roc-lang · lukewilliamboswell · Jul 4, 2025 · Jul 3, 2025 · Jul 4, 2025
diff --git a/src/base/SExpr.zig b/src/base/SExpr.zig
@@ -22,7 +22,6 @@ pub const Color = enum {
     node_name,
     string,
     number,
-    region,
     punctuation,
 };
 
@@ -52,10 +51,10 @@ const PlainTextSExprWriter = struct {
         // No-op for plain text
     }
 
-    pub fn beginSourceRange(self: *@This(), start_token: u32, end_token: u32) !void {
+    pub fn beginSourceRange(self: *@This(), start_byte: u32, end_byte: u32) !void {
         _ = self;
-        _ = start_token;
-        _ = end_token;
+        _ = start_byte;
+        _ = end_byte;
         // No-op for plain text
     }
 
@@ -92,7 +91,6 @@ const HtmlSExprWriter = struct {
                 .node_name => "token-keyword", // Node names are like keywords in S-expressions
                 .string => "token-string",
                 .number => "token-number",
-                .region => "token-comment", // Regions are metadata, similar to comments
                 .punctuation => "token-punctuation",
             };
             try self.writer.print("<span class=\"{s}\">", .{css_class});
@@ -102,8 +100,8 @@ const HtmlSExprWriter = struct {
         self.current_color = color;
     }
 
-    pub fn beginSourceRange(self: *@This(), start_token: u32, end_token: u32) !void {
-        try self.writer.print("<span class=\"source-range\" data-start-token=\"{d}\" data-end-token=\"{d}\" >", .{ start_token, end_token });
+    pub fn beginSourceRange(self: *@This(), start_byte: u32, end_byte: u32) !void {
+        try self.writer.print("<span class=\"source-range\" data-start-byte=\"{d}\" data-end-byte=\"{d}\" >", .{ start_byte, end_byte });
     }
 
     pub fn endSourceRange(self: *@This()) !void {
@@ -125,10 +123,10 @@ pub const AttributeValue = union(enum) {
     node_idx: u32,
     region: RegionInfo,
     raw_string: []const u8, // for unquoted strings
-    tokens_range: struct {
+    bytes_range: struct {
         region: RegionInfo,
-        start_token: u32,
-        end_token: u32,
+        start_byte: u32,
+        end_byte: u32,
     },
 };
 
@@ -168,7 +166,7 @@ pub fn deinit(self: *SExpr, gpa: Allocator) void {
                     gpa.free(r.line_text);
                 }
             },
-            .tokens_range => |tr| {
+            .bytes_range => |tr| {
                 // Free the region line text if it's not empty
                 if (tr.region.line_text.len > 0) {
                     gpa.free(tr.region.line_text);
@@ -242,18 +240,18 @@ pub fn appendBoolAttr(self: *SExpr, gpa: Allocator, key: []const u8, value: bool
 }
 
 /// Append a token range attribute with region information
-pub fn appendTokenRange(self: *SExpr, gpa: Allocator, region: RegionInfo, start_token: u32, end_token: u32) void {
+pub fn appendByteRange(self: *SExpr, gpa: Allocator, region: RegionInfo, start_byte: u32, end_byte: u32) void {
     const owned_value = gpa.dupe(u8, region.line_text) catch |err| exitOnOom(err);
-    self.addAttribute(gpa, "tokens", .{ .tokens_range = .{
+    self.addAttribute(gpa, "tokens", .{ .bytes_range = .{
         .region = RegionInfo{
             .start_line_idx = region.start_line_idx,
             .start_col_idx = region.start_col_idx,
             .end_line_idx = region.end_line_idx,
             .end_col_idx = region.end_col_idx,
             .line_text = owned_value,
         },
-        .start_token = start_token,
-        .end_token = end_token,
+        .start_byte = start_byte,
+        .end_byte = end_byte,
     } });
 }
 
@@ -325,28 +323,24 @@ fn toStringImpl(node: SExpr, writer_impl: anytype, indent: usize) !void {
             },
             .region => |r| {
                 try writer_impl.print(" ", .{});
-                try writer_impl.setColor(.region);
                 try writer_impl.print("@{d}.{d}-{d}.{d}", .{
                     // add one to display numbers instead of index
                     r.start_line_idx + 1,
                     r.start_col_idx + 1,
                     r.end_line_idx + 1,
                     r.end_col_idx + 1,
                 });
-                try writer_impl.setColor(.default);
             },
-            .tokens_range => |tr| {
+            .bytes_range => |tr| {
                 try writer_impl.print(" ", .{});
-                try writer_impl.beginSourceRange(tr.start_token, tr.end_token);
-                try writer_impl.setColor(.region);
+                try writer_impl.beginSourceRange(tr.start_byte, tr.end_byte);
                 try writer_impl.print("@{d}.{d}-{d}.{d}", .{
                     // add one to display numbers instead of index
                     tr.region.start_line_idx + 1,
                     tr.region.start_col_idx + 1,
                     tr.region.end_line_idx + 1,
                     tr.region.end_col_idx + 1,
                 });
-                try writer_impl.setColor(.default);
                 try writer_impl.endSourceRange();
             },
         }

diff --git a/src/check/canonicalize/CIR.zig b/src/check/canonicalize/CIR.zig
@@ -930,25 +930,19 @@ pub const IntValue = struct {
     }
 };
 
-/// Helper function to convert the entire Canonical IR to a string in S-expression format
-/// and write it to the given writer.
-///
-/// If a single expression is provided we only print that expression
-pub fn toSExprStr(ir: *CIR, writer: std.io.AnyWriter, maybe_expr_idx: ?Expr.Idx, source: []const u8) !void {
+/// Helper function to generate the S-expression node for the entire Canonical IR.
+/// If a single expression is provided, only that expression is returned.
+pub fn toSExpr(ir: *CIR, maybe_expr_idx: ?Expr.Idx, source: []const u8) SExpr {
     // Set temporary source for region info calculation during SExpr generation
     ir.temp_source_for_sexpr = source;
     defer ir.temp_source_for_sexpr = null;
     const gpa = ir.env.gpa;
 
     if (maybe_expr_idx) |expr_idx| {
         // Get the expression from the store
-        var expr_node = ir.store.getExpr(expr_idx).toSExpr(ir);
-        defer expr_node.deinit(gpa);
-
-        expr_node.toStringPretty(writer);
+        return ir.store.getExpr(expr_idx).toSExpr(ir);
     } else {
         var root_node = SExpr.init(gpa, "can-ir");
-        defer root_node.deinit(gpa);
 
         // Iterate over all the definitions in the file and convert each to an S-expression
         const defs_slice = ir.store.sliceDefs(ir.all_defs);
@@ -968,10 +962,21 @@ pub fn toSExprStr(ir: *CIR, writer: std.io.AnyWriter, maybe_expr_idx: ?Expr.Idx,
             root_node.appendNode(gpa, &stmt_node);
         }
 
-        root_node.toStringPretty(writer);
+        return root_node;
     }
 }
 
+/// Helper function to convert the entire Canonical IR to a string in S-expression format
+/// and write it to the given writer.
+///
+/// If a single expression is provided we only print that expression
+pub fn toSExprStr(ir: *CIR, env: *ModuleEnv, writer: std.io.AnyWriter, maybe_expr_idx: ?Expr.Idx, source: []const u8) !void {
+    const gpa = ir.env.gpa;
+    var node = toSExpr(ir, env, maybe_expr_idx, source);
+    defer node.deinit(gpa);
+    node.toStringPretty(writer);
+}
+
 test "NodeStore - init and deinit" {
     var store = CIR.NodeStore.init(testing.allocator);
     defer store.deinit();

diff --git a/src/check/parse/AST.zig b/src/check/parse/AST.zig
@@ -80,11 +80,20 @@ pub fn calcRegionInfo(self: *AST, region: TokenizedRegion, line_starts: []const
 
 /// Append region information to an S-expression node for diagnostics
 pub fn appendRegionInfoToSexprNode(self: *AST, env: *base.ModuleEnv, node: *SExpr, region: TokenizedRegion) void {
-    node.appendTokenRange(
+    const start = self.tokens.resolve(region.start);
+    const end = self.tokens.resolve(region.end);
+    const info: base.RegionInfo = base.RegionInfo.position(self.source, env.line_starts.items, start.start.offset, end.end.offset) catch .{
+        .start_line_idx = 0,
+        .start_col_idx = 0,
+        .end_line_idx = 0,
+        .end_col_idx = 0,
+        .line_text = "",
+    };
+    node.appendByteRange(
         env.gpa,
-        self.calcRegionInfo(region, env.line_starts.items),
-        region.start,
-        region.end,
+        info,
+        start.start.offset,
+        end.end.offset,
     );
 }
 
@@ -756,13 +765,13 @@ pub const Statement = union(enum) {
                     const header_node = ast.store.nodes.get(@enumFromInt(@intFromEnum(a.header)));
                     if (header_node.tag == .malformed) {
                         // Handle malformed type header by creating a placeholder
-                        header.appendRegion(env.gpa, ast.calcRegionInfo(header_node.region, env.line_starts.items));
+                        ast.appendRegionInfoToSexprNode(env, &header, header_node.region);
                         header.appendStringAttr(env.gpa, "name", "<malformed>");
                         var args_node = SExpr.init(env.gpa, "args");
                         header.appendNode(env.gpa, &args_node);
                     } else {
                         const ty_header = ast.store.getTypeHeader(a.header);
-                        header.appendRegion(env.gpa, ast.calcRegionInfo(ty_header.region, env.line_starts.items));
+                        ast.appendRegionInfoToSexprNode(env, &header, ty_header.region);
                         header.appendStringAttr(env.gpa, "name", ast.resolve(ty_header.name));
 
                         var args_node = SExpr.init(env.gpa, "args");
@@ -830,7 +839,7 @@ pub const Statement = union(enum) {
             .@"return" => |a| {
                 var node = SExpr.init(env.gpa, "s-return");
 
-                node.appendRegion(env.gpa, ast.calcRegionInfo(a.region, env.line_starts.items));
+                ast.appendRegionInfoToSexprNode(env, &node, a.region);
 
                 var child = ast.store.getExpr(a.expr).toSExpr(env, ast);
                 node.appendNode(env.gpa, &child);
@@ -1074,7 +1083,7 @@ pub const Pattern = union(enum) {
             },
             .as => |a| {
                 var node = SExpr.init(env.gpa, "p-as");
-                node.appendRegion(env.gpa, ast.calcRegionInfo(a.region, env.line_starts.items));
+                ast.appendRegionInfoToSexprNode(env, &node, a.region);
 
                 var pattern_node = ast.store.getPattern(a.pattern).toSExpr(env, ast);
                 node.appendStringAttr(env.gpa, "name", ast.resolve(a.name));
@@ -1419,7 +1428,7 @@ pub const ExposedItem = union(enum) {
             .malformed => |m| {
                 var node = SExpr.init(env.gpa, "exposed-malformed");
                 node.appendStringAttr(env.gpa, "reason", @tagName(m.reason));
-                node.appendRegion(env.gpa, ast.calcRegionInfo(m.region, env.line_starts.items));
+                ast.appendRegionInfoToSexprNode(env, &node, m.region);
                 return node;
             },
         }

diff --git a/src/snapshot.css b/src/snapshot.css
@@ -100,26 +100,40 @@ body {
     background-color: #f0f0f0;
 }
 .highlighted {
-    background-color: #ffffcc !important;
-    outline: 2px solid #ffd700 !important;
+    background-color: #ffffcc;
+    outline: 2px solid #ffd700;
+}
+
+/* Source range highlighting for PARSE tree */
+.source-range {
+    cursor: pointer;
+    transition: background-color 0.2s ease;
+}
+.source-range:hover {
+    background-color: #f0f0f0;
+    text-decoration: underline;
+}
+
+/* Byte range highlighting */
+.highlight {
+    background-color: #ffffcc;
+    border-bottom: 2px solid #ffd700;
 }
 
 /* Flash animation for click highlighting */
-@keyframes flash {
+@keyframes flash-underline {
     0%,
-    50%,
     100% {
         background-color: #ffffcc;
-        outline: 2px solid #ffd700;
+        border-bottom-color: #ffd700;
     }
-    25%,
-    75% {
+    50% {
         background-color: #ffeb3b;
-        outline: 2px solid #ff9800;
+        border-bottom-color: #ff9800;
     }
 }
 .flash-highlight {
-    animation: flash 0.3s ease-in-out 2;
+    animation: flash-underline 0.3s ease-in-out 2;
 }
 
 /* Syntax highlighting */
@@ -153,6 +167,10 @@ body {
 .token-default {
     color: #000000;
 }
+.source-range {
+    color: #008000;
+    font-style: italic;
+}
 
 /* Hidden data storage */
 .hidden {