diff --git a/src/base.zig b/src/base.zig index 5c1b162ad8..ba37f74fa4 100644 --- a/src/base.zig +++ b/src/base.zig @@ -12,6 +12,7 @@ pub const ModuleImport = @import("base/ModuleImport.zig"); pub const StringLiteral = @import("base/StringLiteral.zig"); pub const RegionInfo = @import("base/RegionInfo.zig"); pub const Scratch = @import("base/Scratch.zig").Scratch; +pub const parallel = @import("base/parallel.zig"); /// Whether a function calls itself. pub const Recursive = enum { diff --git a/src/base/parallel.zig b/src/base/parallel.zig new file mode 100644 index 0000000000..d7923fd96c --- /dev/null +++ b/src/base/parallel.zig @@ -0,0 +1,186 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Thread = std.Thread; + +/// Atomic type for thread-safe usize operations +pub const AtomicUsize = std.atomic.Value(usize); + +/// Processing options for parallel execution +pub const ProcessOptions = struct { + max_threads: usize, + use_per_thread_arenas: bool, +}; + +/// Worker thread function signature +/// Takes: allocator, context, item_id -> void +pub fn WorkerFn(comptime T: type) type { + return *const fn (allocator: Allocator, context: *T, item_id: usize) void; +} + +/// Internal worker thread context +fn WorkerContext(comptime T: type) type { + return struct { + work_item_count: usize, + index: *AtomicUsize, + worker_fn: WorkerFn(T), + context: *T, + base_allocator: Allocator, + options: ProcessOptions, + }; +} + +/// Worker thread implementation using work-stealing +fn workerThread(comptime T: type, ctx: WorkerContext(T)) void { + if (ctx.options.use_per_thread_arenas) { + // Use per-thread arena allocator with page allocator that clears between work items + // var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + var arena = std.heap.ArenaAllocator.init(ctx.base_allocator); + defer arena.deinit(); + + while (true) { + const i = ctx.index.fetchAdd(1, .monotonic); + if (i >= ctx.work_item_count) break; + + // Clear arena between work items + _ = arena.reset(.retain_capacity); + + ctx.worker_fn(arena.allocator(), ctx.context, i); + } + } else { + // Use the base allocator directly + while (true) { + const i = ctx.index.fetchAdd(1, .monotonic); + if (i >= ctx.work_item_count) break; + ctx.worker_fn(ctx.base_allocator, ctx.context, i); + } + } +} + +/// Process work items in parallel across multiple threads +/// +/// Generic function that: +/// 1. Takes a count of work items +/// 2. Spawns worker threads to process items +/// +/// Example usage: +/// ``` +/// const MyWorkItem = struct { path: []const u8 }; +/// +/// fn processItem(allocator: Allocator, item: MyWorkItem) bool { +/// // Process the work item +/// std.log.info("processing {s}", .{item.path}); +/// return true; // or false on failure +/// } +/// +/// const result = try processParallel(MyWorkItem, allocator, work_items, processItem, .{}); +/// ``` +pub fn process( + comptime T: type, + context: *T, + worker_fn: WorkerFn(T), + allocator: Allocator, + work_item_count: usize, + options: ProcessOptions, +) !void { + if (work_item_count == 0) { + return; + } + + if (options.max_threads == 1) { + // Process everything in main thread + var index = AtomicUsize.init(0); + const ctx = WorkerContext(T){ + .work_item_count = work_item_count, + .index = &index, + .worker_fn = worker_fn, + .context = context, + .base_allocator = allocator, + .options = options, + }; + workerThread(T, ctx); + } else { + const thread_count = @min( + if (options.max_threads == 0) std.Thread.getCpuCount() catch 1 else options.max_threads, + work_item_count, + ); + + var index = AtomicUsize.init(0); + const fixed_stack_thread_count: usize = 16; + var threads: [fixed_stack_thread_count]Thread = undefined; + var extra_threads: std.ArrayList(Thread) = undefined; + + if (thread_count > fixed_stack_thread_count) { + extra_threads = std.ArrayList(Thread).init(allocator); + } + + // Start worker threads + for (0..thread_count) |i| { + const ctx = WorkerContext(T){ + .work_item_count = work_item_count, + .index = &index, + .worker_fn = worker_fn, + .context = context, + .base_allocator = allocator, + .options = options, + }; + if (i < threads.len) { + threads[i] = try Thread.spawn(.{}, workerThread, .{ T, ctx }); + } else { + try extra_threads.append(try Thread.spawn(.{}, workerThread, .{ T, ctx })); + } + } + + // Wait for all threads to complete + for (threads[0..@min(thread_count, fixed_stack_thread_count)]) |thread| { + thread.join(); + } + if (thread_count > fixed_stack_thread_count) { + for (extra_threads.items) |thread| { + thread.join(); + } + extra_threads.deinit(); + } + } +} + +test "process basic functionality" { + const testing = std.testing; + const allocator = testing.allocator; + + const MyContext = struct { + items: []const i32, + outputs: []i32, + }; + + const TestWorker = struct { + fn worker(worker_allocator: Allocator, item: *MyContext, item_id: usize) void { + _ = worker_allocator; // unused in this test + const value = item.items[item_id]; + if (value < 0) { + item.outputs[item_id] = -1; + } else { + item.outputs[item_id] = value * value; + } + } + }; + + var outputs: [5]i32 = undefined; // Preallocate output array + + var context = MyContext{ + .items = &[_]i32{ 1, 2, -3, 4, 5 }, + .outputs = &outputs, + }; + + try process( + MyContext, + &context, + TestWorker.worker, + allocator, + outputs.len, + .{ .max_threads = 1, .use_per_thread_arenas = false }, + ); + try testing.expectEqual( + outputs, + [_]i32{ 1, 4, -1, 16, 25 }, + ); +} diff --git a/src/snapshot.zig b/src/snapshot.zig index 9142b42ad1..5fd1aa5f90 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -2,6 +2,7 @@ const std = @import("std"); const testing = std.testing; const Allocator = std.mem.Allocator; const base = @import("base.zig"); +const parallel = base.parallel; const canonicalize = @import("check/canonicalize.zig"); const types_mod = @import("types.zig"); const types_problem_mod = @import("check/check_types/problem.zig"); @@ -94,8 +95,10 @@ fn warn(comptime fmt_str: []const u8, args: anytype) void { /// cli entrypoint for snapshot tool pub fn main() !void { - // Use c_allocator for argument parsing - const gpa = std.heap.c_allocator; + // Use GeneralPurposeAllocator for command-line parsing and general work + var gpa_impl = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa_impl.deinit(); + const gpa = gpa_impl.allocator(); const args = try std.process.argsAlloc(gpa); defer std.process.argsFree(gpa, args); @@ -107,6 +110,8 @@ pub fn main() !void { var expect_fuzz_corpus_path: bool = false; var generate_html: bool = false; var debug_mode: bool = false; + var max_threads: usize = 0; + var expect_threads: bool = false; for (args[1..]) |arg| { if (std.mem.eql(u8, arg, "--verbose")) { @@ -115,6 +120,12 @@ pub fn main() !void { generate_html = true; } else if (std.mem.eql(u8, arg, "--debug")) { debug_mode = true; + } else if (std.mem.eql(u8, arg, "--threads")) { + if (max_threads != 0) { + std.log.err("`--threads` should only be specified once.", .{}); + std.process.exit(1); + } + expect_threads = true; } else if (std.mem.eql(u8, arg, "--fuzz-corpus")) { if (maybe_fuzz_corpus_path != null) { std.log.err("`--fuzz-corpus` should only be specified once.", .{}); @@ -124,6 +135,12 @@ pub fn main() !void { } else if (expect_fuzz_corpus_path) { maybe_fuzz_corpus_path = arg; expect_fuzz_corpus_path = false; + } else if (expect_threads) { + max_threads = std.fmt.parseInt(usize, arg, 10) catch |err| { + std.log.err("Invalid thread count '{s}': {s}", .{ arg, @errorName(err) }); + std.process.exit(1); + }; + expect_threads = false; } else if (std.mem.eql(u8, arg, "--help")) { const usage = \\Usage: roc snapshot [options] [snapshot_paths...] @@ -132,6 +149,7 @@ pub fn main() !void { \\ --verbose Enable verbose logging \\ --html Generate HTML output files \\ --debug Use GeneralPurposeAllocator for debugging (default: c_allocator) + \\ --threads Number of threads to use (0 = auto-detect, 1 = single-threaded). Default: 0. \\ --fuzz-corpus Specify the path to the fuzz corpus \\ \\Arguments: @@ -149,43 +167,59 @@ pub fn main() !void { std.process.exit(1); } - // Choose allocator for snapshot processing based on debug mode - var gpa_impl: ?std.heap.GeneralPurposeAllocator(.{}) = null; - defer if (gpa_impl) |*impl| { - _ = impl.deinit(); - }; - - const snapshot_allocator = if (debug_mode) blk: { - gpa_impl = std.heap.GeneralPurposeAllocator(.{}){}; - break :blk gpa_impl.?.allocator(); - } else std.heap.c_allocator; + if (expect_threads) { + std.log.err("Expected thread count, but none was provided", .{}); + std.process.exit(1); + } - if (maybe_fuzz_corpus_path != null) { - log("copying SOURCE from snapshots to: {s}", .{maybe_fuzz_corpus_path.?}); - try std.fs.cwd().makePath(maybe_fuzz_corpus_path.?); + // Force single-threaded mode in debug mode + if (debug_mode and max_threads == 0) { + max_threads = 1; } + const config = Config{ + .maybe_fuzz_corpus_path = maybe_fuzz_corpus_path, + .generate_html = generate_html, + }; + + if (config.maybe_fuzz_corpus_path != null) { + log("copying SOURCE from snapshots to: {s}", .{config.maybe_fuzz_corpus_path.?}); + try std.fs.cwd().makePath(config.maybe_fuzz_corpus_path.?); + } const snapshots_dir = "src/snapshots"; - var total_success: usize = 0; - var total_failed: usize = 0; var timer = std.time.Timer.start() catch unreachable; + // Stage 1: Collect work items + var work_list = WorkList.init(gpa); + defer { + // Clean up any remaining work items + for (work_list.items) |work_item| { + gpa.free(work_item.path); + } + work_list.deinit(); + } + if (snapshot_paths.items.len > 0) { for (snapshot_paths.items) |path| { - const result = try processPath(snapshot_allocator, path, maybe_fuzz_corpus_path, generate_html); - total_success += result.success; - total_failed += result.failed; + try collectWorkItems(gpa, path, &work_list); } } else { // process all files in snapshots_dir - const result = try processPath(snapshot_allocator, snapshots_dir, maybe_fuzz_corpus_path, generate_html); - total_success = result.success; - total_failed = result.failed; + try collectWorkItems(gpa, snapshots_dir, &work_list); } + const collect_duration_ms = timer.read() / std.time.ns_per_ms; + log("collected {d} work items in {d} ms", .{ work_list.items.len, collect_duration_ms }); + + // Stage 2: Process work items (in parallel or single-threaded) + const result = try processWorkItems(gpa, work_list, max_threads, debug_mode, config); + const duration_ms = timer.read() / std.time.ns_per_ms; - std.log.info("processed {d} snapshots in {d} ms.", .{ total_success, duration_ms }); + std.log.info( + "collected {d} items in {d} ms, processed {d} snapshots in {d} ms.", + .{ work_list.items.len, collect_duration_ms, result.success, duration_ms }, + ); } /// Check if a file has a valid snapshot extension @@ -366,19 +400,26 @@ fn processRocFileAsSnapshot(allocator: Allocator, output_path: []const u8, roc_c try processRocFileAsSnapshotWithExpected(allocator, output_path, roc_content, meta, expected_content, generate_html); } -fn processRocFileAsSnapshotWithExpected(allocator: Allocator, output_path: []const u8, roc_content: []const u8, meta: Meta, expected_content: ?[]const u8, generate_html: bool) !void { +fn processSnapshotContent(allocator: Allocator, content: Content, output_path: []const u8, generate_html: bool) !void { log("Generating snapshot for: {s}", .{output_path}); // Process the content through the compilation pipeline var module_env = base.ModuleEnv.init(allocator); defer module_env.deinit(); - // Parse the content - var ast = parse.parse(&module_env, roc_content); - defer ast.deinit(allocator); + // Parse the source code based on node type + var parse_ast = switch (content.meta.node_type) { + .file => parse.parse(&module_env, content.source), + .header => parse.parseHeader(&module_env, content.source), + .expr => parse.parseExpr(&module_env, content.source), + .statement => parse.parseStatement(&module_env, content.source), + .package => parse.parse(&module_env, content.source), + .platform => parse.parse(&module_env, content.source), + .app => parse.parse(&module_env, content.source), + }; + defer parse_ast.deinit(allocator); - // Try canonicalization - ast.store.emptyScratch(); + parse_ast.store.emptyScratch(); // Extract module name from output path const basename = std.fs.path.basename(output_path); @@ -389,37 +430,90 @@ fn processRocFileAsSnapshotWithExpected(allocator: Allocator, output_path: []con var can_ir = CIR.init(&module_env, module_name); defer can_ir.deinit(); - var can = canonicalize.init(&can_ir, &ast, null) catch |err| { - warn("Canonicalization init failed: {}", .{err}); - return; - }; + var can = try canonicalize.init(&can_ir, &parse_ast, null); defer can.deinit(); - const maybe_expr_idx: ?CIR.Expr.Idx = null; + var maybe_expr_idx: ?CIR.Expr.Idx = null; - can.canonicalizeFile() catch |err| { - warn("Canonicalization failed: {}", .{err}); - return; - }; + switch (content.meta.node_type) { + .file => try can.canonicalizeFile(), + .header => { + // TODO: implement canonicalize_header when available + }, + .expr => { + const expr_idx: AST.Expr.Idx = @enumFromInt(parse_ast.root_node_idx); + maybe_expr_idx = try can.canonicalizeExpr(expr_idx); + }, + .statement => { + // Manually track scratch statements because we aren't using the file entrypoint + const stmt_idx: AST.Statement.Idx = @enumFromInt(parse_ast.root_node_idx); + const scratch_statements_start = can_ir.store.scratch_statements.top(); + _ = try can.canonicalizeStatement(stmt_idx); + can_ir.all_statements = can_ir.store.statementSpanFrom(scratch_statements_start); + }, + .package => try can.canonicalizeFile(), + .platform => try can.canonicalizeFile(), + .app => try can.canonicalizeFile(), + } - // Types (ONCE) + // Types const empty_modules: []const *CIR = &.{}; - var solver = Solver.init(allocator, &can_ir.env.types, &can_ir, empty_modules) catch |err| { - warn("Type solver init failed: {}", .{err}); - return; - }; + var solver = try Solver.init(allocator, &can_ir.env.types, &can_ir, empty_modules); defer solver.deinit(); - try solver.checkDefs(); + if (maybe_expr_idx) |expr_idx| { + _ = try solver.checkExpr(expr_idx); + } else { + try solver.checkDefs(); + } - // Create content structure - const content = Content{ - .meta = meta, - .source = roc_content, - .expected = expected_content, - .formatted = null, - .has_canonicalize = true, - }; + // Cache round-trip validation - ensure ModuleCache serialization/deserialization works + { + // Generate original S-expression for comparison + var original_tree = SExprTree.init(allocator); + defer original_tree.deinit(); + CIR.pushToSExprTree(&can_ir, null, &original_tree, content.source); + + var original_sexpr = std.ArrayList(u8).init(allocator); + defer original_sexpr.deinit(); + original_tree.toStringPretty(original_sexpr.writer().any()); + + // Create and serialize MmapCache + const cache_data = try cache.CacheModule.create(allocator, &module_env, &can_ir, 0, 0); + defer allocator.free(cache_data); + + // Deserialize back + var loaded_cache = try cache.CacheModule.fromMappedMemory(cache_data); + + // Restore ModuleEnv and CIR + const restored = try loaded_cache.restore(allocator, module_name); + var restored_module_env = restored.module_env; + defer restored_module_env.deinit(); + var restored_cir = restored.cir; + defer restored_cir.deinit(); + + // Fix env pointer after struct move + restored_cir.env = &restored_module_env; + + // Generate S-expression from restored CIR + var restored_tree = SExprTree.init(allocator); + defer restored_tree.deinit(); + CIR.pushToSExprTree(&restored_cir, null, &restored_tree, content.source); + + var restored_sexpr = std.ArrayList(u8).init(allocator); + defer restored_sexpr.deinit(); + restored_tree.toStringPretty(restored_sexpr.writer().any()); + + // Compare S-expressions - crash if they don't match + if (!std.mem.eql(u8, original_sexpr.items, restored_sexpr.items)) { + std.log.err("Cache round-trip validation failed for snapshot: {s}", .{output_path}); + std.log.err("Original and restored CIR S-expressions don't match!", .{}); + std.log.err("This indicates a bug in MmapCache serialization/deserialization.", .{}); + std.log.err("Original S-expression:\n{s}", .{original_sexpr.items}); + std.log.err("Restored S-expression:\n{s}", .{restored_sexpr.items}); + return error.CacheRoundTripValidationFailed; + } + } // Buffer all output in memory before writing files var md_buffer = std.ArrayList(u8).init(allocator); @@ -437,10 +531,10 @@ fn processRocFileAsSnapshotWithExpected(allocator: Allocator, output_path: []con try generateMetaSection(&output, &content); try generateSourceSection(&output, &content); try generateExpectedSection(&output, &content); - try generateProblemsSection(&output, &ast, &can_ir, &solver, &content, output_path, &module_env); - try generateTokensSection(&output, &ast, &content, &module_env); - try generateParseSection(&output, &content, &ast, &module_env); - try generateFormattedSection(&output, &content, &ast); + try generateProblemsSection(&output, &parse_ast, &can_ir, &solver, &content, output_path, &module_env); + try generateTokensSection(&output, &parse_ast, &content, &module_env); + try generateParseSection(&output, &content, &parse_ast, &module_env); + try generateFormattedSection(&output, &content, &parse_ast); try generateCanonicalizeSection(&output, &content, &can_ir, maybe_expr_idx); try generateTypesSection(&output, &content, &can_ir, maybe_expr_idx); @@ -455,33 +549,112 @@ fn processRocFileAsSnapshotWithExpected(allocator: Allocator, output_path: []con try md_file.writeAll(md_buffer.items); - // Write HTML file if (html_buffer) |*buf| { - const html_path = try std.fmt.allocPrint(allocator, "{s}.html", .{output_path[0 .. output_path.len - 3]}); - defer allocator.free(html_path); - - const html_file = std.fs.cwd().createFile(html_path, .{}) catch |err| { - warn("Failed to create {s}: {}", .{ html_path, err }); - return; + writeHtmlFile(allocator, output_path, buf) catch |err| { + warn("Failed to write HTML file for {s}: {}", .{ output_path, err }); }; - defer html_file.close(); - - try html_file.writeAll(buf.items); } } +fn processRocFileAsSnapshotWithExpected(allocator: Allocator, output_path: []const u8, roc_content: []const u8, meta: Meta, expected_content: ?[]const u8, generate_html: bool) !void { + // Create content structure + const content = Content{ + .meta = meta, + .source = roc_content, + .expected = expected_content, + .formatted = null, + .has_canonicalize = true, + }; + + try processSnapshotContent(allocator, content, output_path, generate_html); +} + +const Config = struct { + maybe_fuzz_corpus_path: ?[]const u8, + generate_html: bool, +}; + const ProcessResult = struct { success: usize, failed: usize, }; -fn processPath(gpa: Allocator, path: []const u8, maybe_fuzz_corpus_path: ?[]const u8, generate_html: bool) !ProcessResult { - var processed_count: usize = 0; - var failed_count: usize = 0; +const WorkItem = struct { + path: []const u8, + kind: enum { + snapshot_file, + multi_file_snapshot, + }, +}; + +const WorkList = std.ArrayList(WorkItem); + +const ProcessContext = struct { + work_list: *WorkList, + config: Config, + success_count: parallel.AtomicUsize, + failed_count: parallel.AtomicUsize, +}; + +/// Worker function that processes a single work item +fn processWorkItem(allocator: Allocator, context: *ProcessContext, item_id: usize) void { + const work_item = context.work_list.items[item_id]; + const success = switch (work_item.kind) { + .snapshot_file => processSnapshotFile(allocator, work_item.path, context.config.maybe_fuzz_corpus_path, context.config.generate_html) catch false, + .multi_file_snapshot => blk: { + processMultiFileSnapshot(allocator, work_item.path, context.config.generate_html) catch { + break :blk false; + }; + break :blk true; + }, + }; + + if (success) { + _ = context.success_count.fetchAdd(1, .monotonic); + } else { + _ = context.failed_count.fetchAdd(1, .monotonic); + } +} + +/// Stage 2: Process work items in parallel using the parallel utility +fn processWorkItems(gpa: Allocator, work_list: WorkList, max_threads: usize, debug: bool, config: Config) !ProcessResult { + if (work_list.items.len == 0) { + return ProcessResult{ .success = 0, .failed = 0 }; + } + + var context = ProcessContext{ + .work_list = @constCast(&work_list), + .config = config, + .success_count = parallel.AtomicUsize.init(0), + .failed_count = parallel.AtomicUsize.init(0), + }; + + // Use per-thread arena allocators for snapshot processing + const options = parallel.ProcessOptions{ + .max_threads = max_threads, + .use_per_thread_arenas = !debug, + }; + + try parallel.process( + ProcessContext, + &context, + processWorkItem, + gpa, + work_list.items.len, + options, + ); + + return ProcessResult{ + .success = context.success_count.load(.monotonic), + .failed = context.failed_count.load(.monotonic), + }; +} +/// Stage 1: Walk directory tree and collect work items +fn collectWorkItems(gpa: Allocator, path: []const u8, work_list: *WorkList) !void { const canonical_path = std.fs.cwd().realpathAlloc(gpa, path) catch |err| { std.log.err("failed to resolve path '{s}': {s}", .{ path, @errorName(err) }); - return .{ .success = 0, .failed = 1 }; + return; }; defer gpa.free(canonical_path); @@ -492,9 +665,11 @@ fn processPath(gpa: Allocator, path: []const u8, maybe_fuzz_corpus_path: ?[]cons // It's a directory if (isMultiFileSnapshot(canonical_path)) { - try processMultiFileSnapshot(gpa, canonical_path, generate_html); - processed_count += 1; - return .{ .success = processed_count, .failed = failed_count }; + const path_copy = try gpa.dupe(u8, canonical_path); + try work_list.append(WorkItem{ + .path = path_copy, + .kind = .multi_file_snapshot, + }); } else { var dir_iterator = dir.iterate(); while (try dir_iterator.next()) |entry| { @@ -505,16 +680,13 @@ fn processPath(gpa: Allocator, path: []const u8, maybe_fuzz_corpus_path: ?[]cons defer gpa.free(full_path); if (entry.kind == .directory) { - const result = try processPath(gpa, full_path, maybe_fuzz_corpus_path, generate_html); - processed_count += result.success; - failed_count += result.failed; + try collectWorkItems(gpa, full_path, work_list); } else if (entry.kind == .file and isSnapshotFile(entry.name)) { - if (try processSnapshotFile(gpa, full_path, maybe_fuzz_corpus_path, generate_html)) { - processed_count += 1; - } else { - log("skipped file (not a valid snapshot): {s}", .{full_path}); - failed_count += 1; - } + const path_copy = try gpa.dupe(u8, full_path); + try work_list.append(WorkItem{ + .path = path_copy, + .kind = .snapshot_file, + }); } } } @@ -522,23 +694,18 @@ fn processPath(gpa: Allocator, path: []const u8, maybe_fuzz_corpus_path: ?[]cons // Not a directory, try as file if (dir_err == error.NotDir) { if (isSnapshotFile(canonical_path)) { - if (try processSnapshotFile(gpa, canonical_path, maybe_fuzz_corpus_path, generate_html)) { - processed_count += 1; - } else { - std.log.err("failed to process snapshot file: {s}", .{canonical_path}); - std.log.err("make sure the file starts with '~~~META' and has valid snapshot format", .{}); - failed_count += 1; - } + const path_copy = try gpa.dupe(u8, canonical_path); + try work_list.append(WorkItem{ + .path = path_copy, + .kind = .snapshot_file, + }); } else { std.log.err("file '{s}' is not a snapshot file (must end with .md)", .{canonical_path}); } } else { std.log.err("failed to access path '{s}': {s}", .{ canonical_path, @errorName(dir_err) }); - return .{ .success = 0, .failed = 1 }; } } - - return .{ .success = processed_count, .failed = failed_count }; } /// Represents the different sections of a snapshot file. @@ -1559,166 +1726,11 @@ fn processSnapshotFileUnified(gpa: Allocator, snapshot_path: []const u8, maybe_f } }; - var module_env = base.ModuleEnv.init(gpa); - defer module_env.deinit(); - - // Parse the source code (ONCE) - var parse_ast = switch (content.meta.node_type) { - .file => parse.parse(&module_env, content.source), - .header => parse.parseHeader(&module_env, content.source), - .expr => parse.parseExpr(&module_env, content.source), - .statement => parse.parseStatement(&module_env, content.source), - .package => parse.parse(&module_env, content.source), - .platform => parse.parse(&module_env, content.source), - .app => parse.parse(&module_env, content.source), - }; - defer parse_ast.deinit(gpa); - - parse_ast.store.emptyScratch(); - - // Canonicalize the source code (ONCE) - // Extract module name from snapshot path - const basename = std.fs.path.basename(snapshot_path); - const module_name = if (std.mem.lastIndexOfScalar(u8, basename, '.')) |dot_idx| - basename[0..dot_idx] - else - basename; - var can_ir = CIR.init(&module_env, module_name); - defer can_ir.deinit(); - - var can = try canonicalize.init(&can_ir, &parse_ast, null); - defer can.deinit(); - - var maybe_expr_idx: ?CIR.Expr.Idx = null; - - switch (content.meta.node_type) { - .file => try can.canonicalizeFile(), - .header => { - // TODO: implement canonicalize_header when available - }, - .expr => { - const expr_idx: AST.Expr.Idx = @enumFromInt(parse_ast.root_node_idx); - maybe_expr_idx = try can.canonicalizeExpr(expr_idx); - }, - .statement => { - // Manually track scratch statements because we aren't using the file entrypoint - const stmt_idx: AST.Statement.Idx = @enumFromInt(parse_ast.root_node_idx); - const scratch_statements_start = can_ir.store.scratch_statements.top(); - _ = try can.canonicalizeStatement(stmt_idx); - can_ir.all_statements = can_ir.store.statementSpanFrom(scratch_statements_start); - }, - .package => try can.canonicalizeFile(), - .platform => try can.canonicalizeFile(), - .app => try can.canonicalizeFile(), - } - - // Types (ONCE) - const empty_modules: []const *CIR = &.{}; - var solver = try Solver.init(gpa, &can_ir.env.types, &can_ir, empty_modules); - defer solver.deinit(); - - if (maybe_expr_idx) |expr_idx| { - _ = try solver.checkExpr(expr_idx); - } else { - try solver.checkDefs(); - } - - // Cache round-trip validation - ensure ModuleCache serialization/deserialization works - { - // Generate original S-expression for comparison - var original_tree = SExprTree.init(gpa); - defer original_tree.deinit(); - CIR.pushToSExprTree(&can_ir, null, &original_tree, content.source); - - var original_sexpr = std.ArrayList(u8).init(gpa); - defer original_sexpr.deinit(); - original_tree.toStringPretty(original_sexpr.writer().any()); - - // Create and serialize MmapCache - const cache_data = try cache.CacheModule.create(gpa, &module_env, &can_ir, 0, 0); - defer gpa.free(cache_data); - - // Deserialize back - var loaded_cache = try cache.CacheModule.fromMappedMemory(cache_data); - - // Restore ModuleEnv and CIR - // Extract module name from snapshot path - const cache_basename = std.fs.path.basename(snapshot_path); - const cache_module_name = if (std.mem.lastIndexOfScalar(u8, cache_basename, '.')) |dot_idx| - cache_basename[0..dot_idx] - else - cache_basename; - const restored = try loaded_cache.restore(gpa, cache_module_name); - var restored_module_env = restored.module_env; - defer restored_module_env.deinit(); - var restored_cir = restored.cir; - defer restored_cir.deinit(); - - // Fix env pointer after struct move - restored_cir.env = &restored_module_env; - - // Generate S-expression from restored CIR - var restored_tree = SExprTree.init(gpa); - defer restored_tree.deinit(); - CIR.pushToSExprTree(&restored_cir, null, &restored_tree, content.source); - - var restored_sexpr = std.ArrayList(u8).init(gpa); - defer restored_sexpr.deinit(); - restored_tree.toStringPretty(restored_sexpr.writer().any()); - - // Compare S-expressions - crash if they don't match - if (!std.mem.eql(u8, original_sexpr.items, restored_sexpr.items)) { - std.log.err("Cache round-trip validation failed for snapshot: {s}", .{snapshot_path}); - std.log.err("Original and restored CIR S-expressions don't match!", .{}); - std.log.err("This indicates a bug in MmapCache serialization/deserialization.", .{}); - std.log.err("Original S-expression:\n{s}", .{original_sexpr.items}); - std.log.err("Restored S-expression:\n{s}", .{restored_sexpr.items}); - return false; - } - } - - // Buffer all output in memory before writing files - var md_buffer = std.ArrayList(u8).init(gpa); - defer md_buffer.deinit(); - - var html_buffer = if (generate_html) std.ArrayList(u8).init(gpa) else null; - defer if (html_buffer) |*buf| buf.deinit(); - - var output = DualOutput.init(gpa, &md_buffer, if (html_buffer) |*buf| buf else null); - - // Generate HTML wrapper - try generateHtmlWrapper(&output, &content); - - // Generate all sections simultaneously - try generateMetaSection(&output, &content); - try generateSourceSection(&output, &content); - try generateExpectedSection(&output, &content); - try generateProblemsSection(&output, &parse_ast, &can_ir, &solver, &content, snapshot_path, &module_env); - try generateTokensSection(&output, &parse_ast, &content, &module_env); - - // Generate remaining sections - try generateParseSection(&output, &content, &parse_ast, &module_env); - try generateFormattedSection(&output, &content, &parse_ast); - try generateCanonicalizeSection(&output, &content, &can_ir, maybe_expr_idx); - try generateTypesSection(&output, &content, &can_ir, maybe_expr_idx); - // TODO: Include to emit entire types store. Can be helpful for debugging - // try generateTypesStoreSection(gpa, &output, &can_ir); - - // Generate HTML closing - try generateHtmlClosing(&output); - - // Write markdown file - var md_file = std.fs.cwd().createFile(snapshot_path, .{}) catch |err| { - log("failed to create file '{s}': {s}", .{ snapshot_path, @errorName(err) }); + // Process the content through the shared compilation pipeline + processSnapshotContent(gpa, content, snapshot_path, generate_html) catch |err| { + log("failed to process snapshot content: {s}", .{@errorName(err)}); return false; }; - defer md_file.close(); - try md_file.writer().writeAll(md_buffer.items); - - // Write HTML file - if (html_buffer) |*buf| { - try writeHtmlFile(gpa, snapshot_path, buf); - } // If flag --fuzz-corpus is passed, write the SOURCE to our corpus if (maybe_fuzz_corpus_path != null) {