diff --git a/src/base.zig b/src/base.zig
index 5c1b162ad8..ba37f74fa4 100644
--- a/src/base.zig
+++ b/src/base.zig
@@ -12,6 +12,7 @@ pub const ModuleImport = @import("base/ModuleImport.zig");
 pub const StringLiteral = @import("base/StringLiteral.zig");
 pub const RegionInfo = @import("base/RegionInfo.zig");
 pub const Scratch = @import("base/Scratch.zig").Scratch;
+pub const parallel = @import("base/parallel.zig");
 
 /// Whether a function calls itself.
 pub const Recursive = enum {
diff --git a/src/base/parallel.zig b/src/base/parallel.zig
new file mode 100644
index 0000000000..d7923fd96c
--- /dev/null
+++ b/src/base/parallel.zig
@@ -0,0 +1,186 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const Thread = std.Thread;
+
+/// Atomic type for thread-safe usize operations
+pub const AtomicUsize = std.atomic.Value(usize);
+
+/// Processing options for parallel execution
+pub const ProcessOptions = struct {
+    max_threads: usize,
+    use_per_thread_arenas: bool,
+};
+
+/// Worker thread function signature
+/// Takes: allocator, context, item_id -> void
+pub fn WorkerFn(comptime T: type) type {
+    return *const fn (allocator: Allocator, context: *T, item_id: usize) void;
+}
+
+/// Internal worker thread context
+fn WorkerContext(comptime T: type) type {
+    return struct {
+        work_item_count: usize,
+        index: *AtomicUsize,
+        worker_fn: WorkerFn(T),
+        context: *T,
+        base_allocator: Allocator,
+        options: ProcessOptions,
+    };
+}
+
+/// Worker thread implementation using work-stealing
+fn workerThread(comptime T: type, ctx: WorkerContext(T)) void {
+    if (ctx.options.use_per_thread_arenas) {
+        // Use per-thread arena allocator with page allocator that clears between work items
+        // var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+        var arena = std.heap.ArenaAllocator.init(ctx.base_allocator);
+        defer arena.deinit();
+
+        while (true) {
+            const i = ctx.index.fetchAdd(1, .monotonic);
+            if (i >= ctx.work_item_count) break;
+
+            // Clear arena between work items
+            _ = arena.reset(.retain_capacity);
+
+            ctx.worker_fn(arena.allocator(), ctx.context, i);
+        }
+    } else {
+        // Use the base allocator directly
+        while (true) {
+            const i = ctx.index.fetchAdd(1, .monotonic);
+            if (i >= ctx.work_item_count) break;
+            ctx.worker_fn(ctx.base_allocator, ctx.context, i);
+        }
+    }
+}
+
+/// Process work items in parallel across multiple threads
+///
+/// Generic function that:
+/// 1. Takes a count of work items
+/// 2. Spawns worker threads to process items
+///
+/// Example usage:
+/// ```
+/// const MyWorkItem = struct { path: []const u8 };
+///
+/// fn processItem(allocator: Allocator, item: MyWorkItem) bool {
+///     // Process the work item
+///     std.log.info("processing {s}", .{item.path});
+///     return true; // or false on failure
+/// }
+///
+/// const result = try processParallel(MyWorkItem, allocator, work_items, processItem, .{});
+/// ```
+pub fn process(
+    comptime T: type,
+    context: *T,
+    worker_fn: WorkerFn(T),
+    allocator: Allocator,
+    work_item_count: usize,
+    options: ProcessOptions,
+) !void {
+    if (work_item_count == 0) {
+        return;
+    }
+
+    if (options.max_threads == 1) {
+        // Process everything in main thread
+        var index = AtomicUsize.init(0);
+        const ctx = WorkerContext(T){
+            .work_item_count = work_item_count,
+            .index = &index,
+            .worker_fn = worker_fn,
+            .context = context,
+            .base_allocator = allocator,
+            .options = options,
+        };
+        workerThread(T, ctx);
+    } else {
+        const thread_count = @min(
+            if (options.max_threads == 0) std.Thread.getCpuCount() catch 1 else options.max_threads,
+            work_item_count,
+        );
+
+        var index = AtomicUsize.init(0);
+        const fixed_stack_thread_count: usize = 16;
+        var threads: [fixed_stack_thread_count]Thread = undefined;
+        var extra_threads: std.ArrayList(Thread) = undefined;
+
+        if (thread_count > fixed_stack_thread_count) {
+            extra_threads = std.ArrayList(Thread).init(allocator);
+        }
+
+        // Start worker threads
+        for (0..thread_count) |i| {
+            const ctx = WorkerContext(T){
+                .work_item_count = work_item_count,
+                .index = &index,
+                .worker_fn = worker_fn,
+                .context = context,
+                .base_allocator = allocator,
+                .options = options,
+            };
+            if (i < threads.len) {
+                threads[i] = try Thread.spawn(.{}, workerThread, .{ T, ctx });
+            } else {
+                try extra_threads.append(try Thread.spawn(.{}, workerThread, .{ T, ctx }));
+            }
+        }
+
+        // Wait for all threads to complete
+        for (threads[0..@min(thread_count, fixed_stack_thread_count)]) |thread| {
+            thread.join();
+        }
+        if (thread_count > fixed_stack_thread_count) {
+            for (extra_threads.items) |thread| {
+                thread.join();
+            }
+            extra_threads.deinit();
+        }
+    }
+}
+
+test "process basic functionality" {
+    const testing = std.testing;
+    const allocator = testing.allocator;
+
+    const MyContext = struct {
+        items: []const i32,
+        outputs: []i32,
+    };
+
+    const TestWorker = struct {
+        fn worker(worker_allocator: Allocator, item: *MyContext, item_id: usize) void {
+            _ = worker_allocator; // unused in this test
+            const value = item.items[item_id];
+            if (value < 0) {
+                item.outputs[item_id] = -1;
+            } else {
+                item.outputs[item_id] = value * value;
+            }
+        }
+    };
+
+    var outputs: [5]i32 = undefined; // Preallocate output array
+
+    var context = MyContext{
+        .items = &[_]i32{ 1, 2, -3, 4, 5 },
+        .outputs = &outputs,
+    };
+
+    try process(
+        MyContext,
+        &context,
+        TestWorker.worker,
+        allocator,
+        outputs.len,
+        .{ .max_threads = 1, .use_per_thread_arenas = false },
+    );
+    try testing.expectEqual(
+        outputs,
+        [_]i32{ 1, 4, -1, 16, 25 },
+    );
+}
diff --git a/src/snapshot.zig b/src/snapshot.zig
index 9142b42ad1..5fd1aa5f90 100644
--- a/src/snapshot.zig
+++ b/src/snapshot.zig
@@ -2,6 +2,7 @@ const std = @import("std");
 const testing = std.testing;
 const Allocator = std.mem.Allocator;
 const base = @import("base.zig");
+const parallel = base.parallel;
 const canonicalize = @import("check/canonicalize.zig");
 const types_mod = @import("types.zig");
 const types_problem_mod = @import("check/check_types/problem.zig");
@@ -94,8 +95,10 @@ fn warn(comptime fmt_str: []const u8, args: anytype) void {
 
 /// cli entrypoint for snapshot tool
 pub fn main() !void {
-    // Use c_allocator for argument parsing
-    const gpa = std.heap.c_allocator;
+    // Use GeneralPurposeAllocator for command-line parsing and general work
+    var gpa_impl = std.heap.GeneralPurposeAllocator(.{}){};
+    defer _ = gpa_impl.deinit();
+    const gpa = gpa_impl.allocator();
 
     const args = try std.process.argsAlloc(gpa);
     defer std.process.argsFree(gpa, args);
@@ -107,6 +110,8 @@ pub fn main() !void {
     var expect_fuzz_corpus_path: bool = false;
     var generate_html: bool = false;
     var debug_mode: bool = false;
+    var max_threads: usize = 0;
+    var expect_threads: bool = false;
 
     for (args[1..]) |arg| {
         if (std.mem.eql(u8, arg, "--verbose")) {
@@ -115,6 +120,12 @@ pub fn main() !void {
             generate_html = true;
         } else if (std.mem.eql(u8, arg, "--debug")) {
             debug_mode = true;
+        } else if (std.mem.eql(u8, arg, "--threads")) {
+            if (max_threads != 0) {
+                std.log.err("`--threads` should only be specified once.", .{});
+                std.process.exit(1);
+            }
+            expect_threads = true;
         } else if (std.mem.eql(u8, arg, "--fuzz-corpus")) {
             if (maybe_fuzz_corpus_path != null) {
                 std.log.err("`--fuzz-corpus` should only be specified once.", .{});
@@ -124,6 +135,12 @@ pub fn main() !void {
         } else if (expect_fuzz_corpus_path) {
             maybe_fuzz_corpus_path = arg;
             expect_fuzz_corpus_path = false;
+        } else if (expect_threads) {
+            max_threads = std.fmt.parseInt(usize, arg, 10) catch |err| {
+                std.log.err("Invalid thread count '{s}': {s}", .{ arg, @errorName(err) });
+                std.process.exit(1);
+            };
+            expect_threads = false;
         } else if (std.mem.eql(u8, arg, "--help")) {
             const usage =
                 \\Usage: roc snapshot [options] [snapshot_paths...]
@@ -132,6 +149,7 @@ pub fn main() !void {
                 \\  --verbose       Enable verbose logging
                 \\  --html          Generate HTML output files
                 \\  --debug         Use GeneralPurposeAllocator for debugging (default: c_allocator)
+                \\  --threads <n>   Number of threads to use (0 = auto-detect, 1 = single-threaded). Default: 0.
                 \\  --fuzz-corpus <path>  Specify the path to the fuzz corpus
                 \\
                 \\Arguments:
@@ -149,43 +167,59 @@ pub fn main() !void {
         std.process.exit(1);
     }
 
-    // Choose allocator for snapshot processing based on debug mode
-    var gpa_impl: ?std.heap.GeneralPurposeAllocator(.{}) = null;
-    defer if (gpa_impl) |*impl| {
-        _ = impl.deinit();
-    };
-
-    const snapshot_allocator = if (debug_mode) blk: {
-        gpa_impl = std.heap.GeneralPurposeAllocator(.{}){};
-        break :blk gpa_impl.?.allocator();
-    } else std.heap.c_allocator;
+    if (expect_threads) {
+        std.log.err("Expected thread count, but none was provided", .{});
+        std.process.exit(1);
+    }
 
-    if (maybe_fuzz_corpus_path != null) {
-        log("copying SOURCE from snapshots to: {s}", .{maybe_fuzz_corpus_path.?});
-        try std.fs.cwd().makePath(maybe_fuzz_corpus_path.?);
+    // Force single-threaded mode in debug mode
+    if (debug_mode and max_threads == 0) {
+        max_threads = 1;
     }
 
+    const config = Config{
+        .maybe_fuzz_corpus_path = maybe_fuzz_corpus_path,
+        .generate_html = generate_html,
+    };
+
+    if (config.maybe_fuzz_corpus_path != null) {
+        log("copying SOURCE from snapshots to: {s}", .{config.maybe_fuzz_corpus_path.?});
+        try std.fs.cwd().makePath(config.maybe_fuzz_corpus_path.?);
+    }
     const snapshots_dir = "src/snapshots";
-    var total_success: usize = 0;
-    var total_failed: usize = 0;
     var timer = std.time.Timer.start() catch unreachable;
 
+    // Stage 1: Collect work items
+    var work_list = WorkList.init(gpa);
+    defer {
+        // Clean up any remaining work items
+        for (work_list.items) |work_item| {
+            gpa.free(work_item.path);
+        }
+        work_list.deinit();
+    }
+
     if (snapshot_paths.items.len > 0) {
         for (snapshot_paths.items) |path| {
-            const result = try processPath(snapshot_allocator, path, maybe_fuzz_corpus_path, generate_html);
-            total_success += result.success;
-            total_failed += result.failed;
+            try collectWorkItems(gpa, path, &work_list);
         }
     } else {
         // process all files in snapshots_dir
-        const result = try processPath(snapshot_allocator, snapshots_dir, maybe_fuzz_corpus_path, generate_html);
-        total_success = result.success;
-        total_failed = result.failed;
+        try collectWorkItems(gpa, snapshots_dir, &work_list);
     }
 
+    const collect_duration_ms = timer.read() / std.time.ns_per_ms;
+    log("collected {d} work items in {d} ms", .{ work_list.items.len, collect_duration_ms });
+
+    // Stage 2: Process work items (in parallel or single-threaded)
+    const result = try processWorkItems(gpa, work_list, max_threads, debug_mode, config);
+
     const duration_ms = timer.read() / std.time.ns_per_ms;
 
-    std.log.info("processed {d} snapshots in {d} ms.", .{ total_success, duration_ms });
+    std.log.info(
+        "collected {d} items in {d} ms, processed {d} snapshots in {d} ms.",
+        .{ work_list.items.len, collect_duration_ms, result.success, duration_ms },
+    );
 }
 
 /// Check if a file has a valid snapshot extension
@@ -366,19 +400,26 @@ fn processRocFileAsSnapshot(allocator: Allocator, output_path: []const u8, roc_c
     try processRocFileAsSnapshotWithExpected(allocator, output_path, roc_content, meta, expected_content, generate_html);
 }
 
-fn processRocFileAsSnapshotWithExpected(allocator: Allocator, output_path: []const u8, roc_content: []const u8, meta: Meta, expected_content: ?[]const u8, generate_html: bool) !void {
+fn processSnapshotContent(allocator: Allocator, content: Content, output_path: []const u8, generate_html: bool) !void {
     log("Generating snapshot for: {s}", .{output_path});
 
     // Process the content through the compilation pipeline
     var module_env = base.ModuleEnv.init(allocator);
     defer module_env.deinit();
 
-    // Parse the content
-    var ast = parse.parse(&module_env, roc_content);
-    defer ast.deinit(allocator);
+    // Parse the source code based on node type
+    var parse_ast = switch (content.meta.node_type) {
+        .file => parse.parse(&module_env, content.source),
+        .header => parse.parseHeader(&module_env, content.source),
+        .expr => parse.parseExpr(&module_env, content.source),
+        .statement => parse.parseStatement(&module_env, content.source),
+        .package => parse.parse(&module_env, content.source),
+        .platform => parse.parse(&module_env, content.source),
+        .app => parse.parse(&module_env, content.source),
+    };
+    defer parse_ast.deinit(allocator);
 
-    // Try canonicalization
-    ast.store.emptyScratch();
+    parse_ast.store.emptyScratch();
 
     // Extract module name from output path
     const basename = std.fs.path.basename(output_path);
@@ -389,37 +430,90 @@ fn processRocFileAsSnapshotWithExpected(allocator: Allocator, output_path: []con
     var can_ir = CIR.init(&module_env, module_name);
     defer can_ir.deinit();
 
-    var can = canonicalize.init(&can_ir, &ast, null) catch |err| {
-        warn("Canonicalization init failed: {}", .{err});
-        return;
-    };
+    var can = try canonicalize.init(&can_ir, &parse_ast, null);
     defer can.deinit();
 
-    const maybe_expr_idx: ?CIR.Expr.Idx = null;
+    var maybe_expr_idx: ?CIR.Expr.Idx = null;
 
-    can.canonicalizeFile() catch |err| {
-        warn("Canonicalization failed: {}", .{err});
-        return;
-    };
+    switch (content.meta.node_type) {
+        .file => try can.canonicalizeFile(),
+        .header => {
+            // TODO: implement canonicalize_header when available
+        },
+        .expr => {
+            const expr_idx: AST.Expr.Idx = @enumFromInt(parse_ast.root_node_idx);
+            maybe_expr_idx = try can.canonicalizeExpr(expr_idx);
+        },
+        .statement => {
+            // Manually track scratch statements because we aren't using the file entrypoint
+            const stmt_idx: AST.Statement.Idx = @enumFromInt(parse_ast.root_node_idx);
+            const scratch_statements_start = can_ir.store.scratch_statements.top();
+            _ = try can.canonicalizeStatement(stmt_idx);
+            can_ir.all_statements = can_ir.store.statementSpanFrom(scratch_statements_start);
+        },
+        .package => try can.canonicalizeFile(),
+        .platform => try can.canonicalizeFile(),
+        .app => try can.canonicalizeFile(),
+    }
 
-    // Types (ONCE)
+    // Types
     const empty_modules: []const *CIR = &.{};
-    var solver = Solver.init(allocator, &can_ir.env.types, &can_ir, empty_modules) catch |err| {
-        warn("Type solver init failed: {}", .{err});
-        return;
-    };
+    var solver = try Solver.init(allocator, &can_ir.env.types, &can_ir, empty_modules);
     defer solver.deinit();
 
-    try solver.checkDefs();
+    if (maybe_expr_idx) |expr_idx| {
+        _ = try solver.checkExpr(expr_idx);
+    } else {
+        try solver.checkDefs();
+    }
 
-    // Create content structure
-    const content = Content{
-        .meta = meta,
-        .source = roc_content,
-        .expected = expected_content,
-        .formatted = null,
-        .has_canonicalize = true,
-    };
+    // Cache round-trip validation - ensure ModuleCache serialization/deserialization works
+    {
+        // Generate original S-expression for comparison
+        var original_tree = SExprTree.init(allocator);
+        defer original_tree.deinit();
+        CIR.pushToSExprTree(&can_ir, null, &original_tree, content.source);
+
+        var original_sexpr = std.ArrayList(u8).init(allocator);
+        defer original_sexpr.deinit();
+        original_tree.toStringPretty(original_sexpr.writer().any());
+
+        // Create and serialize MmapCache
+        const cache_data = try cache.CacheModule.create(allocator, &module_env, &can_ir, 0, 0);
+        defer allocator.free(cache_data);
+
+        // Deserialize back
+        var loaded_cache = try cache.CacheModule.fromMappedMemory(cache_data);
+
+        // Restore ModuleEnv and CIR
+        const restored = try loaded_cache.restore(allocator, module_name);
+        var restored_module_env = restored.module_env;
+        defer restored_module_env.deinit();
+        var restored_cir = restored.cir;
+        defer restored_cir.deinit();
+
+        // Fix env pointer after struct move
+        restored_cir.env = &restored_module_env;
+
+        // Generate S-expression from restored CIR
+        var restored_tree = SExprTree.init(allocator);
+        defer restored_tree.deinit();
+        CIR.pushToSExprTree(&restored_cir, null, &restored_tree, content.source);
+
+        var restored_sexpr = std.ArrayList(u8).init(allocator);
+        defer restored_sexpr.deinit();
+        restored_tree.toStringPretty(restored_sexpr.writer().any());
+
+        // Compare S-expressions - crash if they don't match
+        if (!std.mem.eql(u8, original_sexpr.items, restored_sexpr.items)) {
+            std.log.err("Cache round-trip validation failed for snapshot: {s}", .{output_path});
+            std.log.err("Original and restored CIR S-expressions don't match!", .{});
+            std.log.err("This indicates a bug in MmapCache serialization/deserialization.", .{});
+            std.log.err("Original S-expression:\n{s}", .{original_sexpr.items});
+            std.log.err("Restored S-expression:\n{s}", .{restored_sexpr.items});
+            return error.CacheRoundTripValidationFailed;
+        }
+    }
 
     // Buffer all output in memory before writing files
     var md_buffer = std.ArrayList(u8).init(allocator);
@@ -437,10 +531,10 @@ fn processRocFileAsSnapshotWithExpected(allocator: Allocator, output_path: []con
     try generateMetaSection(&output, &content);
     try generateSourceSection(&output, &content);
     try generateExpectedSection(&output, &content);
-    try generateProblemsSection(&output, &ast, &can_ir, &solver, &content, output_path, &module_env);
-    try generateTokensSection(&output, &ast, &content, &module_env);
-    try generateParseSection(&output, &content, &ast, &module_env);
-    try generateFormattedSection(&output, &content, &ast);
+    try generateProblemsSection(&output, &parse_ast, &can_ir, &solver, &content, output_path, &module_env);
+    try generateTokensSection(&output, &parse_ast, &content, &module_env);
+    try generateParseSection(&output, &content, &parse_ast, &module_env);
+    try generateFormattedSection(&output, &content, &parse_ast);
     try generateCanonicalizeSection(&output, &content, &can_ir, maybe_expr_idx);
     try generateTypesSection(&output, &content, &can_ir, maybe_expr_idx);
 
@@ -455,33 +549,112 @@ fn processRocFileAsSnapshotWithExpected(allocator: Allocator, output_path: []con
 
     try md_file.writeAll(md_buffer.items);
 
-    // Write HTML file
     if (html_buffer) |*buf| {
-        const html_path = try std.fmt.allocPrint(allocator, "{s}.html", .{output_path[0 .. output_path.len - 3]});
-        defer allocator.free(html_path);
-
-        const html_file = std.fs.cwd().createFile(html_path, .{}) catch |err| {
-            warn("Failed to create {s}: {}", .{ html_path, err });
-            return;
+        writeHtmlFile(allocator, output_path, buf) catch |err| {
+            warn("Failed to write HTML file for {s}: {}", .{ output_path, err });
         };
-        defer html_file.close();
-
-        try html_file.writeAll(buf.items);
     }
 }
 
+fn processRocFileAsSnapshotWithExpected(allocator: Allocator, output_path: []const u8, roc_content: []const u8, meta: Meta, expected_content: ?[]const u8, generate_html: bool) !void {
+    // Create content structure
+    const content = Content{
+        .meta = meta,
+        .source = roc_content,
+        .expected = expected_content,
+        .formatted = null,
+        .has_canonicalize = true,
+    };
+
+    try processSnapshotContent(allocator, content, output_path, generate_html);
+}
+
+const Config = struct {
+    maybe_fuzz_corpus_path: ?[]const u8,
+    generate_html: bool,
+};
+
 const ProcessResult = struct {
     success: usize,
     failed: usize,
 };
 
-fn processPath(gpa: Allocator, path: []const u8, maybe_fuzz_corpus_path: ?[]const u8, generate_html: bool) !ProcessResult {
-    var processed_count: usize = 0;
-    var failed_count: usize = 0;
+const WorkItem = struct {
+    path: []const u8,
+    kind: enum {
+        snapshot_file,
+        multi_file_snapshot,
+    },
+};
+
+const WorkList = std.ArrayList(WorkItem);
+
+const ProcessContext = struct {
+    work_list: *WorkList,
+    config: Config,
+    success_count: parallel.AtomicUsize,
+    failed_count: parallel.AtomicUsize,
+};
+
+/// Worker function that processes a single work item
+fn processWorkItem(allocator: Allocator, context: *ProcessContext, item_id: usize) void {
+    const work_item = context.work_list.items[item_id];
+    const success = switch (work_item.kind) {
+        .snapshot_file => processSnapshotFile(allocator, work_item.path, context.config.maybe_fuzz_corpus_path, context.config.generate_html) catch false,
+        .multi_file_snapshot => blk: {
+            processMultiFileSnapshot(allocator, work_item.path, context.config.generate_html) catch {
+                break :blk false;
+            };
+            break :blk true;
+        },
+    };
+
+    if (success) {
+        _ = context.success_count.fetchAdd(1, .monotonic);
+    } else {
+        _ = context.failed_count.fetchAdd(1, .monotonic);
+    }
+}
+
+/// Stage 2: Process work items in parallel using the parallel utility
+fn processWorkItems(gpa: Allocator, work_list: WorkList, max_threads: usize, debug: bool, config: Config) !ProcessResult {
+    if (work_list.items.len == 0) {
+        return ProcessResult{ .success = 0, .failed = 0 };
+    }
+
+    var context = ProcessContext{
+        .work_list = @constCast(&work_list),
+        .config = config,
+        .success_count = parallel.AtomicUsize.init(0),
+        .failed_count = parallel.AtomicUsize.init(0),
+    };
+
+    // Use per-thread arena allocators for snapshot processing
+    const options = parallel.ProcessOptions{
+        .max_threads = max_threads,
+        .use_per_thread_arenas = !debug,
+    };
+
+    try parallel.process(
+        ProcessContext,
+        &context,
+        processWorkItem,
+        gpa,
+        work_list.items.len,
+        options,
+    );
+
+    return ProcessResult{
+        .success = context.success_count.load(.monotonic),
+        .failed = context.failed_count.load(.monotonic),
+    };
+}
 
+/// Stage 1: Walk directory tree and collect work items
+fn collectWorkItems(gpa: Allocator, path: []const u8, work_list: *WorkList) !void {
     const canonical_path = std.fs.cwd().realpathAlloc(gpa, path) catch |err| {
         std.log.err("failed to resolve path '{s}': {s}", .{ path, @errorName(err) });
-        return .{ .success = 0, .failed = 1 };
+        return;
     };
     defer gpa.free(canonical_path);
 
@@ -492,9 +665,11 @@ fn processPath(gpa: Allocator, path: []const u8, maybe_fuzz_corpus_path: ?[]cons
 
         // It's a directory
         if (isMultiFileSnapshot(canonical_path)) {
-            try processMultiFileSnapshot(gpa, canonical_path, generate_html);
-            processed_count += 1;
-            return .{ .success = processed_count, .failed = failed_count };
+            const path_copy = try gpa.dupe(u8, canonical_path);
+            try work_list.append(WorkItem{
+                .path = path_copy,
+                .kind = .multi_file_snapshot,
+            });
         } else {
             var dir_iterator = dir.iterate();
             while (try dir_iterator.next()) |entry| {
@@ -505,16 +680,13 @@ fn processPath(gpa: Allocator, path: []const u8, maybe_fuzz_corpus_path: ?[]cons
                 defer gpa.free(full_path);
 
                 if (entry.kind == .directory) {
-                    const result = try processPath(gpa, full_path, maybe_fuzz_corpus_path, generate_html);
-                    processed_count += result.success;
-                    failed_count += result.failed;
+                    try collectWorkItems(gpa, full_path, work_list);
                 } else if (entry.kind == .file and isSnapshotFile(entry.name)) {
-                    if (try processSnapshotFile(gpa, full_path, maybe_fuzz_corpus_path, generate_html)) {
-                        processed_count += 1;
-                    } else {
-                        log("skipped file (not a valid snapshot): {s}", .{full_path});
-                        failed_count += 1;
-                    }
+                    const path_copy = try gpa.dupe(u8, full_path);
+                    try work_list.append(WorkItem{
+                        .path = path_copy,
+                        .kind = .snapshot_file,
+                    });
                 }
             }
         }
@@ -522,23 +694,18 @@ fn processPath(gpa: Allocator, path: []const u8, maybe_fuzz_corpus_path: ?[]cons
         // Not a directory, try as file
         if (dir_err == error.NotDir) {
             if (isSnapshotFile(canonical_path)) {
-                if (try processSnapshotFile(gpa, canonical_path, maybe_fuzz_corpus_path, generate_html)) {
-                    processed_count += 1;
-                } else {
-                    std.log.err("failed to process snapshot file: {s}", .{canonical_path});
-                    std.log.err("make sure the file starts with '~~~META' and has valid snapshot format", .{});
-                    failed_count += 1;
-                }
+                const path_copy = try gpa.dupe(u8, canonical_path);
+                try work_list.append(WorkItem{
+                    .path = path_copy,
+                    .kind = .snapshot_file,
+                });
             } else {
                 std.log.err("file '{s}' is not a snapshot file (must end with .md)", .{canonical_path});
             }
         } else {
             std.log.err("failed to access path '{s}': {s}", .{ canonical_path, @errorName(dir_err) });
-            return .{ .success = 0, .failed = 1 };
         }
     }
-
-    return .{ .success = processed_count, .failed = failed_count };
 }
 
 /// Represents the different sections of a snapshot file.
@@ -1559,166 +1726,11 @@ fn processSnapshotFileUnified(gpa: Allocator, snapshot_path: []const u8, maybe_f
         }
     };
 
-    var module_env = base.ModuleEnv.init(gpa);
-    defer module_env.deinit();
-
-    // Parse the source code (ONCE)
-    var parse_ast = switch (content.meta.node_type) {
-        .file => parse.parse(&module_env, content.source),
-        .header => parse.parseHeader(&module_env, content.source),
-        .expr => parse.parseExpr(&module_env, content.source),
-        .statement => parse.parseStatement(&module_env, content.source),
-        .package => parse.parse(&module_env, content.source),
-        .platform => parse.parse(&module_env, content.source),
-        .app => parse.parse(&module_env, content.source),
-    };
-    defer parse_ast.deinit(gpa);
-
-    parse_ast.store.emptyScratch();
-
-    // Canonicalize the source code (ONCE)
-    // Extract module name from snapshot path
-    const basename = std.fs.path.basename(snapshot_path);
-    const module_name = if (std.mem.lastIndexOfScalar(u8, basename, '.')) |dot_idx|
-        basename[0..dot_idx]
-    else
-        basename;
-    var can_ir = CIR.init(&module_env, module_name);
-    defer can_ir.deinit();
-
-    var can = try canonicalize.init(&can_ir, &parse_ast, null);
-    defer can.deinit();
-
-    var maybe_expr_idx: ?CIR.Expr.Idx = null;
-
-    switch (content.meta.node_type) {
-        .file => try can.canonicalizeFile(),
-        .header => {
-            // TODO: implement canonicalize_header when available
-        },
-        .expr => {
-            const expr_idx: AST.Expr.Idx = @enumFromInt(parse_ast.root_node_idx);
-            maybe_expr_idx = try can.canonicalizeExpr(expr_idx);
-        },
-        .statement => {
-            // Manually track scratch statements because we aren't using the file entrypoint
-            const stmt_idx: AST.Statement.Idx = @enumFromInt(parse_ast.root_node_idx);
-            const scratch_statements_start = can_ir.store.scratch_statements.top();
-            _ = try can.canonicalizeStatement(stmt_idx);
-            can_ir.all_statements = can_ir.store.statementSpanFrom(scratch_statements_start);
-        },
-        .package => try can.canonicalizeFile(),
-        .platform => try can.canonicalizeFile(),
-        .app => try can.canonicalizeFile(),
-    }
-
-    // Types (ONCE)
-    const empty_modules: []const *CIR = &.{};
-    var solver = try Solver.init(gpa, &can_ir.env.types, &can_ir, empty_modules);
-    defer solver.deinit();
-
-    if (maybe_expr_idx) |expr_idx| {
-        _ = try solver.checkExpr(expr_idx);
-    } else {
-        try solver.checkDefs();
-    }
-
-    // Cache round-trip validation - ensure ModuleCache serialization/deserialization works
-    {
-        // Generate original S-expression for comparison
-        var original_tree = SExprTree.init(gpa);
-        defer original_tree.deinit();
-        CIR.pushToSExprTree(&can_ir, null, &original_tree, content.source);
-
-        var original_sexpr = std.ArrayList(u8).init(gpa);
-        defer original_sexpr.deinit();
-        original_tree.toStringPretty(original_sexpr.writer().any());
-
-        // Create and serialize MmapCache
-        const cache_data = try cache.CacheModule.create(gpa, &module_env, &can_ir, 0, 0);
-        defer gpa.free(cache_data);
-
-        // Deserialize back
-        var loaded_cache = try cache.CacheModule.fromMappedMemory(cache_data);
-
-        // Restore ModuleEnv and CIR
-        // Extract module name from snapshot path
-        const cache_basename = std.fs.path.basename(snapshot_path);
-        const cache_module_name = if (std.mem.lastIndexOfScalar(u8, cache_basename, '.')) |dot_idx|
-            cache_basename[0..dot_idx]
-        else
-            cache_basename;
-        const restored = try loaded_cache.restore(gpa, cache_module_name);
-        var restored_module_env = restored.module_env;
-        defer restored_module_env.deinit();
-        var restored_cir = restored.cir;
-        defer restored_cir.deinit();
-
-        // Fix env pointer after struct move
-        restored_cir.env = &restored_module_env;
-
-        // Generate S-expression from restored CIR
-        var restored_tree = SExprTree.init(gpa);
-        defer restored_tree.deinit();
-        CIR.pushToSExprTree(&restored_cir, null, &restored_tree, content.source);
-
-        var restored_sexpr = std.ArrayList(u8).init(gpa);
-        defer restored_sexpr.deinit();
-        restored_tree.toStringPretty(restored_sexpr.writer().any());
-
-        // Compare S-expressions - crash if they don't match
-        if (!std.mem.eql(u8, original_sexpr.items, restored_sexpr.items)) {
-            std.log.err("Cache round-trip validation failed for snapshot: {s}", .{snapshot_path});
-            std.log.err("Original and restored CIR S-expressions don't match!", .{});
-            std.log.err("This indicates a bug in MmapCache serialization/deserialization.", .{});
-            std.log.err("Original S-expression:\n{s}", .{original_sexpr.items});
-            std.log.err("Restored S-expression:\n{s}", .{restored_sexpr.items});
-            return false;
-        }
-    }
-
-    // Buffer all output in memory before writing files
-    var md_buffer = std.ArrayList(u8).init(gpa);
-    defer md_buffer.deinit();
-
-    var html_buffer = if (generate_html) std.ArrayList(u8).init(gpa) else null;
-    defer if (html_buffer) |*buf| buf.deinit();
-
-    var output = DualOutput.init(gpa, &md_buffer, if (html_buffer) |*buf| buf else null);
-
-    // Generate HTML wrapper
-    try generateHtmlWrapper(&output, &content);
-
-    // Generate all sections simultaneously
-    try generateMetaSection(&output, &content);
-    try generateSourceSection(&output, &content);
-    try generateExpectedSection(&output, &content);
-    try generateProblemsSection(&output, &parse_ast, &can_ir, &solver, &content, snapshot_path, &module_env);
-    try generateTokensSection(&output, &parse_ast, &content, &module_env);
-
-    // Generate remaining sections
-    try generateParseSection(&output, &content, &parse_ast, &module_env);
-    try generateFormattedSection(&output, &content, &parse_ast);
-    try generateCanonicalizeSection(&output, &content, &can_ir, maybe_expr_idx);
-    try generateTypesSection(&output, &content, &can_ir, maybe_expr_idx);
-    // TODO: Include to emit entire types store. Can be helpful for debugging
-    // try generateTypesStoreSection(gpa, &output, &can_ir);
-
-    // Generate HTML closing
-    try generateHtmlClosing(&output);
-
-    // Write markdown file
-    var md_file = std.fs.cwd().createFile(snapshot_path, .{}) catch |err| {
-        log("failed to create file '{s}': {s}", .{ snapshot_path, @errorName(err) });
+    // Process the content through the shared compilation pipeline
+    processSnapshotContent(gpa, content, snapshot_path, generate_html) catch |err| {
+        log("failed to process snapshot content: {s}", .{@errorName(err)});
         return false;
     };
-    defer md_file.close();
-    try md_file.writer().writeAll(md_buffer.items);
-
-    // Write HTML file
-    if (html_buffer) |*buf| {
-        try writeHtmlFile(gpa, snapshot_path, buf);
-    }
 
     // If flag --fuzz-corpus is passed, write the SOURCE to our corpus
     if (maybe_fuzz_corpus_path != null) {