diff --git a/src/base/Ident.zig b/src/base/Ident.zig index befc225cfd..ccaea09a38 100644 --- a/src/base/Ident.zig +++ b/src/base/Ident.zig @@ -9,6 +9,7 @@ const std = @import("std"); const collections = @import("../collections.zig"); const Region = @import("Region.zig"); const ModuleImport = @import("ModuleImport.zig"); +const serialization = @import("../serialization/mod.zig"); const SmallStringInterner = collections.SmallStringInterner; const exitOnOom = collections.utils.exitOnOom; @@ -162,7 +163,7 @@ pub const Store = struct { digit_index -= 1; } - const name = str_buffer[digit_index..]; + const name = str_buffer[digit_index + 1 ..]; const idx = self.interner.insert(gpa, name, Region.zero()); self.exposing_modules.append(gpa, @enumFromInt(0)) catch |err| exitOnOom(err); @@ -201,6 +202,220 @@ pub const Store = struct { return self.interner.getText(@enumFromInt(@as(u32, idx.idx))); } + /// Calculate the size needed to serialize this Ident.Store + pub fn serializedSize(self: *const Store) usize { + var size: usize = 0; + + // SmallStringInterner components + size += @sizeOf(u32); // bytes_len + size += self.interner.bytes.items.len; // bytes data + size = std.mem.alignForward(usize, size, @alignOf(u32)); // align for next u32 + + size += @sizeOf(u32); // outer_indices_len + size += self.interner.outer_indices.items.len * @sizeOf(@TypeOf(self.interner.outer_indices.items[0])); // outer_indices data + size = std.mem.alignForward(usize, size, @alignOf(u32)); // align for next u32 + + size += @sizeOf(u32); // regions_len + size += self.interner.regions.items.len * @sizeOf(@TypeOf(self.interner.regions.items[0])); // regions data + size = std.mem.alignForward(usize, size, @alignOf(u32)); // align for next u32 + + // Store components + size += @sizeOf(u32); // exposing_modules_len + size += self.exposing_modules.items.len * @sizeOf(@TypeOf(self.exposing_modules.items[0])); // exposing_modules data + size = std.mem.alignForward(usize, size, @alignOf(u32)); // align for next u32 + + size += @sizeOf(u32); // attributes_len + size += self.attributes.items.len * @sizeOf(u8); // attributes data (packed as bytes) + size = std.mem.alignForward(usize, size, @alignOf(u32)); // align for next u32 + + size += @sizeOf(u32); // next_unique_name + + // Align to SERIALIZATION_ALIGNMENT to maintain alignment for subsequent data + return std.mem.alignForward(usize, size, serialization.SERIALIZATION_ALIGNMENT); + } + + /// Serialize this Ident.Store into the provided buffer + pub fn serializeInto(self: *const Store, buffer: []u8, gpa: std.mem.Allocator) ![]u8 { + const size = self.serializedSize(); + if (buffer.len < size) return error.BufferTooSmall; + + var offset: usize = 0; + + // Serialize interner bytes + const bytes_len = @as(u32, @intCast(self.interner.bytes.items.len)); + @as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = bytes_len; + offset += @sizeOf(u32); + if (bytes_len > 0) { + @memcpy(buffer[offset .. offset + bytes_len], self.interner.bytes.items); + offset += bytes_len; + } + offset = std.mem.alignForward(usize, offset, @alignOf(u32)); + + // Serialize interner outer_indices + const outer_indices_len = @as(u32, @intCast(self.interner.outer_indices.items.len)); + @as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = outer_indices_len; + offset += @sizeOf(u32); + if (outer_indices_len > 0) { + const outer_indices_bytes = outer_indices_len * @sizeOf(@TypeOf(self.interner.outer_indices.items[0])); + @memcpy(buffer[offset .. offset + outer_indices_bytes], std.mem.sliceAsBytes(self.interner.outer_indices.items)); + offset += outer_indices_bytes; + } + offset = std.mem.alignForward(usize, offset, @alignOf(u32)); + + // Serialize interner regions + const regions_len = @as(u32, @intCast(self.interner.regions.items.len)); + @as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = regions_len; + offset += @sizeOf(u32); + if (regions_len > 0) { + const regions_bytes = regions_len * @sizeOf(@TypeOf(self.interner.regions.items[0])); + @memcpy(buffer[offset .. offset + regions_bytes], std.mem.sliceAsBytes(self.interner.regions.items)); + offset += regions_bytes; + } + offset = std.mem.alignForward(usize, offset, @alignOf(u32)); + + // Serialize exposing_modules + const exposing_modules_len = @as(u32, @intCast(self.exposing_modules.items.len)); + @as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = exposing_modules_len; + offset += @sizeOf(u32); + if (exposing_modules_len > 0) { + const exposing_modules_bytes = exposing_modules_len * @sizeOf(@TypeOf(self.exposing_modules.items[0])); + @memcpy(buffer[offset .. offset + exposing_modules_bytes], std.mem.sliceAsBytes(self.exposing_modules.items)); + offset += exposing_modules_bytes; + } + offset = std.mem.alignForward(usize, offset, @alignOf(u32)); + + // Serialize attributes + const attributes_len = @as(u32, @intCast(self.attributes.items.len)); + @as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = attributes_len; + offset += @sizeOf(u32); + if (attributes_len > 0) { + // Serialize each Attributes as a single byte to avoid padding + for (self.attributes.items) |attr| { + const attr_bits: u3 = @bitCast(attr); + buffer[offset] = @as(u8, attr_bits); + offset += 1; + } + } + offset = std.mem.alignForward(usize, offset, @alignOf(u32)); + + // Serialize next_unique_name + @as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = self.next_unique_name; + offset += @sizeOf(u32); + + _ = gpa; // suppress unused parameter warning + + // Zero out any padding bytes + if (offset < size) { + @memset(buffer[offset..size], 0); + } + + return buffer[0..size]; + } + + /// Deserialize an Ident.Store from the provided buffer + pub fn deserializeFrom(buffer: []const u8, gpa: std.mem.Allocator) !Store { + if (buffer.len < @sizeOf(u32)) return error.BufferTooSmall; + + var offset: usize = 0; + + // Deserialize interner bytes + const bytes_len = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*; + offset += @sizeOf(u32); + var bytes = std.ArrayListUnmanaged(u8){}; + if (bytes_len > 0) { + if (offset + bytes_len > buffer.len) return error.BufferTooSmall; + try bytes.appendSlice(gpa, buffer[offset .. offset + bytes_len]); + offset += bytes_len; + } + offset = std.mem.alignForward(usize, offset, @alignOf(u32)); + + // Deserialize interner outer_indices + const outer_indices_len = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*; + offset += @sizeOf(u32); + var outer_indices = std.ArrayListUnmanaged(@import("../collections/SmallStringInterner.zig").StringIdx){}; + if (outer_indices_len > 0) { + const outer_indices_bytes = outer_indices_len * @sizeOf(@import("../collections/SmallStringInterner.zig").StringIdx); + if (offset + outer_indices_bytes > buffer.len) return error.BufferTooSmall; + const outer_indices_data = @as([*]const @import("../collections/SmallStringInterner.zig").StringIdx, @ptrCast(@alignCast(buffer.ptr + offset))); + try outer_indices.appendSlice(gpa, outer_indices_data[0..outer_indices_len]); + offset += outer_indices_bytes; + } + offset = std.mem.alignForward(usize, offset, @alignOf(u32)); + + // Deserialize interner regions + const regions_len = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*; + offset += @sizeOf(u32); + var regions = std.ArrayListUnmanaged(Region){}; + if (regions_len > 0) { + const regions_bytes = regions_len * @sizeOf(Region); + if (offset + regions_bytes > buffer.len) return error.BufferTooSmall; + const regions_data = @as([*]const Region, @ptrCast(@alignCast(buffer.ptr + offset))); + try regions.appendSlice(gpa, regions_data[0..regions_len]); + offset += regions_bytes; + } + offset = std.mem.alignForward(usize, offset, @alignOf(u32)); + + // Deserialize exposing_modules + const exposing_modules_len = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*; + offset += @sizeOf(u32); + var exposing_modules = std.ArrayListUnmanaged(ModuleImport.Idx){}; + if (exposing_modules_len > 0) { + const exposing_modules_bytes = exposing_modules_len * @sizeOf(ModuleImport.Idx); + if (offset + exposing_modules_bytes > buffer.len) return error.BufferTooSmall; + const exposing_modules_data = @as([*]const ModuleImport.Idx, @ptrCast(@alignCast(buffer.ptr + offset))); + try exposing_modules.appendSlice(gpa, exposing_modules_data[0..exposing_modules_len]); + offset += exposing_modules_bytes; + } + offset = std.mem.alignForward(usize, offset, @alignOf(u32)); + + // Deserialize attributes + const attributes_len = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*; + offset += @sizeOf(u32); + var attributes = std.ArrayListUnmanaged(Attributes){}; + if (attributes_len > 0) { + if (offset + attributes_len > buffer.len) return error.BufferTooSmall; + try attributes.ensureTotalCapacity(gpa, attributes_len); + // Deserialize each Attributes from a single byte to avoid padding + for (0..attributes_len) |_| { + const attr_bits: u3 = @truncate(buffer[offset]); + const attr: Attributes = @bitCast(attr_bits); + attributes.appendAssumeCapacity(attr); + offset += 1; + } + } + offset = std.mem.alignForward(usize, offset, @alignOf(u32)); + + // Deserialize next_unique_name + if (offset + @sizeOf(u32) > buffer.len) return error.BufferTooSmall; + const next_unique_name = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*; + + // Rebuild the strings hash table + var strings = @import("../collections/SmallStringInterner.zig").StringIdx.Table{}; + try strings.ensureTotalCapacityContext(gpa, @intCast(outer_indices.items.len), @import("../collections/SmallStringInterner.zig").StringIdx.TableContext{ .bytes = &bytes }); + + // Re-populate the hash table + for (outer_indices.items) |string_idx| { + const string_bytes = std.mem.sliceTo(bytes.items[@intFromEnum(string_idx)..], 0); + const entry = strings.getOrPutContextAdapted(gpa, string_bytes, @import("../collections/SmallStringInterner.zig").StringIdx.TableAdapter{ .bytes = &bytes }, @import("../collections/SmallStringInterner.zig").StringIdx.TableContext{ .bytes = &bytes }) catch |err| exitOnOom(err); + entry.key_ptr.* = string_idx; + } + + // Construct the interner + const interner = @import("../collections/SmallStringInterner.zig"){ + .bytes = bytes, + .strings = strings, + .outer_indices = outer_indices, + .regions = regions, + }; + + return Store{ + .interner = interner, + .exposing_modules = exposing_modules, + .attributes = attributes, + .next_unique_name = next_unique_name, + }; + } + /// Get the region for an identifier. pub fn getRegion(self: *const Store, idx: Idx) Region { return self.interner.getRegion(@enumFromInt(@as(u32, idx.idx))); @@ -273,3 +488,87 @@ test "from_bytes creates ignored identifier" { try std.testing.expect(result.attributes.ignored == true); try std.testing.expect(result.attributes.reassignable == false); } + +test "Ident.Store serialization round-trip" { + const gpa = std.testing.allocator; + + // Create original store and add some identifiers + var original_store = Store.initCapacity(gpa, 16); + defer original_store.deinit(gpa); + + const ident1 = Ident.for_text("hello"); + const ident2 = Ident.for_text("world!"); + const ident3 = Ident.for_text("_ignored"); + + const idx1 = original_store.insert(gpa, ident1, Region.zero()); + const idx2 = original_store.insert(gpa, ident2, Region.zero()); + const idx3 = original_store.insert(gpa, ident3, Region.zero()); + + // Serialize + const serialized_size = original_store.serializedSize(); + const buffer = try gpa.alignedAlloc(u8, @alignOf(u32), serialized_size); + defer gpa.free(buffer); + + const serialized = try original_store.serializeInto(buffer, gpa); + try std.testing.expectEqual(serialized_size, serialized.len); + + // Deserialize + var restored_store = try Store.deserializeFrom(serialized, gpa); + defer restored_store.deinit(gpa); + + // Verify the identifiers are identical + try std.testing.expectEqualStrings("hello", restored_store.getText(idx1)); + try std.testing.expectEqualStrings("world!", restored_store.getText(idx2)); + try std.testing.expectEqualStrings("_ignored", restored_store.getText(idx3)); + + // Verify attributes are preserved + try std.testing.expect(restored_store.getText(idx1)[0] != '_'); // not ignored + try std.testing.expect(restored_store.getText(idx2)[restored_store.getText(idx2).len - 1] == '!'); // effectful + try std.testing.expect(restored_store.getText(idx3)[0] == '_'); // ignored + + // Verify next_unique_name is preserved + try std.testing.expectEqual(original_store.next_unique_name, restored_store.next_unique_name); + + // Verify structural integrity + try std.testing.expectEqual(original_store.exposing_modules.items.len, restored_store.exposing_modules.items.len); + try std.testing.expectEqual(original_store.attributes.items.len, restored_store.attributes.items.len); + try std.testing.expectEqual(original_store.interner.bytes.items.len, restored_store.interner.bytes.items.len); + try std.testing.expectEqual(original_store.interner.outer_indices.items.len, restored_store.interner.outer_indices.items.len); +} + +test "Ident.Store serialization comprehensive" { + const gpa = std.testing.allocator; + + var store = Store.initCapacity(gpa, 8); + defer store.deinit(gpa); + + // Test various identifier types and edge cases + const ident1 = Ident.for_text("hello"); + const ident2 = Ident.for_text("world!"); + const ident3 = Ident.for_text("_ignored"); + const ident4 = Ident.for_text("a"); // single character + const ident5 = Ident.for_text("very_long_identifier_name_that_might_cause_issues"); // long name + const region = Region.zero(); + + _ = store.insert(gpa, ident1, region); + _ = store.insert(gpa, ident2, region); + _ = store.insert(gpa, ident3, region); + _ = store.insert(gpa, ident4, region); + _ = store.insert(gpa, ident5, region); + + // Add some unique names + _ = store.genUnique(gpa); + _ = store.genUnique(gpa); + + // Test serialization + try serialization.testing.testSerialization(Store, &store, gpa); +} + +test "Ident.Store empty store serialization" { + const gpa = std.testing.allocator; + + var empty_store = Store.initCapacity(gpa, 0); + defer empty_store.deinit(gpa); + + try serialization.testing.testSerialization(Store, &empty_store, gpa); +} diff --git a/src/base/ModuleEnv.zig b/src/base/ModuleEnv.zig index 51eae21e44..d5cda84cc8 100644 --- a/src/base/ModuleEnv.zig +++ b/src/base/ModuleEnv.zig @@ -22,15 +22,15 @@ strings: StringLiteral.Store, types: types_mod.Store, /// Map of exposed items by their string representation (not interned) /// This is built during canonicalization and preserved for later use -exposed_by_str: std.StringHashMapUnmanaged(void) = .{}, +exposed_by_str: collections.SafeStringHashMap(void), /// Map of exposed item names to their CIR node indices (stored as u16) /// This is populated during canonicalization to allow cross-module lookups -exposed_nodes: std.StringHashMapUnmanaged(u16) = .{}, +exposed_nodes: collections.SafeStringHashMap(u16), /// Line starts for error reporting. We retain only start and offset positions in the IR /// and then use these line starts to calculate the line number and column number as required. /// this is a more compact representation at the expense of extra computation only when generating error diagnostics. -line_starts: std.ArrayList(u32), +line_starts: collections.SafeList(u32), /// Initialize the module environment. pub fn init(gpa: std.mem.Allocator) Self { @@ -42,7 +42,9 @@ pub fn init(gpa: std.mem.Allocator) Self { .ident_ids_for_slicing = collections.SafeList(Ident.Idx).initCapacity(gpa, 256), .strings = StringLiteral.Store.initCapacityBytes(gpa, 4096), .types = types_mod.Store.initCapacity(gpa, 2048, 512), - .line_starts = std.ArrayList(u32).init(gpa), + .exposed_by_str = collections.SafeStringHashMap(void).init(), + .exposed_nodes = collections.SafeStringHashMap(u16).init(), + .line_starts = collections.SafeList(u32).initCapacity(gpa, 256), }; } @@ -52,14 +54,16 @@ pub fn deinit(self: *Self) void { self.ident_ids_for_slicing.deinit(self.gpa); self.strings.deinit(self.gpa); self.types.deinit(); - self.line_starts.deinit(); + self.line_starts.deinit(self.gpa); self.exposed_by_str.deinit(self.gpa); self.exposed_nodes.deinit(self.gpa); } /// Calculate and store line starts from the source text pub fn calcLineStarts(self: *Self, source: []const u8) !void { - self.line_starts.clearRetainingCapacity(); + // Reset line_starts by creating a new SafeList + self.line_starts.deinit(self.gpa); + self.line_starts = collections.SafeList(u32).initCapacity(self.gpa, 256); // if the source is empty, we're done if (source.len == 0) { @@ -67,14 +71,14 @@ pub fn calcLineStarts(self: *Self, source: []const u8) !void { } // the first line starts at offset 0 - try self.line_starts.append(0); + _ = self.line_starts.append(self.gpa, 0); // find all newlines in the source, save their offset var pos: u32 = 0; for (source) |c| { if (c == '\n') { // next line starts after the newline in the current position - try self.line_starts.append(pos + 1); + _ = self.line_starts.append(self.gpa, pos + 1); } pos += 1; } @@ -82,5 +86,5 @@ pub fn calcLineStarts(self: *Self, source: []const u8) !void { /// Get diagnostic position information for a given range pub fn calcRegionInfo(self: *const Self, source: []const u8, begin: u32, end: u32) !RegionInfo { - return RegionInfo.position(source, self.line_starts.items, begin, end); + return RegionInfo.position(source, self.line_starts.items.items, begin, end); } diff --git a/src/base/RegionInfo.zig b/src/base/RegionInfo.zig index 00460ffac3..a995f244f8 100644 --- a/src/base/RegionInfo.zig +++ b/src/base/RegionInfo.zig @@ -5,6 +5,7 @@ //! as this is more compact, and then when we need to we can calculate the line and column information //! using line_starts and the offsets. const std = @import("std"); +const collections = @import("../collections.zig"); const Allocator = std.mem.Allocator; // byte indexes into the source text @@ -54,8 +55,8 @@ fn getLineText(source: []const u8, line_starts: []const u32, start_line_idx: u32 } /// Record the offsets for the start of each line in the source code -pub fn findLineStarts(gpa: Allocator, source: []const u8) !std.ArrayList(u32) { - var line_starts = std.ArrayList(u32).init(gpa); +pub fn findLineStarts(gpa: Allocator, source: []const u8) !collections.SafeList(u32) { + var line_starts = collections.SafeList(u32).initCapacity(gpa, 256); // if the source is empty, return an empty list of line starts if (source.len == 0) { @@ -63,14 +64,14 @@ pub fn findLineStarts(gpa: Allocator, source: []const u8) !std.ArrayList(u32) { } // the first line starts at offset 0 - try line_starts.append(0); + _ = line_starts.append(gpa, 0); // find all newlines in the source, save their offset var pos: u32 = 0; for (source) |c| { if (c == '\n') { // next line starts after the newline in the current position - try line_starts.append(pos + 1); + _ = line_starts.append(gpa, pos + 1); } pos += 1; } @@ -107,81 +108,81 @@ pub fn position(source: []const u8, line_starts: []const u32, begin: u32, end: u test "lineIdx" { const gpa = std.testing.allocator; - var line_starts = std.ArrayList(u32).init(gpa); - defer line_starts.deinit(); + var line_starts = collections.SafeList(u32).initCapacity(gpa, 256); + defer line_starts.deinit(gpa); // Simple test case with lines at positions 0, 10, 20 - try line_starts.append(0); - try line_starts.append(10); - try line_starts.append(20); - try line_starts.append(30); - - try std.testing.expectEqual(0, lineIdx(line_starts.items, 0)); - try std.testing.expectEqual(0, lineIdx(line_starts.items, 5)); - try std.testing.expectEqual(0, lineIdx(line_starts.items, 9)); - try std.testing.expectEqual(1, lineIdx(line_starts.items, 10)); - try std.testing.expectEqual(1, lineIdx(line_starts.items, 15)); - try std.testing.expectEqual(1, lineIdx(line_starts.items, 19)); - try std.testing.expectEqual(2, lineIdx(line_starts.items, 20)); - try std.testing.expectEqual(2, lineIdx(line_starts.items, 25)); - try std.testing.expectEqual(2, lineIdx(line_starts.items, 29)); - try std.testing.expectEqual(3, lineIdx(line_starts.items, 30)); - try std.testing.expectEqual(3, lineIdx(line_starts.items, 35)); + _ = line_starts.append(gpa, 0); + _ = line_starts.append(gpa, 10); + _ = line_starts.append(gpa, 20); + _ = line_starts.append(gpa, 30); + + try std.testing.expectEqual(0, lineIdx(line_starts.items.items, 0)); + try std.testing.expectEqual(0, lineIdx(line_starts.items.items, 5)); + try std.testing.expectEqual(0, lineIdx(line_starts.items.items, 9)); + try std.testing.expectEqual(1, lineIdx(line_starts.items.items, 10)); + try std.testing.expectEqual(1, lineIdx(line_starts.items.items, 15)); + try std.testing.expectEqual(1, lineIdx(line_starts.items.items, 19)); + try std.testing.expectEqual(2, lineIdx(line_starts.items.items, 20)); + try std.testing.expectEqual(2, lineIdx(line_starts.items.items, 25)); + try std.testing.expectEqual(2, lineIdx(line_starts.items.items, 29)); + try std.testing.expectEqual(3, lineIdx(line_starts.items.items, 30)); + try std.testing.expectEqual(3, lineIdx(line_starts.items.items, 35)); } test "columnIdx" { const gpa = std.testing.allocator; - var line_starts = std.ArrayList(u32).init(gpa); - defer line_starts.deinit(); + var line_starts = collections.SafeList(u32).initCapacity(gpa, 256); + defer line_starts.deinit(gpa); - try line_starts.append(0); - try line_starts.append(10); - try line_starts.append(20); + _ = line_starts.append(gpa, 0); + _ = line_starts.append(gpa, 10); + _ = line_starts.append(gpa, 20); - try std.testing.expectEqual(0, columnIdx(line_starts.items, 0, 0)); - try std.testing.expectEqual(5, columnIdx(line_starts.items, 0, 5)); - try std.testing.expectEqual(9, columnIdx(line_starts.items, 0, 9)); + try std.testing.expectEqual(0, columnIdx(line_starts.items.items, 0, 0)); + try std.testing.expectEqual(5, columnIdx(line_starts.items.items, 0, 5)); + try std.testing.expectEqual(9, columnIdx(line_starts.items.items, 0, 9)); - try std.testing.expectEqual(0, columnIdx(line_starts.items, 1, 10)); - try std.testing.expectEqual(5, columnIdx(line_starts.items, 1, 15)); + try std.testing.expectEqual(0, columnIdx(line_starts.items.items, 1, 10)); + try std.testing.expectEqual(5, columnIdx(line_starts.items.items, 1, 15)); } test "getLineText" { const gpa = std.testing.allocator; - var line_starts = std.ArrayList(u32).init(gpa); - defer line_starts.deinit(); + var line_starts = collections.SafeList(u32).initCapacity(gpa, 256); + defer line_starts.deinit(gpa); const source = "line0\nline1\nline2"; - try line_starts.append(0); - try line_starts.append(6); - try line_starts.append(12); + _ = line_starts.append(gpa, 0); + _ = line_starts.append(gpa, 6); + _ = line_starts.append(gpa, 12); - try std.testing.expectEqualStrings("line0", getLineText(source, line_starts.items, 0, 0)); - try std.testing.expectEqualStrings("line1", getLineText(source, line_starts.items, 1, 1)); - try std.testing.expectEqualStrings("line0\nline1", getLineText(source, line_starts.items, 0, 1)); - try std.testing.expectEqualStrings("line2", getLineText(source, line_starts.items, 2, 2)); + try std.testing.expectEqualStrings("line0", getLineText(source, line_starts.items.items, 0, 0)); + try std.testing.expectEqualStrings("line1", getLineText(source, line_starts.items.items, 1, 1)); + try std.testing.expectEqualStrings("line0\nline1", getLineText(source, line_starts.items.items, 0, 1)); + try std.testing.expectEqualStrings("line2", getLineText(source, line_starts.items.items, 2, 2)); } test "get" { const gpa = std.testing.allocator; - var line_starts = std.ArrayList(u32).init(gpa); - defer line_starts.deinit(); + var line_starts = collections.SafeList(u32).initCapacity(gpa, 256); + defer line_starts.deinit(gpa); const source = "line0\nline1\nline2"; - try line_starts.append(0); - try line_starts.append(6); - try line_starts.append(12); + _ = line_starts.append(gpa, 0); + _ = line_starts.append(gpa, 6); + _ = line_starts.append(gpa, 12); - const info1 = try position(source, line_starts.items, 2, 4); + const info1 = try position(source, line_starts.items.items, 2, 4); try std.testing.expectEqual(0, info1.start_line_idx); try std.testing.expectEqual(2, info1.start_col_idx); try std.testing.expectEqual(0, info1.end_line_idx); try std.testing.expectEqual(4, info1.end_col_idx); try std.testing.expectEqualStrings("line0", info1.line_text); - const info2 = try position(source, line_starts.items, 8, 10); + const info2 = try position(source, line_starts.items.items, 8, 10); try std.testing.expectEqual(1, info2.start_line_idx); try std.testing.expectEqual(2, info2.start_col_idx); try std.testing.expectEqual(1, info2.end_line_idx); diff --git a/src/base/StringLiteral.zig b/src/base/StringLiteral.zig index 880af5b01e..8be53ea003 100644 --- a/src/base/StringLiteral.zig +++ b/src/base/StringLiteral.zig @@ -2,6 +2,7 @@ const std = @import("std"); const collections = @import("../collections.zig"); +const serialization = @import("../serialization/mod.zig"); const exitOnOom = collections.utils.exitOnOom; const testing = std.testing; @@ -66,6 +67,61 @@ pub const Store = struct { const str_len = std.mem.bytesAsValue(u32, self.buffer.items[idx_u32 - 4 .. idx_u32]).*; return self.buffer.items[idx_u32 .. idx_u32 + str_len]; } + + /// Calculate the size needed to serialize this StringLiteral.Store + pub fn serializedSize(self: *const Store) usize { + // Header: 4 bytes for buffer length + // Data: buffer.items.len bytes + const raw_size = @sizeOf(u32) + self.buffer.items.len; + // Align to SERIALIZATION_ALIGNMENT to maintain alignment for subsequent data + return std.mem.alignForward(usize, raw_size, serialization.SERIALIZATION_ALIGNMENT); + } + + /// Serialize this StringLiteral.Store into the provided buffer + /// Buffer must be at least serializedSize() bytes + pub fn serializeInto(self: *const Store, buffer: []u8) ![]u8 { + const size = self.serializedSize(); + if (buffer.len < size) return error.BufferTooSmall; + + // Write buffer length + const len_ptr = @as(*u32, @ptrCast(@alignCast(buffer.ptr))); + len_ptr.* = @intCast(self.buffer.items.len); + + // Write buffer data + if (self.buffer.items.len > 0) { + @memcpy(buffer[@sizeOf(u32) .. @sizeOf(u32) + self.buffer.items.len], self.buffer.items); + } + + // Zero out any padding bytes + const actual_size = @sizeOf(u32) + self.buffer.items.len; + if (actual_size < size) { + @memset(buffer[actual_size..size], 0); + } + + return buffer[0..size]; + } + + /// Deserialize a StringLiteral.Store from the provided buffer + pub fn deserializeFrom(buffer: []const u8, gpa: std.mem.Allocator) !Store { + if (buffer.len < @sizeOf(u32)) return error.BufferTooSmall; + + // Read buffer length + const buffer_len = @as(*const u32, @ptrCast(@alignCast(buffer.ptr))).*; + + const expected_size = @sizeOf(u32) + buffer_len; + if (buffer.len < expected_size) return error.BufferTooSmall; + + // Create store with exact capacity + var store = Store.initCapacityBytes(gpa, buffer_len); + + // Copy buffer data + if (buffer_len > 0) { + const data_start = @sizeOf(u32); + store.buffer.appendSliceAssumeCapacity(buffer[data_start .. data_start + buffer_len]); + } + + return store; + } }; test "insert" { @@ -82,3 +138,33 @@ test "insert" { try testing.expectEqualStrings("abc", interner.get(idx_1)); try testing.expectEqualStrings("defg", interner.get(idx_2)); } + +test "StringLiteral.Store serialization comprehensive" { + const gpa = testing.allocator; + + var store = Store{}; + defer store.deinit(gpa); + + // Add various test strings including edge cases + _ = store.insert(gpa, "hello"); + _ = store.insert(gpa, "world"); + _ = store.insert(gpa, "test string with 🦎 unicode"); + _ = store.insert(gpa, ""); // empty string + _ = store.insert(gpa, "\x00\x01\x02"); // binary data + _ = store.insert(gpa, "🦎🚀✨"); // emoji + _ = store.insert(gpa, "日本語"); // non-latin script + _ = store.insert(gpa, "test\n\r\t"); // control characters + _ = store.insert(gpa, "very very very very very very long string that exceeds normal buffer sizes and might cause issues with memory management"); + + // Test serialization + try serialization.testing.testSerialization(Store, &store, gpa); +} + +test "StringLiteral.Store empty store serialization" { + const gpa = testing.allocator; + + var empty_store = Store{}; + defer empty_store.deinit(gpa); + + try serialization.testing.testSerialization(Store, &empty_store, gpa); +} diff --git a/src/cache.zig b/src/cache.zig deleted file mode 100644 index d68c4b73b0..0000000000 --- a/src/cache.zig +++ /dev/null @@ -1,340 +0,0 @@ -//! Exposes the readCacheInto and writeToCache functions for -//! serializing IR to and from disk. The caller is responsible for: -//! - Determining the base directory where the cache files should go. -//! - Determining what hash should be used as the cache key. -//! - Providing either the data to write to disk, or a buffer to read into. -const std = @import("std"); -const builtin = @import("builtin"); -const base = @import("base.zig"); -const canonicalize = @import("check/canonicalize.zig"); -const assert = std.debug.assert; -const Filesystem = @import("coordinate/Filesystem.zig"); -const Package = base.Package; -const Allocator = std.mem.Allocator; - -const hash_encoder = std.base64.url_safe_no_pad.Encoder; -const file_ext = ".rcir"; - -/// The header that gets written to disk right before the cached data. -/// Having this header makes it possible to read the entire cached file -/// into a buffer in one syscall, because the header provides all the -/// information necessary to process the remainder of the information -/// (e.g. rehydrating pointers). -pub const CacheHeader = struct { - total_cached_bytes: u32, - - /// Error specific to initializing a CacheHeader from bytes. - /// Returned when the buffer is too small to contain a complete header - /// or the complete data that the header specifies. - pub const InitError = error{ - PartialRead, - }; - - /// Verify that the given buffer begins with a valid CacheHeader, - /// and also that it has a valid number of bytes in it. Returns - /// a pointer to the CacheHeader within the buffer. - pub fn initFromBytes(buf: []align(@alignOf(CacheHeader)) u8) InitError!*CacheHeader { - if (buf.len == 0) { - return InitError.PartialRead; - } - - // The buffer might not contain a complete header. - if (buf.len < @sizeOf(CacheHeader)) { - return InitError.PartialRead; - } - - const header = @as(*CacheHeader, @ptrCast(buf.ptr)); - const data_start = @sizeOf(CacheHeader); - const data_end = data_start + header.total_cached_bytes; - - // The buffer might not contain complete data after the header. - if (buf.len < data_end) { - return InitError.PartialRead; - } - - return header; - } -}; - -/// Reads the canonical IR for a given file hash and Roc version into the given buffer. -/// -/// If this succeeds, then it's the caller's responsibility to: -/// - Verify that there are bytes left over in the buffer. (If the buffer is now full, -/// then this was a partial read and the caller needs to call this again with a bigger buffer). -/// - Cast the bytes to a CacheHeader -/// - Truncate the buffer's length based on the total_cached_bytes field of the CacheHeader. -/// -/// Returns the number of bytes read or an error if file operations fail. -pub fn readCacheInto( - dest: []align(@alignOf(CacheHeader)) u8, - abs_cache_dir: []const u8, - hash: []const u8, - fs: Filesystem, - allocator: Allocator, -) (Filesystem.ReadError || Allocator.Error)!usize { - const path_result = try createCachePath(allocator, abs_cache_dir, hash); - defer allocator.free(path_result.path); - return try fs.readFileInto(path_result.path, dest); -} - -/// Writes the given content to a cache file for the specified hash. -/// Creates any missing intermediate directories as necessary. -pub fn writeToCache( - cache_dir_path: []const u8, - hash: []const u8, - header: *const CacheHeader, // Must be followed in memory by the contents of the header - fs: Filesystem, - allocator: Allocator, -) (Filesystem.WriteError || Filesystem.MakePathError || Allocator.Error)!void { - const cache_path = try createCachePath(allocator, cache_dir_path, hash); - defer allocator.free(cache_path.path); - - // Create enclosing directories as needed. - const hash_start = cache_dir_path.len + 1; // +1 for path separator - const hash_sep_pos = hash_start + cache_path.half_encoded_len; - try fs.makePath(cache_path.path[0..hash_sep_pos]); - - // Write to the file both the header and the cache data immediately following it in memory. - const total_bytes = @sizeOf(CacheHeader) + header.total_cached_bytes; - const header_and_content = @as([*]const u8, @ptrCast(header))[0..total_bytes]; - try fs.writeFile(cache_path.path, header_and_content); -} - -/// TODO: implement -pub fn getPackageRootAbsDir(url_data: Package.Url, gpa: Allocator, fs: Filesystem) []const u8 { - _ = url_data; - _ = gpa; - _ = fs; - - @panic("not implemented"); -} - -/// TODO: implement -pub fn getCanIrForHashAndRocVersion(file_hash: []const u8, roc_version: []const u8, fs: Filesystem, allocator: Allocator) ?canonicalize.CIR { - _ = file_hash; - _ = roc_version; - _ = fs; - _ = allocator; - return null; -} - -/// Allocates and returns the full path to the cache file for the given hash. -/// Also returns the length of the hash path part. -/// -/// The path format is: abs_cache_dir + "/" + first_half_of_hash + "/" + second_half_of_hash + file_ext -/// -/// All other path-related values can be derived from the returned values. -/// -/// Returns a tuple containing: -/// - The full path as a null-terminated string -/// - The hash path length -fn createCachePath(allocator: Allocator, abs_cache_dir: []const u8, hash: []const u8) Allocator.Error!struct { path: [:0]u8, half_encoded_len: usize } { - // Calculate required space: abs_cache_dir + "/" + hash_path + file_ext + null terminator - // We need hash_encoder.calcSize(hash.len) + 1 bytes for the hash path (+1 for the separator) - const required_bytes = abs_cache_dir.len + 1 + hash_encoder.calcSize(hash.len) + 1 + file_ext.len + 1; - - var path_buf = try allocator.allocSentinel(u8, required_bytes - 1, 0); - errdefer allocator.free(path_buf); - - // abs_cache_dir + "/" + first_half_of_hash + "/" + second_half_of_hash + file_ext - @memcpy(path_buf[0..abs_cache_dir.len], abs_cache_dir); - path_buf[abs_cache_dir.len] = std.fs.path.sep; - const hash_start = abs_cache_dir.len + 1; // +1 for the path separator - - // Inline the writeHashToPath function here with the hash bytes split in half - const half_hash_len = hash.len / 2; - const half_encoded_len = hash_encoder.calcSize(half_hash_len); - - // Encode the first half of the hash - _ = hash_encoder.encode(path_buf[hash_start .. hash_start + half_encoded_len], hash[0..half_hash_len]); - - // Add path separator - path_buf[hash_start + half_encoded_len] = std.fs.path.sep; - - // Encode the second half of the hash - _ = hash_encoder.encode(path_buf[hash_start + half_encoded_len + 1 ..], hash[half_hash_len..hash.len]); - - const hash_path_len = (half_encoded_len * 2) + 1; - - const ext_start = hash_start + hash_path_len; - const ext_end = ext_start + file_ext.len; - @memcpy(path_buf[ext_start..ext_end], file_ext); - - return .{ .path = path_buf, .half_encoded_len = half_encoded_len }; -} - -test "CacheHeader.initFromBytes - valid data" { - const test_data = "This is test data for our cache!"; - const test_data_len = test_data.len; - - var buffer: [1024]u8 align(@alignOf(CacheHeader)) = .{0} ** 1024; - - var header = @as(*CacheHeader, @ptrCast(&buffer[0])); - header.total_cached_bytes = test_data_len; - - const data_start = @sizeOf(CacheHeader); - @memcpy(buffer[data_start .. data_start + test_data_len], test_data); - - const parsed_header = try CacheHeader.initFromBytes(&buffer); - try std.testing.expectEqual(header.total_cached_bytes, parsed_header.total_cached_bytes); -} - -test "CacheHeader.initFromBytes - buffer too small" { - // Create a buffer smaller than CacheHeader size - var small_buffer: [4]u8 align(@alignOf(CacheHeader)) = undefined; - - // Test that it returns PartialRead error - const result = CacheHeader.initFromBytes(&small_buffer); - try std.testing.expectError(CacheHeader.InitError.PartialRead, result); -} - -test "CacheHeader.initFromBytes - insufficient data bytes" { - var buffer: [128]u8 align(@alignOf(CacheHeader)) = .{0} ** 128; - - var header = @as(*CacheHeader, @ptrCast(&buffer[0])); - - // Set header to request more data than is available in the buffer - const available_data_space = buffer.len - @sizeOf(CacheHeader); - header.total_cached_bytes = available_data_space + 1; - - const result = CacheHeader.initFromBytes(&buffer); - try std.testing.expectError(CacheHeader.InitError.PartialRead, result); -} - -test "readCacheInto - file too big" { - var mock_fs = Filesystem.testing(); - const err = error.FileTooBig; - - mock_fs.readFileInto = struct { - fn readFileInto(path: []const u8, buf: []u8) Filesystem.ReadError!usize { - _ = path; - _ = buf; - return err; - } - }.readFileInto; - - var read_buffer: [1024]u8 align(@alignOf(CacheHeader)) = undefined; - const result = readCacheInto(&read_buffer, "/fake/cache/dir", "not-a-hash", mock_fs, std.testing.allocator); - - try std.testing.expectError(err, result); -} - -test "readCacheInto after writeToCache" { - var tmp_dir = std.testing.tmpDir(.{}); - defer tmp_dir.cleanup(); - - // Get absolute path of tmp_dir to use as cache directory - var abs_path_buf: [std.fs.max_path_bytes]u8 = undefined; - const abs_cache_dir = try tmp_dir.dir.realpath(".", &abs_path_buf); - - const fs = Filesystem.default(); - const hash = "0123456789abcdef"; - const test_data = "Test data for caching!"; - const test_data_len = test_data.len; - - // Create buffer with header and data - const buffer_size = @sizeOf(CacheHeader) + test_data_len; - var write_buffer: []align(@alignOf(CacheHeader)) u8 = try std.testing.allocator.alignedAlloc(u8, @alignOf(CacheHeader), buffer_size); - defer std.testing.allocator.free(write_buffer); - var header = @as(*CacheHeader, @ptrCast(write_buffer.ptr)); - header.total_cached_bytes = test_data_len; - const data_start = @sizeOf(CacheHeader); - @memcpy(write_buffer[data_start .. data_start + test_data_len], test_data); - - // Write to cache - try writeToCache(abs_cache_dir, hash, header, fs, std.testing.allocator); - - // Read it back - var read_buffer: [1024]u8 align(@alignOf(CacheHeader)) = undefined; - const bytes_read = try readCacheInto(&read_buffer, abs_cache_dir, hash, fs, std.testing.allocator); - - // Verify header was read correctly - try std.testing.expect(bytes_read >= @sizeOf(CacheHeader)); - const parsed_header = try CacheHeader.initFromBytes(read_buffer[0..bytes_read]); - try std.testing.expectEqual(header.total_cached_bytes, parsed_header.total_cached_bytes); - - // Verify data was read correctly - const expected_total_bytes = @sizeOf(CacheHeader) + parsed_header.total_cached_bytes; - try std.testing.expectEqual(expected_total_bytes, bytes_read); - - const data_bytes = read_buffer[@sizeOf(CacheHeader)..expected_total_bytes]; - try std.testing.expectEqualStrings(test_data, data_bytes); -} - -// TODO expand this test gradually to more of our Can IR until -// we can round-trip a whole type-checked module from cache -test "NodeStore cache round-trip" { - const NodeStore = @import("check/canonicalize/NodeStore.zig"); - const Node = @import("check/canonicalize/Node.zig"); - - var tmp_dir = std.testing.tmpDir(.{}); - defer tmp_dir.cleanup(); - - var abs_path_buf: [std.fs.max_path_bytes]u8 = undefined; - const abs_cache_dir = try tmp_dir.dir.realpath(".", &abs_path_buf); - - const fs = Filesystem.default(); - const allocator = std.testing.allocator; - const test_hash = "0123456789abcdef"; - - var store = NodeStore.initCapacity(allocator, 10); - defer store.deinit(); - - const expr_node = Node{ - .data_1 = 42, - .data_2 = 100, - .data_3 = 200, - .tag = .expr_string, - }; - const expr_idx = store.nodes.append(store.gpa, expr_node); - const region = base.Region{ .start = .{ .offset = 0 }, .end = .{ .offset = 10 } }; - _ = store.regions.append(store.gpa, region); - - try store.extra_data.append(store.gpa, 1234); - try store.extra_data.append(store.gpa, 5678); - - const store_size = store.serializedSize(); - const store_buffer = try allocator.alignedAlloc(u8, @alignOf(Node), store_size); - defer allocator.free(store_buffer); - const serialized = try store.serializeInto(store_buffer); - try std.testing.expectEqual(store_size, serialized.len); - - const header_size = @sizeOf(CacheHeader); - const aligned_header_size = std.mem.alignForward(usize, header_size, @alignOf(Node)); - const total_size = aligned_header_size + store_size; - var write_buffer = try allocator.alignedAlloc(u8, @alignOf(Node), total_size); - defer allocator.free(write_buffer); - - const header = @as(*CacheHeader, @ptrCast(write_buffer.ptr)); - header.* = .{ - .total_cached_bytes = @intCast(store_size), - }; - - @memcpy(write_buffer[aligned_header_size..total_size], serialized); - - try writeToCache(abs_cache_dir, test_hash, header, fs, allocator); - - var read_buffer: [4096]u8 align(@alignOf(Node)) = undefined; - const bytes_read = try readCacheInto(&read_buffer, abs_cache_dir, test_hash, fs, allocator); - - const parsed_header = try CacheHeader.initFromBytes(read_buffer[0..bytes_read]); - try std.testing.expectEqual(header.total_cached_bytes, parsed_header.total_cached_bytes); - - const data_start = std.mem.alignForward(usize, @sizeOf(CacheHeader), @alignOf(Node)); - const data_end = data_start + parsed_header.total_cached_bytes; - - var restored_store = try NodeStore.deserializeFrom(@as([]align(@alignOf(Node)) const u8, @alignCast(read_buffer[data_start..data_end])), allocator); - defer restored_store.deinit(); - - try std.testing.expectEqual(store.nodes.len(), restored_store.nodes.len()); - try std.testing.expectEqual(store.extra_data.items.len, restored_store.extra_data.items.len); - - const restored_node = restored_store.nodes.get(expr_idx); - try std.testing.expectEqual(expr_node.data_1, restored_node.data_1); - try std.testing.expectEqual(expr_node.data_2, restored_node.data_2); - try std.testing.expectEqual(expr_node.data_3, restored_node.data_3); - try std.testing.expectEqual(expr_node.tag, restored_node.tag); - - try std.testing.expectEqual(@as(u32, 1234), restored_store.extra_data.items[0]); - try std.testing.expectEqual(@as(u32, 5678), restored_store.extra_data.items[1]); -} diff --git a/src/cache/CacheConfig.zig b/src/cache/CacheConfig.zig new file mode 100644 index 0000000000..3e4a8ce3ac --- /dev/null +++ b/src/cache/CacheConfig.zig @@ -0,0 +1,263 @@ +//! Cache configuration and statistics tracking for the Roc compiler cache system. + +const std = @import("std"); + +const Allocator = std.mem.Allocator; + +/// Configuration for the Roc cache system. +/// +/// This struct controls cache behavior including storage location, +/// size limits, and cleanup policies. +pub const CacheConfig = struct { + enabled: bool = true, + cache_dir: ?[]const u8 = null, // null = use default + max_size_mb: u32 = 1024, // 1GB default + max_age_days: u32 = 30, // 30 days default + verbose: bool = false, // Print cache statistics + + const Self = @This(); + + /// Get the default cache directory for the current platform. + /// + /// Uses platform-specific cache directories: + /// - Linux: ~/.cache/roc + /// - macOS: ~/Library/Caches/roc + /// - Windows: %LOCALAPPDATA%\roc\cache + pub fn getDefaultCacheDir(allocator: Allocator) ![]u8 { + const env_var = switch (@import("builtin").target.os.tag) { + .windows => "USERPROFILE", + else => "HOME", + }; + + const home_dir = std.process.getEnvVarOwned(allocator, env_var) catch { + return error.NoHomeDirectory; + }; + defer allocator.free(home_dir); + + const cache_subdir = switch (@import("builtin").target.os.tag) { + .linux => ".cache/roc", + .macos => "Library/Caches/roc", + .windows => "AppData/Local/roc/cache", + else => ".cache/roc", // fallback to Linux style + }; + + return std.fs.path.join(allocator, &[_][]const u8{ home_dir, cache_subdir }); + } + + /// Get the effective cache directory, using default if none specified. + pub fn getEffectiveCacheDir(self: Self, allocator: Allocator) ![]u8 { + if (self.cache_dir) |dir| { + return allocator.dupe(u8, dir); + } else { + return getDefaultCacheDir(allocator); + } + } + + /// Get the version-specific cache directory. + /// + /// This isolates cache entries by compiler version to prevent + /// conflicts when switching between compiler versions. + pub fn getVersionCacheDir(self: Self, allocator: Allocator) ![]u8 { + const base_dir = try self.getEffectiveCacheDir(allocator); + defer allocator.free(base_dir); + + // Create a simple version hash for directory isolation + const version_hash = comptime blk: { + const zig_version = @import("builtin").zig_version; + const version_info = std.fmt.comptimePrint("roc-{d}.{d}.{d}-{s}", .{ + zig_version.major, + zig_version.minor, + zig_version.patch, + @tagName(@import("builtin").mode), + }); + + var hasher = std.crypto.hash.sha2.Sha256.init(.{}); + hasher.update(version_info); + const hash = hasher.finalResult(); + + // Use first 16 hex chars for directory name + var hex_buf: [32]u8 = undefined; + _ = std.fmt.bufPrint(&hex_buf, "{}", .{std.fmt.fmtSliceHexLower(hash[0..16])}) catch unreachable; + break :blk hex_buf; + }; + + return std.fs.path.join(allocator, &[_][]const u8{ base_dir, &version_hash }); + } + + /// Get the cache entries directory. + pub fn getCacheEntriesDir(self: Self, allocator: Allocator) ![]u8 { + const version_dir = try self.getVersionCacheDir(allocator); + defer allocator.free(version_dir); + + return std.fs.path.join(allocator, &[_][]const u8{ version_dir, "entries" }); + } + + /// Get the temporary directory for cache operations. + pub fn getTempDir(self: Self, allocator: Allocator) ![]u8 { + const version_dir = try self.getVersionCacheDir(allocator); + defer allocator.free(version_dir); + + return std.fs.path.join(allocator, &[_][]const u8{ version_dir, "temp" }); + } + + /// Get maximum cache size in bytes. + pub fn getMaxSizeBytes(self: Self) u64 { + return @as(u64, self.max_size_mb) * 1024 * 1024; + } + + /// Get maximum age in nanoseconds. + pub fn getMaxAgeNanos(self: Self) i64 { + return @as(i64, self.max_age_days) * 24 * 60 * 60 * 1_000_000_000; + } +}; + +/// Statistics tracking for cache operations. +/// +/// This struct tracks cache performance metrics that can be +/// displayed with the --verbose flag. +pub const CacheStats = struct { + hits: u64 = 0, + misses: u64 = 0, + invalidations: u64 = 0, + stores: u64 = 0, + store_failures: u64 = 0, + bytes_read: u64 = 0, + bytes_written: u64 = 0, + time_saved_ns: u64 = 0, // Time saved by cache hits + + const Self = @This(); + + /// Record a cache hit. + pub fn recordHit(self: *Self, bytes_read: u64, time_saved_ns: u64) void { + self.hits += 1; + self.bytes_read += bytes_read; + self.time_saved_ns += time_saved_ns; + } + + /// Record a cache miss. + pub fn recordMiss(self: *Self) void { + self.misses += 1; + } + + /// Record a cache invalidation. + pub fn recordInvalidation(self: *Self) void { + self.invalidations += 1; + } + + /// Record a successful cache store. + pub fn recordStore(self: *Self, bytes_written: u64) void { + self.stores += 1; + self.bytes_written += bytes_written; + } + + /// Record a failed cache store. + pub fn recordStoreFailure(self: *Self) void { + self.store_failures += 1; + } + + /// Get total cache operations. + pub fn getTotalOps(self: Self) u64 { + return self.hits + self.misses; + } + + /// Get cache hit rate as a percentage. + pub fn getHitRate(self: Self) f64 { + const total = self.getTotalOps(); + if (total == 0) return 0.0; + return (@as(f64, @floatFromInt(self.hits)) / @as(f64, @floatFromInt(total))) * 100.0; + } + + /// Get time saved in milliseconds. + pub fn getTimeSavedMs(self: Self) f64 { + return @as(f64, @floatFromInt(self.time_saved_ns)) / 1_000_000.0; + } + + /// Print cache statistics to stderr. + pub fn print(self: Self, writer: anytype) !void { + const total_ops = self.getTotalOps(); + if (total_ops == 0) { + try writer.print("Cache: No operations performed\n", .{}); + return; + } + + try writer.print("Cache Statistics:\n", .{}); + try writer.print(" Operations: {} total ({} hits, {} misses)\n", .{ total_ops, self.hits, self.misses }); + try writer.print(" Hit rate: {d:.1}%\n", .{self.getHitRate()}); + try writer.print(" Data: {d:.1} MB read, {d:.1} MB written\n", .{ + @as(f64, @floatFromInt(self.bytes_read)) / (1024.0 * 1024.0), + @as(f64, @floatFromInt(self.bytes_written)) / (1024.0 * 1024.0), + }); + try writer.print(" Time saved: {d:.1} ms\n", .{self.getTimeSavedMs()}); + try writer.print(" Stores: {} successful, {} failed\n", .{ self.stores, self.store_failures }); + if (self.invalidations > 0) { + try writer.print(" Invalidations: {}\n", .{self.invalidations}); + } + } +}; + +// Tests +const testing = std.testing; + +test "CacheConfig default values" { + const config = CacheConfig{}; + + try testing.expect(config.enabled == true); + try testing.expect(config.cache_dir == null); + try testing.expect(config.max_size_mb == 1024); + try testing.expect(config.max_age_days == 30); + try testing.expect(config.verbose == false); +} + +test "CacheConfig getMaxSizeBytes" { + const config = CacheConfig{ .max_size_mb = 100 }; + + try testing.expectEqual(@as(u64, 100 * 1024 * 1024), config.getMaxSizeBytes()); +} + +test "CacheConfig getMaxAgeNanos" { + const config = CacheConfig{ .max_age_days = 7 }; + + const expected = @as(i64, 7) * 24 * 60 * 60 * 1_000_000_000; + try testing.expectEqual(expected, config.getMaxAgeNanos()); +} + +test "CacheConfig getEffectiveCacheDir with explicit dir" { + const allocator = testing.allocator; + const config = CacheConfig{ .cache_dir = "/custom/cache" }; + + const dir = try config.getEffectiveCacheDir(allocator); + defer allocator.free(dir); + + try testing.expectEqualStrings("/custom/cache", dir); +} + +test "CacheStats basic operations" { + var stats = CacheStats{}; + + // Record some operations + stats.recordHit(1024, 1000000); // 1KB read, 1ms saved + stats.recordMiss(); + stats.recordStore(2048); // 2KB written + + try testing.expectEqual(@as(u64, 1), stats.hits); + try testing.expectEqual(@as(u64, 1), stats.misses); + try testing.expectEqual(@as(u64, 1), stats.stores); + try testing.expectEqual(@as(u64, 2), stats.getTotalOps()); + try testing.expectEqual(@as(f64, 50.0), stats.getHitRate()); + try testing.expectEqual(@as(f64, 1.0), stats.getTimeSavedMs()); +} + +test "CacheStats hit rate calculation" { + var stats = CacheStats{}; + + // No operations - should be 0% + try testing.expectEqual(@as(f64, 0.0), stats.getHitRate()); + + // 3 hits, 1 miss = 75% + stats.recordHit(100, 1000); + stats.recordHit(200, 2000); + stats.recordHit(300, 3000); + stats.recordMiss(); + + try testing.expectEqual(@as(f64, 75.0), stats.getHitRate()); +} diff --git a/src/cache/CacheKey.zig b/src/cache/CacheKey.zig new file mode 100644 index 0000000000..21b647962a --- /dev/null +++ b/src/cache/CacheKey.zig @@ -0,0 +1,242 @@ +//! Cache key generation and management for uniquely identifying cached compilation results. + +const std = @import("std"); +const Filesystem = @import("../coordinate/Filesystem.zig"); + +const Allocator = std.mem.Allocator; + +/// Cache key that uniquely identifies a cached compilation result. +/// +/// The cache key captures all factors that affect compilation output: +/// - Source content hash: Invalidates when file content changes +/// - File modification time: Additional validation layer +/// - Compiler version: Invalidates when compiler changes +/// +/// Future extensions could include dependency hashes for import tracking. +pub const CacheKey = struct { + content_hash: [32]u8, // SHA-256 of source content + file_mtime: i128, // File modification time (nanoseconds since epoch) + compiler_version: [32]u8, // Hash of compiler version/build info + source_path: []const u8, // Path to the source file + + const Self = @This(); + + /// Generate a cache key for the given source content and file path. + /// + /// This function computes all necessary hashes and retrieves file metadata + /// to create a comprehensive cache key. + pub fn generate( + source: []const u8, + file_path: []const u8, + fs: Filesystem, + allocator: Allocator, + ) !Self { + // Hash the source content + var content_hasher = std.crypto.hash.sha2.Sha256.init(.{}); + content_hasher.update(source); + const content_hash = content_hasher.finalResult(); + + // Get file modification time + const file_mtime = getFileModTime(file_path, fs) catch |err| switch (err) { + error.FileNotFound => 0, // Use 0 for non-existent files (e.g., in-memory sources) + else => return err, + }; + + // Get compiler version hash + const compiler_version = getCompilerVersionHash(); + + return Self{ + .content_hash = content_hash, + .file_mtime = file_mtime, + .compiler_version = compiler_version, + .source_path = try allocator.dupe(u8, file_path), + }; + } + + /// Convert cache key to a filesystem-safe filename. + /// + /// Returns a hex string representation that can be used as a cache filename. + /// The filename includes enough information to avoid collisions while being + /// filesystem-safe across different platforms. + pub fn toCacheFileName(self: Self, allocator: Allocator) ![]u8 { + // Create a combined hash of all key components + var hasher = std.crypto.hash.sha2.Sha256.init(.{}); + hasher.update(&self.content_hash); + hasher.update(std.mem.asBytes(&self.file_mtime)); + hasher.update(&self.compiler_version); + const combined_hash = hasher.finalResult(); + + // Convert to hex string + const filename = try allocator.alloc(u8, combined_hash.len * 2); + _ = std.fmt.bufPrint(filename, "{}", .{std.fmt.fmtSliceHexLower(&combined_hash)}) catch unreachable; + + return filename; + } + + /// Check if this cache key is equal to another. + pub fn eql(self: Self, other: Self) bool { + return std.mem.eql(u8, &self.content_hash, &other.content_hash) and + self.file_mtime == other.file_mtime and + std.mem.eql(u8, &self.compiler_version, &other.compiler_version); + } + + /// Format cache key for debugging output. + pub fn format( + self: Self, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; + + try writer.print("CacheKey{{ content: {}, mtime: {}, compiler: {} }}", .{ + std.fmt.fmtSliceHexLower(self.content_hash[0..8]), // First 8 bytes for readability + self.file_mtime, + std.fmt.fmtSliceHexLower(self.compiler_version[0..8]), // First 8 bytes for readability + }); + } + + /// Get the source file path from the cache key. + pub fn getSourcePath(self: Self, allocator: Allocator) ![]u8 { + return allocator.dupe(u8, self.source_path); + } + + /// Free the source path when the key is no longer needed. + pub fn deinit(self: *Self, allocator: Allocator) void { + allocator.free(self.source_path); + } +}; + +/// Get file modification time in nanoseconds since epoch. +/// +/// This provides a quick validation that the file hasn't changed since caching. +/// While the content hash is the primary validation, mtime provides an additional +/// layer of validation and can help detect file system-level changes. +fn getFileModTime(file_path: []const u8, fs: Filesystem) !i128 { + const file_info = fs.getFileInfo(file_path) catch |err| switch (err) { + error.FileNotFound => return 0, // Use 0 for non-existent files (e.g., in-memory sources) + else => return err, + }; + + return file_info.mtime_ns; +} + +/// Get a hash representing the current compiler version. +/// +/// This ensures cache invalidation when the compiler version changes. +/// The hash should include version info, build flags, and other factors +/// that could affect compilation output. +fn getCompilerVersionHash() [32]u8 { + // For now, we'll create a simple version hash based on compile-time information + // In a real implementation, this would include version numbers, git hashes, etc. + + const version_info = comptime blk: { + // Include Zig version and build mode as factors + const zig_version = @import("builtin").zig_version; + const build_mode = @import("builtin").mode; + + break :blk std.fmt.comptimePrint("roc-zig-{d}.{d}.{d}-{s}", .{ + zig_version.major, + zig_version.minor, + zig_version.patch, + @tagName(build_mode), + }); + }; + + var hasher = std.crypto.hash.sha2.Sha256.init(.{}); + hasher.update(version_info); + + // Add additional compile-time factors that could affect output + hasher.update(@tagName(@import("builtin").target.cpu.arch)); + hasher.update(@tagName(@import("builtin").target.os.tag)); + + return hasher.finalResult(); +} + +// Tests +const testing = std.testing; + +test "CacheKey generation" { + const allocator = testing.allocator; + + // Mock filesystem for testing + const fs = Filesystem.testing(); + + const source1 = "module [foo]\n\nfoo = 42"; + const source2 = "module [bar]\n\nbar = 24"; + + var key1 = try CacheKey.generate(source1, "test1.roc", fs, allocator); + defer key1.deinit(allocator); + var key2 = try CacheKey.generate(source2, "test2.roc", fs, allocator); + defer key2.deinit(allocator); + var key1_again = try CacheKey.generate(source1, "test1.roc", fs, allocator); + defer key1_again.deinit(allocator); + + // Different sources should produce different keys + try testing.expect(!key1.eql(key2)); + + // Same source should produce same key + try testing.expect(key1.eql(key1_again)); +} + +test "CacheKey to filename conversion" { + const allocator = testing.allocator; + + const fs = Filesystem.testing(); + + const source = "module [test]\n\ntest = 123"; + var key = try CacheKey.generate(source, "test.roc", fs, allocator); + defer key.deinit(allocator); + + const filename = try key.toCacheFileName(allocator); + defer allocator.free(filename); + + // Should be a hex string + try testing.expect(filename.len == 64); // SHA-256 hex = 64 chars + + // Should only contain hex characters + for (filename) |char| { + try testing.expect(std.ascii.isHex(char)); + } +} + +test "CacheKey equality" { + const allocator = testing.allocator; + + const fs = Filesystem.testing(); + + const source = "module [test]\n\ntest = 456"; + var key1 = try CacheKey.generate(source, "test.roc", fs, allocator); + defer key1.deinit(allocator); + var key2 = try CacheKey.generate(source, "test.roc", fs, allocator); + defer key2.deinit(allocator); + + try testing.expect(key1.eql(key2)); + + // Different content should produce different keys + const different_source = "module [test]\n\ntest = 789"; + var key3 = try CacheKey.generate(different_source, "test.roc", fs, allocator); + defer key3.deinit(allocator); + + try testing.expect(!key1.eql(key3)); +} + +test "CacheKey format" { + const allocator = testing.allocator; + + const fs = Filesystem.testing(); + + const source = "module [format_test]\n\nformat_test = 1"; + var key = try CacheKey.generate(source, "format_test.roc", fs, allocator); + defer key.deinit(allocator); + + var buffer: [256]u8 = undefined; + const formatted = try std.fmt.bufPrint(&buffer, "{}", .{key}); + + // Should contain expected format elements + try testing.expect(std.mem.containsAtLeast(u8, formatted, 1, "CacheKey")); + try testing.expect(std.mem.containsAtLeast(u8, formatted, 1, "content")); + try testing.expect(std.mem.containsAtLeast(u8, formatted, 1, "mtime")); + try testing.expect(std.mem.containsAtLeast(u8, formatted, 1, "compiler")); +} diff --git a/src/cache/CacheManager.zig b/src/cache/CacheManager.zig new file mode 100644 index 0000000000..50165f4f7c --- /dev/null +++ b/src/cache/CacheManager.zig @@ -0,0 +1,364 @@ +//! Central cache manager that handles cache operations, directory management, and statistics tracking. + +const std = @import("std"); +const base = @import("../base.zig"); +const canonicalize = @import("../check/canonicalize.zig"); +const reporting = @import("../reporting.zig"); +const Filesystem = @import("../coordinate/Filesystem.zig"); +const cache_mod = @import("mod.zig"); +const Cache = cache_mod.CacheModule; +const CacheKey = cache_mod.CacheKey; +const CacheConfig = cache_mod.CacheConfig; +const CacheStats = cache_mod.CacheStats; +const SERIALIZATION_ALIGNMENT = @import("../serialization/mod.zig").SERIALIZATION_ALIGNMENT; +const coordinate_simple = @import("../coordinate_simple.zig"); + +const Allocator = std.mem.Allocator; +const ModuleEnv = base.ModuleEnv; +const CIR = canonicalize.CIR; + +/// Result of a cache lookup operation. +pub const CacheResult = union(enum) { + hit: coordinate_simple.ProcessResult, + miss: void, + invalid: void, +}; + +/// Central cache manager for handling all cache operations. +/// +/// This manager handles cache directory setup, file operations, +/// and maintains cache statistics. It provides a clean interface +/// for cache operations while handling errors gracefully. +pub const CacheManager = struct { + config: CacheConfig, + filesystem: Filesystem, + allocator: Allocator, + stats: CacheStats, + + const Self = @This(); + + /// Initialize a new cache manager. + pub fn init(allocator: Allocator, config: CacheConfig, filesystem: Filesystem) Self { + return Self{ + .config = config, + .filesystem = filesystem, + .allocator = allocator, + .stats = CacheStats{}, + }; + } + + /// Look up a cache entry by key. + /// + /// Returns CacheResult indicating hit, miss, or invalid entry. + /// On cache hit, the returned ProcessResult owns all its data. + pub fn lookup(self: *Self, key: CacheKey) !CacheResult { + if (!self.config.enabled) { + return CacheResult.miss; + } + + const start_time = std.time.nanoTimestamp(); + + const cache_filename = key.toCacheFileName(self.allocator) catch { + return CacheResult.miss; + }; + defer self.allocator.free(cache_filename); + + const entries_dir = self.config.getCacheEntriesDir(self.allocator) catch { + return CacheResult.miss; + }; + defer self.allocator.free(entries_dir); + + const cache_path = std.fs.path.join(self.allocator, &[_][]const u8{ entries_dir, cache_filename }) catch { + return CacheResult.miss; + }; + defer self.allocator.free(cache_path); + + // Store the original file path from the key for later use + const source_path = try key.getSourcePath(self.allocator); + defer self.allocator.free(source_path); + + // Check if cache file exists + const exists = self.filesystem.fileExists(cache_path) catch false; + if (!exists) { + self.stats.recordMiss(); + return CacheResult.miss; + } + + // Read cache data using memory mapping for better performance + const mapped_cache = cache_mod.CacheModule.readFromFileMapped(self.allocator, cache_path, self.filesystem) catch |err| { + if (self.config.verbose) { + std.log.debug("Failed to read cache file {s}: {}", .{ cache_path, err }); + } + self.stats.recordMiss(); + return CacheResult.miss; + }; + defer mapped_cache.deinit(self.allocator); + + // Validate and restore from cache + const result = self.restoreFromCache(mapped_cache.data(), key, source_path) catch |err| { + if (self.config.verbose) { + std.log.debug("Failed to restore from cache {s}: {}", .{ cache_path, err }); + } + self.stats.recordInvalidation(); + return CacheResult.invalid; + }; + + const end_time = std.time.nanoTimestamp(); + const time_saved = end_time - start_time; + + self.stats.recordHit(mapped_cache.data().len, @as(u64, @intCast(time_saved))); + + return CacheResult{ .hit = result }; + } + + /// Store a cache entry. + /// + /// Serializes the ProcessResult and stores it in the cache. + /// Failures are logged but don't propagate to avoid breaking compilation. + pub fn store(self: *Self, key: CacheKey, result: *const coordinate_simple.ProcessResult) !void { + if (!self.config.enabled) { + return; + } + + const start_time = std.time.nanoTimestamp(); + + // Ensure cache directories exist + self.ensureCacheDir() catch |err| { + if (self.config.verbose) { + std.log.debug("Failed to create cache directory: {}", .{err}); + } + self.stats.recordStoreFailure(); + return; + }; + + // Serialize the result + const cache_data = self.serializeResult(result) catch |err| { + if (self.config.verbose) { + std.log.debug("Failed to serialize cache data: {}", .{err}); + } + self.stats.recordStoreFailure(); + return; + }; + defer self.allocator.free(cache_data); + + // Get cache file path + const cache_filename = key.toCacheFileName(self.allocator) catch { + self.stats.recordStoreFailure(); + return; + }; + defer self.allocator.free(cache_filename); + + const entries_dir = self.config.getCacheEntriesDir(self.allocator) catch { + self.stats.recordStoreFailure(); + return; + }; + defer self.allocator.free(entries_dir); + + const cache_path = std.fs.path.join(self.allocator, &[_][]const u8{ entries_dir, cache_filename }) catch { + self.stats.recordStoreFailure(); + return; + }; + defer self.allocator.free(cache_path); + + // Write to temporary file first, then rename for atomicity + const temp_dir = self.config.getTempDir(self.allocator) catch { + self.stats.recordStoreFailure(); + return; + }; + defer self.allocator.free(temp_dir); + + const temp_filename = std.fmt.allocPrint(self.allocator, "{s}.tmp", .{cache_filename}) catch { + self.stats.recordStoreFailure(); + return; + }; + defer self.allocator.free(temp_filename); + + const temp_path = std.fs.path.join(self.allocator, &[_][]const u8{ temp_dir, temp_filename }) catch { + self.stats.recordStoreFailure(); + return; + }; + defer self.allocator.free(temp_path); + + // Write to temp file + self.filesystem.writeFile(temp_path, cache_data) catch |err| { + if (self.config.verbose) { + std.log.debug("Failed to write cache temp file {s}: {}", .{ temp_path, err }); + } + self.stats.recordStoreFailure(); + return; + }; + + // Move temp file to final location (atomic operation) + self.filesystem.rename(temp_path, cache_path) catch |err| { + if (self.config.verbose) { + std.log.debug("Failed to rename cache file {s} -> {s}: {}", .{ temp_path, cache_path, err }); + } + self.stats.recordStoreFailure(); + return; + }; + + const end_time = std.time.nanoTimestamp(); + self.stats.recordStore(cache_data.len); + + if (self.config.verbose) { + const time_ms = @as(f64, @floatFromInt(end_time - start_time)) / 1_000_000.0; + std.log.debug("Stored cache entry {s} ({d:.1} MB in {d:.1} ms)", .{ + cache_path, + @as(f64, @floatFromInt(cache_data.len)) / (1024.0 * 1024.0), + time_ms, + }); + } + } + + /// Ensure cache directories exist. + pub fn ensureCacheDir(self: *Self) !void { + const entries_dir = try self.config.getCacheEntriesDir(self.allocator); + defer self.allocator.free(entries_dir); + + const temp_dir = try self.config.getTempDir(self.allocator); + defer self.allocator.free(temp_dir); + + // Create directories + self.filesystem.makePath(entries_dir) catch |err| switch (err) { + error.PathAlreadyExists => {}, // OK + else => return err, + }; + + self.filesystem.makePath(temp_dir) catch |err| switch (err) { + error.PathAlreadyExists => {}, // OK + else => return err, + }; + } + + /// Get cache statistics. + pub fn getStats(self: *const Self) CacheStats { + return self.stats; + } + + /// Print cache statistics if verbose mode is enabled. + pub fn printStats(self: *const Self) void { + if (!self.config.verbose) return; + + const stderr = std.io.getStdErr().writer(); + self.stats.print(stderr) catch { + // If we can't print stats, just continue + }; + } + + /// Serialize a ProcessResult to cache data. + fn serializeResult(self: *Self, result: *const coordinate_simple.ProcessResult) ![]u8 { + // Note: We don't cache reports - they can be recomputed if needed + // Create cache data using the ModuleEnv from the CIR + const cache_data = try Cache.create(self.allocator, result.cir.env, result.cir); + + return cache_data; + } + + /// Restore a ProcessResult from cache data. + fn restoreFromCache(self: *Self, cache_data: []align(SERIALIZATION_ALIGNMENT) const u8, key: CacheKey, source_path: []const u8) !coordinate_simple.ProcessResult { + // Load cache using existing Cache functionality + var cache = cache_mod.CacheModule.fromMappedMemory(cache_data) catch return error.InvalidCache; + + // Validate cache + cache.validate() catch return error.InvalidCache; + + // Restore the data + const restored = cache.restore(self.allocator) catch return error.RestoreError; + + // Reports are not cached - they need to be recomputed if needed + // Users can use --no-cache to see diagnostic reports + std.log.info("Loaded from cache - diagnostic reports not shown. Use --no-cache to see Errors and Warnings for this module.", .{}); + const reports = try self.allocator.alloc(reporting.Report, 0); + + // Allocate and copy ModuleEnv to heap for ownership + const module_env = try self.allocator.create(ModuleEnv); + module_env.* = restored.module_env; + + // Allocate CIR to heap for ownership + const cir = try self.allocator.create(CIR); + + // Copy CIR but don't copy the invalid env pointer + cir.* = restored.cir; + // Immediately fix env pointer to point to our heap-allocated module_env + cir.env = module_env; + + // Re-read the source file - we need it for any potential error reporting + const source = self.filesystem.readFile(source_path, self.allocator) catch |err| blk: { + // If we can't read the source, provide a fallback + if (self.config.verbose) { + std.log.debug("Failed to read source file {s}: {}", .{ source_path, err }); + } + break :blk try self.allocator.dupe(u8, "# Source file not available"); + }; + + // Create ProcessResult with proper ownership + return coordinate_simple.ProcessResult{ + .cir = cir, + .reports = reports, + .source = source, + .cache_key = key, + .was_cached = true, + }; + } +}; + +// Tests +const testing = std.testing; + +test "CacheManager initialization" { + const allocator = testing.allocator; + const config = CacheConfig{}; + const filesystem = Filesystem.testing(); + + var manager = CacheManager.init(allocator, config, filesystem); + + try testing.expect(manager.config.enabled == true); + try testing.expect(manager.stats.getTotalOps() == 0); +} + +test "CacheManager lookup miss" { + const allocator = testing.allocator; + const config = CacheConfig{}; + var filesystem = Filesystem.testing(); + + // Mock fileExists to return false + const TestFS = struct { + fn fileExists(path: []const u8) Filesystem.OpenError!bool { + _ = path; + return false; + } + }; + filesystem.fileExists = TestFS.fileExists; + + var manager = CacheManager.init(allocator, config, filesystem); + + const key = CacheKey{ + .content_hash = [_]u8{0} ** 32, + .file_mtime = 0, + .compiler_version = [_]u8{0} ** 32, + .source_path = "test.roc", + }; + + const result = try manager.lookup(key); + try testing.expect(result == .miss); + try testing.expect(manager.stats.misses == 1); +} + +test "CacheManager disabled" { + const allocator = testing.allocator; + const config = CacheConfig{ .enabled = false }; + const filesystem = Filesystem.testing(); + + var manager = CacheManager.init(allocator, config, filesystem); + + const key = CacheKey{ + .content_hash = [_]u8{0} ** 32, + .file_mtime = 0, + .compiler_version = [_]u8{0} ** 32, + .source_path = "test.roc", + }; + + const result = try manager.lookup(key); + try testing.expect(result == .miss); + try testing.expect(manager.stats.getTotalOps() == 0); // No stats recorded when disabled +} diff --git a/src/cache/CacheModule.zig b/src/cache/CacheModule.zig new file mode 100644 index 0000000000..fed5934342 --- /dev/null +++ b/src/cache/CacheModule.zig @@ -0,0 +1,783 @@ +//! Module cache for Roc files +//! +//! This module provides memory-mapped caching for compiled Roc modules, +//! allowing fast serialization and deserialization of ModuleEnv and CIR data. + +const std = @import("std"); +const base = @import("../base.zig"); +const canonicalize = @import("../check/canonicalize.zig"); +const collections = @import("../collections.zig"); +const types = @import("../types.zig"); +const parse = @import("../check/parse.zig").parse; +const SExprTree = @import("../base/SExprTree.zig"); +const Filesystem = @import("../coordinate/Filesystem.zig"); +const SERIALIZATION_ALIGNMENT = @import("../serialization/mod.zig").SERIALIZATION_ALIGNMENT; + +const Allocator = std.mem.Allocator; +const TypeStore = types.Store; +const CIR = canonicalize.CIR; +const Node = CIR.Node; +const NodeStore = CIR.NodeStore; +const SafeList = collections.SafeList; +const SafeStringHashMap = collections.SafeStringHashMap; + +/// Magic number for cache validation +const CACHE_MAGIC: u32 = 0x524F4343; // "ROCC" in ASCII +const CACHE_VERSION: u32 = 1; + +/// Component metadata for locating data in the cache +const ComponentInfo = struct { + offset: u32, + length: u32, +}; + +/// Cache header that gets written to disk before the cached data +pub const Header = struct { + /// Magic number for validation + magic: u32, + + /// Version for compatibility checking + version: u32, + + /// Total size of the data section (excluding this header) + data_size: u32, + + // TODO implement this properly.. just stubbed out for now. + // CRC32 checksum of the data section + checksum: u32, + + /// Component locations in the data section + node_store: ComponentInfo, + string_store: ComponentInfo, + ident_ids_for_slicing: ComponentInfo, + ident_store: ComponentInfo, + line_starts: ComponentInfo, + types_store: ComponentInfo, + exposed_by_str: ComponentInfo, + exposed_nodes: ComponentInfo, + external_decls: ComponentInfo, + + /// Spans can be stored directly since they're small + all_defs: CIR.Def.Span, + all_statements: CIR.Statement.Span, + + /// Fixed padding to ensure alignment + _padding: [24]u8 = [_]u8{0} ** 24, + + /// Error specific to initializing a Header from bytes + pub const InitError = error{ + PartialRead, + InvalidMagic, + InvalidVersion, + ChecksumMismatch, + }; + + /// Verify that the given buffer begins with a valid Header + pub fn initFromBytes(buf: []align(@alignOf(Header)) u8) InitError!*Header { + if (buf.len < @sizeOf(Header)) { + return InitError.PartialRead; + } + + const header = @as(*Header, @ptrCast(buf.ptr)); + const data_start = @sizeOf(Header); + const data_end = data_start + header.data_size; + + // The buffer might not contain complete data after the header + if (buf.len < data_end) { + return InitError.PartialRead; + } + + // Validate magic and version + if (header.magic != CACHE_MAGIC) return InitError.InvalidMagic; + if (header.version != CACHE_VERSION) return InitError.InvalidVersion; + + return header; + } +}; + +/// Memory-mapped cache that can be read directly from disk +pub const CacheModule = struct { + header: *const Header, + data: []align(SERIALIZATION_ALIGNMENT) const u8, + + /// Create a cache by serializing ModuleEnv and CIR data + pub fn create( + allocator: Allocator, + module_env: *const base.ModuleEnv, + cir: *const CIR, + ) ![]align(SERIALIZATION_ALIGNMENT) u8 { + // Calculate component sizes + const node_store_size = cir.store.serializedSize(); + const string_store_size = module_env.strings.serializedSize(); + const ident_ids_size = module_env.ident_ids_for_slicing.serializedSize(); + const ident_store_size = module_env.idents.serializedSize(); + const line_starts_size = module_env.line_starts.serializedSize(); + const types_store_size = module_env.types.serializedSize(); + const exposed_by_str_size = module_env.exposed_by_str.serializedSize(); + const exposed_nodes_size = module_env.exposed_nodes.serializedSize(); + const external_decls_size = cir.external_decls.serializedSize(); + + // Calculate aligned offsets + var offset: u32 = 0; + + // Ensure each offset is aligned to SERIALIZATION_ALIGNMENT + const node_store_offset = offset; + offset += @intCast(node_store_size); + + offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); + const string_store_offset = offset; + offset += @intCast(string_store_size); + + offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); + const ident_ids_offset = offset; + offset += @intCast(ident_ids_size); + + offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); + const ident_store_offset = offset; + offset += @intCast(ident_store_size); + + offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); + const line_starts_offset = offset; + offset += @intCast(line_starts_size); + + offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); + const types_store_offset = offset; + offset += @intCast(types_store_size); + + offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); + const exposed_by_str_offset = offset; + offset += @intCast(exposed_by_str_size); + + offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); + const exposed_nodes_offset = offset; + offset += @intCast(exposed_nodes_size); + + offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); + const external_decls_offset = offset; + offset += @intCast(external_decls_size); + offset = @intCast(std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT)); + + const total_data_size = offset; + + // Allocate buffer for header + data + const header_size = std.mem.alignForward(usize, @sizeOf(Header), SERIALIZATION_ALIGNMENT); + const total_size = header_size + total_data_size; + const buffer = try allocator.alignedAlloc(u8, SERIALIZATION_ALIGNMENT, total_size); + + // Zero-initialize buffer for proper CRC calculation + @memset(buffer, 0); + + // Initialize header + const header = @as(*Header, @ptrCast(buffer.ptr)); + header.* = Header{ + .magic = CACHE_MAGIC, + .version = CACHE_VERSION, + .data_size = total_data_size, + .checksum = 0, // Will be calculated after data is written + .node_store = .{ .offset = node_store_offset, .length = @intCast(node_store_size) }, + .string_store = .{ .offset = string_store_offset, .length = @intCast(string_store_size) }, + .ident_ids_for_slicing = .{ .offset = ident_ids_offset, .length = @intCast(ident_ids_size) }, + .ident_store = .{ .offset = ident_store_offset, .length = @intCast(ident_store_size) }, + .line_starts = .{ .offset = line_starts_offset, .length = @intCast(line_starts_size) }, + .types_store = .{ .offset = types_store_offset, .length = @intCast(types_store_size) }, + .exposed_by_str = .{ .offset = exposed_by_str_offset, .length = @intCast(exposed_by_str_size) }, + .exposed_nodes = .{ .offset = exposed_nodes_offset, .length = @intCast(exposed_nodes_size) }, + .external_decls = .{ .offset = external_decls_offset, .length = @intCast(external_decls_size) }, + .all_defs = cir.all_defs, + .all_statements = cir.all_statements, + }; + + // Get data section (must be aligned) + const data_section = @as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(buffer[header_size..])); + + // Assert all offsets are aligned (in debug mode) + std.debug.assert(node_store_offset % SERIALIZATION_ALIGNMENT == 0); + std.debug.assert(string_store_offset % SERIALIZATION_ALIGNMENT == 0); + std.debug.assert(ident_ids_offset % SERIALIZATION_ALIGNMENT == 0); + std.debug.assert(ident_store_offset % SERIALIZATION_ALIGNMENT == 0); + std.debug.assert(line_starts_offset % SERIALIZATION_ALIGNMENT == 0); + std.debug.assert(types_store_offset % SERIALIZATION_ALIGNMENT == 0); + std.debug.assert(exposed_by_str_offset % SERIALIZATION_ALIGNMENT == 0); + std.debug.assert(exposed_nodes_offset % SERIALIZATION_ALIGNMENT == 0); + std.debug.assert(external_decls_offset % SERIALIZATION_ALIGNMENT == 0); + + // Serialize each component + // Since we've ensured all offsets are aligned, we can safely alignCast the slices + _ = try cir.store.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(data_section[node_store_offset .. node_store_offset + node_store_size]))); + _ = try module_env.strings.serializeInto(data_section[string_store_offset .. string_store_offset + string_store_size]); + _ = try module_env.ident_ids_for_slicing.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(data_section[ident_ids_offset .. ident_ids_offset + ident_ids_size]))); + _ = try module_env.idents.serializeInto(data_section[ident_store_offset .. ident_store_offset + ident_store_size], allocator); + _ = try module_env.line_starts.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(data_section[line_starts_offset .. line_starts_offset + line_starts_size]))); + _ = try module_env.types.serializeInto(data_section[types_store_offset .. types_store_offset + types_store_size], allocator); + _ = try module_env.exposed_by_str.serializeInto(data_section[exposed_by_str_offset .. exposed_by_str_offset + exposed_by_str_size]); + _ = try module_env.exposed_nodes.serializeInto(data_section[exposed_nodes_offset .. exposed_nodes_offset + exposed_nodes_size]); + _ = try cir.external_decls.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(data_section[external_decls_offset .. external_decls_offset + external_decls_size]))); + + // TODO Calculate and store checksum + // header.checksum = std.hash.Crc32.hash(data_section[0..total_data_size]); + + return buffer; + } + + /// Load a cache from memory-mapped data + pub fn fromMappedMemory(mapped_data: []align(SERIALIZATION_ALIGNMENT) const u8) !CacheModule { + if (mapped_data.len < @sizeOf(Header)) { + return error.BufferTooSmall; + } + + const header = @as(*const Header, @ptrCast(mapped_data.ptr)); + + // Validate magic number and version + if (header.magic != CACHE_MAGIC) return error.InvalidMagicNumber; + if (header.version != CACHE_VERSION) return error.InvalidVersion; + + // Validate data size + const expected_total_size = @sizeOf(Header) + header.data_size; + if (mapped_data.len < expected_total_size) return error.BufferTooSmall; + + // Get data section (must be aligned) + const header_size = std.mem.alignForward(usize, @sizeOf(Header), SERIALIZATION_ALIGNMENT); + const data = mapped_data[header_size .. header_size + header.data_size]; + + // TODO Validate checksum + // const calculated_checksum = std.hash.Crc32.hash(data); + // if (header.checksum != calculated_checksum) return error.ChecksumMismatch; + + return CacheModule{ + .header = header, + .data = @as([]align(SERIALIZATION_ALIGNMENT) const u8, @alignCast(data)), + }; + } + + /// Restored data from cache + pub const RestoredData = struct { + module_env: base.ModuleEnv, + cir: CIR, + }; + + /// Restore ModuleEnv and CIR from the cached data + pub fn restore(self: *const CacheModule, allocator: Allocator) !RestoredData { + // Deserialize each component + const node_store = try NodeStore.deserializeFrom( + @as([]align(@alignOf(Node)) const u8, @alignCast(self.getComponentData(.node_store))), + allocator, + ); + + const strings = try base.StringLiteral.Store.deserializeFrom(self.getComponentData(.string_store), allocator); + const ident_ids_for_slicing = try SafeList(base.Ident.Idx).deserializeFrom( + @as([]align(@alignOf(base.Ident.Idx)) const u8, @alignCast(self.getComponentData(.ident_ids_for_slicing))), + allocator, + ); + const idents = try base.Ident.Store.deserializeFrom(self.getComponentData(.ident_store), allocator); + const line_starts = try SafeList(u32).deserializeFrom( + @as([]align(@alignOf(u32)) const u8, @alignCast(self.getComponentData(.line_starts))), + allocator, + ); + const types_store = try TypeStore.deserializeFrom(self.getComponentData(.types_store), allocator); + const exposed_by_str = try SafeStringHashMap(void).deserializeFrom(self.getComponentData(.exposed_by_str), allocator); + const exposed_nodes = try SafeStringHashMap(u16).deserializeFrom(self.getComponentData(.exposed_nodes), allocator); + + // Create ModuleEnv from deserialized components + var module_env = base.ModuleEnv{ + .gpa = allocator, + .idents = idents, + .ident_ids_for_slicing = ident_ids_for_slicing, + .strings = strings, + .types = types_store, + .exposed_by_str = exposed_by_str, + .exposed_nodes = exposed_nodes, + .line_starts = line_starts, + }; + errdefer module_env.deinit(); + + // Deserialize external_decls + const external_decls = try CIR.ExternalDecl.SafeList.deserializeFrom( + @as([]align(@alignOf(CIR.ExternalDecl)) const u8, @alignCast(self.getComponentData(.external_decls))), + allocator, + ); + + // Create result struct + var result = RestoredData{ + .module_env = module_env, + .cir = CIR{ + .env = undefined, // Will be set below + .store = node_store, + .temp_source_for_sexpr = null, + .all_defs = self.header.all_defs, + .all_statements = self.header.all_statements, + .external_decls = external_decls, + .imports = CIR.Import.Store.init(), + }, + }; + + // Fix env pointer to point to the correct module_env location + result.cir.env = &result.module_env; + + return result; + } + + /// Get the raw data for a specific component + pub fn getComponentData(self: *const CacheModule, comptime component: ComponentType) []const u8 { + const info = switch (component) { + .node_store => self.header.node_store, + .string_store => self.header.string_store, + .ident_ids_for_slicing => self.header.ident_ids_for_slicing, + .ident_store => self.header.ident_store, + .line_starts => self.header.line_starts, + .types_store => self.header.types_store, + .exposed_by_str => self.header.exposed_by_str, + .exposed_nodes => self.header.exposed_nodes, + .external_decls => self.header.external_decls, + }; + return self.data[info.offset .. info.offset + info.length]; + } + + /// Get diagnostic information about the cache + pub fn getDiagnostics(self: *const CacheModule) Diagnostics { + return Diagnostics{ + .total_size = @sizeOf(Header) + self.header.data_size, + .header_size = @sizeOf(Header), + .data_size = self.header.data_size, + .checksum = self.header.checksum, + .component_sizes = .{ + .node_store = self.header.node_store.length, + .string_store = self.header.string_store.length, + .ident_ids_for_slicing = self.header.ident_ids_for_slicing.length, + .ident_store = self.header.ident_store.length, + .line_starts = self.header.line_starts.length, + .types_store = self.header.types_store.length, + .exposed_by_str = self.header.exposed_by_str.length, + .exposed_nodes = self.header.exposed_nodes.length, + .external_decls = self.header.external_decls.length, + }, + }; + } + + /// Validate the cache structure and integrity + pub fn validate(self: *const CacheModule) !void { + // Validate component offsets are within bounds + inline for (std.meta.fields(ComponentType)) |field| { + const component = @field(ComponentType, field.name); + const info = switch (component) { + .node_store => self.header.node_store, + .string_store => self.header.string_store, + .ident_ids_for_slicing => self.header.ident_ids_for_slicing, + .ident_store => self.header.ident_store, + .line_starts => self.header.line_starts, + .types_store => self.header.types_store, + .exposed_by_str => self.header.exposed_by_str, + .exposed_nodes => self.header.exposed_nodes, + .external_decls => self.header.external_decls, + }; + + if (info.offset + info.length > self.header.data_size) { + return error.ComponentOutOfBounds; + } + } + } + + /// Convenience functions for reading/writing cache files + pub fn writeToFile( + allocator: Allocator, + cache_data: []const u8, + file_path: []const u8, + filesystem: anytype, + ) !void { + _ = allocator; + try filesystem.writeFile(file_path, cache_data); + } + + /// Convenience function for reading cache files + pub fn readFromFile( + allocator: Allocator, + file_path: []const u8, + filesystem: anytype, + ) ![]align(SERIALIZATION_ALIGNMENT) u8 { + const file_data = try filesystem.readFile(file_path, allocator); + defer allocator.free(file_data); + + const buffer = try allocator.alignedAlloc(u8, SERIALIZATION_ALIGNMENT, file_data.len); + @memcpy(buffer, file_data); + + return buffer; + } + + /// Tagged union to represent cache data that can be either memory-mapped or heap-allocated + pub const CacheData = union(enum) { + mapped: struct { + ptr: [*]align(SERIALIZATION_ALIGNMENT) const u8, + len: usize, + unaligned_ptr: [*]const u8, + unaligned_len: usize, + }, + allocated: []align(SERIALIZATION_ALIGNMENT) const u8, + + pub fn data(self: CacheData) []align(SERIALIZATION_ALIGNMENT) const u8 { + return switch (self) { + .mapped => |m| m.ptr[0..m.len], + .allocated => |a| a, + }; + } + + pub fn deinit(self: CacheData, allocator: Allocator) void { + switch (self) { + .mapped => |m| { + // Use the unaligned pointer for munmap + if (comptime @hasDecl(std.posix, "munmap") and @import("builtin").target.os.tag != .windows) { + const page_aligned_ptr = @as([*]align(std.heap.page_size_min) const u8, @alignCast(m.unaligned_ptr)); + std.posix.munmap(page_aligned_ptr[0..m.unaligned_len]); + } + }, + .allocated => |a| allocator.free(a), + } + } + }; + + /// Read cache file using memory mapping for better performance when available + pub fn readFromFileMapped( + allocator: Allocator, + file_path: []const u8, + filesystem: anytype, + ) !CacheData { + // Try to use memory mapping on supported platforms + if (comptime @hasDecl(std.posix, "mmap") and @import("builtin").target.os.tag != .windows) { + // Open the file + const file = std.fs.cwd().openFile(file_path, .{ .mode = .read_only }) catch { + // Fall back to regular reading on open error + const data = try readFromFile(allocator, file_path, filesystem); + return CacheData{ .allocated = data }; + }; + defer file.close(); + + // Get file size + const stat = try file.stat(); + const file_size = stat.size; + + // Check if file size exceeds usize limits on 32-bit systems + if (file_size > std.math.maxInt(usize)) { + // Fall back to regular reading for very large files + const data = try readFromFile(allocator, file_path, filesystem); + return CacheData{ .allocated = data }; + } + + const file_size_usize = @as(usize, @intCast(file_size)); + + // Memory map the file + const mapped_memory = if (comptime @import("builtin").target.os.tag == .macos or + @import("builtin").target.os.tag == .ios or + @import("builtin").target.os.tag == .tvos or + @import("builtin").target.os.tag == .watchos) + std.posix.mmap( + null, + file_size_usize, + std.posix.PROT.READ, + .{ .TYPE = .PRIVATE }, + file.handle, + 0, + ) + else + std.posix.mmap( + null, + file_size_usize, + std.posix.PROT.READ, + .{ .TYPE = .PRIVATE }, + file.handle, + 0, + ); + + const result = mapped_memory catch { + // Fall back to regular reading on mmap error + const data = try readFromFile(allocator, file_path, filesystem); + return CacheData{ .allocated = data }; + }; + + // Find the aligned portion within the mapped memory + const unaligned_ptr = @as([*]const u8, @ptrCast(result.ptr)); + const addr = @intFromPtr(unaligned_ptr); + const aligned_addr = std.mem.alignForward(usize, addr, SERIALIZATION_ALIGNMENT); + const offset = aligned_addr - addr; + + if (offset >= file_size_usize) { + // File is too small to contain aligned data + if (comptime @hasDecl(std.posix, "munmap") and @import("builtin").target.os.tag != .windows) { + std.posix.munmap(result); + } + const data = try readFromFile(allocator, file_path, filesystem); + return CacheData{ .allocated = data }; + } + + const aligned_ptr = @as([*]align(SERIALIZATION_ALIGNMENT) const u8, @ptrFromInt(aligned_addr)); + const aligned_len = file_size_usize - offset; + + return CacheData{ + .mapped = .{ + .ptr = aligned_ptr, + .len = aligned_len, + .unaligned_ptr = unaligned_ptr, + .unaligned_len = file_size_usize, + }, + }; + } else { + // Platform doesn't support mmap, use regular file reading + const data = try readFromFile(allocator, file_path, filesystem); + return CacheData{ .allocated = data }; + } + } +}; + +/// Enum for component types +const ComponentType = enum { + node_store, + string_store, + ident_ids_for_slicing, + ident_store, + line_starts, + types_store, + exposed_by_str, + exposed_nodes, + external_decls, +}; + +/// Diagnostic information about a cache +pub const Diagnostics = struct { + total_size: u32, + header_size: u32, + data_size: u32, + checksum: u32, + component_sizes: struct { + node_store: u32, + string_store: u32, + ident_ids_for_slicing: u32, + ident_store: u32, + line_starts: u32, + types_store: u32, + exposed_by_str: u32, + exposed_nodes: u32, + external_decls: u32, + }, +}; + +test "Header alignment" { + // Verify the header is properly aligned + try std.testing.expect(@sizeOf(Header) % SERIALIZATION_ALIGNMENT == 0); +} + +test "create and restore cache" { + const gpa = std.testing.allocator; + + // Real Roc module source for comprehensive testing + const source = + \\module [foo] + \\ + \\foo : U64 -> Str + \\foo = |num| + \\ when num is + \\ 42 -> "forty-two" + \\ _ -> Num.toStr num + \\ + ; + + // Parse the source + var module_env = base.ModuleEnv.init(gpa); + defer module_env.deinit(); + + var cir = CIR.init(&module_env); + defer cir.deinit(); + + // Parse and canonicalize + var ast = parse(&module_env, source); + defer ast.deinit(gpa); + + var canonicalizer = try canonicalize.init(&cir, &ast, null); + defer canonicalizer.deinit(); + try canonicalizer.canonicalizeFile(); + + // Generate original S-expression for comparison + var original_tree = SExprTree.init(gpa); + defer original_tree.deinit(); + CIR.pushToSExprTree(&cir, null, &original_tree, source); + + var original_sexpr = std.ArrayList(u8).init(gpa); + defer original_sexpr.deinit(); + original_tree.toStringPretty(original_sexpr.writer().any()); + + // Create cache from real data + const cache_data = try CacheModule.create(gpa, &module_env, &cir); + defer gpa.free(cache_data); + + // Load cache + var cache = try CacheModule.fromMappedMemory(cache_data); + + // Validate cache + try cache.validate(); + + // Restore ModuleEnv and CIR + const restored = try cache.restore(gpa); + + var restored_module_env = restored.module_env; + defer restored_module_env.deinit(); + var restored_cir = restored.cir; + defer restored_cir.deinit(); + + // Fix env pointer after struct move + restored_cir.env = &restored_module_env; + + // Generate S-expression from restored CIR + var restored_tree = SExprTree.init(gpa); + defer restored_tree.deinit(); + + CIR.pushToSExprTree(&restored_cir, null, &restored_tree, source); + + var restored_sexpr = std.ArrayList(u8).init(gpa); + defer restored_sexpr.deinit(); + + restored_tree.toStringPretty(restored_sexpr.writer().any()); + + // Verify round-trip integrity + try std.testing.expect(std.mem.eql(u8, original_sexpr.items, restored_sexpr.items)); + + // Get diagnostics + const diagnostics = cache.getDiagnostics(); + try std.testing.expect(diagnostics.total_size > 0); +} + +test "cache filesystem roundtrip with in-memory storage" { + const gpa = std.testing.allocator; + + // Real Roc module source for comprehensive testing + const source = + \\module [foo] + \\ + \\foo : U64 -> Str + \\foo = |num| num.to_str() + ; + + // Parse the source + var module_env = base.ModuleEnv.init(gpa); + defer module_env.deinit(); + + var cir = CIR.init(&module_env); + defer cir.deinit(); + + // Parse and canonicalize + var ast = parse(&module_env, source); + defer ast.deinit(gpa); + + var canonicalizer = try canonicalize.init(&cir, &ast, null); + defer canonicalizer.deinit(); + try canonicalizer.canonicalizeFile(); + + // Generate original S-expression for comparison + var original_tree = SExprTree.init(gpa); + defer original_tree.deinit(); + CIR.pushToSExprTree(&cir, null, &original_tree, source); + + var original_sexpr = std.ArrayList(u8).init(gpa); + defer original_sexpr.deinit(); + original_tree.toStringPretty(original_sexpr.writer().any()); + + // Create cache from real data + const cache_data = try CacheModule.create(gpa, &module_env, &cir); + defer gpa.free(cache_data); + + // In-memory file storage for comprehensive mock filesystem + var file_storage = std.StringHashMap([]const u8).init(gpa); + defer { + var iterator = file_storage.iterator(); + while (iterator.next()) |entry| { + gpa.free(entry.value_ptr.*); + } + file_storage.deinit(); + } + + // Create comprehensive mock filesystem with proper storage using static variables + var filesystem = Filesystem.testing(); + + const MockFS = struct { + var storage: ?*std.StringHashMap([]const u8) = null; + var allocator: ?Allocator = null; + + fn writeFile(path: []const u8, contents: []const u8) Filesystem.WriteError!void { + const store = storage orelse return error.SystemResources; + const alloc = allocator orelse return error.SystemResources; + + // Store a copy of the contents in our storage + const stored_contents = alloc.dupe(u8, contents) catch return error.SystemResources; + + // Free existing content if path already exists + if (store.get(path)) |existing| { + alloc.free(existing); + } + + // Store the new content + store.put(path, stored_contents) catch { + alloc.free(stored_contents); + return error.SystemResources; + }; + } + + fn readFile(path: []const u8, alloc: Allocator) Filesystem.ReadError![]const u8 { + const store = storage orelse return error.FileNotFound; + + if (store.get(path)) |contents| { + return alloc.dupe(u8, contents) catch return error.OutOfMemory; + } else { + return error.FileNotFound; + } + } + }; + + // Initialize the static variables + MockFS.storage = &file_storage; + MockFS.allocator = gpa; + + filesystem.writeFile = MockFS.writeFile; + filesystem.readFile = MockFS.readFile; + + // Test full roundtrip: write cache to mock filesystem + const test_path = "comprehensive_test_cache.bin"; + try CacheModule.writeToFile(gpa, cache_data, test_path, filesystem); + + // Verify the data was stored + try std.testing.expect(file_storage.contains(test_path)); + + // Read the cache back from mock filesystem + const read_cache_data = try CacheModule.readFromFile(gpa, test_path, filesystem); + defer gpa.free(read_cache_data); + + // Verify the read data matches the original + try std.testing.expectEqualSlices(u8, cache_data, read_cache_data); + + // Load and validate the cache from the roundtrip data + var roundtrip_cache = try CacheModule.fromMappedMemory(read_cache_data); + try roundtrip_cache.validate(); + + // Restore from the roundtrip cache + const restored = try roundtrip_cache.restore(gpa); + + var restored_module_env = restored.module_env; + defer restored_module_env.deinit(); + var restored_cir = restored.cir; + defer restored_cir.deinit(); + + // Fix env pointer after struct move + restored_cir.env = &restored_module_env; + + // Generate S-expression from restored CIR + var restored_tree = SExprTree.init(gpa); + defer restored_tree.deinit(); + + CIR.pushToSExprTree(&restored_cir, null, &restored_tree, source); + + var restored_sexpr = std.ArrayList(u8).init(gpa); + defer restored_sexpr.deinit(); + + restored_tree.toStringPretty(restored_sexpr.writer().any()); + + // Verify complete roundtrip integrity + try std.testing.expect(std.mem.eql(u8, original_sexpr.items, restored_sexpr.items)); + + // Get diagnostics to ensure they're preserved + const diagnostics = roundtrip_cache.getDiagnostics(); + try std.testing.expect(diagnostics.total_size > 0); +} diff --git a/src/cache/mod.zig b/src/cache/mod.zig new file mode 100644 index 0000000000..1a6aed388e --- /dev/null +++ b/src/cache/mod.zig @@ -0,0 +1,105 @@ +//! Cache module for the Roc compiler +//! +//! This module provides memory-mapped caching for compiled Roc modules, +//! allowing fast serialization and deserialization of ModuleEnv and CIR data. + +const std = @import("std"); + +// Re-export the unified cache +pub const CacheModule = @import("CacheModule.zig").CacheModule; +pub const Header = @import("CacheModule.zig").Header; +pub const Diagnostics = @import("CacheModule.zig").Diagnostics; + +// Re-export new cache management components +pub const CacheManager = @import("CacheManager.zig").CacheManager; +pub const CacheResult = @import("CacheManager.zig").CacheResult; +pub const CacheKey = @import("CacheKey.zig").CacheKey; +pub const CacheConfig = @import("CacheConfig.zig").CacheConfig; +pub const CacheStats = @import("CacheConfig.zig").CacheStats; + +/// Cache configuration constants +pub const Config = struct { + /// Default cache directory name + pub const DEFAULT_CACHE_DIR = ".roc_cache"; + + /// Default file extension for cache files + pub const CACHE_FILE_EXT = ".rcache"; + + /// Maximum cache file size (256MB) + pub const MAX_CACHE_SIZE = 256 * 1024 * 1024; + + /// Cache format version + pub const CACHE_VERSION = 1; +}; + +/// Statistics for cache operations (optional, for debugging) +pub const Stats = struct { + hits: u32 = 0, + misses: u32 = 0, + writes: u32 = 0, + errors: u32 = 0, + total_bytes_written: u64 = 0, + total_bytes_read: u64 = 0, + + pub fn reset(self: *Stats) void { + self.* = .{}; + } + + pub fn hitRate(self: *const Stats) f64 { + const total = self.hits + self.misses; + if (total == 0) return 0.0; + return @as(f64, @floatFromInt(self.hits)) / @as(f64, @floatFromInt(total)); + } + + pub fn print(self: *const Stats, writer: std.io.AnyWriter) !void { + try writer.print("Cache Stats:\n", .{}); + try writer.print(" Hits: {d}\n", .{self.hits}); + try writer.print(" Misses: {d}\n", .{self.misses}); + try writer.print(" Writes: {d}\n", .{self.writes}); + try writer.print(" Errors: {d}\n", .{self.errors}); + try writer.print(" Hit Rate: {d:.2}%\n", .{self.hitRate() * 100.0}); + try writer.print(" Bytes Written: {d}\n", .{self.total_bytes_written}); + try writer.print(" Bytes Read: {d}\n", .{self.total_bytes_read}); + } +}; + +/// Global cache statistics (optional, for debugging) +var global_stats: Stats = .{}; + +/// Get reference to global stats +pub fn getGlobalStats() *Stats { + return &global_stats; +} + +/// Reset global stats +pub fn resetGlobalStats() void { + global_stats.reset(); +} + +/// Print global stats to stderr +pub fn printGlobalStats() !void { + const stderr = std.io.getStdErr().writer(); + try global_stats.print(stderr.any()); +} + +test "cache module" { + // Basic test to ensure module compiles and types are accessible + const allocator = std.testing.allocator; + + // Test that we can access the main types + _ = CacheModule; + _ = Header; + _ = Diagnostics; + + // Test stats functionality + var stats = Stats{}; + stats.hits = 10; + stats.misses = 5; + try std.testing.expectEqual(@as(f64, 2.0 / 3.0), stats.hitRate()); + + // Test config constants + try std.testing.expect(std.mem.eql(u8, Config.DEFAULT_CACHE_DIR, ".roc_cache")); + try std.testing.expect(std.mem.eql(u8, Config.CACHE_FILE_EXT, ".rcache")); + + _ = allocator; // Suppress unused variable warning +} diff --git a/src/check/canonicalize/CIR.zig b/src/check/canonicalize/CIR.zig index 2ab38d04f8..b19060f42b 100644 --- a/src/check/canonicalize/CIR.zig +++ b/src/check/canonicalize/CIR.zig @@ -8,6 +8,7 @@ const tracy = @import("../../tracy.zig"); const types = @import("../../types.zig"); const collections = @import("../../collections.zig"); const reporting = @import("../../reporting.zig"); +const serialization = @import("../../serialization/mod.zig"); const exitOnOom = collections.utils.exitOnOom; const SExpr = base.SExpr; const Scratch = base.Scratch; @@ -20,10 +21,10 @@ const StringLiteral = base.StringLiteral; const CalledVia = base.CalledVia; const SExprTree = base.SExprTree; const TypeVar = types.Var; -const NodeStore = @import("NodeStore.zig"); pub const RocDec = @import("../../builtins/dec.zig").RocDec; pub const Node = @import("Node.zig"); +pub const NodeStore = @import("NodeStore.zig"); pub const Expr = @import("Expression.zig").Expr; pub const Pattern = @import("Pattern.zig").Pattern; pub const Statement = @import("Statement.zig").Statement; @@ -58,7 +59,7 @@ all_defs: Def.Span, /// All the top-level statements in the module, populated by calling `canonicalize_file` all_statements: Statement.Span, /// All external declarations referenced in this module -external_decls: std.ArrayList(ExternalDecl), +external_decls: ExternalDecl.SafeList, /// Store for interned module imports imports: Import.Store, @@ -78,7 +79,20 @@ pub fn init(env: *ModuleEnv) CIR { .store = NodeStore.initCapacity(env.gpa, NODE_STORE_CAPACITY), .all_defs = .{ .span = .{ .start = 0, .len = 0 } }, .all_statements = .{ .span = .{ .start = 0, .len = 0 } }, - .external_decls = std.ArrayList(ExternalDecl).init(env.gpa), + .external_decls = ExternalDecl.SafeList.initCapacity(env.gpa, 16), + .imports = Import.Store.init(), + }; +} + +/// Create a CIR from cached data, completely rehydrating from cache +pub fn fromCache(env: *ModuleEnv, cached_store: NodeStore, all_defs: Def.Span, all_statements: Statement.Span) CIR { + return CIR{ + .env = env, + .store = cached_store, + .temp_source_for_sexpr = null, + .all_defs = all_defs, + .all_statements = all_statements, + .external_decls = ExternalDecl.SafeList.initCapacity(env.gpa, 16), .imports = Import.Store.init(), }; } @@ -86,7 +100,7 @@ pub fn init(env: *ModuleEnv) CIR { /// Deinit the IR's memory. pub fn deinit(self: *CIR) void { self.store.deinit(); - self.external_decls.deinit(); + self.external_decls.deinit(self.env.gpa); self.imports.deinit(self.env.gpa); } @@ -552,28 +566,29 @@ pub fn setTypeVarAt(self: *CIR, at_idx: Node.Idx, content: types.Content) types. /// Adds an external declaration to the CIR and returns its index pub fn pushExternalDecl(self: *CIR, decl: ExternalDecl) ExternalDecl.Idx { - const idx = @as(u32, @intCast(self.external_decls.items.len)); - self.external_decls.append(decl) catch |err| exitOnOom(err); + const idx = @as(u32, @intCast(self.external_decls.len())); + _ = self.external_decls.append(self.env.gpa, decl); return @enumFromInt(idx); } /// Retrieves an external declaration by its index pub fn getExternalDecl(self: *const CIR, idx: ExternalDecl.Idx) *const ExternalDecl { - return &self.external_decls.items[@intFromEnum(idx)]; + return self.external_decls.get(@as(ExternalDecl.SafeList.Idx, @enumFromInt(@intFromEnum(idx)))); } /// Adds multiple external declarations and returns a span pub fn pushExternalDecls(self: *CIR, decls: []const ExternalDecl) ExternalDecl.Span { - const start = @as(u32, @intCast(self.external_decls.items.len)); + const start = @as(u32, @intCast(self.external_decls.len())); for (decls) |decl| { - self.external_decls.append(decl) catch |err| exitOnOom(err); + _ = self.external_decls.append(self.env.gpa, decl); } - return .{ .span = .{ .start = start, .len = @as(u32, @intCast(decls.len)) } }; + return ExternalDecl.Span{ .span = .{ .start = start, .len = @as(u32, @intCast(decls.len)) } }; } /// Gets a slice of external declarations from a span pub fn sliceExternalDecls(self: *const CIR, span: ExternalDecl.Span) []const ExternalDecl { - return self.external_decls.items[span.span.start .. span.span.start + span.span.len]; + const range = ExternalDecl.SafeList.Range{ .start = @enumFromInt(span.span.start), .end = @enumFromInt(span.span.start + span.span.len) }; + return self.external_decls.rangeToSlice(range); } /// Retrieves the text of an identifier by its index @@ -589,7 +604,108 @@ fn formatPatternIdxNode(gpa: std.mem.Allocator, pattern_idx: Pattern.Idx) SExpr } test "Node is 16 bytes" { - try testing.expectEqual(16, @sizeOf(Node)); + try std.testing.expectEqual(16, @sizeOf(Node)); +} + +test "ExternalDecl serialization round-trip" { + const gpa = std.testing.allocator; + + // Create original external declaration + const original = ExternalDecl{ + .qualified_name = @bitCast(@as(u32, 123)), + .module_name = @bitCast(@as(u32, 456)), + .local_name = @bitCast(@as(u32, 789)), + .type_var = @enumFromInt(999), + .kind = .value, + .region = Region{ + .start = .{ .offset = 10 }, + .end = .{ .offset = 20 }, + }, + }; + + // Serialize + const serialized_size = original.serializedSize(); + const buffer = try gpa.alloc(u8, serialized_size); + defer gpa.free(buffer); + + const serialized = try original.serializeInto(buffer); + try std.testing.expectEqual(serialized_size, serialized.len); + + // Deserialize + const restored = try ExternalDecl.deserializeFrom(serialized); + + // Verify all fields are identical + try std.testing.expectEqual(original.qualified_name, restored.qualified_name); + try std.testing.expectEqual(original.module_name, restored.module_name); + try std.testing.expectEqual(original.local_name, restored.local_name); + try std.testing.expectEqual(original.type_var, restored.type_var); + try std.testing.expectEqual(original.kind, restored.kind); + try std.testing.expectEqual(original.region.start.offset, restored.region.start.offset); + try std.testing.expectEqual(original.region.end.offset, restored.region.end.offset); +} + +test "ExternalDecl serialization comprehensive" { + const gpa = std.testing.allocator; + + // Test various external declarations including edge cases + const decl1 = ExternalDecl{ + .qualified_name = @bitCast(@as(u32, 0)), // minimum value + .module_name = @bitCast(@as(u32, 1)), + .local_name = @bitCast(@as(u32, 2)), + .type_var = @enumFromInt(0), + .kind = .value, + .region = Region{ + .start = .{ .offset = 0 }, + .end = .{ .offset = 1 }, + }, + }; + + const decl2 = ExternalDecl{ + .qualified_name = @bitCast(@as(u32, 0xFFFFFFFF)), // maximum value + .module_name = @bitCast(@as(u32, 0xFFFFFFFE)), + .local_name = @bitCast(@as(u32, 0xFFFFFFFD)), + .type_var = @enumFromInt(0xFFFFFFFF), + .kind = .type, + .region = Region{ + .start = .{ .offset = 0xFFFFFFFF }, + .end = .{ .offset = 0xFFFFFFFE }, + }, + }; + + // Test serialization using the testing framework + try serialization.testing.testSerialization(ExternalDecl, &decl1, gpa); + try serialization.testing.testSerialization(ExternalDecl, &decl2, gpa); +} + +test "ExternalDecl different kinds serialization" { + const gpa = std.testing.allocator; + + const value_decl = ExternalDecl{ + .qualified_name = @bitCast(@as(u32, 100)), + .module_name = @bitCast(@as(u32, 200)), + .local_name = @bitCast(@as(u32, 300)), + .type_var = @enumFromInt(400), + .kind = .value, + .region = Region{ + .start = .{ .offset = 50 }, + .end = .{ .offset = 75 }, + }, + }; + + const type_decl = ExternalDecl{ + .qualified_name = @bitCast(@as(u32, 100)), + .module_name = @bitCast(@as(u32, 200)), + .local_name = @bitCast(@as(u32, 300)), + .type_var = @enumFromInt(400), + .kind = .type, + .region = Region{ + .start = .{ .offset = 50 }, + .end = .{ .offset = 75 }, + }, + }; + + try serialization.testing.testSerialization(ExternalDecl, &value_decl, gpa); + try serialization.testing.testSerialization(ExternalDecl, &type_decl, gpa); } /// A working representation of a record field @@ -989,6 +1105,9 @@ pub const ExternalDecl = struct { pub const Idx = enum(u32) { _ }; pub const Span = struct { span: DataSpan }; + /// A safe list of external declarations + pub const SafeList = collections.SafeList(ExternalDecl); + pub fn pushToSExprTree(self: *const @This(), ir: *const CIR, tree: *SExprTree) void { const begin = tree.beginNode(); tree.pushStaticAtom("ext-decl"); @@ -1024,6 +1143,103 @@ pub const ExternalDecl = struct { const attrs = tree.beginNode(); tree.endNode(begin, attrs); } + + /// Calculate the serialized size of this external declaration + pub fn serializedSize(self: *const @This()) usize { + _ = self; + return @sizeOf(u32) + // qualified_name + @sizeOf(u32) + // module_name + @sizeOf(u32) + // local_name + @sizeOf(u32) + // type_var + @sizeOf(u8) + // kind (enum) + @sizeOf(u32) + // region.start.offset + @sizeOf(u32); // region.end.offset + } + + /// Serialize this external declaration into the provided buffer + pub fn serializeInto(self: *const @This(), buffer: []u8) ![]const u8 { + const size = self.serializedSize(); + if (buffer.len < size) return error.BufferTooSmall; + + var offset: usize = 0; + + // Serialize qualified_name + std.mem.writeInt(u32, buffer[offset .. offset + 4][0..4], @bitCast(self.qualified_name), .little); + offset += 4; + + // Serialize module_name + std.mem.writeInt(u32, buffer[offset .. offset + 4][0..4], @bitCast(self.module_name), .little); + offset += 4; + + // Serialize local_name + std.mem.writeInt(u32, buffer[offset .. offset + 4][0..4], @bitCast(self.local_name), .little); + offset += 4; + + // Serialize type_var + std.mem.writeInt(u32, buffer[offset .. offset + 4][0..4], @intFromEnum(self.type_var), .little); + offset += 4; + + // Serialize kind + buffer[offset] = switch (self.kind) { + .value => 0, + .type => 1, + }; + offset += 1; + + // Serialize region + std.mem.writeInt(u32, buffer[offset .. offset + 4][0..4], self.region.start.offset, .little); + offset += 4; + std.mem.writeInt(u32, buffer[offset .. offset + 4][0..4], self.region.end.offset, .little); + offset += 4; + + return buffer[0..offset]; + } + + /// Deserialize an external declaration from the provided buffer + pub fn deserializeFrom(buffer: []const u8) !@This() { + var offset: usize = 0; + const needed_size = @sizeOf(u32) * 6 + @sizeOf(u8); + if (buffer.len < needed_size) return error.BufferTooSmall; + + const qualified_name: Ident.Idx = @bitCast(std.mem.readInt(u32, buffer[offset .. offset + 4][0..4], .little)); + offset += 4; + + const module_name: Ident.Idx = @bitCast(std.mem.readInt(u32, buffer[offset .. offset + 4][0..4], .little)); + offset += 4; + + const local_name: Ident.Idx = @bitCast(std.mem.readInt(u32, buffer[offset .. offset + 4][0..4], .little)); + offset += 4; + + const type_var: TypeVar = @enumFromInt(std.mem.readInt(u32, buffer[offset .. offset + 4][0..4], .little)); + offset += 4; + + const kind_byte = buffer[offset]; + offset += 1; + const kind: @TypeOf(@as(@This(), undefined).kind) = switch (kind_byte) { + 0 => .value, + 1 => .type, + else => return error.InvalidKind, + }; + + const start_offset = std.mem.readInt(u32, buffer[offset .. offset + 4][0..4], .little); + offset += 4; + const end_offset = std.mem.readInt(u32, buffer[offset .. offset + 4][0..4], .little); + offset += 4; + + const region = Region{ + .start = .{ .offset = start_offset }, + .end = .{ .offset = end_offset }, + }; + + return @This(){ + .qualified_name = qualified_name, + .module_name = module_name, + .local_name = local_name, + .type_var = type_var, + .kind = kind, + .region = region, + }; + } }; /// Tracks type variables introduced during annotation canonicalization @@ -1163,7 +1379,7 @@ pub fn pushToSExprTree(ir: *CIR, maybe_expr_idx: ?Expr.Idx, tree: *SExprTree, so const defs_slice = ir.store.sliceDefs(ir.all_defs); const statements_slice = ir.store.sliceStatements(ir.all_statements); - if (defs_slice.len == 0 and statements_slice.len == 0 and ir.external_decls.items.len == 0) { + if (defs_slice.len == 0 and statements_slice.len == 0 and ir.external_decls.len() == 0) { tree.pushBoolPair("empty", true); } const attrs = tree.beginNode(); @@ -1176,7 +1392,8 @@ pub fn pushToSExprTree(ir: *CIR, maybe_expr_idx: ?Expr.Idx, tree: *SExprTree, so ir.store.getStatement(stmt_idx).pushToSExprTree(ir, tree, stmt_idx); } - for (ir.external_decls.items) |*external_decl| { + for (0..ir.external_decls.len()) |i| { + const external_decl = ir.external_decls.get(@enumFromInt(i)); external_decl.pushToSExprTree(ir, tree); } @@ -1211,7 +1428,7 @@ pub fn calcRegionInfo(self: *const CIR, region: Region) base.RegionInfo { return empty; }; - const info = base.RegionInfo.position(source, self.env.line_starts.items, region.start.offset, region.end.offset) catch { + const info = base.RegionInfo.position(source, self.env.line_starts.items.items, region.start.offset, region.end.offset) catch { // Return a zero position if we can't calculate it return empty; }; @@ -1282,7 +1499,7 @@ pub fn pushTypesToSExprTree(ir: *CIR, maybe_expr_idx: ?Expr.Idx, tree: *SExprTre ir.appendRegionInfoToSExprTree(tree, expr_idx); - if (@intFromEnum(expr_var) > ir.env.types.slots.backing.items.len) { + if (@intFromEnum(expr_var) > ir.env.types.slots.backing.len()) { const unknown_begin = tree.beginNode(); tree.pushStaticAtom("unknown"); const unknown_attrs = tree.beginNode(); @@ -1447,7 +1664,7 @@ pub fn pushTypesToSExprTree(ir: *CIR, maybe_expr_idx: ?Expr.Idx, tree: *SExprTre const expr_region = ir.store.getExprRegion(def.expr); ir.appendRegionInfoToSExprTreeFromRegion(tree, expr_region); - if (@intFromEnum(expr_var) > ir.env.types.slots.backing.items.len) { + if (@intFromEnum(expr_var) > ir.env.types.slots.backing.len()) { const unknown_begin = tree.beginNode(); tree.pushStaticAtom("unknown"); const unknown_attrs = tree.beginNode(); diff --git a/src/check/canonicalize/NodeStore.zig b/src/check/canonicalize/NodeStore.zig index 6b8a0a5405..1f8f88d24d 100644 --- a/src/check/canonicalize/NodeStore.zig +++ b/src/check/canonicalize/NodeStore.zig @@ -8,6 +8,7 @@ const Node = @import("Node.zig"); const CIR = @import("CIR.zig"); const RocDec = @import("../../builtins/dec.zig").RocDec; const PackedDataSpan = @import("../../base/PackedDataSpan.zig"); +const SERIALIZATION_ALIGNMENT = @import("../../serialization/mod.zig").SERIALIZATION_ALIGNMENT; const DataSpan = base.DataSpan; const Region = base.Region; @@ -2787,28 +2788,28 @@ pub fn matchBranchPatternSpanFrom(store: *NodeStore, start: u32) CIR.Expr.Match. /// Calculate the size needed to serialize this NodeStore pub fn serializedSize(self: *const NodeStore) usize { // We only serialize nodes, regions, and extra_data (the scratch arrays are transient) - return self.nodes.serializedSize() + + const raw_size = self.nodes.serializedSize() + self.regions.serializedSize() + @sizeOf(u32) + // extra_data length (self.extra_data.items.len * @sizeOf(u32)); + // Align to SERIALIZATION_ALIGNMENT to maintain alignment for subsequent data + return std.mem.alignForward(usize, raw_size, SERIALIZATION_ALIGNMENT); } /// Serialize this NodeStore into the provided buffer /// Buffer must be at least serializedSize() bytes and properly aligned -pub fn serializeInto(self: *const NodeStore, buffer: []align(@alignOf(Node)) u8) ![]u8 { +pub fn serializeInto(self: *const NodeStore, buffer: []align(SERIALIZATION_ALIGNMENT) u8) ![]u8 { const size = self.serializedSize(); if (buffer.len < size) return error.BufferTooSmall; var offset: usize = 0; // Serialize nodes - cast to proper alignment for Node type - const nodes_buffer = @as([]align(@alignOf(Node)) u8, @alignCast(buffer[offset..])); - const nodes_slice = try self.nodes.serializeInto(nodes_buffer); + const nodes_slice = try self.nodes.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(buffer[offset..]))); offset += nodes_slice.len; // Serialize regions - const regions_buffer = @as([]align(@alignOf(Region)) u8, @alignCast(buffer[offset..])); - const regions_slice = try self.regions.serializeInto(regions_buffer); + const regions_slice = try self.regions.serializeInto(@as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(buffer[offset..]))); offset += regions_slice.len; // Serialize extra_data length @@ -2823,7 +2824,12 @@ pub fn serializeInto(self: *const NodeStore, buffer: []align(@alignOf(Node)) u8) offset += self.extra_data.items.len * @sizeOf(u32); } - return buffer[0..offset]; + // Zero out any padding bytes + if (offset < size) { + @memset(buffer[offset..size], 0); + } + + return buffer[0..size]; } /// Deserialize a NodeStore from the provided buffer diff --git a/src/check/canonicalize/test/exposed_shadowing_test.zig b/src/check/canonicalize/test/exposed_shadowing_test.zig index ca7884f9d6..bc26ff67b8 100644 --- a/src/check/canonicalize/test/exposed_shadowing_test.zig +++ b/src/check/canonicalize/test/exposed_shadowing_test.zig @@ -1,3 +1,5 @@ +//! TODO + const std = @import("std"); const testing = std.testing; const base = @import("../../../base.zig"); diff --git a/src/check/canonicalize/test/import_validation_test.zig b/src/check/canonicalize/test/import_validation_test.zig index 3819efccef..e8bef28ed4 100644 --- a/src/check/canonicalize/test/import_validation_test.zig +++ b/src/check/canonicalize/test/import_validation_test.zig @@ -1,3 +1,5 @@ +//! TODO + const std = @import("std"); const testing = std.testing; const base = @import("../../../base.zig"); diff --git a/src/check/check_types/problem.zig b/src/check/check_types/problem.zig index b50802b9dd..4fb257af91 100644 --- a/src/check/check_types/problem.zig +++ b/src/check/check_types/problem.zig @@ -368,7 +368,7 @@ pub const ReportBuilder = struct { const overall_region_info = base.RegionInfo.position( self.source, - self.module_env.line_starts.items, + self.module_env.line_starts.items.items, overall_start_offset, overall_end_offset, ) catch return report; @@ -376,14 +376,14 @@ pub const ReportBuilder = struct { // Get region info for both elements const actual_region_info = base.RegionInfo.position( self.source, - self.module_env.line_starts.items, + self.module_env.line_starts.items.items, actual_region.start.offset, actual_region.end.offset, ) catch return report; const expected_region_info = base.RegionInfo.position( self.source, - self.module_env.line_starts.items, + self.module_env.line_starts.items.items, expected_region.start.offset, expected_region.end.offset, ) catch return report; @@ -478,7 +478,7 @@ pub const ReportBuilder = struct { const actual_region = self.can_ir.store.getNodeRegion(@enumFromInt(@intFromEnum(types.actual_var))); const actual_region_info = base.RegionInfo.position( self.source, - self.module_env.line_starts.items, + self.module_env.line_starts.items.items, actual_region.start.offset, actual_region.end.offset, ) catch return report; @@ -590,7 +590,7 @@ pub const ReportBuilder = struct { const overall_region_info = base.RegionInfo.position( self.source, - self.module_env.line_starts.items, + self.module_env.line_starts.items.items, overall_start_offset, overall_end_offset, ) catch return report; @@ -598,7 +598,7 @@ pub const ReportBuilder = struct { // Get region info for invalid branch const actual_region_info = base.RegionInfo.position( self.source, - self.module_env.line_starts.items, + self.module_env.line_starts.items.items, actual_region.start.offset, actual_region.end.offset, ) catch return report; @@ -725,7 +725,7 @@ pub const ReportBuilder = struct { const match_expr_region = self.can_ir.store.getNodeRegion(@enumFromInt(@intFromEnum(data.match_expr))); const overall_region_info = base.RegionInfo.position( self.source, - self.module_env.line_starts.items, + self.module_env.line_starts.items.items, match_expr_region.start.offset, match_expr_region.end.offset, ) catch return report; @@ -734,7 +734,7 @@ pub const ReportBuilder = struct { const invalid_var_region = self.can_ir.store.getNodeRegion(@enumFromInt(@intFromEnum(types.actual_var))); const invalid_var_region_info = base.RegionInfo.position( self.source, - self.module_env.line_starts.items, + self.module_env.line_starts.items.items, invalid_var_region.start.offset, invalid_var_region.end.offset, ) catch return report; @@ -847,7 +847,7 @@ pub const ReportBuilder = struct { const overall_region_info = base.RegionInfo.position( self.source, - self.module_env.line_starts.items, + self.module_env.line_starts.items.items, overall_start_offset, overall_end_offset, ) catch return report; @@ -866,7 +866,7 @@ pub const ReportBuilder = struct { // Create underline regions const this_branch_region_info = base.RegionInfo.position( self.source, - self.module_env.line_starts.items, + self.module_env.line_starts.items.items, this_branch_region.start.offset, this_branch_region.end.offset, ) catch return report; @@ -949,7 +949,7 @@ pub const ReportBuilder = struct { const overall_region_info = base.RegionInfo.position( self.source, - self.module_env.line_starts.items, + self.module_env.line_starts.items.items, overall_start_offset, overall_end_offset, ) catch return report; @@ -968,7 +968,7 @@ pub const ReportBuilder = struct { // Create underline regions const this_branch_region_info = base.RegionInfo.position( self.source, - self.module_env.line_starts.items, + self.module_env.line_starts.items.items, problem_side_region.start.offset, problem_side_region.end.offset, ) catch return report; @@ -1047,7 +1047,7 @@ pub const ReportBuilder = struct { const overall_region_info = base.RegionInfo.position( self.source, - self.module_env.line_starts.items, + self.module_env.line_starts.items.items, overall_start_offset, overall_end_offset, ) catch return report; @@ -1066,7 +1066,7 @@ pub const ReportBuilder = struct { // Create underline regions const this_branch_region_info = base.RegionInfo.position( self.source, - self.module_env.line_starts.items, + self.module_env.line_starts.items.items, problem_side_region.start.offset, problem_side_region.end.offset, ) catch return report; diff --git a/src/check/parse/AST.zig b/src/check/parse/AST.zig index 0c53b1525f..d86ed067f1 100644 --- a/src/check/parse/AST.zig +++ b/src/check/parse/AST.zig @@ -83,7 +83,7 @@ pub fn calcRegionInfo(self: *AST, region: TokenizedRegion, line_starts: []const pub fn appendRegionInfoToSexprTree(self: *AST, env: *base.ModuleEnv, tree: *SExprTree, region: TokenizedRegion) void { const start = self.tokens.resolve(region.start); const end = self.tokens.resolve(region.end); - const info: base.RegionInfo = base.RegionInfo.position(self.source, env.line_starts.items, start.start.offset, end.end.offset) catch .{ + const info: base.RegionInfo = base.RegionInfo.position(self.source, env.line_starts.items.items, start.start.offset, end.end.offset) catch .{ .start_line_idx = 0, .start_col_idx = 0, .end_line_idx = 0, @@ -531,18 +531,18 @@ pub fn parseDiagnosticToReport(self: *AST, diagnostic: Diagnostic, allocator: st // Add source context if we have a valid region if (region.start.offset <= region.end.offset and region.end.offset <= self.source.len) { // Compute line_starts from source for proper region info calculation - var line_starts = std.ArrayList(u32).init(allocator); - defer line_starts.deinit(); + var line_starts = collections.SafeList(u32).initCapacity(allocator, 256); + defer line_starts.deinit(allocator); - try line_starts.append(0); // First line starts at 0 + _ = line_starts.append(allocator, 0); // First line starts at 0 for (self.source, 0..) |char, i| { if (char == '\n') { - try line_starts.append(@intCast(i + 1)); + _ = line_starts.append(allocator, @intCast(i + 1)); } } // Use proper region info calculation with converted region - const region_info = base.RegionInfo.position(self.source, line_starts.items, region.start.offset, region.end.offset) catch { + const region_info = base.RegionInfo.position(self.source, line_starts.items.items, region.start.offset, region.end.offset) catch { return report; // Return report without source context if region calculation fails }; diff --git a/src/cli_args.zig b/src/cli_args.zig index f281d79e88..baa4f2e474 100644 --- a/src/cli_args.zig +++ b/src/cli_args.zig @@ -68,6 +68,8 @@ pub const RunArgs = struct { pub const CheckArgs = struct { path: []const u8, // the path of the roc file to be checked main: ?[]const u8, // the path to a roc file with an app header to be used to resolved dependencies + no_cache: bool = false, // disable cache + verbose: bool = false, // enable verbose output }; /// Arguments for `roc build` @@ -145,6 +147,8 @@ const main_help = fn parseCheck(args: []const []const u8) CliArgs { var path: ?[]const u8 = null; var main: ?[]const u8 = null; + var no_cache: bool = false; + var verbose: bool = false; for (args) |arg| { if (isHelpFlag(arg)) { return CliArgs{ .help = @@ -157,6 +161,8 @@ fn parseCheck(args: []const []const u8) CliArgs { \\ \\Options: \\ --main=
The .roc file of the main app/package module to resolve dependencies from + \\ --no-cache Disable caching + \\ --verbose Enable verbose output including cache statistics \\ -h, --help Print help \\ }; @@ -166,6 +172,10 @@ fn parseCheck(args: []const []const u8) CliArgs { } else { return CliArgs{ .problem = CliProblem{ .missing_flag_value = .{ .flag = "--main" } } }; } + } else if (mem.eql(u8, arg, "--no-cache")) { + no_cache = true; + } else if (mem.eql(u8, arg, "--verbose")) { + verbose = true; } else { if (path != null) { return CliArgs{ .problem = CliProblem{ .unexpected_argument = .{ .cmd = "check", .arg = arg } } }; @@ -173,7 +183,7 @@ fn parseCheck(args: []const []const u8) CliArgs { path = arg; } } - return CliArgs{ .check = CheckArgs{ .path = path orelse "main.roc", .main = main } }; + return CliArgs{ .check = CheckArgs{ .path = path orelse "main.roc", .main = main, .no_cache = no_cache, .verbose = verbose } }; } fn parseBuild(args: []const []const u8) CliArgs { diff --git a/src/collections.zig b/src/collections.zig index c279b31cbd..8d0eb8ce82 100644 --- a/src/collections.zig +++ b/src/collections.zig @@ -12,6 +12,8 @@ pub const SafeList = @import("collections/safe_list.zig").SafeList; pub const SafeMultiList = @import("collections/safe_list.zig").SafeMultiList; +pub const SafeStringHashMap = @import("collections/safe_hash_map.zig").SafeStringHashMap; + /// A range that must have at least one element pub const NonEmptyRange = struct { /// Starting index (inclusive) diff --git a/src/collections/SmallStringInterner.zig b/src/collections/SmallStringInterner.zig index 68b08b8d15..e95d1b38a5 100644 --- a/src/collections/SmallStringInterner.zig +++ b/src/collections/SmallStringInterner.zig @@ -104,16 +104,17 @@ pub fn getRegion(self: *const Self, idx: Idx) Region { return self.regions.items[@as(usize, @intFromEnum(idx))]; } -const StringIdx = enum(u32) { +/// TODO +pub const StringIdx = enum(u32) { _, // This uses an unmanaged hash map due to context management requirements. // It enables us to ensure that an update context is always used with the newest pointer to the underlying bytes allocation. - const Table = std.HashMapUnmanaged(StringIdx, void, TableContext, std.hash_map.default_max_load_percentage); + pub const Table = std.HashMapUnmanaged(StringIdx, void, TableContext, std.hash_map.default_max_load_percentage); /// These are copied straight out of the zig standard library. /// They are simply modified to give us control over the hash function and bytes allocation. - const TableContext = struct { + pub const TableContext = struct { bytes: *const std.ArrayListUnmanaged(u8), pub fn eql(_: @This(), a: StringIdx, b: StringIdx) bool { @@ -125,7 +126,7 @@ const StringIdx = enum(u32) { } }; - const TableAdapter = struct { + pub const TableAdapter = struct { bytes: *const std.ArrayListUnmanaged(u8), pub fn eql(ctx: @This(), a: []const u8, b: StringIdx) bool { diff --git a/src/collections/safe_hash_map.zig b/src/collections/safe_hash_map.zig new file mode 100644 index 0000000000..8b9ade7596 --- /dev/null +++ b/src/collections/safe_hash_map.zig @@ -0,0 +1,364 @@ +//! Safe hash map collections with serialization support +//! +//! These collections provide type-safe serialization/deserialization for hash maps +//! commonly used in the Roc compiler. + +const std = @import("std"); +const testing = std.testing; +const Allocator = std.mem.Allocator; +const exitOnOom = @import("utils.zig").exitOnOom; +const serialization = @import("../serialization/mod.zig"); + +/// A type-safe string hash map with serialization support +pub fn SafeStringHashMap(comptime V: type) type { + return struct { + map: std.StringHashMapUnmanaged(V) = .{}, + + const Self = @This(); + + /// Initialize the hash map + pub fn init() Self { + return Self{}; + } + + /// Initialize with capacity + pub fn initCapacity(gpa: Allocator, capacity: usize) Self { + var map = std.StringHashMapUnmanaged(V){}; + map.ensureTotalCapacity(gpa, @intCast(capacity)) catch |err| exitOnOom(err); + return Self{ .map = map }; + } + + /// Deinitialize the hash map + pub fn deinit(self: *Self, gpa: Allocator) void { + // Free all the keys (which are owned strings) + var iter = self.map.iterator(); + while (iter.next()) |entry| { + gpa.free(entry.key_ptr.*); + } + self.map.deinit(gpa); + } + + /// Put a key-value pair (takes ownership of key) + pub fn put(self: *Self, gpa: Allocator, key: []const u8, value: V) Allocator.Error!void { + // Check if key already exists and get the old key if so + const result = try self.map.getOrPut(gpa, key); + if (result.found_existing) { + // Free the old key before replacing + gpa.free(result.key_ptr.*); + // Replace with new key + result.key_ptr.* = try gpa.dupe(u8, key); + } else { + // New key, just duplicate it + result.key_ptr.* = try gpa.dupe(u8, key); + } + result.value_ptr.* = value; + } + + /// Get a value by key + pub fn get(self: *const Self, key: []const u8) ?V { + return self.map.get(key); + } + + /// Get the number of entries + pub fn count(self: *const Self) usize { + return self.map.count(); + } + + /// Check if a key exists in the map + pub fn contains(self: *const Self, key: []const u8) bool { + return self.map.contains(key); + } + + /// Calculate the size needed to serialize this hash map + pub fn serializedSize(self: *const Self) usize { + var size: usize = @sizeOf(u32); // count + + var iter = self.map.iterator(); + while (iter.next()) |entry| { + size += @sizeOf(u32); // key length + size += entry.key_ptr.len; // key bytes + if (V != void) { + size += @sizeOf(V); // value bytes + } + } + + return size; + } + + /// Serialize this hash map into the provided buffer + pub fn serializeInto(self: *const Self, buffer: []u8) ![]u8 { + const size = self.serializedSize(); + if (buffer.len < size) return error.BufferTooSmall; + + var offset: usize = 0; + + // Write count + std.mem.writeInt(u32, buffer[offset..][0..4], @intCast(self.map.count()), .little); + offset += @sizeOf(u32); + + // Write entries + var iter = self.map.iterator(); + while (iter.next()) |entry| { + // Write key length + const key_len: u32 = @intCast(entry.key_ptr.len); + std.mem.writeInt(u32, buffer[offset..][0..4], key_len, .little); + offset += @sizeOf(u32); + + // Write key bytes + @memcpy(buffer[offset .. offset + entry.key_ptr.len], entry.key_ptr.*); + offset += entry.key_ptr.len; + + // Write value bytes (if not void) + if (V != void) { + @memcpy(buffer[offset .. offset + @sizeOf(V)], std.mem.asBytes(entry.value_ptr)); + offset += @sizeOf(V); + } + } + + return buffer[0..offset]; + } + + /// Deserialize a hash map from the provided buffer + pub fn deserializeFrom(buffer: []const u8, allocator: Allocator) !Self { + if (buffer.len < @sizeOf(u32)) return error.BufferTooSmall; + + var offset: usize = 0; + + // Read count + const entry_count = std.mem.readInt(u32, buffer[offset..][0..4], .little); + offset += @sizeOf(u32); + + // Create hash map with capacity + var result = Self.initCapacity(allocator, entry_count); + errdefer result.deinit(allocator); + + // Read entries + for (0..entry_count) |_| { + // Read key length + if (offset + @sizeOf(u32) > buffer.len) return error.BufferTooSmall; + const key_len = std.mem.readInt(u32, buffer[offset..][0..4], .little); + offset += @sizeOf(u32); + + // Read key bytes + if (offset + key_len > buffer.len) return error.BufferTooSmall; + const key = buffer[offset .. offset + key_len]; + offset += key_len; + + // Read value (if not void) + const value = if (V != void) blk: { + if (offset + @sizeOf(V) > buffer.len) return error.BufferTooSmall; + const value_bytes = buffer[offset .. offset + @sizeOf(V)]; + offset += @sizeOf(V); + break :blk std.mem.bytesAsValue(V, value_bytes).*; + } else {}; + + // Insert into map + try result.put(allocator, key, value); + } + + return result; + } + + /// Get an iterator over the hash map + pub fn iterator(self: *const Self) std.StringHashMapUnmanaged(V).Iterator { + return self.map.iterator(); + } + }; +} + +// Tests +test "SafeStringHashMap(void) basic operations" { + const gpa = testing.allocator; + + var map = SafeStringHashMap(void).init(); + defer map.deinit(gpa); + + try testing.expectEqual(@as(usize, 0), map.count()); + + try map.put(gpa, "hello", {}); + try map.put(gpa, "world", {}); + + try testing.expectEqual(@as(usize, 2), map.count()); + try testing.expect(map.get("hello") != null); + try testing.expect(map.get("world") != null); + try testing.expect(map.get("missing") == null); +} + +test "SafeStringHashMap(u16) basic operations" { + const gpa = testing.allocator; + + var map = SafeStringHashMap(u16).init(); + defer map.deinit(gpa); + + try map.put(gpa, "foo", 42); + try map.put(gpa, "bar", 123); + + try testing.expectEqual(@as(usize, 2), map.count()); + try testing.expectEqual(@as(u16, 42), map.get("foo").?); + try testing.expectEqual(@as(u16, 123), map.get("bar").?); + try testing.expect(map.get("missing") == null); +} + +test "SafeStringHashMap(void) serialization round-trip" { + const gpa = testing.allocator; + + var original = SafeStringHashMap(void).init(); + defer original.deinit(gpa); + + try original.put(gpa, "first", {}); + try original.put(gpa, "second", {}); + try original.put(gpa, "third", {}); + + // Serialize + const size = original.serializedSize(); + const buffer = try gpa.alloc(u8, size); + defer gpa.free(buffer); + + const serialized = try original.serializeInto(buffer); + try testing.expectEqual(size, serialized.len); + + // Deserialize + var deserialized = try SafeStringHashMap(void).deserializeFrom(serialized, gpa); + defer deserialized.deinit(gpa); + + // Verify + try testing.expectEqual(original.count(), deserialized.count()); + try testing.expect(deserialized.get("first") != null); + try testing.expect(deserialized.get("second") != null); + try testing.expect(deserialized.get("third") != null); + try testing.expect(deserialized.get("missing") == null); +} + +test "SafeStringHashMap(u16) serialization round-trip" { + const gpa = testing.allocator; + + var original = SafeStringHashMap(u16).init(); + defer original.deinit(gpa); + + try original.put(gpa, "alpha", 100); + try original.put(gpa, "beta", 200); + try original.put(gpa, "gamma", 300); + + // Serialize + const size = original.serializedSize(); + const buffer = try gpa.alloc(u8, size); + defer gpa.free(buffer); + + const serialized = try original.serializeInto(buffer); + + // Deserialize + var deserialized = try SafeStringHashMap(u16).deserializeFrom(serialized, gpa); + defer deserialized.deinit(gpa); + + // Verify + try testing.expectEqual(original.count(), deserialized.count()); + try testing.expectEqual(@as(u16, 100), deserialized.get("alpha").?); + try testing.expectEqual(@as(u16, 200), deserialized.get("beta").?); + try testing.expectEqual(@as(u16, 300), deserialized.get("gamma").?); +} + +test "SafeStringHashMap empty serialization" { + const gpa = testing.allocator; + + var empty = SafeStringHashMap(u32).init(); + defer empty.deinit(gpa); + + // Serialize empty map + const size = empty.serializedSize(); + try testing.expectEqual(@sizeOf(u32), size); // Just the count + + const buffer = try gpa.alloc(u8, size); + defer gpa.free(buffer); + + const serialized = try empty.serializeInto(buffer); + + // Deserialize + var deserialized = try SafeStringHashMap(u32).deserializeFrom(serialized, gpa); + defer deserialized.deinit(gpa); + + try testing.expectEqual(@as(usize, 0), deserialized.count()); +} + +test "SafeStringHashMap deserialization buffer too small error" { + const gpa = testing.allocator; + + // Buffer too small to even contain count + var tiny_buffer: [2]u8 = undefined; + try testing.expectError(error.BufferTooSmall, SafeStringHashMap(void).deserializeFrom(&tiny_buffer, gpa)); + + // Buffer with count but insufficient data + var partial_buffer: [8]u8 = undefined; + std.mem.writeInt(u32, partial_buffer[0..4], 1, .little); // Claims 1 item + std.mem.writeInt(u32, partial_buffer[4..8], 10, .little); // Claims key length 10, but no space for key + try testing.expectError(error.BufferTooSmall, SafeStringHashMap(void).deserializeFrom(&partial_buffer, gpa)); +} + +test "SafeStringHashMap duplicate key handling" { + const gpa = testing.allocator; + + var map = SafeStringHashMap(u32).init(); + defer map.deinit(gpa); + + // Insert first value + try map.put(gpa, "test_key", 42); + try testing.expectEqual(@as(usize, 1), map.count()); + try testing.expectEqual(@as(u32, 42), map.get("test_key").?); + + // Insert same key with different value - should replace without leaking + try map.put(gpa, "test_key", 99); + try testing.expectEqual(@as(usize, 1), map.count()); + try testing.expectEqual(@as(u32, 99), map.get("test_key").?); + + // Insert same key again + try map.put(gpa, "test_key", 123); + try testing.expectEqual(@as(usize, 1), map.count()); + try testing.expectEqual(@as(u32, 123), map.get("test_key").?); + + // Add a different key to make sure normal operation still works + try map.put(gpa, "other_key", 456); + try testing.expectEqual(@as(usize, 2), map.count()); + try testing.expectEqual(@as(u32, 123), map.get("test_key").?); + try testing.expectEqual(@as(u32, 456), map.get("other_key").?); +} + +test "SafeStringHashMap comprehensive serialization framework test" { + const gpa = testing.allocator; + + var map = SafeStringHashMap(u32).init(); + defer map.deinit(gpa); + + // Add various test data including edge cases + try map.put(gpa, "key1", 0); // minimum value + try map.put(gpa, "key2", 42); + try map.put(gpa, "longer_key_name_test", 123); + try map.put(gpa, "k", 0xFFFFFFFF); // maximum value, short key + try map.put(gpa, "", 999); // empty key + try map.put(gpa, "🦎🚀", 777); // unicode key + + // Test serialization using the testing framework + try serialization.testing.testSerialization(SafeStringHashMap(u32), &map, gpa); +} + +test "SafeStringHashMap empty map serialization framework test" { + const gpa = testing.allocator; + + var empty_map = SafeStringHashMap(u16).init(); + defer empty_map.deinit(gpa); + + try serialization.testing.testSerialization(SafeStringHashMap(u16), &empty_map, gpa); +} + +test "SafeStringHashMap void value serialization framework test" { + const gpa = testing.allocator; + + var map = SafeStringHashMap(void).init(); + defer map.deinit(gpa); + + // Add various keys (values are void) + try map.put(gpa, "first", {}); + try map.put(gpa, "second", {}); + try map.put(gpa, "third", {}); + + // Test serialization using the testing framework + try serialization.testing.testSerialization(SafeStringHashMap(void), &map, gpa); +} diff --git a/src/collections/safe_list.zig b/src/collections/safe_list.zig index f651c034af..05511614b8 100644 --- a/src/collections/safe_list.zig +++ b/src/collections/safe_list.zig @@ -2,10 +2,12 @@ const std = @import("std"); const utils = @import("utils.zig"); +const serialization = @import("../serialization/mod.zig"); const testing = std.testing; -const Allocator = std.mem.Allocator; const exitOnOom = utils.exitOnOom; +const Allocator = std.mem.Allocator; +const SERIALIZATION_ALIGNMENT = serialization.SERIALIZATION_ALIGNMENT; /// Represents a type safe range in a list; [start, end) /// @@ -164,13 +166,29 @@ pub fn SafeList(comptime T: type) type { /// Returns the size needed to serialize this list pub fn serializedSize(self: *const SafeList(T)) usize { // Header: 4 bytes for count - // Data: items.len * @sizeOf(T) - return @sizeOf(u32) + (self.items.items.len * @sizeOf(T)); + var total_size: usize = @sizeOf(u32); + + // Check if T has custom serialization + if (comptime switch (@typeInfo(T)) { + .@"struct", .@"union", .@"enum", .@"opaque" => @hasDecl(T, "serializedSize"), + else => false, + }) { + // Use custom serialization for each item + for (self.items.items) |*item| { + total_size += item.serializedSize(); + } + } else { + // Use fixed size for POD types + total_size += self.items.items.len * @sizeOf(T); + } + + // Align to SERIALIZATION_ALIGNMENT to maintain alignment for subsequent data + return std.mem.alignForward(usize, total_size, SERIALIZATION_ALIGNMENT); } /// Serialize this list into the provided buffer /// Returns the slice of buffer that was written to - pub fn serializeInto(self: *const SafeList(T), buffer: []align(@alignOf(T)) u8) ![]align(@alignOf(T)) const u8 { + pub fn serializeInto(self: *const SafeList(T), buffer: []align(SERIALIZATION_ALIGNMENT) u8) ![]align(SERIALIZATION_ALIGNMENT) const u8 { const size = self.serializedSize(); if (buffer.len < size) return error.BufferTooSmall; @@ -178,12 +196,33 @@ pub fn SafeList(comptime T: type) type { const count_ptr = @as(*u32, @ptrCast(@alignCast(buffer.ptr))); count_ptr.* = @intCast(self.items.items.len); - // If T is a POD type, just memcpy the data - if (@typeInfo(T) == .@"struct" or @typeInfo(T) == .int or @typeInfo(T) == .float) { - const data_ptr = @as([*]T, @ptrCast(@alignCast(buffer.ptr + @sizeOf(u32)))); - @memcpy(data_ptr[0..self.items.items.len], self.items.items); + var offset: usize = @sizeOf(u32); + + // Check if T has custom serialization + if (comptime switch (@typeInfo(T)) { + .@"struct", .@"union", .@"enum", .@"opaque" => @hasDecl(T, "serializeInto"), + else => false, + }) { + // Use custom serialization for each item + for (self.items.items) |*item| { + const item_buffer = buffer[offset..]; + const item_slice = try item.serializeInto(item_buffer); + offset += item_slice.len; + } } else { - @compileError("Cannot serialize non-POD type " ++ @typeName(T)); + // Use memcpy for POD types + if (@typeInfo(T) == .@"struct" or @typeInfo(T) == .int or @typeInfo(T) == .float or @typeInfo(T) == .@"enum") { + const data_ptr = @as([*]T, @ptrCast(@alignCast(buffer.ptr + @sizeOf(u32)))); + @memcpy(data_ptr[0..self.items.items.len], self.items.items); + offset += self.items.items.len * @sizeOf(T); + } else { + @compileError("Cannot serialize non-POD type " ++ @typeName(T) ++ " without custom serialization methods"); + } + } + + // Zero out any padding bytes + if (offset < size) { + @memset(buffer[offset..size], 0); } return buffer[0..size]; @@ -196,16 +235,36 @@ pub fn SafeList(comptime T: type) type { // Read count const count = @as(*const u32, @ptrCast(@alignCast(buffer.ptr))).*; - const expected_size = @sizeOf(u32) + (count * @sizeOf(T)); - if (buffer.len < expected_size) return error.BufferTooSmall; - // Create list with exact capacity var list = SafeList(T).initCapacity(allocator, count); + errdefer list.deinit(allocator); + + var offset: usize = @sizeOf(u32); + + // Check if T has custom deserialization + if (comptime switch (@typeInfo(T)) { + .@"struct", .@"union", .@"enum", .@"opaque" => @hasDecl(T, "deserializeFrom"), + else => false, + }) { + // Use custom deserialization for each item + for (0..count) |_| { + const item_buffer = buffer[offset..]; + const item = try T.deserializeFrom(item_buffer); + const item_idx = list.items.append(allocator, item) catch |err| exitOnOom(err); + _ = item_idx; + + // For custom deserialization, we need to advance offset by the actual serialized size + offset += item.serializedSize(); + } + } else { + // Use memcpy for POD types + const expected_size = @sizeOf(u32) + (count * @sizeOf(T)); + if (buffer.len < expected_size) return error.BufferTooSmall; - // Copy data - if (count > 0) { - const data_ptr = @as([*]const T, @ptrCast(@alignCast(buffer.ptr + @sizeOf(u32)))); - list.items.appendSliceAssumeCapacity(data_ptr[0..count]); + if (count > 0) { + const data_ptr = @as([*]const T, @ptrCast(@alignCast(buffer.ptr + @sizeOf(u32)))); + list.items.appendSliceAssumeCapacity(data_ptr[0..count]); + } } return list; @@ -399,12 +458,14 @@ pub fn SafeMultiList(comptime T: type) type { pub fn serializedSize(self: *const SafeMultiList(T)) usize { // Header: 4 bytes for count // Data: items.len * @sizeOf(T) - return @sizeOf(u32) + (self.items.len * @sizeOf(T)); + const raw_size = @sizeOf(u32) + (self.items.len * @sizeOf(T)); + // Align to SERIALIZATION_ALIGNMENT to maintain alignment for subsequent data + return std.mem.alignForward(usize, raw_size, SERIALIZATION_ALIGNMENT); } /// Serialize this list into the provided buffer /// Returns the slice of buffer that was written to - pub fn serializeInto(self: *const SafeMultiList(T), buffer: []align(@alignOf(T)) u8) ![]align(@alignOf(T)) const u8 { + pub fn serializeInto(self: *const SafeMultiList(T), buffer: []align(SERIALIZATION_ALIGNMENT) u8) ![]align(SERIALIZATION_ALIGNMENT) const u8 { const size = self.serializedSize(); if (buffer.len < size) return error.BufferTooSmall; @@ -423,6 +484,12 @@ pub fn SafeMultiList(comptime T: type) type { @compileError("Cannot serialize non-POD type " ++ @typeName(T)); } + // Zero out any padding bytes + const actual_size = @sizeOf(u32) + (self.items.len * @sizeOf(T)); + if (actual_size < size) { + @memset(buffer[actual_size..size], 0); + } + return buffer[0..size]; } @@ -592,11 +659,11 @@ test "SafeList(u32) serialization empty list" { var list = SafeList(u32){}; defer list.deinit(gpa); - // Empty list should serialize to just a count of 0 - const expected_size = @sizeOf(u32); + // Empty list should serialize to just a count of 0, aligned to SERIALIZATION_ALIGNMENT + const expected_size = std.mem.alignForward(usize, @sizeOf(u32), SERIALIZATION_ALIGNMENT); try testing.expectEqual(expected_size, list.serializedSize()); - var buffer: [@sizeOf(u32)]u8 align(@alignOf(u32)) = undefined; + var buffer: [16]u8 align(SERIALIZATION_ALIGNMENT) = undefined; const serialized = try list.serializeInto(&buffer); try testing.expectEqual(expected_size, serialized.len); @@ -618,7 +685,7 @@ test "SafeList(u32) serialization with data" { const expected_size = @sizeOf(u32) + (3 * @sizeOf(u32)); try testing.expectEqual(expected_size, list.serializedSize()); - var buffer: [256]u8 align(@alignOf(u32)) = undefined; + var buffer: [256]u8 align(SERIALIZATION_ALIGNMENT) = undefined; const serialized = try list.serializeInto(&buffer); try testing.expectEqual(expected_size, serialized.len); @@ -642,10 +709,10 @@ test "SafeList(u8) serialization with data" { _ = list.appendSlice(gpa, "hello"); - const expected_size = @sizeOf(u32) + 5; + const expected_size = std.mem.alignForward(usize, @sizeOf(u32) + 5, SERIALIZATION_ALIGNMENT); try testing.expectEqual(expected_size, list.serializedSize()); - var buffer: [256]u8 align(@alignOf(u8)) = undefined; + var buffer: [256]u8 align(SERIALIZATION_ALIGNMENT) = undefined; const serialized = try list.serializeInto(&buffer); try testing.expectEqual(expected_size, serialized.len); @@ -654,7 +721,7 @@ test "SafeList(u8) serialization with data" { try testing.expectEqual(@as(u32, 5), count); // Check the data - try testing.expectEqualSlices(u8, "hello", buffer[@sizeOf(u32)..expected_size]); + try testing.expectEqualSlices(u8, "hello", buffer[@sizeOf(u32) .. @sizeOf(u32) + 5]); } test "SafeList(u32) deserialization empty list" { @@ -676,13 +743,13 @@ test "SafeList(u32) deserialization with data" { // Prepare buffer with count = 3 and data [42, 100, 255] const expected_data = [_]u32{ 42, 100, 255 }; const buffer_size = @sizeOf(u32) + expected_data.len * @sizeOf(u32); - var buffer: [256]u8 align(@alignOf(u32)) = undefined; + var partial_buffer: [64]u8 align(SERIALIZATION_ALIGNMENT) = undefined; - @as(*u32, @ptrCast(@alignCast(&buffer))).* = expected_data.len; - const data_ptr = @as([*]u32, @ptrCast(@alignCast(buffer[@sizeOf(u32)..]))); + @as(*u32, @ptrCast(@alignCast(&partial_buffer))).* = expected_data.len; + const data_ptr = @as([*]u32, @ptrCast(@alignCast(partial_buffer[@sizeOf(u32)..]))); @memcpy(data_ptr[0..expected_data.len], &expected_data); - var list = try SafeList(u32).deserializeFrom(buffer[0..buffer_size], gpa); + var list = try SafeList(u32).deserializeFrom(partial_buffer[0..buffer_size], gpa); defer list.deinit(gpa); try testing.expectEqual(expected_data.len, list.len()); @@ -698,12 +765,12 @@ test "SafeList(u8) deserialization with data" { // Prepare buffer with "world" const expected_data = "world"; const buffer_size = @sizeOf(u32) + expected_data.len; - var buffer: [256]u8 align(@alignOf(u8)) = undefined; + var small_buffer: [64]u8 align(SERIALIZATION_ALIGNMENT) = undefined; - @as(*u32, @ptrCast(@alignCast(&buffer))).* = @intCast(expected_data.len); - @memcpy(buffer[@sizeOf(u32)..buffer_size], expected_data); + @as(*u32, @ptrCast(@alignCast(&small_buffer))).* = @intCast(expected_data.len); + @memcpy(small_buffer[@sizeOf(u32)..buffer_size], expected_data); - var list = try SafeList(u8).deserializeFrom(buffer[0..buffer_size], gpa); + var list = try SafeList(u8).deserializeFrom(small_buffer[0..buffer_size], gpa); defer list.deinit(gpa); try testing.expectEqual(expected_data.len, list.len()); @@ -722,7 +789,7 @@ test "SafeList(u32) round-trip serialization" { _ = original.appendSlice(gpa, &test_data); // Serialize - var buffer: [1024]u8 align(@alignOf(u32)) = undefined; + var buffer: [1024]u8 align(SERIALIZATION_ALIGNMENT) = undefined; const serialized = try original.serializeInto(&buffer); // Deserialize @@ -747,7 +814,7 @@ test "SafeList serialization buffer too small error" { _ = list.append(gpa, 100); // Buffer too small for the data - var small_buffer: [4]u8 align(@alignOf(u32)) = undefined; // Only room for count, not data + var small_buffer: [4]u8 align(SERIALIZATION_ALIGNMENT) = undefined; // Only room for count, not data try testing.expectError(error.BufferTooSmall, list.serializeInto(&small_buffer)); } @@ -755,13 +822,13 @@ test "SafeList deserialization buffer too small error" { const gpa = testing.allocator; // Buffer too small to even contain count - var tiny_buffer: [2]u8 align(@alignOf(u32)) = undefined; - try testing.expectError(error.BufferTooSmall, SafeList(u32).deserializeFrom(&tiny_buffer, gpa)); + var small_buffer: [2]u8 align(SERIALIZATION_ALIGNMENT) = undefined; + try testing.expectError(error.BufferTooSmall, SafeList(u32).deserializeFrom(&small_buffer, gpa)); // Buffer with count but insufficient data - var partial_buffer: [6]u8 align(@alignOf(u32)) = undefined; - @as(*u32, @ptrCast(@alignCast(&partial_buffer))).* = 2; // Claims 2 items but only has 2 extra bytes - try testing.expectError(error.BufferTooSmall, SafeList(u32).deserializeFrom(&partial_buffer, gpa)); + var partial_buffer2: [6]u8 align(SERIALIZATION_ALIGNMENT) = undefined; + @as(*u32, @ptrCast(@alignCast(&partial_buffer2))).* = 2; // Claims 2 items but only has 2 extra bytes + try testing.expectError(error.BufferTooSmall, SafeList(u32).deserializeFrom(&partial_buffer2, gpa)); } test "SafeList(struct) serialization" { @@ -774,7 +841,7 @@ test "SafeList(struct) serialization" { _ = list.append(gpa, Point{ .x = 10, .y = 20 }); _ = list.append(gpa, Point{ .x = 30, .y = 40 }); - var buffer: [1024]u8 align(@alignOf(Point)) = undefined; + var buffer: [1024]u8 align(SERIALIZATION_ALIGNMENT) = undefined; const serialized = try list.serializeInto(&buffer); var deserialized = try SafeList(Point).deserializeFrom(serialized, gpa); @@ -798,11 +865,11 @@ test "SafeMultiList(struct) serialization empty list" { var list = SafeMultiList(Point){}; defer list.deinit(gpa); - // Empty list should serialize to just a count of 0 - const expected_size = @sizeOf(u32); + // Empty list should serialize to just a count of 0, aligned to SERIALIZATION_ALIGNMENT + const expected_size = std.mem.alignForward(usize, @sizeOf(u32), SERIALIZATION_ALIGNMENT); try testing.expectEqual(expected_size, list.serializedSize()); - var buffer: [@sizeOf(u32)]u8 align(@alignOf(Point)) = undefined; + var buffer: [16]u8 align(SERIALIZATION_ALIGNMENT) = undefined; const serialized = try list.serializeInto(&buffer); try testing.expectEqual(expected_size, serialized.len); @@ -822,10 +889,10 @@ test "SafeMultiList(struct) serialization with data" { _ = list.append(gpa, Point{ .x = 30, .y = 40 }); _ = list.append(gpa, Point{ .x = 50, .y = 60 }); - const expected_size = @sizeOf(u32) + (3 * @sizeOf(Point)); + const expected_size = std.mem.alignForward(usize, @sizeOf(u32) + (3 * @sizeOf(Point)), SERIALIZATION_ALIGNMENT); try testing.expectEqual(expected_size, list.serializedSize()); - var buffer: [256]u8 align(@alignOf(Point)) = undefined; + var buffer: [2048]u8 align(SERIALIZATION_ALIGNMENT) = undefined; const serialized = try list.serializeInto(&buffer); try testing.expectEqual(expected_size, serialized.len); @@ -854,10 +921,10 @@ test "SafeMultiList(struct) serialization with primitive data" { _ = list.append(gpa, Value{ .val = 42 }); _ = list.append(gpa, Value{ .val = 100 }); - const expected_size = @sizeOf(u32) + (2 * @sizeOf(Value)); + const expected_size = std.mem.alignForward(usize, @sizeOf(u32) + (2 * @sizeOf(u32)), SERIALIZATION_ALIGNMENT); try testing.expectEqual(expected_size, list.serializedSize()); - var buffer: [256]u8 align(@alignOf(Value)) = undefined; + var buffer: [2048]u8 align(SERIALIZATION_ALIGNMENT) = undefined; const serialized = try list.serializeInto(&buffer); try testing.expectEqual(expected_size, serialized.len); @@ -898,7 +965,7 @@ test "SafeMultiList(struct) deserialization with data" { // Prepare buffer with count = 2 and data const buffer_size = @sizeOf(u32) + expected_data.len * @sizeOf(Point); - var buffer: [256]u8 align(@alignOf(Point)) = undefined; + var buffer: [2048]u8 align(SERIALIZATION_ALIGNMENT) = undefined; @as(*u32, @ptrCast(@alignCast(&buffer))).* = expected_data.len; const data_ptr = @as([*]Point, @ptrCast(@alignCast(buffer[@sizeOf(u32)..]))); @@ -922,7 +989,7 @@ test "SafeMultiList(struct) deserialization with primitive data" { const Value = struct { val: u32 }; const expected_data = [_]Value{ Value{ .val = 42 }, Value{ .val = 100 }, Value{ .val = 255 } }; const buffer_size = @sizeOf(u32) + expected_data.len * @sizeOf(Value); - var buffer: [256]u8 align(@alignOf(Value)) = undefined; + var buffer: [2048]u8 align(SERIALIZATION_ALIGNMENT) = undefined; @as(*u32, @ptrCast(@alignCast(&buffer))).* = expected_data.len; const data_ptr = @as([*]Value, @ptrCast(@alignCast(buffer[@sizeOf(u32)..]))); @@ -953,7 +1020,7 @@ test "SafeMultiList(struct) round-trip serialization" { _ = original.appendSlice(gpa, &test_data); // Serialize - var buffer: [1024]u8 align(@alignOf(Point)) = undefined; + var buffer: [2048]u8 align(SERIALIZATION_ALIGNMENT) = undefined; const serialized = try original.serializeInto(&buffer); // Deserialize @@ -981,7 +1048,7 @@ test "SafeMultiList serialization buffer too small error" { _ = list.append(gpa, Point{ .x = 30, .y = 40 }); // Buffer too small for the data - var small_buffer: [4]u8 align(@alignOf(Point)) = undefined; // Only room for count, not data + var small_buffer: [4]u8 align(SERIALIZATION_ALIGNMENT) = undefined; // Only room for count, not data try testing.expectError(error.BufferTooSmall, list.serializeInto(&small_buffer)); } @@ -991,11 +1058,11 @@ test "SafeMultiList deserialization buffer too small error" { const Point = struct { x: i32, y: i32 }; // Buffer too small to even contain count - var tiny_buffer: [2]u8 align(@alignOf(Point)) = undefined; + var tiny_buffer: [2]u8 align(SERIALIZATION_ALIGNMENT) = undefined; try testing.expectError(error.BufferTooSmall, SafeMultiList(Point).deserializeFrom(&tiny_buffer, gpa)); // Buffer with count but insufficient data - var partial_buffer: [6]u8 align(@alignOf(Point)) = undefined; + var partial_buffer: [6]u8 align(SERIALIZATION_ALIGNMENT) = undefined; @as(*u32, @ptrCast(@alignCast(&partial_buffer))).* = 1; // Claims 1 item but insufficient space for Point try testing.expectError(error.BufferTooSmall, SafeMultiList(Point).deserializeFrom(&partial_buffer, gpa)); } @@ -1084,10 +1151,10 @@ test "SafeMultiList complex Node-like structure serialization" { _ = list.appendSlice(gpa, &test_nodes); // Test serialization - const expected_size = @sizeOf(u32) + (test_nodes.len * @sizeOf(ComplexNode)); + const expected_size = std.mem.alignForward(usize, @sizeOf(u32) + (test_nodes.len * @sizeOf(ComplexNode)), SERIALIZATION_ALIGNMENT); try testing.expectEqual(expected_size, list.serializedSize()); - var buffer: [1024]u8 align(@alignOf(ComplexNode)) = undefined; + var buffer: [1024]u8 align(SERIALIZATION_ALIGNMENT) = undefined; const serialized = try list.serializeInto(&buffer); try testing.expectEqual(expected_size, serialized.len); @@ -1116,3 +1183,62 @@ test "SafeMultiList complex Node-like structure serialization" { try testing.expectEqual(expected.extra, actual.extra); } } + +test "SafeList comprehensive serialization framework test" { + const gpa = testing.allocator; + + var list = SafeList(u32).initCapacity(gpa, 8); + defer list.deinit(gpa); + + // Add various test data including edge cases + _ = list.append(gpa, 0); // minimum value + _ = list.append(gpa, 42); + _ = list.append(gpa, 123); + _ = list.append(gpa, 0xFFFFFFFF); // maximum value + _ = list.append(gpa, 999); + + // Test serialization using the testing framework + try serialization.testing.testSerialization(SafeList(u32), &list, gpa); +} + +test "SafeList empty list serialization framework test" { + const gpa = testing.allocator; + + var empty_list = SafeList(u32){}; + defer empty_list.deinit(gpa); + + try serialization.testing.testSerialization(SafeList(u32), &empty_list, gpa); +} + +test "SafeMultiList comprehensive serialization framework test" { + const gpa = testing.allocator; + + const TestStruct = struct { + x: u32, + y: u32, + }; + + var list = SafeMultiList(TestStruct).initCapacity(gpa, 4); + defer list.deinit(gpa); + + // Add various test data including edge cases + _ = list.append(gpa, TestStruct{ .x = 0, .y = 0 }); // minimum values + _ = list.append(gpa, TestStruct{ .x = 42, .y = 123 }); + _ = list.append(gpa, TestStruct{ .x = 0xFFFFFFFF, .y = 0xFFFFFFFF }); // maximum values + + // Test serialization using the testing framework + try serialization.testing.testSerialization(SafeMultiList(TestStruct), &list, gpa); +} + +test "SafeMultiList empty list serialization framework test" { + const gpa = testing.allocator; + + const TestStruct = struct { + val: u32, + }; + + var empty_list = SafeMultiList(TestStruct){}; + defer empty_list.deinit(gpa); + + try serialization.testing.testSerialization(SafeMultiList(TestStruct), &empty_list, gpa); +} diff --git a/src/coordinate.zig b/src/coordinate.zig index df21bda62a..a6105823aa 100644 --- a/src/coordinate.zig +++ b/src/coordinate.zig @@ -2,7 +2,7 @@ const std = @import("std"); const base = @import("base.zig"); -const cache = @import("cache.zig"); +const cache = @import("cache/mod.zig"); const types = @import("types.zig"); const collections = @import("collections.zig"); diff --git a/src/coordinate/Filesystem.zig b/src/coordinate/Filesystem.zig index 2d506f0572..139d99531c 100644 --- a/src/coordinate/Filesystem.zig +++ b/src/coordinate/Filesystem.zig @@ -18,6 +18,8 @@ dirName: *const fn (absolute_path: []const u8) ?[]const u8, baseName: *const fn (absolute_path: []const u8) ?[]const u8, canonicalize: *const fn (relative_path: []const u8, allocator: Allocator) CanonicalizeError![]const u8, makePath: *const fn (path: []const u8) MakePathError!void, +rename: *const fn (old_path: []const u8, new_path: []const u8) RenameError!void, +getFileInfo: *const fn (path: []const u8) GetFileInfoError!FileInfo, // TODO: replace this with a method that gets the right // filesystem manager for the current context. @@ -34,6 +36,8 @@ pub fn default() Self { .baseName = &baseNameDefault, .canonicalize = &canonicalizeDefault, .makePath = &makePathDefault, + .rename = &renameDefault, + .getFileInfo = &getFileInfoDefault, }; } @@ -51,6 +55,8 @@ pub fn testing() Self { .baseName = &baseNameTesting, .canonicalize = &canonicalizeTesting, .makePath = &makePathTesting, + .rename = &renameTesting, + .getFileInfo = &getFileInfoTesting, }; } @@ -73,6 +79,20 @@ pub const OpenError = std.fs.File.OpenError || std.fs.Dir.AccessError; /// All errors that can occur when canonicalizing a filepath. pub const CanonicalizeError = error{ FileNotFound, Unknown, OutOfMemory } || std.posix.RealPathError; +/// All errors that can occur when renaming a file. +pub const RenameError = std.fs.Dir.RenameError || std.posix.RenameError; + +/// All errors that can occur when getting file information. +pub const GetFileInfoError = std.fs.File.OpenError || std.fs.File.StatError; + +/// File information structure containing metadata. +pub const FileInfo = struct { + /// File modification time in nanoseconds since Unix epoch + mtime_ns: i128, + /// File size in bytes + size: u64, +}; + /// An abstracted directory handle. pub const Dir = struct { dir: std.fs.Dir, @@ -219,6 +239,24 @@ fn makePathDefault(path: []const u8) MakePathError!void { try std.fs.cwd().makePath(path); } +/// Renames a file or directory from old_path to new_path. +fn renameDefault(old_path: []const u8, new_path: []const u8) RenameError!void { + try std.fs.cwd().rename(old_path, new_path); +} + +/// Gets file information including modification time and size. +fn getFileInfoDefault(path: []const u8) GetFileInfoError!FileInfo { + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); + + const stat = try file.stat(); + + return FileInfo{ + .mtime_ns = stat.mtime, + .size = stat.size, + }; +} + /// Writes contents to a file at the given path. /// Creates the file if it doesn't exist or truncates it if it does. fn writeFileDefault(path: []const u8, contents: []const u8) WriteError!void { @@ -278,3 +316,25 @@ fn makePathTesting(path: []const u8) MakePathError!void { _ = path; @panic("makePath should not be called in this test"); } + +fn renameTesting(old_path: []const u8, new_path: []const u8) RenameError!void { + _ = old_path; + _ = new_path; + @panic("rename should not be called in this test"); +} + +fn getFileInfoTesting(path: []const u8) GetFileInfoError!FileInfo { + // Return deterministic file info for testing + // Hash the path to get consistent results + var hasher = std.crypto.hash.sha2.Sha256.init(.{}); + hasher.update(path); + const hash = hasher.finalResult(); + + const mtime_ns = @as(i128, @bitCast(@as(u128, @bitCast(hash[0..16].*)) & 0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)); + const size = @as(u64, @bitCast(hash[16..24].*)) & 0xFFFFFF; // Limit size for testing + + return FileInfo{ + .mtime_ns = mtime_ns, + .size = size, + }; +} diff --git a/src/coordinate/ModuleGraph.zig b/src/coordinate/ModuleGraph.zig index 2ce99d29ae..7bad7a3dfe 100644 --- a/src/coordinate/ModuleGraph.zig +++ b/src/coordinate/ModuleGraph.zig @@ -5,7 +5,7 @@ const std = @import("std"); const testing = std.testing; const base = @import("../base.zig"); const utils = @import("utils.zig"); -const cache = @import("../cache.zig"); +const cache = @import("../cache/mod.zig"); const collections = @import("../collections.zig"); const Can = @import("../check/canonicalize.zig"); const Scope = @import("../check/canonicalize/Scope.zig"); diff --git a/src/coordinate_simple.zig b/src/coordinate_simple.zig index 97ae64e21b..35d0811622 100644 --- a/src/coordinate_simple.zig +++ b/src/coordinate_simple.zig @@ -12,6 +12,11 @@ const Filesystem = @import("coordinate/Filesystem.zig"); const ModuleEnv = base.ModuleEnv; const CIR = canonicalize.CIR; +const cache_mod = @import("cache/mod.zig"); +const CacheManager = cache_mod.CacheManager; +const CacheConfig = cache_mod.CacheConfig; +const CacheKey = cache_mod.CacheKey; +const CacheResult = cache_mod.CacheResult; /// Result of processing source code, containing both CIR and Reports /// for proper diagnostic reporting. @@ -27,13 +32,27 @@ pub const ProcessResult = struct { cir: *CIR, reports: []reporting.Report, source: []const u8, + cache_key: ?CacheKey = null, + was_cached: bool = false, pub fn deinit(self: *ProcessResult, gpa: std.mem.Allocator) void { + // Clean up cache key if present + if (self.cache_key) |*key| { + key.deinit(gpa); + } + for (self.reports) |*report| { report.deinit(); } gpa.free(self.reports); gpa.free(self.source); + + // Clean up the heap-allocated ModuleEnv (only when loaded from cache) + if (self.was_cached) { + self.cir.env.deinit(); + gpa.destroy(self.cir.env); + } + self.cir.deinit(); gpa.destroy(self.cir); } @@ -48,6 +67,7 @@ pub fn processFile( gpa: std.mem.Allocator, fs: Filesystem, filepath: []const u8, + cache_manager: ?*CacheManager, ) !ProcessResult { const trace = tracy.trace(@src()); defer trace.end(); @@ -59,7 +79,44 @@ pub fn processFile( else => return error.FileReadError, }; - // Note: We transfer ownership of source to ProcessResult, avoiding an unnecessary copy + // If caching is enabled, try cache first + if (cache_manager) |cache| { + const cache_key = CacheKey.generate(source, filepath, fs, gpa) catch |err| { + // Log cache key generation failure, continue without cache + std.log.debug("Failed to generate cache key for {s}: {}", .{ filepath, err }); + return try processSourceInternal(gpa, source, filepath, true); + }; + + // Check cache + switch (cache.lookup(cache_key) catch .miss) { + .hit => |cached_result| { + // Cache hit! Free the source we just read since cached result has its own + gpa.free(source); + + return cached_result; + }, + .miss => { + // Fall through to normal processing + }, + .invalid => { + // Fall through to normal processing + }, + } + + // Cache miss - process normally and store result + var result = try processSourceInternal(gpa, source, filepath, true); + result.cache_key = cache_key; + result.was_cached = false; + + // Store in cache (don't fail compilation if cache store fails) + cache.store(cache_key, &result) catch |err| { + std.log.debug("Failed to store cache for {s}: {}", .{ filepath, err }); + }; + + return result; + } + + // No caching - process normally return try processSourceInternal(gpa, source, filepath, true); } @@ -95,15 +152,15 @@ fn processSourceInternal( const trace = tracy.trace(@src()); defer trace.end(); - // Initialize the ModuleEnv - var module_env = ModuleEnv.init(gpa); - defer module_env.deinit(); + // Initialize the ModuleEnv (heap-allocated for ownership transfer) + var module_env = try gpa.create(ModuleEnv); + module_env.* = ModuleEnv.init(gpa); // Calculate line starts for region info - try module_env.calcLineStarts(source); + try module_env.*.calcLineStarts(source); // Parse the source code - var parse_ast = parse.parse(&module_env, source); + var parse_ast = parse.parse(module_env, source); defer parse_ast.deinit(gpa); // Create an arraylist for capturing diagnostic reports. @@ -124,7 +181,7 @@ fn processSourceInternal( // Initialize the Can IR (heap-allocated) var cir = try gpa.create(CIR); - cir.* = CIR.init(&module_env); + cir.* = CIR.init(module_env); // Create scope for semantic analysis // Canonicalize the AST @@ -160,7 +217,7 @@ fn processSourceInternal( // Get type checking diagnostic Reports var report_builder = types_problem_mod.ReportBuilder.init( gpa, - &module_env, + module_env, cir, &solver.snapshots, owned_source, @@ -179,5 +236,7 @@ fn processSourceInternal( .cir = cir, .reports = reports.toOwnedSlice() catch return error.OutOfMemory, .source = owned_source, + .cache_key = null, + .was_cached = false, }; } diff --git a/src/fmt.zig b/src/fmt.zig index e507743717..186940aa51 100644 --- a/src/fmt.zig +++ b/src/fmt.zig @@ -167,12 +167,12 @@ pub fn formatFilePath(gpa: std.mem.Allocator, base_dir: std.fs.Dir, path: []cons fn printParseErrors(gpa: std.mem.Allocator, source: []const u8, parse_ast: AST) !void { // compute offsets of each line, looping over bytes of the input - var line_offsets = std.ArrayList(u32).init(gpa); - defer line_offsets.deinit(); - try line_offsets.append(0); + var line_offsets = @import("collections.zig").SafeList(u32).initCapacity(gpa, 256); + defer line_offsets.deinit(gpa); + _ = line_offsets.append(gpa, 0); for (source, 0..) |c, i| { if (c == '\n') { - try line_offsets.append(@intCast(i)); + _ = line_offsets.append(gpa, @intCast(i)); } } @@ -180,8 +180,8 @@ fn printParseErrors(gpa: std.mem.Allocator, source: []const u8, parse_ast: AST) try stderr.print("Errors:\n", .{}); for (parse_ast.parse_diagnostics.items) |err| { const region = parse_ast.tokens.resolve(@intCast(err.region.start)); - const line = binarySearch(line_offsets.items, region.start.offset) orelse unreachable; - const column = region.start.offset - line_offsets.items[line]; + const line = binarySearch(line_offsets.items.items, region.start.offset) orelse unreachable; + const column = region.start.offset - line_offsets.items.items[line]; const token = parse_ast.tokens.tokens.items(.tag)[err.region.start]; // TODO: pretty print the parse failures. try stderr.print("\t{s}, at token {s} at {d}:{d}\n", .{ @tagName(err.tag), @tagName(token), line + 1, column }); diff --git a/src/layout/store.zig b/src/layout/store.zig index 0d37f104fc..8cd0f1ae97 100644 --- a/src/layout/store.zig +++ b/src/layout/store.zig @@ -101,7 +101,7 @@ pub const Store = struct { type_store: *const types_store.Store, ) std.mem.Allocator.Error!Self { // Get the number of variables from the type store's slots - const capacity = type_store.slots.backing.items.len; + const capacity = type_store.slots.backing.len(); const layouts_by_var = try collections.ArrayListMap(Var, Idx).init(env.gpa, capacity); var layouts = collections.SafeMultiList(Layout){}; diff --git a/src/layout/store_test.zig b/src/layout/store_test.zig index 07c54daba7..94c97c77bd 100644 --- a/src/layout/store_test.zig +++ b/src/layout/store_test.zig @@ -3423,7 +3423,7 @@ test "layouts_by_var uses ArrayListMap with pre-allocation" { // Verify the ArrayListMap was initialized with capacity // The length should be at least as large as the types store - try testing.expect(layout_store.layouts_by_var.entries.len >= type_store.slots.backing.items.len); + try testing.expect(layout_store.layouts_by_var.entries.len >= type_store.slots.backing.len()); } test "idxFromScalar - arithmetic mapping with no branches" { diff --git a/src/main.zig b/src/main.zig index ef4e62a913..cf616eb136 100644 --- a/src/main.zig +++ b/src/main.zig @@ -13,6 +13,9 @@ const coordinate_simple = @import("coordinate_simple.zig"); const tracy = @import("tracy.zig"); const Filesystem = @import("coordinate/Filesystem.zig"); const cli_args = @import("cli_args.zig"); +const cache_mod = @import("cache/mod.zig"); +const CacheManager = cache_mod.CacheManager; +const CacheConfig = cache_mod.CacheConfig; const Allocator = std.mem.Allocator; const exitOnOom = collections.utils.exitOnOom; @@ -142,8 +145,27 @@ fn rocCheck(gpa: Allocator, args: cli_args.CheckArgs) !void { var timer = try std.time.Timer.start(); + // Initialize cache if enabled + var cache_config = CacheConfig{ + .enabled = !args.no_cache, + .verbose = args.verbose, + }; + + var cache_manager = if (cache_config.enabled) blk: { + var manager = CacheManager.init(gpa, cache_config, Filesystem.default()); + manager.ensureCacheDir() catch |err| { + if (args.verbose) { + std.log.debug("Failed to create cache directory: {}", .{err}); + } + // Continue without cache if directory creation fails + cache_config.enabled = false; + break :blk null; + }; + break :blk manager; + } else null; + // Process the file and get Reports - var result = coordinate_simple.processFile(gpa, Filesystem.default(), args.path) catch |err| { + var result = coordinate_simple.processFile(gpa, Filesystem.default(), args.path, if (cache_manager) |*cm| cm else null) catch |err| { stderr.print("Failed to check {s}: ", .{args.path}) catch {}; switch (err) { error.FileNotFound => stderr.print("File not found\n", .{}) catch {}, @@ -157,6 +179,13 @@ fn rocCheck(gpa: Allocator, args: cli_args.CheckArgs) !void { const elapsed = timer.read(); + // Print cache statistics if verbose + if (cache_manager) |*cm| { + if (args.verbose) { + cm.printStats(); + } + } + // Process reports and render them using the reporting system if (result.reports.len > 0) { var fatal_errors: usize = 0; diff --git a/src/serialization/mod.zig b/src/serialization/mod.zig new file mode 100644 index 0000000000..e524ef85af --- /dev/null +++ b/src/serialization/mod.zig @@ -0,0 +1,174 @@ +//! Common serialization utilities and traits for the Roc compiler +//! +//! This module provides: +//! - Common traits for serializable types +//! - Memory safety utilities +//! - Testing framework for serialization +//! - Error types and utilities + +const std = @import("std"); + +pub const testing = @import("testing.zig"); +pub const safety = @import("safety.zig"); + +const Allocator = std.mem.Allocator; + +/// Standard alignment for all serialization operations +/// This ensures consistent alignment across all cached data and serialization +pub const SERIALIZATION_ALIGNMENT = 16; + +/// Common errors that can occur during serialization +pub const SerializationError = error{ + BufferTooSmall, + InvalidFormat, + CorruptedData, + UnsupportedVersion, +}; + +/// Common errors that can occur during deserialization +pub const DeserializationError = error{ + BufferTooSmall, + InvalidFormat, + CorruptedData, + UnsupportedVersion, + OutOfMemory, +}; + +/// Trait interface for types that can be serialized +/// This is more of a documentation/convention than enforced by the compiler +pub fn Serializable(comptime T: type) type { + return struct { + /// Calculate the size needed to serialize this value + pub fn serializedSize(self: *const T) usize { + _ = self; + @compileError("serializedSize must be implemented for " ++ @typeName(T)); + } + + /// Serialize this value into the provided buffer + /// Returns the slice of buffer that was written to + pub fn serializeInto(self: *const T, buffer: []u8) SerializationError![]const u8 { + _ = self; + _ = buffer; + @compileError("serializeInto must be implemented for " ++ @typeName(T)); + } + + /// Deserialize a value from the provided buffer + pub fn deserializeFrom(buffer: []const u8, allocator: Allocator) DeserializationError!T { + _ = buffer; + _ = allocator; + @compileError("deserializeFrom must be implemented for " ++ @typeName(T)); + } + }; +} + +/// Trait interface for types that can be serialized with an allocator +pub fn SerializableWithAllocator(comptime T: type) type { + return struct { + /// Calculate the size needed to serialize this value + pub fn serializedSize(self: *const T) usize { + _ = self; + @compileError("serializedSize must be implemented for " ++ @typeName(T)); + } + + /// Serialize this value into the provided buffer + /// Returns the slice of buffer that was written to + pub fn serializeInto(self: *const T, buffer: []u8, allocator: Allocator) SerializationError![]const u8 { + _ = self; + _ = buffer; + _ = allocator; + @compileError("serializeInto must be implemented for " ++ @typeName(T)); + } + + /// Deserialize a value from the provided buffer + pub fn deserializeFrom(buffer: []const u8, allocator: Allocator) DeserializationError!T { + _ = buffer; + _ = allocator; + @compileError("deserializeFrom must be implemented for " ++ @typeName(T)); + } + }; +} + +/// Helper function to write integers in little-endian format +pub fn writeInt(comptime T: type, buffer: []u8, value: T) void { + if (buffer.len < @sizeOf(T)) { + @panic("Buffer too small for integer"); + } + std.mem.writeInt(T, buffer[0..@sizeOf(T)], value, .little); +} + +/// Helper function to read integers in little-endian format +pub fn readInt(comptime T: type, buffer: []const u8) T { + if (buffer.len < @sizeOf(T)) { + @panic("Buffer too small for integer"); + } + return std.mem.readInt(T, buffer[0..@sizeOf(T)], .little); +} + +/// Helper function to ensure proper alignment for a buffer +pub fn alignBuffer(comptime T: type, buffer: []u8) []align(@alignOf(T)) u8 { + return @as([]align(@alignOf(T)) u8, @alignCast(buffer)); +} + +/// Helper function to ensure proper alignment for a const buffer +pub fn alignBufferConst(comptime T: type, buffer: []const u8) []align(@alignOf(T)) const u8 { + return @as([]align(@alignOf(T)) const u8, @alignCast(buffer)); +} + +/// Common validation function for serialization buffers +pub fn validateBuffer(required_size: usize, buffer: []const u8) SerializationError!void { + if (buffer.len < required_size) { + return SerializationError.BufferTooSmall; + } +} + +/// Common validation function for deserialization buffers +pub fn validateDeserializationBuffer(required_size: usize, buffer: []const u8) DeserializationError!void { + if (buffer.len < required_size) { + return DeserializationError.BufferTooSmall; + } +} + +/// Helper to check if a type implements the basic serialization interface +pub fn hasSerializationInterface(comptime T: type) bool { + return @hasDecl(T, "serializedSize") and + @hasDecl(T, "serializeInto") and + @hasDecl(T, "deserializeFrom"); +} + +/// Helper to check if a type implements serialization with allocator +pub fn hasSerializationWithAllocatorInterface(comptime T: type) bool { + if (!hasSerializationInterface(T)) return false; + + // Check if serializeInto takes an allocator parameter + const serialize_info = @typeInfo(@TypeOf(T.serializeInto)); + if (serialize_info != .@"fn") return false; + + return serialize_info.@"fn".params.len >= 3; // self, buffer, allocator +} + +test "serialization interface detection" { + const TestType = struct { + value: u32, + + pub fn serializedSize(self: *const @This()) usize { + _ = self; + return @sizeOf(u32); + } + + pub fn serializeInto(self: *const @This(), buffer: []u8) SerializationError![]const u8 { + try validateBuffer(@sizeOf(u32), buffer); + writeInt(u32, buffer, self.value); + return buffer[0..@sizeOf(u32)]; + } + + pub fn deserializeFrom(buffer: []const u8, allocator: Allocator) DeserializationError!@This() { + _ = allocator; + try validateDeserializationBuffer(@sizeOf(u32), buffer); + return @This(){ .value = readInt(u32, buffer) }; + } + }; + + const std_testing = std.testing; + try std_testing.expect(hasSerializationInterface(TestType)); + try std_testing.expect(!hasSerializationWithAllocatorInterface(TestType)); +} diff --git a/src/serialization/safety.zig b/src/serialization/safety.zig new file mode 100644 index 0000000000..4990714dc0 --- /dev/null +++ b/src/serialization/safety.zig @@ -0,0 +1,111 @@ +//! Memory safety utilities for serialization +//! +//! This module provides utilities to detect memory safety issues +//! during serialization and deserialization operations. + +const std = @import("std"); +const mod = @import("mod.zig"); + +/// Pattern used to detect uninitialized memory +pub const CANARY_PATTERN: u8 = 0xAA; + +/// Maximum consecutive canary bytes before considering it uninitialized memory +pub const MAX_CANARY_RUN: usize = 8; + +/// Error types for memory safety violations +pub const MemorySafetyError = error{ + UninitializedMemory, + BufferOverrun, +}; + +/// Check for uninitialized memory patterns in data +/// This function looks for long runs of canary bytes which may indicate +/// uninitialized memory being included in serialized data +pub fn checkForUninitializedMemory(data: []const u8) MemorySafetyError!void { + var canary_run: usize = 0; + for (data) |byte| { + if (byte == CANARY_PATTERN) { + canary_run += 1; + if (canary_run > MAX_CANARY_RUN) { + return MemorySafetyError.UninitializedMemory; + } + } else { + canary_run = 0; + } + } +} + +/// Check buffer integrity by verifying canary patterns in unused portions +/// This helps detect buffer overruns during serialization +pub fn checkBufferIntegrity(buffer: []const u8, used_size: usize) MemorySafetyError!void { + if (used_size > buffer.len) { + return MemorySafetyError.BufferOverrun; + } + + for (buffer[used_size..]) |byte| { + if (byte != CANARY_PATTERN) { + return MemorySafetyError.BufferOverrun; + } + } +} + +/// Create a buffer filled with canary pattern for testing +pub fn createCanaryBuffer(allocator: std.mem.Allocator, size: usize) ![]u8 { + const buffer = try allocator.alloc(u8, size); + @memset(buffer, CANARY_PATTERN); + return buffer; +} + +/// Create an aligned buffer filled with canary pattern for testing +pub fn createCanaryBufferAligned(allocator: std.mem.Allocator, size: usize, comptime alignment: u29) ![]u8 { + const buffer = try allocator.alignedAlloc(u8, alignment, size); + @memset(buffer, CANARY_PATTERN); + return buffer; +} + +/// Create a buffer with standard serialization alignment filled with canary pattern +pub fn createStandardAlignedCanaryBuffer(allocator: std.mem.Allocator, size: usize) ![]align(mod.SERIALIZATION_ALIGNMENT) u8 { + const buffer = try allocator.alignedAlloc(u8, mod.SERIALIZATION_ALIGNMENT, size); + @memset(buffer, CANARY_PATTERN); + return buffer; +} + +test "canary pattern detection" { + const testing = std.testing; + + // Test detection of long canary runs + var buffer = try createCanaryBuffer(testing.allocator, 100); + defer testing.allocator.free(buffer); + + // Should detect uninitialized memory + try testing.expectError(MemorySafetyError.UninitializedMemory, checkForUninitializedMemory(buffer)); + + // Break up the pattern + for (0..buffer.len) |i| { + if (i % 8 == 0) { + buffer[i] = 0xFF; + } + } + + // Should pass now + try checkForUninitializedMemory(buffer); +} + +test "buffer integrity checking" { + const testing = std.testing; + + const buffer_size = 100; + const used_size = 50; + + var buffer = try createCanaryBuffer(testing.allocator, buffer_size); + defer testing.allocator.free(buffer); + + // Should pass with correct usage + try checkBufferIntegrity(buffer, used_size); + + // Corrupt the unused portion + buffer[used_size + 10] = 0xFF; + + // Should detect buffer overrun + try testing.expectError(MemorySafetyError.BufferOverrun, checkBufferIntegrity(buffer, used_size)); +} diff --git a/src/serialization/testing.zig b/src/serialization/testing.zig new file mode 100644 index 0000000000..1d175c62ae --- /dev/null +++ b/src/serialization/testing.zig @@ -0,0 +1,145 @@ +//! Simple testing framework for serialization + +const std = @import("std"); +const safety = @import("safety.zig"); +const mod = @import("mod.zig"); + +const testing = std.testing; +const Allocator = std.mem.Allocator; + +/// Simple comprehensive test for serializable types +pub fn testSerialization(comptime T: type, value: *const T, allocator: Allocator) !void { + const size = value.serializedSize(); + + // Test with buffer that has extra space to detect overruns + const buffer_size = size + 32; + var buffer = try safety.createStandardAlignedCanaryBuffer(allocator, buffer_size); + defer allocator.free(buffer); + + // Serialize + const serialized = try serializeValue(T, value, buffer[0..size], allocator); + try testing.expectEqual(size, serialized.len); + + // Check for buffer overruns + try safety.checkBufferIntegrity(buffer, size); + + // Check for uninitialized memory + try safety.checkForUninitializedMemory(serialized); + + // Deserialize + const deserialized = try deserializeValue(T, serialized, allocator); + defer deinitValue(T, &deserialized, allocator); + + // Test deterministic output - create aligned buffer + const buffer2 = try allocator.alignedAlloc(u8, mod.SERIALIZATION_ALIGNMENT, size); + defer allocator.free(buffer2); + const serialized2 = try serializeValue(T, value, buffer2, allocator); + try testing.expectEqualSlices(u8, serialized, serialized2); + + // Test boundary conditions + if (size > 0) { + const small_buffer = try allocator.alignedAlloc(u8, mod.SERIALIZATION_ALIGNMENT, size - 1); + defer allocator.free(small_buffer); + try testing.expectError(error.BufferTooSmall, serializeValue(T, value, small_buffer, allocator)); + } +} + +/// Helper function to create aligned buffer with standard alignment +fn createStandardAlignedBuffer(allocator: Allocator, size: usize) ![]align(mod.SERIALIZATION_ALIGNMENT) u8 { + return try allocator.alignedAlloc(u8, mod.SERIALIZATION_ALIGNMENT, size); +} + +/// Helper function to create aligned canary buffer with standard alignment +fn createStandardAlignedCanaryBuffer(allocator: Allocator, size: usize) ![]align(mod.SERIALIZATION_ALIGNMENT) u8 { + return try safety.createStandardAlignedCanaryBuffer(allocator, size); +} + +/// Helper function to serialize a value, handling different function signatures +fn serializeValue(comptime T: type, value: *const T, buffer: []align(mod.SERIALIZATION_ALIGNMENT) u8, allocator: Allocator) ![]const u8 { + if (!@hasDecl(T, "serializeInto")) { + @compileError("Type " ++ @typeName(T) ++ " does not implement serializeInto"); + } + + // Check function signature at compile time + const serialize_info = @typeInfo(@TypeOf(T.serializeInto)); + if (serialize_info != .@"fn") { + @compileError("serializeInto is not a function"); + } + + // Check parameter count to determine which signature to use + const param_count = serialize_info.@"fn".params.len; + if (param_count >= 3) { + // Signature: serializeInto(self, buffer, allocator) + return try value.serializeInto(buffer, allocator); + } else { + // Signature: serializeInto(self, buffer) + return try value.serializeInto(buffer); + } +} + +/// Helper function to deserialize a value, handling different function signatures +fn deserializeValue(comptime T: type, buffer: []const u8, allocator: Allocator) !T { + // Create properly aligned mutable buffer copy + const new_buffer = try allocator.alignedAlloc(u8, mod.SERIALIZATION_ALIGNMENT, buffer.len); + defer allocator.free(new_buffer); + @memcpy(new_buffer, buffer); + + if (!@hasDecl(T, "deserializeFrom")) { + @compileError("Type " ++ @typeName(T) ++ " does not implement deserializeFrom"); + } + + // Check function signature at compile time + const deserialize_info = @typeInfo(@TypeOf(T.deserializeFrom)); + if (deserialize_info != .@"fn") { + @compileError("deserializeFrom is not a function"); + } + + // Check parameter count to determine which signature to use + const param_count = deserialize_info.@"fn".params.len; + if (param_count >= 2) { + // Signature: deserializeFrom(buffer, allocator) + return try T.deserializeFrom(new_buffer, allocator); + } else { + // Signature: deserializeFrom(buffer) + return try T.deserializeFrom(new_buffer); + } +} + +/// Helper function to deinitialize a value if it has a deinit method +fn deinitValue(comptime T: type, value: *const T, allocator: Allocator) void { + if (@hasDecl(T, "deinit")) { + var mutable_value = value.*; + const deinit_info = @typeInfo(@TypeOf(T.deinit)); + if (deinit_info == .@"fn" and deinit_info.@"fn".params.len >= 2) { + mutable_value.deinit(allocator); + } else { + mutable_value.deinit(); + } + } +} + +test "simple serialization framework" { + const TestType = struct { + value: u32, + + pub fn serializedSize(self: *const @This()) usize { + _ = self; + return @sizeOf(u32); + } + + pub fn serializeInto(self: *const @This(), buffer: []u8) ![]const u8 { + try mod.validateBuffer(@sizeOf(u32), buffer); + mod.writeInt(u32, buffer, self.value); + return buffer[0..@sizeOf(u32)]; + } + + pub fn deserializeFrom(buffer: []const u8, allocator: Allocator) !@This() { + _ = allocator; + try mod.validateDeserializationBuffer(@sizeOf(u32), buffer); + return @This(){ .value = mod.readInt(u32, buffer) }; + } + }; + + const test_value = TestType{ .value = 42 }; + try testSerialization(TestType, &test_value, testing.allocator); +} diff --git a/src/snapshot.zig b/src/snapshot.zig index baa63fc905..793a6e4a42 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -5,6 +5,8 @@ const base = @import("base.zig"); const canonicalize = @import("check/canonicalize.zig"); const types_mod = @import("types.zig"); const types_problem_mod = @import("check/check_types/problem.zig"); +const cache = @import("cache/mod.zig"); + const Solver = @import("check/check_types.zig"); const CIR = canonicalize.CIR; const parse = @import("check/parse.zig"); @@ -1595,6 +1597,54 @@ fn processSnapshotFileUnified(gpa: Allocator, snapshot_path: []const u8, maybe_f try solver.checkDefs(); } + // Cache round-trip validation - ensure ModuleCache serialization/deserialization works + { + // Generate original S-expression for comparison + var original_tree = SExprTree.init(gpa); + defer original_tree.deinit(); + CIR.pushToSExprTree(&can_ir, null, &original_tree, content.source); + + var original_sexpr = std.ArrayList(u8).init(gpa); + defer original_sexpr.deinit(); + original_tree.toStringPretty(original_sexpr.writer().any()); + + // Create and serialize MmapCache + const cache_data = try cache.CacheModule.create(gpa, &module_env, &can_ir); + defer gpa.free(cache_data); + + // Deserialize back + var loaded_cache = try cache.CacheModule.fromMappedMemory(cache_data); + + // Restore ModuleEnv and CIR + const restored = try loaded_cache.restore(gpa); + var restored_module_env = restored.module_env; + defer restored_module_env.deinit(); + var restored_cir = restored.cir; + defer restored_cir.deinit(); + + // Fix env pointer after struct move + restored_cir.env = &restored_module_env; + + // Generate S-expression from restored CIR + var restored_tree = SExprTree.init(gpa); + defer restored_tree.deinit(); + CIR.pushToSExprTree(&restored_cir, null, &restored_tree, content.source); + + var restored_sexpr = std.ArrayList(u8).init(gpa); + defer restored_sexpr.deinit(); + restored_tree.toStringPretty(restored_sexpr.writer().any()); + + // Compare S-expressions - crash if they don't match + if (!std.mem.eql(u8, original_sexpr.items, restored_sexpr.items)) { + std.log.err("Cache round-trip validation failed for snapshot: {s}", .{snapshot_path}); + std.log.err("Original and restored CIR S-expressions don't match!", .{}); + std.log.err("This indicates a bug in MmapCache serialization/deserialization.", .{}); + std.log.err("Original S-expression:\n{s}", .{original_sexpr.items}); + std.log.err("Restored S-expression:\n{s}", .{restored_sexpr.items}); + return false; + } + } + // Buffer all output in memory before writing files var md_buffer = std.ArrayList(u8).init(gpa); defer md_buffer.deinit(); diff --git a/src/snapshots/hello_world.roc b/src/snapshots/hello_world.roc new file mode 100644 index 0000000000..4c9438f34a --- /dev/null +++ b/src/snapshots/hello_world.roc @@ -0,0 +1,5 @@ +app [main!] { pf: platform "../basic-cli/platform.roc" } + +import pf.Stdout + +main! = |_| Stdout.line!("Hello, world!") diff --git a/src/test.zig b/src/test.zig index cba9519f57..c9fce43ba9 100644 --- a/src/test.zig +++ b/src/test.zig @@ -4,7 +4,9 @@ const testing = std.testing; test { testing.refAllDeclsRecursive(@import("main.zig")); testing.refAllDeclsRecursive(@import("builtins/main.zig")); - testing.refAllDeclsRecursive(@import("cache.zig")); + testing.refAllDeclsRecursive(@import("cache/mod.zig")); + testing.refAllDeclsRecursive(@import("cache/CacheModule.zig")); + testing.refAllDeclsRecursive(@import("serialization/mod.zig")); // TODO: Remove after hooking up testing.refAllDeclsRecursive(@import("reporting.zig")); diff --git a/src/types/store.zig b/src/types/store.zig index d1483cf1fd..13622a5655 100644 --- a/src/types/store.zig +++ b/src/types/store.zig @@ -2,10 +2,10 @@ //! Contains both Slot & Descriptor stores const std = @import("std"); - const base = @import("../base.zig"); const collections = @import("../collections.zig"); const types = @import("./types.zig"); +const serialization = @import("../serialization/mod.zig"); const exitOnOutOfMemory = collections.utils.exitOnOom; @@ -18,11 +18,12 @@ const Mark = types.Mark; const RecordField = types.RecordField; const TagUnion = types.TagUnion; const Tag = types.Tag; - const VarSafeList = Var.SafeList; const RecordFieldSafeMultiList = RecordField.SafeMultiList; const TagSafeMultiList = Tag.SafeMultiList; +const SERIALIZATION_ALIGNMENT = serialization.SERIALIZATION_ALIGNMENT; + /// A variable & its descriptor info pub const ResolvedVarDesc = struct { var_: Var, desc_idx: DescStore.Idx, desc: Desc }; @@ -34,7 +35,43 @@ pub const Slot = union(enum) { root: DescStore.Idx, redirect: Var, - const ArrayList = std.ArrayListUnmanaged(Slot); + /// Calculate the size needed to serialize this Slot + pub fn serializedSize(self: *const Slot) usize { + _ = self; + return @sizeOf(u8) + @sizeOf(u32); // tag + data + } + + /// Serialize this Slot into the provided buffer + pub fn serializeInto(self: *const Slot, buffer: []u8) ![]u8 { + if (buffer.len < self.serializedSize()) return error.BufferTooSmall; + + switch (self.*) { + .root => |idx| { + buffer[0] = 0; // tag for root + std.mem.writeInt(u32, buffer[1..5], @intFromEnum(idx), .little); + }, + .redirect => |var_| { + buffer[0] = 1; // tag for redirect + std.mem.writeInt(u32, buffer[1..5], @intFromEnum(var_), .little); + }, + } + + return buffer[0..self.serializedSize()]; + } + + /// Deserialize a Slot from the provided buffer + pub fn deserializeFrom(buffer: []const u8) !Slot { + if (buffer.len < @sizeOf(u8) + @sizeOf(u32)) return error.BufferTooSmall; + + const tag = buffer[0]; + const data = std.mem.readInt(u32, buffer[1..5], .little); + + switch (tag) { + 0 => return Slot{ .root = @enumFromInt(data) }, + 1 => return Slot{ .redirect = @enumFromInt(data) }, + else => return error.InvalidTag, + } + } }; /// The store of all type variables and their descriptors @@ -193,14 +230,11 @@ pub const Store = struct { idx - self.descs.backing.len + 1, ); } - if (idx > self.slots.backing.items.len) { - try self.slots.backing.ensureTotalCapacity( - self.gpa, - idx - self.slots.backing.items.len + 1, - ); + if (idx > self.slots.backing.len()) { + // SafeList doesn't have ensureTotalCapacity, we'll grow as needed } - while (self.slots.backing.items.len <= idx) { + while (self.slots.backing.len() <= idx) { const desc_idx = self.descs.insert( self.gpa, .{ .content = .{ .flex_var = null }, .rank = Rank.top_level, .mark = Mark.none }, @@ -624,17 +658,199 @@ pub const Store = struct { fn slotIdxToVar(slot_idx: SlotStore.Idx) Var { return @enumFromInt(@intFromEnum(slot_idx)); } + + /// Calculate the size needed to serialize this Store + pub fn serializedSize(self: *const Self) usize { + const slots_size = self.slots.serializedSize(); + const descs_size = self.descs.serializedSize(); + const tuple_elems_size = self.tuple_elems.serializedSize(); + const func_args_size = self.func_args.serializedSize(); + const record_fields_size = self.record_fields.serializedSize(); + const tags_size = self.tags.serializedSize(); + const tag_args_size = self.tag_args.serializedSize(); + + // Add alignment padding for each component + var total_size: usize = @sizeOf(u32) * 7; // size headers + total_size = std.mem.alignForward(usize, total_size, SERIALIZATION_ALIGNMENT); + total_size += slots_size; + total_size += descs_size; + total_size = std.mem.alignForward(usize, total_size, SERIALIZATION_ALIGNMENT); + total_size += tuple_elems_size; + total_size = std.mem.alignForward(usize, total_size, SERIALIZATION_ALIGNMENT); + total_size += func_args_size; + total_size = std.mem.alignForward(usize, total_size, SERIALIZATION_ALIGNMENT); + total_size += record_fields_size; + total_size = std.mem.alignForward(usize, total_size, SERIALIZATION_ALIGNMENT); + total_size += tags_size; + total_size = std.mem.alignForward(usize, total_size, SERIALIZATION_ALIGNMENT); + total_size += tag_args_size; + + // Align to SERIALIZATION_ALIGNMENT to maintain alignment for subsequent data + return std.mem.alignForward(usize, total_size, SERIALIZATION_ALIGNMENT); + } + + /// Serialize this Store into the provided buffer + pub fn serializeInto(self: *const Self, buffer: []u8, allocator: Allocator) ![]u8 { + const size = self.serializedSize(); + if (buffer.len < size) return error.BufferTooSmall; + + var offset: usize = 0; + _ = allocator; + + // Write sizes + const slots_size = self.slots.serializedSize(); + @as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = @intCast(slots_size); + offset += @sizeOf(u32); + + const descs_size = self.descs.serializedSize(); + @as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = @intCast(descs_size); + offset += @sizeOf(u32); + + const tuple_elems_size = self.tuple_elems.serializedSize(); + @as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = @intCast(tuple_elems_size); + offset += @sizeOf(u32); + + const func_args_size = self.func_args.serializedSize(); + @as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = @intCast(func_args_size); + offset += @sizeOf(u32); + + const record_fields_size = self.record_fields.serializedSize(); + @as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = @intCast(record_fields_size); + offset += @sizeOf(u32); + + const tags_size = self.tags.serializedSize(); + @as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = @intCast(tags_size); + offset += @sizeOf(u32); + + const tag_args_size = self.tag_args.serializedSize(); + @as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = @intCast(tag_args_size); + offset += @sizeOf(u32); + + // Serialize data + offset = std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT); + const slots_buffer = @as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(buffer[offset .. offset + slots_size])); + const slots_slice = try self.slots.serializeInto(slots_buffer); + offset += slots_slice.len; + + const descs_slice = try self.descs.serializeInto(buffer[offset..]); + offset += descs_slice.len; + + offset = std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT); + const tuple_elems_buffer = @as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(buffer[offset .. offset + tuple_elems_size])); + const tuple_elems_slice = try self.tuple_elems.serializeInto(tuple_elems_buffer); + offset += tuple_elems_slice.len; + + offset = std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT); + const func_args_buffer = @as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(buffer[offset .. offset + func_args_size])); + const func_args_slice = try self.func_args.serializeInto(func_args_buffer); + offset += func_args_slice.len; + + offset = std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT); + const record_fields_buffer = @as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(buffer[offset .. offset + record_fields_size])); + const record_fields_slice = try self.record_fields.serializeInto(record_fields_buffer); + offset += record_fields_slice.len; + + offset = std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT); + const tags_buffer = @as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(buffer[offset .. offset + tags_size])); + const tags_slice = try self.tags.serializeInto(tags_buffer); + offset += tags_slice.len; + + offset = std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT); + const tag_args_buffer = @as([]align(SERIALIZATION_ALIGNMENT) u8, @alignCast(buffer[offset .. offset + tag_args_size])); + const tag_args_slice = try self.tag_args.serializeInto(tag_args_buffer); + offset += tag_args_slice.len; + + // Zero out any padding bytes + if (offset < size) { + @memset(buffer[offset..size], 0); + } + + return buffer[0..size]; + } + + /// Deserialize a Store from the provided buffer + pub fn deserializeFrom(buffer: []const u8, allocator: Allocator) !Self { + if (buffer.len < @sizeOf(u32) * 7) return error.BufferTooSmall; + + var offset: usize = 0; + + // Read sizes + const slots_size = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*; + offset += @sizeOf(u32); + + const descs_size = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*; + offset += @sizeOf(u32); + + const tuple_elems_size = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*; + offset += @sizeOf(u32); + + const func_args_size = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*; + offset += @sizeOf(u32); + + const record_fields_size = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*; + offset += @sizeOf(u32); + + const tags_size = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*; + offset += @sizeOf(u32); + + const tag_args_size = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*; + offset += @sizeOf(u32); + + // Deserialize data + offset = std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT); + const slots_buffer = @as([]align(SERIALIZATION_ALIGNMENT) const u8, @alignCast(buffer[offset .. offset + slots_size])); + const slots = try SlotStore.deserializeFrom(slots_buffer, allocator); + offset += slots_size; + + const descs_buffer = buffer[offset .. offset + descs_size]; + const descs = try DescStore.deserializeFrom(descs_buffer, allocator); + offset += descs_size; + + offset = std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT); + const tuple_elems_buffer = @as([]align(SERIALIZATION_ALIGNMENT) const u8, @alignCast(buffer[offset .. offset + tuple_elems_size])); + const tuple_elems = try VarSafeList.deserializeFrom(tuple_elems_buffer, allocator); + offset += tuple_elems_size; + + offset = std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT); + const func_args_buffer = @as([]align(SERIALIZATION_ALIGNMENT) const u8, @alignCast(buffer[offset .. offset + func_args_size])); + const func_args = try VarSafeList.deserializeFrom(func_args_buffer, allocator); + offset += func_args_size; + + offset = std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT); + const record_fields_buffer = @as([]align(SERIALIZATION_ALIGNMENT) const u8, @alignCast(buffer[offset .. offset + record_fields_size])); + const record_fields = try RecordFieldSafeMultiList.deserializeFrom(record_fields_buffer, allocator); + offset += record_fields_size; + + offset = std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT); + const tags_buffer = @as([]align(SERIALIZATION_ALIGNMENT) const u8, @alignCast(buffer[offset .. offset + tags_size])); + const tags = try TagSafeMultiList.deserializeFrom(tags_buffer, allocator); + offset += tags_size; + + offset = std.mem.alignForward(usize, offset, SERIALIZATION_ALIGNMENT); + const tag_args_buffer = @as([]align(SERIALIZATION_ALIGNMENT) const u8, @alignCast(buffer[offset .. offset + tag_args_size])); + const tag_args = try VarSafeList.deserializeFrom(tag_args_buffer, allocator); + + return Self{ + .gpa = allocator, + .slots = slots, + .descs = descs, + .tuple_elems = tuple_elems, + .func_args = func_args, + .record_fields = record_fields, + .tags = tags, + .tag_args = tag_args, + }; + } }; /// Represents a store of slots const SlotStore = struct { const Self = @This(); - backing: Slot.ArrayList, + backing: collections.SafeList(Slot), fn init(gpa: Allocator, capacity: usize) Self { - const arr_list = Slot.ArrayList.initCapacity(gpa, capacity) catch |err| exitOnOutOfMemory(err); - return .{ .backing = arr_list }; + return .{ .backing = collections.SafeList(Slot).initCapacity(gpa, capacity) }; } fn deinit(self: *Self, gpa: Allocator) void { @@ -643,26 +859,40 @@ const SlotStore = struct { /// Insert a new slot into the store fn insert(self: *Self, gpa: Allocator, typ: Slot) Idx { - const idx: Idx = @enumFromInt(self.backing.items.len); - self.backing.append(gpa, typ) catch |err| exitOnOutOfMemory(err); - return idx; + const safe_idx = self.backing.append(gpa, typ); + return @enumFromInt(@intFromEnum(safe_idx)); } /// Insert a value into the store - fn appendAssumeCapacity(self: *Self, gpa: Allocator, typ: Desc) Idx { - const idx: Idx = @enumFromInt(self.backing.len); - self.backing.appendAssumeCapacity(gpa, typ) catch |err| exitOnOutOfMemory(err); - return idx; + fn appendAssumeCapacity(self: *Self, gpa: Allocator, typ: Slot) Idx { + const safe_idx = self.backing.append(gpa, typ); + return @enumFromInt(@intFromEnum(safe_idx)); } /// Set a value in the store pub fn set(self: *Self, idx: Idx, val: Slot) void { - self.backing.items[@intFromEnum(idx)] = val; + self.backing.set(@enumFromInt(@intFromEnum(idx)), val); } /// Get a value from the store fn get(self: *const Self, idx: Idx) Slot { - return self.backing.items[@intFromEnum(idx)]; + return self.backing.get(@enumFromInt(@intFromEnum(idx))).*; + } + + /// Calculate the size needed to serialize this SlotStore + pub fn serializedSize(self: *const Self) usize { + return self.backing.serializedSize(); + } + + /// Serialize this SlotStore into the provided buffer + pub fn serializeInto(self: *const Self, buffer: []align(SERIALIZATION_ALIGNMENT) u8) ![]align(SERIALIZATION_ALIGNMENT) const u8 { + return self.backing.serializeInto(buffer); + } + + /// Deserialize a SlotStore from the provided buffer + pub fn deserializeFrom(buffer: []align(SERIALIZATION_ALIGNMENT) const u8, allocator: Allocator) !Self { + const backing = try collections.SafeList(Slot).deserializeFrom(buffer, allocator); + return Self{ .backing = backing }; } /// A type-safe index into the store @@ -685,7 +915,7 @@ const DescStore = struct { } /// Deinit & free allocated memory - fn deinit(self: *Self, gpa: Allocator) void { + pub fn deinit(self: *Self, gpa: Allocator) void { self.backing.deinit(gpa); } @@ -706,6 +936,82 @@ const DescStore = struct { return self.backing.get(@intFromEnum(idx)); } + /// Calculate the size needed to serialize this DescStore + pub fn serializedSize(self: *const Self) usize { + const raw_size = @sizeOf(u32) + (self.backing.len * (@sizeOf(Content) + 1 + 4)); + // Align to SERIALIZATION_ALIGNMENT to maintain alignment for subsequent data + return std.mem.alignForward(usize, raw_size, SERIALIZATION_ALIGNMENT); + } + + /// Serialize this DescStore into the provided buffer + pub fn serializeInto(self: *const Self, buffer: []u8) ![]u8 { + const size = self.serializedSize(); + if (buffer.len < size) return error.BufferTooSmall; + + // Write count + std.mem.writeInt(u32, buffer[0..4], @intCast(self.backing.len), .little); + + var offset: usize = @sizeOf(u32); + + // Write data + if (self.backing.len > 0) { + const slice = self.backing.slice(); + for (slice.items(.content), slice.items(.rank), slice.items(.mark)) |content, rank, mark| { + // Serialize each field individually to avoid padding + @memcpy(buffer[offset .. offset + @sizeOf(Content)], std.mem.asBytes(&content)); + offset += @sizeOf(Content); + + buffer[offset] = @intFromEnum(rank); + offset += 1; + + std.mem.writeInt(u32, buffer[offset .. offset + 4][0..4], @intFromEnum(mark), .little); + offset += 4; + } + } + + // Zero out any padding bytes + if (offset < size) { + @memset(buffer[offset..size], 0); + } + + return buffer[0..size]; + } + + /// Deserialize a DescStore from the provided buffer + pub fn deserializeFrom(buffer: []const u8, allocator: Allocator) !Self { + if (buffer.len < @sizeOf(u32)) return error.BufferTooSmall; + + const count = std.mem.readInt(u32, buffer[0..4], .little); + const expected_size = @sizeOf(u32) + (count * (@sizeOf(Content) + 1 + 4)); + + if (buffer.len < expected_size) return error.BufferTooSmall; + + var result = Self.init(allocator, count); + + if (count > 0) { + var offset: usize = @sizeOf(u32); + for (0..count) |_| { + const item_size = @sizeOf(Content) + 1 + 4; + if (offset + item_size > buffer.len) return error.BufferTooSmall; + + // Deserialize each field individually + const content = std.mem.bytesAsValue(Content, buffer[offset .. offset + @sizeOf(Content)]).*; + offset += @sizeOf(Content); + + const rank: Rank = @enumFromInt(buffer[offset]); + offset += 1; + + const mark: Mark = @enumFromInt(std.mem.readInt(u32, buffer[offset .. offset + 4][0..4], .little)); + offset += 4; + + const desc = Desc{ .content = content, .rank = rank, .mark = mark }; + _ = result.insert(allocator, desc); + } + } + + return result; + } + /// A type-safe index into the store /// This type is made public below const Idx = enum(u32) { _ }; @@ -776,3 +1082,61 @@ test "resolveVarAndCompressPath - flattens redirect chain to structure" { try std.testing.expectEqual(Slot{ .redirect = c }, store.getSlot(a)); try std.testing.expectEqual(Slot{ .redirect = c }, store.getSlot(b)); } + +test "Slot serialization comprehensive" { + const gpa = std.testing.allocator; + + // Test various slot types including edge cases + const slot1 = Slot{ .root = @enumFromInt(0) }; // minimum value + const slot2 = Slot{ .root = @enumFromInt(0xFFFFFFFF) }; // maximum value + const slot3 = Slot{ .redirect = @enumFromInt(0) }; // minimum redirect + const slot4 = Slot{ .redirect = @enumFromInt(0xFFFFFFFF) }; // maximum redirect + + // Test serialization using the testing framework + try serialization.testing.testSerialization(Slot, &slot1, gpa); + try serialization.testing.testSerialization(Slot, &slot2, gpa); + try serialization.testing.testSerialization(Slot, &slot3, gpa); + try serialization.testing.testSerialization(Slot, &slot4, gpa); +} + +test "DescStore serialization comprehensive" { + const gpa = std.testing.allocator; + + var store = DescStore.init(gpa, 8); + defer store.deinit(gpa); + + // Add various descriptor types including edge cases + const desc1 = types.Descriptor{ + .content = Content{ .flex_var = null }, + .rank = types.Rank.generalized, + .mark = types.Mark.none, + }; + + const desc2 = types.Descriptor{ + .content = Content{ .flex_var = @bitCast(@as(u32, 0)) }, + .rank = types.Rank.top_level, + .mark = types.Mark.visited, + }; + + const desc3 = types.Descriptor{ + .content = Content{ .flex_var = @bitCast(@as(u32, 0xFFFFFFFF)) }, + .rank = types.Rank.top_level, + .mark = types.Mark.visited, + }; + + _ = store.insert(gpa, desc1); + _ = store.insert(gpa, desc2); + _ = store.insert(gpa, desc3); + + // Test serialization + try serialization.testing.testSerialization(DescStore, &store, gpa); +} + +test "DescStore empty store serialization" { + const gpa = std.testing.allocator; + + var empty_store = DescStore.init(gpa, 0); + defer empty_store.deinit(gpa); + + try serialization.testing.testSerialization(DescStore, &empty_store, gpa); +} diff --git a/src/types/writers.zig b/src/types/writers.zig index 095be4162a..f9d1cd2471 100644 --- a/src/types/writers.zig +++ b/src/types/writers.zig @@ -27,7 +27,7 @@ pub const SExprWriter = struct { var root_node = SExpr.init(gpa, "types_store"); defer root_node.deinit(gpa); - if (env.types.slots.backing.items.len == 0) { + if (env.types.slots.backing.len() == 0) { root_node.appendStringAttr(gpa, "vars", "empty"); } @@ -36,7 +36,7 @@ pub const SExprWriter = struct { var type_writer = TypeWriter.init(buffer.writer(), env); - for (0..env.types.slots.backing.items.len) |slot_idx| { + for (0..env.types.slots.backing.len()) |slot_idx| { const var_: Var = @enumFromInt(slot_idx); var var_node = SExpr.init(gpa, "var"); @@ -80,7 +80,7 @@ pub const TypeWriter = struct { /// Convert a var to a type string pub fn writeVar(self: *Self, var_: types.Var) Allocator.Error!void { // Debug assert that the variable is in bounds - if not, we have a bug in type checking - std.debug.assert(@intFromEnum(var_) < self.env.types.slots.backing.items.len); + std.debug.assert(@intFromEnum(var_) < self.env.types.slots.backing.len()); const resolved = self.env.types.resolveVar(var_);