Skip to content

Implement module caching #7979

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jul 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
301 changes: 300 additions & 1 deletion src/base/Ident.zig
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const std = @import("std");
const collections = @import("../collections.zig");
const Region = @import("Region.zig");
const ModuleImport = @import("ModuleImport.zig");
const serialization = @import("../serialization/mod.zig");

const SmallStringInterner = collections.SmallStringInterner;
const exitOnOom = collections.utils.exitOnOom;
Expand Down Expand Up @@ -162,7 +163,7 @@ pub const Store = struct {
digit_index -= 1;
}

const name = str_buffer[digit_index..];
const name = str_buffer[digit_index + 1 ..];

const idx = self.interner.insert(gpa, name, Region.zero());
self.exposing_modules.append(gpa, @enumFromInt(0)) catch |err| exitOnOom(err);
Expand Down Expand Up @@ -201,6 +202,220 @@ pub const Store = struct {
return self.interner.getText(@enumFromInt(@as(u32, idx.idx)));
}

/// Calculate the size needed to serialize this Ident.Store
pub fn serializedSize(self: *const Store) usize {
var size: usize = 0;

// SmallStringInterner components
size += @sizeOf(u32); // bytes_len
size += self.interner.bytes.items.len; // bytes data
size = std.mem.alignForward(usize, size, @alignOf(u32)); // align for next u32

size += @sizeOf(u32); // outer_indices_len
size += self.interner.outer_indices.items.len * @sizeOf(@TypeOf(self.interner.outer_indices.items[0])); // outer_indices data
size = std.mem.alignForward(usize, size, @alignOf(u32)); // align for next u32

size += @sizeOf(u32); // regions_len
size += self.interner.regions.items.len * @sizeOf(@TypeOf(self.interner.regions.items[0])); // regions data
size = std.mem.alignForward(usize, size, @alignOf(u32)); // align for next u32

// Store components
size += @sizeOf(u32); // exposing_modules_len
size += self.exposing_modules.items.len * @sizeOf(@TypeOf(self.exposing_modules.items[0])); // exposing_modules data
size = std.mem.alignForward(usize, size, @alignOf(u32)); // align for next u32

size += @sizeOf(u32); // attributes_len
size += self.attributes.items.len * @sizeOf(u8); // attributes data (packed as bytes)
size = std.mem.alignForward(usize, size, @alignOf(u32)); // align for next u32

size += @sizeOf(u32); // next_unique_name

// Align to SERIALIZATION_ALIGNMENT to maintain alignment for subsequent data
return std.mem.alignForward(usize, size, serialization.SERIALIZATION_ALIGNMENT);
}

/// Serialize this Ident.Store into the provided buffer
pub fn serializeInto(self: *const Store, buffer: []u8, gpa: std.mem.Allocator) ![]u8 {
const size = self.serializedSize();
if (buffer.len < size) return error.BufferTooSmall;

var offset: usize = 0;

// Serialize interner bytes
const bytes_len = @as(u32, @intCast(self.interner.bytes.items.len));
@as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = bytes_len;
offset += @sizeOf(u32);
if (bytes_len > 0) {
@memcpy(buffer[offset .. offset + bytes_len], self.interner.bytes.items);
offset += bytes_len;
}
offset = std.mem.alignForward(usize, offset, @alignOf(u32));

// Serialize interner outer_indices
const outer_indices_len = @as(u32, @intCast(self.interner.outer_indices.items.len));
@as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = outer_indices_len;
offset += @sizeOf(u32);
if (outer_indices_len > 0) {
const outer_indices_bytes = outer_indices_len * @sizeOf(@TypeOf(self.interner.outer_indices.items[0]));
@memcpy(buffer[offset .. offset + outer_indices_bytes], std.mem.sliceAsBytes(self.interner.outer_indices.items));
offset += outer_indices_bytes;
}
offset = std.mem.alignForward(usize, offset, @alignOf(u32));

// Serialize interner regions
const regions_len = @as(u32, @intCast(self.interner.regions.items.len));
@as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = regions_len;
offset += @sizeOf(u32);
if (regions_len > 0) {
const regions_bytes = regions_len * @sizeOf(@TypeOf(self.interner.regions.items[0]));
@memcpy(buffer[offset .. offset + regions_bytes], std.mem.sliceAsBytes(self.interner.regions.items));
offset += regions_bytes;
}
offset = std.mem.alignForward(usize, offset, @alignOf(u32));

// Serialize exposing_modules
const exposing_modules_len = @as(u32, @intCast(self.exposing_modules.items.len));
@as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = exposing_modules_len;
offset += @sizeOf(u32);
if (exposing_modules_len > 0) {
const exposing_modules_bytes = exposing_modules_len * @sizeOf(@TypeOf(self.exposing_modules.items[0]));
@memcpy(buffer[offset .. offset + exposing_modules_bytes], std.mem.sliceAsBytes(self.exposing_modules.items));
offset += exposing_modules_bytes;
}
offset = std.mem.alignForward(usize, offset, @alignOf(u32));

// Serialize attributes
const attributes_len = @as(u32, @intCast(self.attributes.items.len));
@as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = attributes_len;
offset += @sizeOf(u32);
if (attributes_len > 0) {
// Serialize each Attributes as a single byte to avoid padding
for (self.attributes.items) |attr| {
const attr_bits: u3 = @bitCast(attr);
buffer[offset] = @as(u8, attr_bits);
offset += 1;
}
}
offset = std.mem.alignForward(usize, offset, @alignOf(u32));

// Serialize next_unique_name
@as(*u32, @ptrCast(@alignCast(buffer.ptr + offset))).* = self.next_unique_name;
offset += @sizeOf(u32);

_ = gpa; // suppress unused parameter warning

// Zero out any padding bytes
if (offset < size) {
@memset(buffer[offset..size], 0);
}

return buffer[0..size];
}

/// Deserialize an Ident.Store from the provided buffer
pub fn deserializeFrom(buffer: []const u8, gpa: std.mem.Allocator) !Store {
if (buffer.len < @sizeOf(u32)) return error.BufferTooSmall;

var offset: usize = 0;

// Deserialize interner bytes
const bytes_len = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*;
offset += @sizeOf(u32);
var bytes = std.ArrayListUnmanaged(u8){};
if (bytes_len > 0) {
if (offset + bytes_len > buffer.len) return error.BufferTooSmall;
try bytes.appendSlice(gpa, buffer[offset .. offset + bytes_len]);
offset += bytes_len;
}
offset = std.mem.alignForward(usize, offset, @alignOf(u32));

// Deserialize interner outer_indices
const outer_indices_len = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*;
offset += @sizeOf(u32);
var outer_indices = std.ArrayListUnmanaged(@import("../collections/SmallStringInterner.zig").StringIdx){};
if (outer_indices_len > 0) {
const outer_indices_bytes = outer_indices_len * @sizeOf(@import("../collections/SmallStringInterner.zig").StringIdx);
if (offset + outer_indices_bytes > buffer.len) return error.BufferTooSmall;
const outer_indices_data = @as([*]const @import("../collections/SmallStringInterner.zig").StringIdx, @ptrCast(@alignCast(buffer.ptr + offset)));
try outer_indices.appendSlice(gpa, outer_indices_data[0..outer_indices_len]);
offset += outer_indices_bytes;
}
offset = std.mem.alignForward(usize, offset, @alignOf(u32));

// Deserialize interner regions
const regions_len = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*;
offset += @sizeOf(u32);
var regions = std.ArrayListUnmanaged(Region){};
if (regions_len > 0) {
const regions_bytes = regions_len * @sizeOf(Region);
if (offset + regions_bytes > buffer.len) return error.BufferTooSmall;
const regions_data = @as([*]const Region, @ptrCast(@alignCast(buffer.ptr + offset)));
try regions.appendSlice(gpa, regions_data[0..regions_len]);
offset += regions_bytes;
}
offset = std.mem.alignForward(usize, offset, @alignOf(u32));

// Deserialize exposing_modules
const exposing_modules_len = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*;
offset += @sizeOf(u32);
var exposing_modules = std.ArrayListUnmanaged(ModuleImport.Idx){};
if (exposing_modules_len > 0) {
const exposing_modules_bytes = exposing_modules_len * @sizeOf(ModuleImport.Idx);
if (offset + exposing_modules_bytes > buffer.len) return error.BufferTooSmall;
const exposing_modules_data = @as([*]const ModuleImport.Idx, @ptrCast(@alignCast(buffer.ptr + offset)));
try exposing_modules.appendSlice(gpa, exposing_modules_data[0..exposing_modules_len]);
offset += exposing_modules_bytes;
}
offset = std.mem.alignForward(usize, offset, @alignOf(u32));

// Deserialize attributes
const attributes_len = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*;
offset += @sizeOf(u32);
var attributes = std.ArrayListUnmanaged(Attributes){};
if (attributes_len > 0) {
if (offset + attributes_len > buffer.len) return error.BufferTooSmall;
try attributes.ensureTotalCapacity(gpa, attributes_len);
// Deserialize each Attributes from a single byte to avoid padding
for (0..attributes_len) |_| {
const attr_bits: u3 = @truncate(buffer[offset]);
const attr: Attributes = @bitCast(attr_bits);
attributes.appendAssumeCapacity(attr);
offset += 1;
}
}
offset = std.mem.alignForward(usize, offset, @alignOf(u32));

// Deserialize next_unique_name
if (offset + @sizeOf(u32) > buffer.len) return error.BufferTooSmall;
const next_unique_name = @as(*const u32, @ptrCast(@alignCast(buffer.ptr + offset))).*;

// Rebuild the strings hash table
var strings = @import("../collections/SmallStringInterner.zig").StringIdx.Table{};
try strings.ensureTotalCapacityContext(gpa, @intCast(outer_indices.items.len), @import("../collections/SmallStringInterner.zig").StringIdx.TableContext{ .bytes = &bytes });

// Re-populate the hash table
for (outer_indices.items) |string_idx| {
const string_bytes = std.mem.sliceTo(bytes.items[@intFromEnum(string_idx)..], 0);
const entry = strings.getOrPutContextAdapted(gpa, string_bytes, @import("../collections/SmallStringInterner.zig").StringIdx.TableAdapter{ .bytes = &bytes }, @import("../collections/SmallStringInterner.zig").StringIdx.TableContext{ .bytes = &bytes }) catch |err| exitOnOom(err);
entry.key_ptr.* = string_idx;
}

// Construct the interner
const interner = @import("../collections/SmallStringInterner.zig"){
.bytes = bytes,
.strings = strings,
.outer_indices = outer_indices,
.regions = regions,
};

return Store{
.interner = interner,
.exposing_modules = exposing_modules,
.attributes = attributes,
.next_unique_name = next_unique_name,
};
}

/// Get the region for an identifier.
pub fn getRegion(self: *const Store, idx: Idx) Region {
return self.interner.getRegion(@enumFromInt(@as(u32, idx.idx)));
Expand Down Expand Up @@ -273,3 +488,87 @@ test "from_bytes creates ignored identifier" {
try std.testing.expect(result.attributes.ignored == true);
try std.testing.expect(result.attributes.reassignable == false);
}

test "Ident.Store serialization round-trip" {
const gpa = std.testing.allocator;

// Create original store and add some identifiers
var original_store = Store.initCapacity(gpa, 16);
defer original_store.deinit(gpa);

const ident1 = Ident.for_text("hello");
const ident2 = Ident.for_text("world!");
const ident3 = Ident.for_text("_ignored");

const idx1 = original_store.insert(gpa, ident1, Region.zero());
const idx2 = original_store.insert(gpa, ident2, Region.zero());
const idx3 = original_store.insert(gpa, ident3, Region.zero());

// Serialize
const serialized_size = original_store.serializedSize();
const buffer = try gpa.alignedAlloc(u8, @alignOf(u32), serialized_size);
defer gpa.free(buffer);

const serialized = try original_store.serializeInto(buffer, gpa);
try std.testing.expectEqual(serialized_size, serialized.len);

// Deserialize
var restored_store = try Store.deserializeFrom(serialized, gpa);
defer restored_store.deinit(gpa);

// Verify the identifiers are identical
try std.testing.expectEqualStrings("hello", restored_store.getText(idx1));
try std.testing.expectEqualStrings("world!", restored_store.getText(idx2));
try std.testing.expectEqualStrings("_ignored", restored_store.getText(idx3));

// Verify attributes are preserved
try std.testing.expect(restored_store.getText(idx1)[0] != '_'); // not ignored
try std.testing.expect(restored_store.getText(idx2)[restored_store.getText(idx2).len - 1] == '!'); // effectful
try std.testing.expect(restored_store.getText(idx3)[0] == '_'); // ignored

// Verify next_unique_name is preserved
try std.testing.expectEqual(original_store.next_unique_name, restored_store.next_unique_name);

// Verify structural integrity
try std.testing.expectEqual(original_store.exposing_modules.items.len, restored_store.exposing_modules.items.len);
try std.testing.expectEqual(original_store.attributes.items.len, restored_store.attributes.items.len);
try std.testing.expectEqual(original_store.interner.bytes.items.len, restored_store.interner.bytes.items.len);
try std.testing.expectEqual(original_store.interner.outer_indices.items.len, restored_store.interner.outer_indices.items.len);
}

test "Ident.Store serialization comprehensive" {
const gpa = std.testing.allocator;

var store = Store.initCapacity(gpa, 8);
defer store.deinit(gpa);

// Test various identifier types and edge cases
const ident1 = Ident.for_text("hello");
const ident2 = Ident.for_text("world!");
const ident3 = Ident.for_text("_ignored");
const ident4 = Ident.for_text("a"); // single character
const ident5 = Ident.for_text("very_long_identifier_name_that_might_cause_issues"); // long name
const region = Region.zero();

_ = store.insert(gpa, ident1, region);
_ = store.insert(gpa, ident2, region);
_ = store.insert(gpa, ident3, region);
_ = store.insert(gpa, ident4, region);
_ = store.insert(gpa, ident5, region);

// Add some unique names
_ = store.genUnique(gpa);
_ = store.genUnique(gpa);

// Test serialization
try serialization.testing.testSerialization(Store, &store, gpa);
}

test "Ident.Store empty store serialization" {
const gpa = std.testing.allocator;

var empty_store = Store.initCapacity(gpa, 0);
defer empty_store.deinit(gpa);

try serialization.testing.testSerialization(Store, &empty_store, gpa);
}
22 changes: 13 additions & 9 deletions src/base/ModuleEnv.zig
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ strings: StringLiteral.Store,
types: types_mod.Store,
/// Map of exposed items by their string representation (not interned)
/// This is built during canonicalization and preserved for later use
exposed_by_str: std.StringHashMapUnmanaged(void) = .{},
exposed_by_str: collections.SafeStringHashMap(void),
/// Map of exposed item names to their CIR node indices (stored as u16)
/// This is populated during canonicalization to allow cross-module lookups
exposed_nodes: std.StringHashMapUnmanaged(u16) = .{},
exposed_nodes: collections.SafeStringHashMap(u16),

/// Line starts for error reporting. We retain only start and offset positions in the IR
/// and then use these line starts to calculate the line number and column number as required.
/// this is a more compact representation at the expense of extra computation only when generating error diagnostics.
line_starts: std.ArrayList(u32),
line_starts: collections.SafeList(u32),

/// Initialize the module environment.
pub fn init(gpa: std.mem.Allocator) Self {
Expand All @@ -42,7 +42,9 @@ pub fn init(gpa: std.mem.Allocator) Self {
.ident_ids_for_slicing = collections.SafeList(Ident.Idx).initCapacity(gpa, 256),
.strings = StringLiteral.Store.initCapacityBytes(gpa, 4096),
.types = types_mod.Store.initCapacity(gpa, 2048, 512),
.line_starts = std.ArrayList(u32).init(gpa),
.exposed_by_str = collections.SafeStringHashMap(void).init(),
.exposed_nodes = collections.SafeStringHashMap(u16).init(),
.line_starts = collections.SafeList(u32).initCapacity(gpa, 256),
};
}

Expand All @@ -52,35 +54,37 @@ pub fn deinit(self: *Self) void {
self.ident_ids_for_slicing.deinit(self.gpa);
self.strings.deinit(self.gpa);
self.types.deinit();
self.line_starts.deinit();
self.line_starts.deinit(self.gpa);
self.exposed_by_str.deinit(self.gpa);
self.exposed_nodes.deinit(self.gpa);
}

/// Calculate and store line starts from the source text
pub fn calcLineStarts(self: *Self, source: []const u8) !void {
self.line_starts.clearRetainingCapacity();
// Reset line_starts by creating a new SafeList
self.line_starts.deinit(self.gpa);
self.line_starts = collections.SafeList(u32).initCapacity(self.gpa, 256);

// if the source is empty, we're done
if (source.len == 0) {
return;
}

// the first line starts at offset 0
try self.line_starts.append(0);
_ = self.line_starts.append(self.gpa, 0);

// find all newlines in the source, save their offset
var pos: u32 = 0;
for (source) |c| {
if (c == '\n') {
// next line starts after the newline in the current position
try self.line_starts.append(pos + 1);
_ = self.line_starts.append(self.gpa, pos + 1);
}
pos += 1;
}
}

/// Get diagnostic position information for a given range
pub fn calcRegionInfo(self: *const Self, source: []const u8, begin: u32, end: u32) !RegionInfo {
return RegionInfo.position(source, self.line_starts.items, begin, end);
return RegionInfo.position(source, self.line_starts.items.items, begin, end);
}
Loading
Loading