Skip to content

Commit dc46732

Browse files
committed
Fix serialization bug
1 parent c435ad1 commit dc46732

File tree

13 files changed

+219
-116
lines changed

13 files changed

+219
-116
lines changed

src/base/CommonEnv.zig

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -117,18 +117,26 @@ pub const Serialized = extern struct {
117117
offset: i64,
118118
source: []const u8,
119119
) *CommonEnv {
120-
// Note: Serialized may be smaller than the runtime struct because:
121-
// - Uses i64 offsets instead of usize pointers (same size on 64-bit, but conceptually different)
122-
// - May have different alignment/padding requirements
123-
// We deserialize by overwriting the Serialized memory with the runtime struct.
120+
// CRITICAL: We must deserialize ALL fields into local variables BEFORE writing to the
121+
// output struct. This is because CommonEnv is a regular struct (not extern), so Zig may
122+
// reorder its fields differently than Serialized (which is extern). If we read from self
123+
// while writing to env (which aliases self), we may read corrupted data in Release mode
124+
// when field orderings differ.
125+
126+
// Step 1: Deserialize all fields into local variables first
127+
const deserialized_idents = self.idents.deserialize(offset).*;
128+
const deserialized_strings = self.strings.deserialize(offset).*;
129+
const deserialized_exposed_items = self.exposed_items.deserialize(offset).*;
130+
const deserialized_line_starts = self.line_starts.deserialize(offset).*;
131+
132+
// Step 2: Overwrite ourself with the deserialized version
124133
const env = @as(*CommonEnv, @ptrFromInt(@intFromPtr(self)));
125134

126135
env.* = CommonEnv{
127-
.idents = self.idents.deserialize(offset).*,
128-
// .ident_ids_for_slicing = self.ident_ids_for_slicing.deserialize(offset).*,
129-
.strings = self.strings.deserialize(offset).*,
130-
.exposed_items = self.exposed_items.deserialize(offset).*,
131-
.line_starts = self.line_starts.deserialize(offset).*,
136+
.idents = deserialized_idents,
137+
.strings = deserialized_strings,
138+
.exposed_items = deserialized_exposed_items,
139+
.line_starts = deserialized_line_starts,
132140
.source = source,
133141
};
134142

src/base/Ident.zig

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,15 +120,25 @@ pub const Store = struct {
120120
}
121121

122122
/// Deserialize this Serialized struct into a Store
123-
pub fn deserialize(self: *Serialized, offset: i64) *Store {
124-
// Note: Serialized may be smaller than the runtime struct.
125-
// We deserialize by overwriting the Serialized memory with the runtime struct.
123+
pub noinline fn deserialize(self: *Serialized, offset: i64) *Store {
124+
// CRITICAL: We must deserialize ALL fields into local variables BEFORE writing to the
125+
// output struct. This is because Store is a regular struct (not extern), so Zig may
126+
// reorder its fields differently than Serialized (which is extern). If we read from self
127+
// while writing to store (which aliases self), we may read corrupted data in Release mode
128+
// when field orderings differ.
129+
130+
// Step 1: Deserialize all fields into local variables first
131+
const deserialized_interner = self.interner.deserialize(offset).*;
132+
const deserialized_attributes = self.attributes.deserialize(offset).*;
133+
const deserialized_next_unique_name = self.next_unique_name;
134+
135+
// Step 2: Overwrite ourself with the deserialized version
126136
const store = @as(*Store, @ptrFromInt(@intFromPtr(self)));
127137

128138
store.* = Store{
129-
.interner = self.interner.deserialize(offset).*,
130-
.attributes = self.attributes.deserialize(offset).*,
131-
.next_unique_name = self.next_unique_name,
139+
.interner = deserialized_interner,
140+
.attributes = deserialized_attributes,
141+
.next_unique_name = deserialized_next_unique_name,
132142
};
133143

134144
return store;

src/base/SmallStringInterner.zig

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -226,14 +226,25 @@ pub const Serialized = extern struct {
226226
}
227227

228228
/// Deserialize this Serialized struct into a SmallStringInterner
229-
pub fn deserialize(self: *Serialized, offset: i64) *SmallStringInterner {
230-
// Overwrite ourself with the deserialized version, and return our pointer after casting it to Self.
229+
pub noinline fn deserialize(self: *Serialized, offset: i64) *SmallStringInterner {
230+
// CRITICAL: We must deserialize ALL fields into local variables BEFORE writing to the
231+
// output struct. This is because SmallStringInterner is a regular struct (not extern), so Zig may
232+
// reorder its fields differently than Serialized (which is extern). If we read from self
233+
// while writing to interner (which aliases self), we may read corrupted data in Release mode
234+
// when field orderings differ.
235+
236+
// Step 1: Deserialize all fields into local variables first
237+
const deserialized_bytes = self.bytes.deserialize(offset).*;
238+
const deserialized_hash_table = self.hash_table.deserialize(offset).*;
239+
const deserialized_entry_count = self.entry_count;
240+
241+
// Step 2: Overwrite ourself with the deserialized version
231242
const interner = @as(*SmallStringInterner, @ptrCast(self));
232243

233244
interner.* = .{
234-
.bytes = self.bytes.deserialize(offset).*,
235-
.hash_table = self.hash_table.deserialize(offset).*,
236-
.entry_count = self.entry_count,
245+
.bytes = deserialized_bytes,
246+
.hash_table = deserialized_hash_table,
247+
.entry_count = deserialized_entry_count,
237248
};
238249

239250
return interner;

src/base/StringLiteral.zig

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,12 +113,18 @@ pub const Store = struct {
113113
}
114114

115115
/// Deserialize this Serialized struct into a Store
116-
pub fn deserialize(self: *Serialized, offset: i64) *Store {
117-
// Overwrite ourself with the deserialized version, and return our pointer after casting it to Self.
116+
pub noinline fn deserialize(self: *Serialized, offset: i64) *Store {
117+
// CRITICAL: We must deserialize ALL fields into local variables BEFORE writing to the
118+
// output struct to avoid aliasing issues in Release mode.
119+
120+
// Step 1: Deserialize all fields into local variables first
121+
const deserialized_buffer = self.buffer.deserialize(offset).*;
122+
123+
// Step 2: Overwrite ourself with the deserialized version
118124
const store = @as(*Store, @ptrFromInt(@intFromPtr(self)));
119125

120126
store.* = Store{
121-
.buffer = self.buffer.deserialize(offset).*,
127+
.buffer = deserialized_buffer,
122128
};
123129

124130
return store;

src/canonicalize/CIR.zig

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -704,13 +704,17 @@ pub const Import = struct {
704704
}
705705

706706
/// Deserialize this Serialized struct into a Store
707-
pub fn deserialize(self: *Serialized, offset: i64, allocator: std.mem.Allocator) std.mem.Allocator.Error!*Store {
708-
// Overwrite ourself with the deserialized version, and return our pointer after casting it to Store.
707+
pub noinline fn deserialize(self: *Serialized, offset: i64, allocator: std.mem.Allocator) std.mem.Allocator.Error!*Store {
708+
// CRITICAL: Deserialize nested struct BEFORE casting and writing to store
709+
// to avoid aliasing issues in Release mode.
710+
const deserialized_imports = self.imports.deserialize(offset).*;
711+
712+
// Now we can cast and write to the destination
709713
const store = @as(*Store, @ptrFromInt(@intFromPtr(self)));
710714

711715
store.* = .{
712716
.map = .{}, // Will be repopulated below
713-
.imports = self.imports.deserialize(offset).*,
717+
.imports = deserialized_imports,
714718
};
715719

716720
// Pre-allocate the exact capacity needed for the map

src/canonicalize/ModuleEnv.zig

Lines changed: 53 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1791,7 +1791,7 @@ pub const Serialized = extern struct {
17911791
}
17921792

17931793
/// Deserialize a ModuleEnv from the buffer, updating the ModuleEnv in place
1794-
pub fn deserialize(
1794+
pub noinline fn deserialize(
17951795
self: *Serialized,
17961796
offset: i64,
17971797
gpa: std.mem.Allocator,
@@ -1803,47 +1803,66 @@ pub const Serialized = extern struct {
18031803
// On 32-bit platforms, Serialized may be larger due to using fixed-size types for platform-independent serialization.
18041804
comptime std.debug.assert(@sizeOf(@This()) >= @sizeOf(Self));
18051805

1806-
// Overwrite ourself with the deserialized version, and return our pointer after casting it to Self.
1806+
// CRITICAL: We must deserialize ALL fields into local variables BEFORE writing to the
1807+
// output struct. This is because ModuleEnv is a regular struct (not extern), so Zig may
1808+
// reorder its fields differently than Serialized (which is extern). If we read from self
1809+
// while writing to env (which aliases self), we may read corrupted data in Release mode
1810+
// when field orderings differ.
1811+
//
1812+
// Following the same pattern as NodeStore.deserialize to avoid aliasing issues.
1813+
1814+
// Step 1: Deserialize all complex fields into local variables first
1815+
const deserialized_common = self.common.deserialize(offset, source).*;
1816+
const deserialized_types = self.types.deserialize(offset, gpa).*;
1817+
const deserialized_module_kind = self.module_kind.decode();
1818+
const deserialized_all_defs = self.all_defs;
1819+
const deserialized_all_statements = self.all_statements;
1820+
const deserialized_exports = self.exports;
1821+
const deserialized_builtin_statements = self.builtin_statements;
1822+
const deserialized_external_decls = self.external_decls.deserialize(offset).*;
1823+
const deserialized_imports = (try self.imports.deserialize(offset, gpa)).*;
1824+
const deserialized_diagnostics = self.diagnostics;
1825+
const deserialized_store = self.store.deserialize(offset, gpa).*;
1826+
const deserialized_deferred_numeric_literals = self.deferred_numeric_literals.deserialize(offset).*;
1827+
1828+
// Step 2: Overwrite ourself with the deserialized version
18071829
const env = @as(*Self, @ptrFromInt(@intFromPtr(self)));
18081830

1809-
// Deserialize common env first so we can look up identifiers
1810-
const common = self.common.deserialize(offset, source).*;
1811-
18121831
env.* = Self{
18131832
.gpa = gpa,
1814-
.common = common,
1815-
.types = self.types.deserialize(offset, gpa).*,
1816-
.module_kind = self.module_kind.decode(),
1817-
.all_defs = self.all_defs,
1818-
.all_statements = self.all_statements,
1819-
.exports = self.exports,
1820-
.builtin_statements = self.builtin_statements,
1821-
.external_decls = self.external_decls.deserialize(offset).*,
1822-
.imports = (try self.imports.deserialize(offset, gpa)).*,
1833+
.common = deserialized_common,
1834+
.types = deserialized_types,
1835+
.module_kind = deserialized_module_kind,
1836+
.all_defs = deserialized_all_defs,
1837+
.all_statements = deserialized_all_statements,
1838+
.exports = deserialized_exports,
1839+
.builtin_statements = deserialized_builtin_statements,
1840+
.external_decls = deserialized_external_decls,
1841+
.imports = deserialized_imports,
18231842
.module_name = module_name,
18241843
.module_name_idx = undefined, // Not used for deserialized modules (only needed during fresh canonicalization)
1825-
.diagnostics = self.diagnostics,
1826-
.store = self.store.deserialize(offset, gpa).*,
1844+
.diagnostics = deserialized_diagnostics,
1845+
.store = deserialized_store,
18271846
.evaluation_order = null, // Not serialized, will be recomputed if needed
18281847
// Well-known identifiers for type checking - look them up in the deserialized common env
1829-
.try_ident = common.findIdent("Try") orelse unreachable,
1830-
.out_of_range_ident = common.findIdent("OutOfRange") orelse unreachable,
1831-
.builtin_module_ident = common.findIdent("Builtin") orelse unreachable,
1832-
.plus_ident = common.findIdent(Ident.PLUS_METHOD_NAME) orelse unreachable,
1833-
.minus_ident = common.findIdent("minus") orelse unreachable,
1834-
.times_ident = common.findIdent("times") orelse unreachable,
1835-
.div_by_ident = common.findIdent("div_by") orelse unreachable,
1836-
.div_trunc_by_ident = common.findIdent("div_trunc_by") orelse unreachable,
1837-
.rem_by_ident = common.findIdent("rem_by") orelse unreachable,
1838-
.negate_ident = common.findIdent(Ident.NEGATE_METHOD_NAME) orelse unreachable,
1839-
.not_ident = common.findIdent("not") orelse unreachable,
1840-
.is_lt_ident = common.findIdent("is_lt") orelse unreachable,
1841-
.is_lte_ident = common.findIdent("is_lte") orelse unreachable,
1842-
.is_gt_ident = common.findIdent("is_gt") orelse unreachable,
1843-
.is_gte_ident = common.findIdent("is_gte") orelse unreachable,
1844-
.is_eq_ident = common.findIdent("is_eq") orelse unreachable,
1845-
.is_ne_ident = common.findIdent("is_ne") orelse unreachable,
1846-
.deferred_numeric_literals = self.deferred_numeric_literals.deserialize(offset).*,
1848+
.try_ident = deserialized_common.findIdent("Try") orelse unreachable,
1849+
.out_of_range_ident = deserialized_common.findIdent("OutOfRange") orelse unreachable,
1850+
.builtin_module_ident = deserialized_common.findIdent("Builtin") orelse unreachable,
1851+
.plus_ident = deserialized_common.findIdent(Ident.PLUS_METHOD_NAME) orelse unreachable,
1852+
.minus_ident = deserialized_common.findIdent("minus") orelse unreachable,
1853+
.times_ident = deserialized_common.findIdent("times") orelse unreachable,
1854+
.div_by_ident = deserialized_common.findIdent("div_by") orelse unreachable,
1855+
.div_trunc_by_ident = deserialized_common.findIdent("div_trunc_by") orelse unreachable,
1856+
.rem_by_ident = deserialized_common.findIdent("rem_by") orelse unreachable,
1857+
.negate_ident = deserialized_common.findIdent(Ident.NEGATE_METHOD_NAME) orelse unreachable,
1858+
.not_ident = deserialized_common.findIdent("not") orelse unreachable,
1859+
.is_lt_ident = deserialized_common.findIdent("is_lt") orelse unreachable,
1860+
.is_lte_ident = deserialized_common.findIdent("is_lte") orelse unreachable,
1861+
.is_gt_ident = deserialized_common.findIdent("is_gt") orelse unreachable,
1862+
.is_gte_ident = deserialized_common.findIdent("is_gte") orelse unreachable,
1863+
.is_eq_ident = deserialized_common.findIdent("is_eq") orelse unreachable,
1864+
.is_ne_ident = deserialized_common.findIdent("is_ne") orelse unreachable,
1865+
.deferred_numeric_literals = deserialized_deferred_numeric_literals,
18471866
};
18481867

18491868
return env;

src/canonicalize/NodeStore.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3522,7 +3522,7 @@ pub const Serialized = extern struct {
35223522
}
35233523

35243524
/// Deserialize this Serialized struct into a NodeStore
3525-
pub fn deserialize(self: *Serialized, offset: i64, gpa: Allocator) *NodeStore {
3525+
pub noinline fn deserialize(self: *Serialized, offset: i64, gpa: Allocator) *NodeStore {
35263526
// Note: Serialized may be smaller than the runtime struct.
35273527
// CRITICAL: On 32-bit platforms, deserializing nodes in-place corrupts the adjacent
35283528
// regions and extra_data fields. We must deserialize in REVERSE order (last to first)

src/collections/ExposedItems.zig

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,17 @@ pub const ExposedItems = struct {
121121
}
122122

123123
/// Deserialize this Serialized struct into an ExposedItems
124-
pub fn deserialize(self: *Serialized, offset: i64) *ExposedItems {
125-
// Note: Serialized may be smaller than the runtime struct.
126-
// We deserialize by overwriting the Serialized memory with the runtime struct.
124+
pub noinline fn deserialize(self: *Serialized, offset: i64) *ExposedItems {
125+
// CRITICAL: Deserialize nested struct BEFORE casting and writing to exposed_items.
126+
// Since exposed_items aliases self (they point to the same memory), we must complete
127+
// all reads before any writes to avoid corruption in Release mode.
128+
const deserialized_items = self.items.deserialize(offset).*;
129+
130+
// Now we can cast and write to the destination
127131
const exposed_items = @as(*ExposedItems, @ptrFromInt(@intFromPtr(self)));
128132

129133
exposed_items.* = ExposedItems{
130-
.items = self.items.deserialize(offset).*,
134+
.items = deserialized_items,
131135
};
132136

133137
return exposed_items;

src/collections/SortedArrayBuilder.zig

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -299,33 +299,35 @@ pub fn SortedArrayBuilder(comptime K: type, comptime V: type) type {
299299
}
300300

301301
/// Deserialize this Serialized struct into a SortedArrayBuilder
302-
pub fn deserialize(self: *Serialized, offset: i64) *SortedArrayBuilder(K, V) {
303-
// Note: Serialized may be smaller than the runtime struct because:
304-
// - Uses i64 offsets instead of usize pointers
305-
// - Omits runtime-only fields like allocators
306-
// - May have different alignment/padding requirements
307-
// We deserialize by overwriting the Serialized memory with the runtime struct.
308-
309-
// Overwrite ourself with the deserialized version, and return our pointer after casting it to Self.
302+
pub noinline fn deserialize(self: *Serialized, offset: i64) *SortedArrayBuilder(K, V) {
303+
// CRITICAL: Read ALL fields from self BEFORE casting and writing to builder.
304+
// Since builder aliases self (they point to the same memory), we must complete
305+
// all reads before any writes to avoid corruption in Release mode.
306+
const serialized_entries_offset = self.entries_offset;
307+
const serialized_entries_len = self.entries_len;
308+
const serialized_entries_capacity = self.entries_capacity;
309+
const serialized_sorted = self.sorted;
310+
311+
// Now we can cast and write to the destination
310312
const builder = @as(*SortedArrayBuilder(K, V), @ptrFromInt(@intFromPtr(self)));
311313

312314
// Handle empty array case
313-
if (self.entries_len == 0) {
315+
if (serialized_entries_len == 0) {
314316
builder.* = SortedArrayBuilder(K, V){
315317
.entries = .{},
316-
.sorted = self.sorted,
318+
.sorted = serialized_sorted,
317319
};
318320
} else {
319321
// Apply the offset to convert from serialized offset to actual pointer
320-
const entries_ptr_usize: usize = @intCast(self.entries_offset + offset);
322+
const entries_ptr_usize: usize = @intCast(serialized_entries_offset + offset);
321323
const entries_ptr: [*]Entry = @ptrFromInt(entries_ptr_usize);
322324

323325
builder.* = SortedArrayBuilder(K, V){
324326
.entries = .{
325-
.items = entries_ptr[0..@intCast(self.entries_len)],
326-
.capacity = @intCast(self.entries_capacity),
327+
.items = entries_ptr[0..@intCast(serialized_entries_len)],
328+
.capacity = @intCast(serialized_entries_capacity),
327329
},
328-
.sorted = self.sorted,
330+
.sorted = serialized_sorted,
329331
};
330332
}
331333

0 commit comments

Comments
 (0)