Skip to content

Commit 36a732a

Browse files
committed
fix cid
Signed-off-by: Chen Kai <[email protected]>
1 parent bf968f4 commit 36a732a

File tree

3 files changed

+223
-16
lines changed

3 files changed

+223
-16
lines changed

src/cid.zig

Lines changed: 132 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
const std = @import("std");
22
const Allocator = std.mem.Allocator;
33
const Multicodec = @import("multicodec.zig").Multicodec;
4-
const Multihash = @import("multihash.zig").Multihash;
4+
const multihash = @import("multihash.zig");
5+
const Multihash = multihash.Multihash;
56
const varint = @import("unsigned_varint.zig");
6-
const MultiBaseCodec = @import("multibase.zig").MultiBaseCodec;
7+
const multibase = @import("multibase.zig");
8+
const MultiBaseCodec = multibase.MultiBaseCodec;
79

810
pub const Error = error{
911
UnknownCodec,
@@ -183,18 +185,69 @@ pub fn Cid(comptime S: usize) type {
183185
}
184186

185187
fn toStringV0(self: *const Self) ![]const u8 {
186-
const hash_bytes = try self.hash.toBytes();
187-
var bytes = std.ArrayList(u8).init(self.allocator);
188-
errdefer bytes.deinit();
189-
return MultiBaseCodec.Base58Btc.encode(bytes.items, hash_bytes);
188+
const bytes = try self.toBytes();
189+
defer self.allocator.free(bytes);
190+
191+
const needed_size = MultiBaseCodec.Base58Btc.calcSize(bytes) - 1; // -1 for remove the multibase prefix 'z'
192+
const dest = try self.allocator.alloc(u8, needed_size);
193+
const encoded = MultiBaseCodec.base58.encodeBtc(dest, bytes);
194+
195+
if (encoded.len < dest.len) {
196+
// Shrink allocation to exact size if needed
197+
return self.allocator.realloc(dest, encoded.len);
198+
}
199+
return dest;
190200
}
191201

192-
fn to_string_v1(self: *const Self) ![]u8 {
193-
const bytes = try self.toBytes(self.allocator);
202+
fn toStringV1(self: *const Self) ![]const u8 {
203+
const bytes = try self.toBytes();
194204
defer self.allocator.free(bytes);
195205

196-
const dest = std.ArrayList(u8).init(self.allocator);
197-
return MultiBaseCodec.Base32Lower.encode(dest.items, bytes);
206+
const needed_size = MultiBaseCodec.Base32Lower.calcSize(bytes);
207+
const dest = try self.allocator.alloc(u8, needed_size);
208+
const encoded = MultiBaseCodec.Base32Lower.encode(dest, bytes);
209+
if (encoded.len < dest.len) {
210+
// Shrink allocation to exact size if needed
211+
return self.allocator.realloc(dest, encoded.len);
212+
}
213+
return dest;
214+
}
215+
216+
pub fn toString(self: Self) ![]const u8 {
217+
switch (self.version) {
218+
.V0 => {
219+
// For V0, always use Base58BTC
220+
return try self.toStringV0();
221+
},
222+
.V1 => {
223+
// For V1, use Base32Lower
224+
return try self.toStringV1();
225+
},
226+
}
227+
}
228+
229+
pub fn toStringOfBase(self: *const Self, base: MultiBaseCodec) ![]const u8 {
230+
return switch (self.version) {
231+
.V0 => {
232+
if (base != .Base58Btc) {
233+
return Error.InvalidCidV0Base;
234+
}
235+
return self.toStringV0();
236+
},
237+
.V1 => {
238+
const bytes = try self.toBytes();
239+
defer self.allocator.free(bytes);
240+
241+
const needed_size = base.calcSize(bytes);
242+
const dest = try self.allocator.alloc(u8, needed_size);
243+
const encoded=base.encode(dest, bytes);
244+
if (encoded.len < dest.len) {
245+
// Shrink allocation to exact size if needed
246+
return self.allocator.realloc(dest, encoded.len);
247+
}
248+
return dest;
249+
},
250+
};
198251
}
199252
};
200253
}
@@ -259,3 +312,72 @@ test "Cid conversion and comparison" {
259312
try testing.expectEqual(cid.encodedLen(), bytes.len);
260313
}
261314
}
315+
316+
test "to_string_of_base32" {
317+
const testing = std.testing;
318+
const allocator = testing.allocator;
319+
320+
const expected_cid = "bafkreibme22gw2h7y2h7tg2fhqotaqjucnbc24deqo72b6mkl2egezxhvy";
321+
const hash = try multihash.MultihashCodecs.SHA2_256.digest("foo");
322+
const cid = try Cid(32).newV1(allocator, Multicodec.RAW.getCode(), hash);
323+
324+
const result = try cid.toStringOfBase(.Base32Lower);
325+
defer allocator.free(result);
326+
327+
try testing.expectEqualStrings(expected_cid, result);
328+
}
329+
330+
test "Cid string representations" {
331+
const testing = std.testing;
332+
const allocator = testing.allocator;
333+
334+
// Test V0 string representation with Base58BTC
335+
{
336+
const hash = try Multihash(32).wrap(Multicodec.SHA2_256, &[_]u8{1} ** 32);
337+
const cid = try Cid(32).newV0(allocator, hash);
338+
const str = try cid.toString();
339+
defer allocator.free(str);
340+
std.debug.print("V0 string: {s}\n", .{str});
341+
try testing.expect(CidVersion.isV0Str(str));
342+
}
343+
344+
// Test V1 string representation with different bases
345+
{
346+
const hash = try Multihash(32).wrap(Multicodec.SHA2_256, &[_]u8{1} ** 32);
347+
const cid = try Cid(32).newV1(allocator, Multicodec.RAW.getCode(), hash);
348+
349+
const str_default = try cid.toString();
350+
defer allocator.free(str_default);
351+
352+
const str_base58 = try cid.toStringOfBase(.Base58Btc);
353+
defer allocator.free(str_base58);
354+
355+
try testing.expect(!std.mem.eql(u8, str_default, str_base58));
356+
}
357+
}
358+
359+
test "Cid error cases" {
360+
const testing = std.testing;
361+
const allocator = testing.allocator;
362+
363+
{
364+
const hash = try Multihash(32).wrap(Multicodec.SHA2_256, &[_]u8{0} ** 32);
365+
try testing.expectError(Error.InvalidCidV0Codec, Cid(32).init(allocator, .V0, Multicodec.RAW.getCode(), hash));
366+
}
367+
368+
{
369+
const hash = try Multihash(32).wrap(Multicodec.SHA2_512, &[_]u8{0} ** 32);
370+
try testing.expectError(Error.InvalidCidV0Multihash, Cid(32).newV0(allocator, hash));
371+
}
372+
373+
{
374+
const hash = try Multihash(32).wrap(Multicodec.SHA2_256, &[_]u8{0} ** 32);
375+
var cid = try Cid(32).newV0(allocator, hash);
376+
defer {
377+
if (cid.toStringOfBase(.Base32Lower)) |str| {
378+
allocator.free(str);
379+
} else |_| {}
380+
}
381+
try testing.expectError(Error.InvalidCidV0Base, cid.toStringOfBase(.Base32Lower));
382+
}
383+
}

src/multibase.zig

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,69 @@ pub const MultiBaseCodec = enum {
148148
};
149149
}
150150

151+
/// Calculates the size needed for encoding the given source bytes
152+
pub fn calcSize(self: MultiBaseCodec, source: []const u8) usize {
153+
const code_len = self.code().len;
154+
std.debug.print("code_len: {}\n", .{code_len});
155+
return code_len + switch (self) {
156+
.Identity => source.len,
157+
.Base2 => source.len * 8,
158+
.Base8 => (source.len * 8 + 2) / 3,
159+
.Base10 => blk: {
160+
if (source.len == 0) break :blk 1;
161+
var size: usize = 1;
162+
for (source) |byte| {
163+
if (byte == 0) {
164+
size += 1;
165+
continue;
166+
}
167+
size += @as(usize, @intFromFloat(@ceil(@log10(@as(f64, @floatFromInt(byte))))));
168+
}
169+
break :blk size;
170+
},
171+
.Base16Lower, .Base16Upper => source.len * 2,
172+
.Base32Lower, .Base32Upper, .Base32HexLower, .Base32HexUpper, .Base32Z => (source.len * 8 + 4) / 5,
173+
.Base32PadLower, .Base32PadUpper, .Base32HexPadLower, .Base32HexPadUpper => ((source.len + 4) / 5) * 8,
174+
.Base36Lower, .Base36Upper => blk: {
175+
if (source.len == 0) break :blk 1;
176+
var size: usize = 1;
177+
for (source) |byte| {
178+
if (byte == 0) {
179+
size += 1;
180+
continue;
181+
}
182+
size += @as(usize, @intFromFloat(@ceil(@log(36.0) / @log(2.0) * 8.0)));
183+
}
184+
break :blk size;
185+
},
186+
.Base58Flickr, .Base58Btc => blk: {
187+
if (source.len == 0) break :blk 1;
188+
// Base58 expands at worst case by log(256)/log(58) ≈ 1.37 times
189+
const size = @as(usize, @intFromFloat(@ceil(@as(f64, @floatFromInt(source.len)) * 137 / 100)));
190+
break :blk size;
191+
},
192+
.Base64, .Base64Url => (source.len + 2) / 3 * 4,
193+
.Base64Pad, .Base64UrlPad => ((source.len + 2) / 3) * 4,
194+
.Base256Emoji => source.len * 4, // Each emoji is up to 4 bytes in UTF-8
195+
};
196+
}
197+
198+
/// Calculates the maximum size needed for decoding the given encoded string
199+
pub fn calcSizeForDecode(self: MultiBaseCodec, source: []const u8) usize {
200+
return switch (self) {
201+
.Identity => source.len - 1,
202+
.Base2 => (source.len - 1) / 8,
203+
.Base8 => (source.len - 1) * 3 / 8,
204+
.Base10 => source.len - 1,
205+
.Base16Lower, .Base16Upper => (source.len - 1) / 2,
206+
.Base32Lower, .Base32Upper, .Base32HexLower, .Base32HexUpper, .Base32PadLower, .Base32PadUpper, .Base32HexPadLower, .Base32HexPadUpper, .Base32Z => (source.len - 1) * 5 / 8,
207+
.Base36Lower, .Base36Upper => source.len - 1,
208+
.Base58Flickr, .Base58Btc => source.len - 1,
209+
.Base64, .Base64Url, .Base64Pad, .Base64UrlPad => (source.len - 1) * 3 / 4,
210+
.Base256Emoji => (source.len - 4) / 4, // First emoji is the multibase prefix
211+
};
212+
}
213+
151214
const identity = struct {
152215
pub fn encode(dest: []u8, source: []const u8) []const u8 {
153216
@memcpy(dest[0..source.len], source);
@@ -834,7 +897,7 @@ pub const MultiBaseCodec = enum {
834897
}
835898
};
836899

837-
const base58 = struct {
900+
pub const base58 = struct {
838901
const ALPHABET_FLICKR = "123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ";
839902
const ALPHABET_BTC = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
840903
const Vec = @Vector(16, u8);

src/multihash.zig

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ pub fn Multihash(comptime S: usize) type {
105105
}
106106

107107
pub fn toBytes(self: Self, allocator: std.mem.Allocator) ![]u8 {
108-
const bytes = try allocator.alloc(u8, self.size);
108+
const bytes = try allocator.alloc(u8, self.encodedLen());
109109
var stream = std.io.fixedBufferStream(bytes);
110110
const written = try self.write(stream.writer());
111111
std.debug.assert(written == bytes.len);
@@ -115,15 +115,37 @@ pub fn Multihash(comptime S: usize) type {
115115
}
116116

117117
/// MultihashDigest is a generic type that can be used to create a Multihash from a given input.
118-
pub fn MultihashDigest(comptime T: type, comptime alloc_size: usize) type {
118+
pub fn MultihashDigest(comptime T: type) type {
119+
const DigestSize = struct {
120+
fn getSize(comptime code: T) comptime_int {
121+
return switch (code) {
122+
.SHA2_256 => 32,
123+
.SHA2_512 => 64,
124+
.SHA3_224 => 28,
125+
.SHA3_256 => 32,
126+
.SHA3_384 => 48,
127+
.SHA3_512 => 64,
128+
.KECCAK_224 => 28,
129+
.KECCAK_256 => 32,
130+
.KECCAK_384 => 48,
131+
.KECCAK_512 => 64,
132+
.BLAKE2B_256 => 32,
133+
.BLAKE2B_512 => 64,
134+
.BLAKE2S_128 => 16,
135+
.BLAKE2S_256 => 32,
136+
.BLAKE3 => 64,
137+
};
138+
}
139+
};
140+
119141
return struct {
120-
pub fn digest(code: T, input: []const u8) !Multihash(alloc_size) {
142+
pub fn digest(comptime code: T, input: []const u8) !Multihash(DigestSize.getSize(code)) {
121143
var hasher = Hasher.init(code);
122144
try hasher.update(input);
123145
const digest_bytes = switch (hasher) {
124146
inline else => |*h| h.finalize()[0..],
125147
};
126-
return try Multihash(alloc_size).wrap(try Multicodec.fromCode(@intFromEnum(code)), digest_bytes);
148+
return try Multihash(DigestSize.getSize(code)).wrap(try Multicodec.fromCode(@intFromEnum(code)), digest_bytes);
127149
}
128150
};
129151
}
@@ -191,7 +213,7 @@ pub const MultihashCodecs = enum(u64) {
191213
BLAKE2S_256 = Multicodec.BLAKE2S_256.getCode(),
192214
BLAKE3 = Multicodec.BLAKE3.getCode(),
193215

194-
pub usingnamespace MultihashDigest(@This(), 64);
216+
pub usingnamespace MultihashDigest(@This());
195217
};
196218

197219
/// Sha2_256 is a struct that represents the SHA2-256 hash algorithm.

0 commit comments

Comments
 (0)