fix cid

GrapeBaBa · GrapeBaBa · commit 36a732aa6964 · 2024-12-16T21:09:14.000+08:00
Signed-off-by: Chen Kai &lt;281165273grape@gmail.com&gt;
diff --git a/src/cid.zig b/src/cid.zig
@@ -1,9 +1,11 @@
 const std = @import("std");
 const Allocator = std.mem.Allocator;
 const Multicodec = @import("multicodec.zig").Multicodec;
-const Multihash = @import("multihash.zig").Multihash;
+const multihash = @import("multihash.zig");
+const Multihash = multihash.Multihash;
 const varint = @import("unsigned_varint.zig");
-const MultiBaseCodec = @import("multibase.zig").MultiBaseCodec;
+const multibase = @import("multibase.zig");
+const MultiBaseCodec = multibase.MultiBaseCodec;
 
 pub const Error = error{
     UnknownCodec,
@@ -183,18 +185,69 @@ pub fn Cid(comptime S: usize) type {
         }
 
         fn toStringV0(self: *const Self) ![]const u8 {
-            const hash_bytes = try self.hash.toBytes();
-            var bytes = std.ArrayList(u8).init(self.allocator);
-            errdefer bytes.deinit();
-            return MultiBaseCodec.Base58Btc.encode(bytes.items, hash_bytes);
+            const bytes = try self.toBytes();
+            defer self.allocator.free(bytes);
+
+            const needed_size = MultiBaseCodec.Base58Btc.calcSize(bytes) - 1; // -1 for remove the multibase prefix 'z'
+            const dest = try self.allocator.alloc(u8, needed_size);
+            const encoded = MultiBaseCodec.base58.encodeBtc(dest, bytes);
+
+            if (encoded.len < dest.len) {
+                // Shrink allocation to exact size if needed
+                return self.allocator.realloc(dest, encoded.len);
+            }
+            return dest;
         }
 
-        fn to_string_v1(self: *const Self) ![]u8 {
-            const bytes = try self.toBytes(self.allocator);
+        fn toStringV1(self: *const Self) ![]const u8 {
+            const bytes = try self.toBytes();
             defer self.allocator.free(bytes);
 
-            const dest = std.ArrayList(u8).init(self.allocator);
-            return MultiBaseCodec.Base32Lower.encode(dest.items, bytes);
+            const needed_size = MultiBaseCodec.Base32Lower.calcSize(bytes);
+            const dest = try self.allocator.alloc(u8, needed_size);
+            const encoded = MultiBaseCodec.Base32Lower.encode(dest, bytes);
+            if (encoded.len < dest.len) {
+                // Shrink allocation to exact size if needed
+                return self.allocator.realloc(dest, encoded.len);
+            }
+            return dest;
+        }
+
+        pub fn toString(self: Self) ![]const u8 {
+            switch (self.version) {
+                .V0 => {
+                    // For V0, always use Base58BTC
+                    return try self.toStringV0();
+                },
+                .V1 => {
+                    // For V1, use Base32Lower
+                    return try self.toStringV1();
+                },
+            }
+        }
+
+        pub fn toStringOfBase(self: *const Self, base: MultiBaseCodec) ![]const u8 {
+            return switch (self.version) {
+                .V0 => {
+                    if (base != .Base58Btc) {
+                        return Error.InvalidCidV0Base;
+                    }
+                    return self.toStringV0();
+                },
+                .V1 => {
+                    const bytes = try self.toBytes();
+                    defer self.allocator.free(bytes);
+
+                    const needed_size = base.calcSize(bytes);
+                    const dest = try self.allocator.alloc(u8, needed_size);
+                    const encoded=base.encode(dest, bytes);
+                    if (encoded.len < dest.len) {
+                        // Shrink allocation to exact size if needed
+                        return self.allocator.realloc(dest, encoded.len);
+                    }
+                    return dest;
+                },
+            };
         }
     };
 }
@@ -259,3 +312,72 @@ test "Cid conversion and comparison" {
         try testing.expectEqual(cid.encodedLen(), bytes.len);
     }
 }
+
+test "to_string_of_base32" {
+    const testing = std.testing;
+    const allocator = testing.allocator;
+
+    const expected_cid = "bafkreibme22gw2h7y2h7tg2fhqotaqjucnbc24deqo72b6mkl2egezxhvy";
+    const hash = try multihash.MultihashCodecs.SHA2_256.digest("foo");
+    const cid = try Cid(32).newV1(allocator, Multicodec.RAW.getCode(), hash);
+
+    const result = try cid.toStringOfBase(.Base32Lower);
+    defer allocator.free(result);
+
+    try testing.expectEqualStrings(expected_cid, result);
+}
+
+test "Cid string representations" {
+    const testing = std.testing;
+    const allocator = testing.allocator;
+
+    // Test V0 string representation with Base58BTC
+    {
+        const hash = try Multihash(32).wrap(Multicodec.SHA2_256, &[_]u8{1} ** 32);
+        const cid = try Cid(32).newV0(allocator, hash);
+        const str = try cid.toString();
+        defer allocator.free(str);
+        std.debug.print("V0 string: {s}\n", .{str});
+        try testing.expect(CidVersion.isV0Str(str));
+    }
+
+    // Test V1 string representation with different bases
+    {
+        const hash = try Multihash(32).wrap(Multicodec.SHA2_256, &[_]u8{1} ** 32);
+        const cid = try Cid(32).newV1(allocator, Multicodec.RAW.getCode(), hash);
+
+        const str_default = try cid.toString();
+        defer allocator.free(str_default);
+
+        const str_base58 = try cid.toStringOfBase(.Base58Btc);
+        defer allocator.free(str_base58);
+
+        try testing.expect(!std.mem.eql(u8, str_default, str_base58));
+    }
+}
+
+test "Cid error cases" {
+    const testing = std.testing;
+    const allocator = testing.allocator;
+
+    {
+        const hash = try Multihash(32).wrap(Multicodec.SHA2_256, &[_]u8{0} ** 32);
+        try testing.expectError(Error.InvalidCidV0Codec, Cid(32).init(allocator, .V0, Multicodec.RAW.getCode(), hash));
+    }
+
+    {
+        const hash = try Multihash(32).wrap(Multicodec.SHA2_512, &[_]u8{0} ** 32);
+        try testing.expectError(Error.InvalidCidV0Multihash, Cid(32).newV0(allocator, hash));
+    }
+
+    {
+        const hash = try Multihash(32).wrap(Multicodec.SHA2_256, &[_]u8{0} ** 32);
+        var cid = try Cid(32).newV0(allocator, hash);
+        defer {
+            if (cid.toStringOfBase(.Base32Lower)) |str| {
+                allocator.free(str);
+            } else |_| {}
+        }
+        try testing.expectError(Error.InvalidCidV0Base, cid.toStringOfBase(.Base32Lower));
+    }
+}
diff --git a/src/multibase.zig b/src/multibase.zig
@@ -148,6 +148,69 @@ pub const MultiBaseCodec = enum {
         };
     }
 
+    /// Calculates the size needed for encoding the given source bytes
+    pub fn calcSize(self: MultiBaseCodec, source: []const u8) usize {
+        const code_len = self.code().len;
+        std.debug.print("code_len: {}\n", .{code_len});
+        return code_len + switch (self) {
+            .Identity => source.len,
+            .Base2 => source.len * 8,
+            .Base8 => (source.len * 8 + 2) / 3,
+            .Base10 => blk: {
+                if (source.len == 0) break :blk 1;
+                var size: usize = 1;
+                for (source) |byte| {
+                    if (byte == 0) {
+                        size += 1;
+                        continue;
+                    }
+                    size += @as(usize, @intFromFloat(@ceil(@log10(@as(f64, @floatFromInt(byte))))));
+                }
+                break :blk size;
+            },
+            .Base16Lower, .Base16Upper => source.len * 2,
+            .Base32Lower, .Base32Upper, .Base32HexLower, .Base32HexUpper, .Base32Z => (source.len * 8 + 4) / 5,
+            .Base32PadLower, .Base32PadUpper, .Base32HexPadLower, .Base32HexPadUpper => ((source.len + 4) / 5) * 8,
+            .Base36Lower, .Base36Upper => blk: {
+                if (source.len == 0) break :blk 1;
+                var size: usize = 1;
+                for (source) |byte| {
+                    if (byte == 0) {
+                        size += 1;
+                        continue;
+                    }
+                    size += @as(usize, @intFromFloat(@ceil(@log(36.0) / @log(2.0) * 8.0)));
+                }
+                break :blk size;
+            },
+            .Base58Flickr, .Base58Btc => blk: {
+                if (source.len == 0) break :blk 1;
+                // Base58 expands at worst case by log(256)/log(58) ≈ 1.37 times
+                const size = @as(usize, @intFromFloat(@ceil(@as(f64, @floatFromInt(source.len)) * 137 / 100)));
+                break :blk size;
+            },
+            .Base64, .Base64Url => (source.len + 2) / 3 * 4,
+            .Base64Pad, .Base64UrlPad => ((source.len + 2) / 3) * 4,
+            .Base256Emoji => source.len * 4, // Each emoji is up to 4 bytes in UTF-8
+        };
+    }
+
+    /// Calculates the maximum size needed for decoding the given encoded string
+    pub fn calcSizeForDecode(self: MultiBaseCodec, source: []const u8) usize {
+        return switch (self) {
+            .Identity => source.len - 1,
+            .Base2 => (source.len - 1) / 8,
+            .Base8 => (source.len - 1) * 3 / 8,
+            .Base10 => source.len - 1,
+            .Base16Lower, .Base16Upper => (source.len - 1) / 2,
+            .Base32Lower, .Base32Upper, .Base32HexLower, .Base32HexUpper, .Base32PadLower, .Base32PadUpper, .Base32HexPadLower, .Base32HexPadUpper, .Base32Z => (source.len - 1) * 5 / 8,
+            .Base36Lower, .Base36Upper => source.len - 1,
+            .Base58Flickr, .Base58Btc => source.len - 1,
+            .Base64, .Base64Url, .Base64Pad, .Base64UrlPad => (source.len - 1) * 3 / 4,
+            .Base256Emoji => (source.len - 4) / 4, // First emoji is the multibase prefix
+        };
+    }
+
     const identity = struct {
         pub fn encode(dest: []u8, source: []const u8) []const u8 {
             @memcpy(dest[0..source.len], source);
@@ -834,7 +897,7 @@ pub const MultiBaseCodec = enum {
         }
     };
 
-    const base58 = struct {
+    pub const base58 = struct {
         const ALPHABET_FLICKR = "123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ";
         const ALPHABET_BTC = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
         const Vec = @Vector(16, u8);
diff --git a/src/multihash.zig b/src/multihash.zig
@@ -105,7 +105,7 @@ pub fn Multihash(comptime S: usize) type {
         }
 
         pub fn toBytes(self: Self, allocator: std.mem.Allocator) ![]u8 {
-            const bytes = try allocator.alloc(u8, self.size);
+            const bytes = try allocator.alloc(u8, self.encodedLen());
             var stream = std.io.fixedBufferStream(bytes);
             const written = try self.write(stream.writer());
             std.debug.assert(written == bytes.len);
@@ -115,15 +115,37 @@ pub fn Multihash(comptime S: usize) type {
 }
 
 /// MultihashDigest is a generic type that can be used to create a Multihash from a given input.
-pub fn MultihashDigest(comptime T: type, comptime alloc_size: usize) type {
+pub fn MultihashDigest(comptime T: type) type {
+    const DigestSize = struct {
+        fn getSize(comptime code: T) comptime_int {
+            return switch (code) {
+                .SHA2_256 => 32,
+                .SHA2_512 => 64,
+                .SHA3_224 => 28,
+                .SHA3_256 => 32,
+                .SHA3_384 => 48,
+                .SHA3_512 => 64,
+                .KECCAK_224 => 28,
+                .KECCAK_256 => 32,
+                .KECCAK_384 => 48,
+                .KECCAK_512 => 64,
+                .BLAKE2B_256 => 32,
+                .BLAKE2B_512 => 64,
+                .BLAKE2S_128 => 16,
+                .BLAKE2S_256 => 32,
+                .BLAKE3 => 64,
+            };
+        }
+    };
+
     return struct {
-        pub fn digest(code: T, input: []const u8) !Multihash(alloc_size) {
+        pub fn digest(comptime code: T, input: []const u8) !Multihash(DigestSize.getSize(code)) {
             var hasher = Hasher.init(code);
             try hasher.update(input);
             const digest_bytes = switch (hasher) {
                 inline else => |*h| h.finalize()[0..],
             };
-            return try Multihash(alloc_size).wrap(try Multicodec.fromCode(@intFromEnum(code)), digest_bytes);
+            return try Multihash(DigestSize.getSize(code)).wrap(try Multicodec.fromCode(@intFromEnum(code)), digest_bytes);
         }
     };
 }
@@ -191,7 +213,7 @@ pub const MultihashCodecs = enum(u64) {
     BLAKE2S_256 = Multicodec.BLAKE2S_256.getCode(),
     BLAKE3 = Multicodec.BLAKE3.getCode(),
 
-    pub usingnamespace MultihashDigest(@This(), 64);
+    pub usingnamespace MultihashDigest(@This());
 };
 
 /// Sha2_256 is a struct that represents the SHA2-256 hash algorithm.