Skip to content

Commit 14d1fce

Browse files
Merge pull request #2130 from mtrmac/chunked-size
Correctly compute UncompressedSize on zstd:chunked pull, don’t set it on estargz
2 parents 935c58f + f979bad commit 14d1fce

File tree

6 files changed

+130
-51
lines changed

6 files changed

+130
-51
lines changed

drivers/driver.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -189,14 +189,14 @@ type Driver interface {
189189
type DriverWithDifferOutput struct {
190190
Differ Differ
191191
Target string
192-
Size int64
192+
Size int64 // Size of the uncompressed layer, -1 if unknown. Must be known if UncompressedDigest is set.
193193
UIDs []uint32
194194
GIDs []uint32
195195
UncompressedDigest digest.Digest
196196
CompressedDigest digest.Digest
197197
Metadata string
198198
BigData map[string][]byte
199-
TarSplit []byte
199+
TarSplit []byte // nil if not available
200200
TOCDigest digest.Digest
201201
// RootDirMode is the mode of the root directory of the layer, if specified.
202202
RootDirMode *os.FileMode

layers.go

+9-6
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,12 @@ type Layer struct {
136136
TOCDigest digest.Digest `json:"toc-digest,omitempty"`
137137

138138
// UncompressedSize is the length of the blob that was last passed to
139-
// ApplyDiff() or create(), after we decompressed it. If
140-
// UncompressedDigest is not set, this should be treated as if it were
141-
// an uninitialized value.
139+
// ApplyDiff() or create(), after we decompressed it.
140+
//
141+
// - If UncompressedDigest is set, this must be set to a valid value.
142+
// - Otherwise, if TOCDigest is set, this is either valid or -1.
143+
// - If neither of this digests is set, this should be treated as if it were
144+
// an uninitialized value.
142145
UncompressedSize int64 `json:"diff-size,omitempty"`
143146

144147
// CompressionType is the type of compression which we detected on the blob
@@ -1214,8 +1217,8 @@ func (r *layerStore) Size(name string) (int64, error) {
12141217
// We use the presence of a non-empty digest as an indicator that the size value was intentionally set, and that
12151218
// a zero value is not just present because it was never set to anything else (which can happen if the layer was
12161219
// created by a version of this library that didn't keep track of digest and size information).
1217-
if layer.TOCDigest != "" || layer.UncompressedDigest != "" {
1218-
return layer.UncompressedSize, nil
1220+
if layer.UncompressedDigest != "" || layer.TOCDigest != "" {
1221+
return layer.UncompressedSize, nil // This may return -1 if only TOCDigest is set
12191222
}
12201223
return -1, nil
12211224
}
@@ -2510,7 +2513,7 @@ func (r *layerStore) applyDiffFromStagingDirectory(id string, diffOutput *driver
25102513
return err
25112514
}
25122515

2513-
if len(diffOutput.TarSplit) != 0 {
2516+
if diffOutput.TarSplit != nil {
25142517
tsdata := bytes.Buffer{}
25152518
compressor, err := pgzip.NewWriterLevel(&tsdata, pgzip.BestSpeed)
25162519
if err != nil {

pkg/chunked/compression_linux.go

+32-2
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64,
139139
}
140140

141141
// readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream.
142-
// Returns (manifest blob, parsed manifest, tar-split blob, manifest offset).
142+
// Returns (manifest blob, parsed manifest, tar-split blob or nil, manifest offset).
143143
func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Digest, annotations map[string]string) ([]byte, *internal.TOC, []byte, int64, error) {
144144
offsetMetadata := annotations[internal.ManifestInfoKey]
145145
if offsetMetadata == "" {
@@ -214,7 +214,7 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di
214214
return nil, nil, nil, 0, fmt.Errorf("unmarshaling TOC: %w", err)
215215
}
216216

217-
decodedTarSplit := []byte{}
217+
var decodedTarSplit []byte = nil
218218
if toc.TarSplitDigest != "" {
219219
if tarSplitChunk.Offset <= 0 {
220220
return nil, nil, nil, 0, fmt.Errorf("TOC requires a tar-split, but the %s annotation does not describe a position", internal.TarSplitInfoKey)
@@ -288,6 +288,36 @@ func ensureTOCMatchesTarSplit(toc *internal.TOC, tarSplit []byte) error {
288288
return nil
289289
}
290290

291+
// tarSizeFromTarSplit computes the total tarball size, using only the tarSplit metadata
292+
func tarSizeFromTarSplit(tarSplit []byte) (int64, error) {
293+
var res int64 = 0
294+
295+
unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit))
296+
for {
297+
entry, err := unpacker.Next()
298+
if err != nil {
299+
if err == io.EOF {
300+
break
301+
}
302+
return -1, fmt.Errorf("reading tar-split entries: %w", err)
303+
}
304+
switch entry.Type {
305+
case storage.SegmentType:
306+
res += int64(len(entry.Payload))
307+
case storage.FileType:
308+
// entry.Size is the “logical size”, which might not be the physical size for sparse entries;
309+
// but the way tar-split/tar/asm.WriteOutputTarStream combines FileType entries and returned files contents,
310+
// sparse files are not supported.
311+
// Also https://github.com/opencontainers/image-spec/blob/main/layer.md says
312+
// > Sparse files SHOULD NOT be used because they lack consistent support across tar implementations.
313+
res += entry.Size
314+
default:
315+
return -1, fmt.Errorf("unexpected tar-split entry type %q", entry.Type)
316+
}
317+
}
318+
return res, nil
319+
}
320+
291321
// ensureTimePointersMatch ensures that a and b are equal
292322
func ensureTimePointersMatch(a, b *time.Time) error {
293323
// We didn’t always use “timeIfNotZero” when creating the TOC, so treat time.IsZero the same as nil.

pkg/chunked/compression_linux_test.go

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package chunked
2+
3+
import (
4+
"bytes"
5+
"io"
6+
"testing"
7+
8+
"github.com/stretchr/testify/assert"
9+
"github.com/stretchr/testify/require"
10+
"github.com/vbatts/tar-split/archive/tar"
11+
"github.com/vbatts/tar-split/tar/asm"
12+
"github.com/vbatts/tar-split/tar/storage"
13+
)
14+
15+
func TestTarSizeFromTarSplit(t *testing.T) {
16+
var tarball bytes.Buffer
17+
tarWriter := tar.NewWriter(&tarball)
18+
for _, e := range someFiles {
19+
tf, err := typeToTarType(e.Type)
20+
require.NoError(t, err)
21+
err = tarWriter.WriteHeader(&tar.Header{
22+
Typeflag: tf,
23+
Name: e.Name,
24+
Size: e.Size,
25+
Mode: e.Mode,
26+
})
27+
require.NoError(t, err)
28+
data := make([]byte, e.Size)
29+
_, err = tarWriter.Write(data)
30+
require.NoError(t, err)
31+
}
32+
err := tarWriter.Close()
33+
require.NoError(t, err)
34+
expectedTarSize := int64(tarball.Len())
35+
36+
var tarSplit bytes.Buffer
37+
tsReader, err := asm.NewInputTarStream(&tarball, storage.NewJSONPacker(&tarSplit), storage.NewDiscardFilePutter())
38+
require.NoError(t, err)
39+
_, err = io.Copy(io.Discard, tsReader)
40+
require.NoError(t, err)
41+
42+
res, err := tarSizeFromTarSplit(tarSplit.Bytes())
43+
require.NoError(t, err)
44+
assert.Equal(t, expectedTarSize, res)
45+
}

pkg/chunked/storage_linux.go

+41-40
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ type chunkedDiffer struct {
8989
// is no TOC referenced by the manifest.
9090
blobDigest digest.Digest
9191

92-
blobSize int64
92+
blobSize int64
93+
uncompressedTarSize int64 // -1 if unknown
9394

9495
pullOptions map[string]string
9596

@@ -216,6 +217,7 @@ func makeConvertFromRawDiffer(store storage.Store, blobDigest digest.Digest, blo
216217
fsVerityDigests: make(map[string]string),
217218
blobDigest: blobDigest,
218219
blobSize: blobSize,
220+
uncompressedTarSize: -1, // Will be computed later
219221
convertToZstdChunked: true,
220222
copyBuffer: makeCopyBuffer(),
221223
layersCache: layersCache,
@@ -229,24 +231,33 @@ func makeZstdChunkedDiffer(store storage.Store, blobSize int64, tocDigest digest
229231
if err != nil {
230232
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
231233
}
234+
var uncompressedTarSize int64 = -1
235+
if tarSplit != nil {
236+
uncompressedTarSize, err = tarSizeFromTarSplit(tarSplit)
237+
if err != nil {
238+
return nil, fmt.Errorf("computing size from tar-split")
239+
}
240+
}
241+
232242
layersCache, err := getLayersCache(store)
233243
if err != nil {
234244
return nil, err
235245
}
236246

237247
return &chunkedDiffer{
238-
fsVerityDigests: make(map[string]string),
239-
blobSize: blobSize,
240-
tocDigest: tocDigest,
241-
copyBuffer: makeCopyBuffer(),
242-
fileType: fileTypeZstdChunked,
243-
layersCache: layersCache,
244-
manifest: manifest,
245-
toc: toc,
246-
pullOptions: pullOptions,
247-
stream: iss,
248-
tarSplit: tarSplit,
249-
tocOffset: tocOffset,
248+
fsVerityDigests: make(map[string]string),
249+
blobSize: blobSize,
250+
uncompressedTarSize: uncompressedTarSize,
251+
tocDigest: tocDigest,
252+
copyBuffer: makeCopyBuffer(),
253+
fileType: fileTypeZstdChunked,
254+
layersCache: layersCache,
255+
manifest: manifest,
256+
toc: toc,
257+
pullOptions: pullOptions,
258+
stream: iss,
259+
tarSplit: tarSplit,
260+
tocOffset: tocOffset,
250261
}, nil
251262
}
252263

@@ -261,16 +272,17 @@ func makeEstargzChunkedDiffer(store storage.Store, blobSize int64, tocDigest dig
261272
}
262273

263274
return &chunkedDiffer{
264-
fsVerityDigests: make(map[string]string),
265-
blobSize: blobSize,
266-
tocDigest: tocDigest,
267-
copyBuffer: makeCopyBuffer(),
268-
fileType: fileTypeEstargz,
269-
layersCache: layersCache,
270-
manifest: manifest,
271-
pullOptions: pullOptions,
272-
stream: iss,
273-
tocOffset: tocOffset,
275+
fsVerityDigests: make(map[string]string),
276+
blobSize: blobSize,
277+
uncompressedTarSize: -1, // We would have to read and decompress the whole layer
278+
tocDigest: tocDigest,
279+
copyBuffer: makeCopyBuffer(),
280+
fileType: fileTypeEstargz,
281+
layersCache: layersCache,
282+
manifest: manifest,
283+
pullOptions: pullOptions,
284+
stream: iss,
285+
tocOffset: tocOffset,
274286
}, nil
275287
}
276288

@@ -1153,7 +1165,6 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
11531165

11541166
var compressedDigest digest.Digest
11551167
var uncompressedDigest digest.Digest
1156-
var convertedBlobSize int64
11571168

11581169
if c.convertToZstdChunked {
11591170
fd, err := unix.Open(dest, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600)
@@ -1185,7 +1196,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
11851196
if err != nil {
11861197
return graphdriver.DriverWithDifferOutput{}, err
11871198
}
1188-
convertedBlobSize = tarSize
1199+
c.uncompressedTarSize = tarSize
11891200
// fileSource is a O_TMPFILE file descriptor, so we
11901201
// need to keep it open until the entire file is processed.
11911202
defer fileSource.Close()
@@ -1255,6 +1266,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
12551266
TOCDigest: c.tocDigest,
12561267
UncompressedDigest: uncompressedDigest,
12571268
CompressedDigest: compressedDigest,
1269+
Size: c.uncompressedTarSize,
12581270
}
12591271

12601272
// When the hard links deduplication is used, file attributes are ignored because setting them
@@ -1268,19 +1280,12 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
12681280

12691281
var missingParts []missingPart
12701282

1271-
mergedEntries, totalSizeFromTOC, err := c.mergeTocEntries(c.fileType, toc.Entries)
1283+
mergedEntries, err := c.mergeTocEntries(c.fileType, toc.Entries)
12721284
if err != nil {
12731285
return output, err
12741286
}
12751287

12761288
output.UIDs, output.GIDs = collectIDs(mergedEntries)
1277-
if convertedBlobSize > 0 {
1278-
// if the image was converted, store the original tar size, so that
1279-
// it can be recreated correctly.
1280-
output.Size = convertedBlobSize
1281-
} else {
1282-
output.Size = totalSizeFromTOC
1283-
}
12841289

12851290
if err := maybeDoIDRemap(mergedEntries, options); err != nil {
12861291
return output, err
@@ -1597,9 +1602,7 @@ func mustSkipFile(fileType compressedFileType, e internal.FileMetadata) bool {
15971602
return false
15981603
}
15991604

1600-
func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]fileMetadata, int64, error) {
1601-
var totalFilesSize int64
1602-
1605+
func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]fileMetadata, error) {
16031606
countNextChunks := func(start int) int {
16041607
count := 0
16051608
for _, e := range entries[start:] {
@@ -1629,10 +1632,8 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i
16291632
continue
16301633
}
16311634

1632-
totalFilesSize += e.Size
1633-
16341635
if e.Type == TypeChunk {
1635-
return nil, -1, fmt.Errorf("chunk type without a regular file")
1636+
return nil, fmt.Errorf("chunk type without a regular file")
16361637
}
16371638

16381639
if e.Type == TypeReg {
@@ -1668,7 +1669,7 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i
16681669
lastChunkOffset = mergedEntries[i].chunks[j].Offset
16691670
}
16701671
}
1671-
return mergedEntries, totalFilesSize, nil
1672+
return mergedEntries, nil
16721673
}
16731674

16741675
// validateChunkChecksum checks if the file at $root/$path[offset:chunk.ChunkSize] has the

store.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -2201,7 +2201,7 @@ func (s *store) ImageSize(id string) (int64, error) {
22012201
}
22022202
// The UncompressedSize is only valid if there's a digest to go with it.
22032203
n := layer.UncompressedSize
2204-
if layer.UncompressedDigest == "" {
2204+
if layer.UncompressedDigest == "" || n == -1 {
22052205
// Compute the size.
22062206
n, err = layerStore.DiffSize("", layer.ID)
22072207
if err != nil {

0 commit comments

Comments
 (0)