Skip to content

Commit fa8153d

Browse files
committed
Ensure that the metadata in the TOC matches the tar-split
Signed-off-by: Miloslav Trmač <[email protected]>
1 parent 2b9a077 commit fa8153d

File tree

10 files changed

+447
-0
lines changed

10 files changed

+447
-0
lines changed

go.mod

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ require (
3030
github.com/tchap/go-patricia/v2 v2.3.1
3131
github.com/ulikunitz/xz v0.5.12
3232
github.com/vbatts/tar-split v0.11.5
33+
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
3334
golang.org/x/sys v0.22.0
3435
gotest.tools v2.2.0+incompatible
3536
)

go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
119119
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
120120
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
121121
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
122+
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
123+
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
122124
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
123125
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
124126
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=

pkg/chunked/compression_linux.go

+124
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,16 @@ import (
55
"errors"
66
"fmt"
77
"io"
8+
"maps"
89
"strconv"
10+
"time"
911

1012
"github.com/containers/storage/pkg/chunked/internal"
1113
"github.com/klauspost/compress/zstd"
1214
"github.com/klauspost/pgzip"
1315
digest "github.com/opencontainers/go-digest"
1416
"github.com/vbatts/tar-split/archive/tar"
17+
expMaps "golang.org/x/exp/maps"
1518
)
1619

1720
var typesToTar = map[string]byte{
@@ -221,6 +224,12 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di
221224
if err != nil {
222225
return nil, nil, nil, 0, fmt.Errorf("validating and decompressing tar-split: %w", err)
223226
}
227+
// We use the TOC for creating on-disk files, but the tar-split for creating metadata
228+
// when exporting the layer contents. Ensure the two match, otherwise local inspection of a container
229+
// might be misleading about the exported contents.
230+
if err := ensureTOCMatchesTarSplit(toc, decodedTarSplit); err != nil {
231+
return nil, nil, nil, 0, fmt.Errorf("tar-split and TOC data is inconsistent: %w", err)
232+
}
224233
} else if tarSplitChunk.Offset > 0 {
225234
// We must ignore the tar-split when the digest is not present in the TOC, because we can’t authenticate it.
226235
//
@@ -234,6 +243,121 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di
234243
return decodedBlob, toc, decodedTarSplit, int64(manifestChunk.Offset), err
235244
}
236245

246+
// ensureTOCMatchesTarSplit validates that toc and tarSplit contain _exactly_ the same entries.
247+
func ensureTOCMatchesTarSplit(toc *internal.TOC, tarSplit []byte) error {
248+
pendingFiles := map[string]*internal.FileMetadata{} // Name -> an entry in toc.Entries
249+
for i := range toc.Entries {
250+
e := &toc.Entries[i]
251+
if e.Type != internal.TypeChunk {
252+
if _, ok := pendingFiles[e.Name]; ok {
253+
return fmt.Errorf("TOC contains duplicate entries for path %q", e.Name)
254+
}
255+
pendingFiles[e.Name] = e
256+
}
257+
}
258+
259+
if err := iterateTarSplit(tarSplit, func(hdr *tar.Header) error {
260+
e, ok := pendingFiles[hdr.Name]
261+
if !ok {
262+
return fmt.Errorf("tar-split contains an entry for %q missing in TOC", hdr.Name)
263+
}
264+
delete(pendingFiles, hdr.Name)
265+
expected, err := internal.NewFileMetadata(hdr)
266+
if err != nil {
267+
return fmt.Errorf("determining expected metadata for %q: %w", hdr.Name, err)
268+
}
269+
if err := ensureFileMetadataAttributesMatch(e, &expected); err != nil {
270+
return fmt.Errorf("TOC and tar-split metadata doesn’t match: %w", err)
271+
}
272+
273+
return nil
274+
}); err != nil {
275+
return err
276+
}
277+
if len(pendingFiles) != 0 {
278+
remaining := expMaps.Keys(pendingFiles)
279+
if len(remaining) > 5 {
280+
remaining = remaining[:5] // Just to limit the size of the output.
281+
}
282+
return fmt.Errorf("TOC contains entries not present in tar-split, incl. %q", remaining)
283+
}
284+
return nil
285+
}
286+
287+
// ensureTimePointersMatch ensures that a and b are equal
288+
func ensureTimePointersMatch(a, b *time.Time) error {
289+
switch {
290+
case a == nil && b == nil:
291+
return nil
292+
case a == nil:
293+
return fmt.Errorf("nil != %v", *b)
294+
case b == nil:
295+
return fmt.Errorf("%v != nil", *a)
296+
default:
297+
if a.Equal(*b) {
298+
return nil
299+
}
300+
return fmt.Errorf("%v != %v", *a, *b)
301+
}
302+
}
303+
304+
// ensureFileMetadataAttributesMatch ensures that a and b match in file attributes (it ignores entries relevant to locating data
305+
// in the tar stream or matching contents)
306+
func ensureFileMetadataAttributesMatch(a, b *internal.FileMetadata) error {
307+
// Keep this in sync with internal.FileMetadata!
308+
309+
if a.Type != b.Type {
310+
return fmt.Errorf("mismatch of Type: %q != %q", a.Type, b.Type)
311+
}
312+
if a.Name != b.Name {
313+
return fmt.Errorf("mismatch of Name: %q != %q", a.Name, b.Name)
314+
}
315+
if a.Linkname != b.Linkname {
316+
return fmt.Errorf("mismatch of Linkname: %q != %q", a.Linkname, b.Linkname)
317+
}
318+
if a.Mode != b.Mode {
319+
return fmt.Errorf("mismatch of Mode: %q != %q", a.Mode, b.Mode)
320+
}
321+
if a.Size != b.Size {
322+
return fmt.Errorf("mismatch of Size: %q != %q", a.Size, b.Size)
323+
}
324+
if a.UID != b.UID {
325+
return fmt.Errorf("mismatch of UID: %q != %q", a.UID, b.UID)
326+
}
327+
if a.GID != b.GID {
328+
return fmt.Errorf("mismatch of GID: %q != %q", a.GID, b.GID)
329+
}
330+
331+
if err := ensureTimePointersMatch(a.ModTime, b.ModTime); err != nil {
332+
return fmt.Errorf("mismatch of ModTime: %w", err)
333+
}
334+
if err := ensureTimePointersMatch(a.AccessTime, b.AccessTime); err != nil {
335+
return fmt.Errorf("mismatch of AccessTime: %w", err)
336+
}
337+
if err := ensureTimePointersMatch(a.ChangeTime, b.ChangeTime); err != nil {
338+
return fmt.Errorf("mismatch of ChangeTime: %w", err)
339+
}
340+
if a.Devmajor != b.Devmajor {
341+
return fmt.Errorf("mismatch of Devmajor: %q != %q", a.Devmajor, b.Devmajor)
342+
}
343+
if a.Devminor != b.Devminor {
344+
return fmt.Errorf("mismatch of Devminor: %q != %q", a.Devminor, b.Devminor)
345+
}
346+
if !maps.Equal(a.Xattrs, b.Xattrs) {
347+
return fmt.Errorf("mismatch of Xattrs: %q != %q", a.Xattrs, b.Xattrs)
348+
}
349+
350+
// Digest is not compared
351+
// Offset is not compared
352+
// EndOffset is not compared
353+
354+
// ChunkSize is not compared
355+
// ChunkOffset is not compared
356+
// ChunkDigest is not compared
357+
// ChunkType is not compared
358+
return nil
359+
}
360+
237361
func decodeAndValidateBlob(blob []byte, lengthUncompressed uint64, expectedCompressedChecksum string) ([]byte, error) {
238362
d, err := digest.Parse(expectedCompressedChecksum)
239363
if err != nil {

pkg/chunked/internal/compression.go

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ type TOC struct {
4343
// is used instead of that in the tar stream. The contents of the tar stream
4444
// are not used in this scenario.
4545
type FileMetadata struct {
46+
// If you add any fields, update ensureFileMetadataMatches as well!
47+
4648
// The metadata below largely duplicates that in the tar headers.
4749
Type string `json:"type"`
4850
Name string `json:"name"`

pkg/chunked/tar_split_linux.go

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package chunked
2+
3+
import (
4+
"bytes"
5+
"fmt"
6+
"io"
7+
8+
"github.com/vbatts/tar-split/archive/tar"
9+
"github.com/vbatts/tar-split/tar/storage"
10+
)
11+
12+
// iterateTarSplit calls handler for each tar header in tarSplit
13+
func iterateTarSplit(tarSplit []byte, handler func(hdr *tar.Header) error) error {
14+
// This, strictly speaking hard-codes undocumented assumptions about how github.com/vbatts/tar-split/tar/asm.NewInputTarStream
15+
// forms the tar-split contents. Pragmatically, NewInputTarStream should always produce storage.FileType entries at least
16+
// for every non-empty file, which constraints it basically to the output we expect.
17+
//
18+
// Specifically, we assume:
19+
// - There is a separate SegmentType entry for every tar header, but only one SegmentType entry for the full header incl. any extensions
20+
// - (There is a FileType entry for every tar header, we ignore it)
21+
// - Trailing padding of a file, if any, is included in the next SegmentType entry
22+
// - At the end, there may be SegmentType entries just for the terminating zero blocks.
23+
24+
unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit))
25+
for {
26+
tsEntry, err := unpacker.Next()
27+
if err != nil {
28+
if err == io.EOF {
29+
return nil
30+
}
31+
return fmt.Errorf("reading tar-split entries: %w", err)
32+
}
33+
switch tsEntry.Type {
34+
case storage.SegmentType:
35+
payload := tsEntry.Payload
36+
// This is horrible, but we don’t know how much padding to skip. (It can be computed from the previous hdr.Size for non-sparse
37+
// files, but for sparse files that is set to the logical size.)
38+
//
39+
// First, assume that all padding is zero bytes.
40+
// A tar header starts with a file name, which might in principle be empty, but
41+
// at least https://github.com/opencontainers/image-spec/blob/main/layer.md#populate-initial-filesystem suggests that
42+
// the tar name should never be empty (it should be ".", or maybe "./").
43+
//
44+
// This will cause us to skip all zero bytes in the trailing blocks, but that’s fine.
45+
i := 0
46+
for i < len(payload) && payload[i] == 0 {
47+
i++
48+
}
49+
payload = payload[i:]
50+
tr := tar.NewReader(bytes.NewReader(payload))
51+
hdr, err := tr.Next()
52+
if err != nil {
53+
if err == io.EOF { // Probably the last entry, but let’s let the unpacker drive that.
54+
break
55+
}
56+
return fmt.Errorf("decoding a tar header from a tar-split entry: %w", err)
57+
}
58+
if err := handler(hdr); err != nil {
59+
return err
60+
}
61+
62+
case storage.FileType:
63+
// Nothing
64+
default:
65+
return fmt.Errorf("unexpected tar-split entry type %q", tsEntry.Type)
66+
}
67+
}
68+
}

pkg/chunked/tar_split_linux_test.go

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
package chunked
2+
3+
import (
4+
"bytes"
5+
"fmt"
6+
"io"
7+
"testing"
8+
"time"
9+
10+
"github.com/containers/storage/pkg/chunked/internal"
11+
"github.com/stretchr/testify/assert"
12+
"github.com/stretchr/testify/require"
13+
"github.com/vbatts/tar-split/archive/tar"
14+
"github.com/vbatts/tar-split/tar/asm"
15+
"github.com/vbatts/tar-split/tar/storage"
16+
)
17+
18+
func testTarheader(index int, typeFlag byte, size int64) tar.Header {
19+
n := (index + 1) * 100 // Use predictable, but distinct, values for all headers
20+
21+
res := tar.Header{
22+
Typeflag: typeFlag,
23+
Name: fmt.Sprintf("name%d", n),
24+
Size: size,
25+
Mode: int64(n + 1),
26+
Uid: n + 2,
27+
Gid: n + 3,
28+
Uname: fmt.Sprintf("user%d", n),
29+
Gname: fmt.Sprintf("group%d", n),
30+
ModTime: time.Unix(int64(n+4), 0),
31+
AccessTime: time.Unix(int64(n+5), 0),
32+
ChangeTime: time.Unix(int64(n+6), 0),
33+
PAXRecords: map[string]string{fmt.Sprintf("key%d", n): fmt.Sprintf("value%d", n)},
34+
Format: tar.FormatPAX, // We must set a format, in the default one AccessTime and ChangeTime are discarded.
35+
}
36+
switch res.Typeflag {
37+
case tar.TypeLink, tar.TypeSymlink:
38+
res.Linkname = fmt.Sprintf("link%d", n)
39+
case tar.TypeChar, tar.TypeBlock:
40+
res.Devmajor = int64(n + 7)
41+
res.Devminor = int64(n + 8)
42+
}
43+
return res
44+
}
45+
46+
func TestIterateTarSplit(t *testing.T) {
47+
entries := []struct {
48+
typeFlag byte
49+
size int64
50+
}{
51+
{tar.TypeReg, 0},
52+
{tar.TypeReg, 1},
53+
{tar.TypeReg, 511},
54+
{tar.TypeReg, 512},
55+
{tar.TypeReg, 513},
56+
{tar.TypeLink, 0},
57+
{tar.TypeSymlink, 0},
58+
{tar.TypeChar, 0},
59+
{tar.TypeBlock, 0},
60+
{tar.TypeDir, 0},
61+
{tar.TypeFifo, 0},
62+
}
63+
64+
var tarball bytes.Buffer
65+
var expected []tar.Header
66+
w := tar.NewWriter(&tarball)
67+
for i, e := range entries {
68+
hdr := testTarheader(i, e.typeFlag, e.size)
69+
err := w.WriteHeader(&hdr)
70+
require.NoError(t, err)
71+
data := make([]byte, e.size)
72+
_, err = w.Write(data)
73+
require.NoError(t, err)
74+
expected = append(expected, hdr)
75+
}
76+
err := w.Close()
77+
require.NoError(t, err)
78+
79+
var tarSplit bytes.Buffer
80+
tsReader, err := asm.NewInputTarStream(&tarball, storage.NewJSONPacker(&tarSplit), storage.NewDiscardFilePutter())
81+
require.NoError(t, err)
82+
_, err = io.Copy(io.Discard, tsReader)
83+
require.NoError(t, err)
84+
85+
var actual []tar.Header
86+
err = iterateTarSplit(tarSplit.Bytes(), func(hdr *tar.Header) error {
87+
actual = append(actual, *hdr)
88+
return nil
89+
})
90+
require.NoError(t, err)
91+
92+
assert.Equal(t, len(expected), len(actual))
93+
for i := range expected {
94+
// We would have to open-code an equality comparison of time.Time values; instead, convert to FileMetadata,
95+
// because we already have that implemented for that type — and because it provides a tiny bit of code coverage
96+
// testing for ensureFileMetadataAttributesMatch.
97+
expected1, err := internal.NewFileMetadata(&expected[i])
98+
require.NoError(t, err, i)
99+
actual1, err := internal.NewFileMetadata(&actual[i])
100+
require.NoError(t, err, i)
101+
err = ensureFileMetadataAttributesMatch(&expected1, &actual1)
102+
assert.NoError(t, err, i)
103+
}
104+
}

vendor/golang.org/x/exp/LICENSE

+27
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)