Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/multiple equivalent images SBOM poc #2467

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions internal/testing/testdata/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -608,3 +608,81 @@ var ITE6EOLPython = []byte(`{
}
}
}`)

// ITE6ReferenceSingle is a test document for the Reference ingestor with a single reference
var ITE6ReferenceSingle = []byte(`{
"type": "https://in-toto.io/Statement/v1",
"subject": [
{
"uri": "pkg:npm/[email protected]"
}
],
"predicateType": "https://in-toto.io/attestation/reference/v0.1",
"predicate": {
"attester": {
"id": "attester-123"
},
"references": [
{
"downloadLocation": "https://example.com/downloads/pkg.tar.gz",
"digest": {
"sha256": "abcd1234..."
},
"mediaType": "application/x-tar"
}
]
}
}`)

// ITE6ReferenceMultiple is a test document for the Reference ingestor with multiple references
var ITE6ReferenceMultiple = []byte(`{
"type": "https://in-toto.io/Statement/v1",
"subject": [
{
"uri": "pkg:pypi/[email protected]"
}
],
"predicateType": "https://in-toto.io/attestation/reference/v0.1",
"predicate": {
"attester": {
"id": "attester-xyz"
},
"references": [
{
"downloadLocation": "https://example.com/artifacts/python-ref1.tgz",
"digest": {
"sha256": "aa1111111111111111111111111111111111111111111111111111111111111111"
},
"mediaType": "application/octet-stream"
},
{
"downloadLocation": "https://example.com/artifacts/python-ref2.whl",
"digest": {
"sha256": "bb2222222222222222222222222222222222222222222222222222222222222222"
},
"mediaType": "application/zip"
}
]
}
}`)

// ITE6ReferenceNoSubject is a test document for the Reference ingestor with no subject provided
var ITE6ReferenceNoSubject = []byte(`{
"type": "https://in-toto.io/Statement/v1",
"subject": [],
"predicateType": "https://in-toto.io/attestation/reference/v0.1",
"predicate": {
"attester": {
"id": "attester-nobody"
},
"references": [
{
"downloadLocation": "https://example.com/artifacts/no-subject.tgz",
"digest": {
"sha256": "no-subject-digest"
},
"mediaType": "application/octet-stream"
}
]
}
}`)
54 changes: 54 additions & 0 deletions pkg/certifier/attestation/reference/attestation_reference.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//
// Copyright 2025 The GUAC Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package attestation

import (
attestationv1 "github.com/in-toto/attestation/go/v1"
)

const (
PredicateReference = "https://in-toto.io/attestation/reference/v0.1"
)

// ReferenceStatement defines the statement header and the Reference predicate
type ReferenceStatement struct {
attestationv1.Statement
// Predicate contains type specific metadata.
Predicate ReferencePredicate `json:"predicate"`
}

// ReferencePredicate defines predicate definition of the Reference attestation
type ReferencePredicate struct {
Attester ReferenceAttester `json:"attester"`
References []ReferenceItem `json:"references"`
}

// ReferenceAttester defines the attester information
type ReferenceAttester struct {
ID string `json:"id"`
}

// ReferenceItem represents an individual reference in the predicate
type ReferenceItem struct {
DownloadLocation string `json:"downloadLocation"`
Digest ReferenceDigestItem `json:"digest"`
MediaType string `json:"mediaType"`
}

// ReferenceDigestItem represents an individual digest in the predicate
type ReferenceDigestItem struct {
SHA256 string `json:"sha256"`
}
73 changes: 71 additions & 2 deletions pkg/handler/collector/oci/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,21 @@ package oci

import (
"context"
"encoding/json"
"fmt"
"slices"
"strings"
"sync"
"time"

attestation "github.com/guacsec/guac/pkg/certifier/attestation/reference"
"github.com/guacsec/guac/pkg/collectsub/datasource"
"github.com/guacsec/guac/pkg/events"
"github.com/guacsec/guac/pkg/handler/processor"
"github.com/guacsec/guac/pkg/logging"
"github.com/guacsec/guac/pkg/version"
attestationv1 "github.com/in-toto/attestation/go/v1"
"github.com/opencontainers/go-digest"
"github.com/pkg/errors"
"github.com/regclient/regclient"
"github.com/regclient/regclient/types/descriptor"
Expand Down Expand Up @@ -322,7 +326,7 @@ func (o *ociCollector) fetchFallbackArtifacts(ctx context.Context, repo string,
// check to see if the digest + suffix has already been collected
if !o.isDigestCollected(repo, digestTag) {
imageTag := fmt.Sprintf("%v:%v", repo, digestTag)
err := fetchOCIArtifactBlobs(ctx, rc, imageTag, "unknown", docChannel)
err := fetchOCIArtifactBlobs(ctx, rc, image, imageTag, "unknown", docChannel)
if err != nil {
return fmt.Errorf("failed retrieving artifact blobs from registry fallback artifacts: %w", err)
}
Expand Down Expand Up @@ -365,7 +369,7 @@ func (o *ociCollector) fetchReferrerArtifacts(ctx context.Context, repo string,
if !o.isDigestCollected(repo, referrerDescDigest) {
logger.Infof("Fetching referrer %s with artifact type %s", referrerDescDigest, referrerDesc.ArtifactType)
referrerDigest := fmt.Sprintf("%v@%v", repo, referrerDescDigest)
e := fetchOCIArtifactBlobs(ctx, rc, referrerDigest, referrerDesc.ArtifactType, docChannel)
e := fetchOCIArtifactBlobs(ctx, rc, image, referrerDigest, referrerDesc.ArtifactType, docChannel)
if e != nil {
errorChan <- fmt.Errorf("failed retrieving artifact blobs from registry: %w", err)
cancel()
Expand Down Expand Up @@ -403,6 +407,7 @@ func (o *ociCollector) fetchReferrerArtifacts(ctx context.Context, repo string,
func fetchOCIArtifactBlobs(
ctx context.Context,
rc *regclient.RegClient,
image ref.Ref,
artifact,
artifactType string,
docChannel chan<- *processor.Document,
Expand Down Expand Up @@ -458,6 +463,12 @@ func fetchOCIArtifactBlobs(
}
}

err = checkIfImageIsCopy(image, artifact, btr1, docChannel)
if err != nil {
// log error and continue
logger.Errorf("failed to check if blob is occurrence: %v", err)
}

doc := &processor.Document{
Blob: btr1,
Type: docType,
Expand All @@ -474,6 +485,64 @@ func fetchOCIArtifactBlobs(
return nil
}

func checkIfImageIsCopy(
image ref.Ref,
artifact string,
blob []byte,
docChannel chan<- *processor.Document,
) error {
spdxDigest := digest.FromBytes(blob)
imagePurl := fmt.Sprintf("pkg:oci/%s/%s@%s", image.Registry, image.Repository, image.Digest)

artifactSyncMockAttesterID := "https://artifact-sync.azure.com/v1"
referenceStatement := &attestation.ReferenceStatement{
Statement: attestationv1.Statement{
Type: attestationv1.StatementTypeUri,
PredicateType: attestation.PredicateReference,
Subject: []*attestationv1.ResourceDescriptor{{
Uri: imagePurl,
Digest: map[string]string{
"sha256": image.Digest,
},
}},
},
Predicate: attestation.ReferencePredicate{
Attester: attestation.ReferenceAttester{
ID: artifactSyncMockAttesterID,
},
References: []attestation.ReferenceItem{
{
DownloadLocation: artifact,
Digest: attestation.ReferenceDigestItem{
SHA256: spdxDigest.String(),
},
MediaType: SpdxJson,
},
},
},
}

// marshall the reference statement
referenceStatementBytes, err := json.Marshal(referenceStatement)
if err != nil {
return fmt.Errorf("failed to marshal reference statement: %w", err)
}

doc := &processor.Document{
Blob: referenceStatementBytes,
Type: processor.DocumentITE6Reference,
Format: processor.FormatJSON,
SourceInformation: processor.SourceInformation{
Collector: string(OCICollector),
Source: artifact,
DocumentRef: events.GetDocRef(referenceStatementBytes),
},
}
docChannel <- doc

return nil
}

// isDigestCollected checks if a given digest has already been collected for a given repository.
// It returns true if the digest has been collected, false otherwise.
func (o *ociCollector) isDigestCollected(repo string, digest string) bool {
Expand Down
3 changes: 2 additions & 1 deletion pkg/handler/processor/ite6/ite6.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ func (e *ITE6Processor) ValidateSchema(i *processor.Document) error {
i.Type != processor.DocumentITE6SLSA &&
i.Type != processor.DocumentITE6Vul &&
i.Type != processor.DocumentITE6ClearlyDefined &&
i.Type != processor.DocumentITE6EOL {
i.Type != processor.DocumentITE6EOL &&
i.Type != processor.DocumentITE6Reference {
return fmt.Errorf("expected ITE6 document type, actual document type: %v", i.Type)
}

Expand Down
1 change: 1 addition & 0 deletions pkg/handler/processor/process/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ func init() {
_ = RegisterDocumentProcessor(&ite6.ITE6Processor{}, processor.DocumentITE6Vul)
_ = RegisterDocumentProcessor(&ite6.ITE6Processor{}, processor.DocumentITE6ClearlyDefined)
_ = RegisterDocumentProcessor(&ite6.ITE6Processor{}, processor.DocumentITE6EOL)
_ = RegisterDocumentProcessor(&ite6.ITE6Processor{}, processor.DocumentITE6Reference)
_ = RegisterDocumentProcessor(&dsse.DSSEProcessor{}, processor.DocumentDSSE)
_ = RegisterDocumentProcessor(&spdx.SPDXProcessor{}, processor.DocumentSPDX)
_ = RegisterDocumentProcessor(&csaf.CSAFProcessor{}, processor.DocumentCsaf)
Expand Down
9 changes: 5 additions & 4 deletions pkg/handler/processor/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,11 @@ type DocumentType string

// Document* is the enumerables of DocumentType
const (
DocumentITE6SLSA DocumentType = "SLSA"
DocumentITE6Generic DocumentType = "ITE6"
DocumentITE6Vul DocumentType = "ITE6VUL"
DocumentITE6EOL DocumentType = "ITE6EOL"
DocumentITE6SLSA DocumentType = "SLSA"
DocumentITE6Generic DocumentType = "ITE6"
DocumentITE6Vul DocumentType = "ITE6VUL"
DocumentITE6EOL DocumentType = "ITE6EOL"
DocumentITE6Reference DocumentType = "ITE6REF"
// ClearlyDefined
DocumentITE6ClearlyDefined DocumentType = "ITE6CD"
DocumentDSSE DocumentType = "DSSE"
Expand Down
2 changes: 2 additions & 0 deletions pkg/ingestor/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"github.com/guacsec/guac/pkg/ingestor/parser/eol"
"github.com/guacsec/guac/pkg/ingestor/parser/opaque"
"github.com/guacsec/guac/pkg/ingestor/parser/open_vex"
"github.com/guacsec/guac/pkg/ingestor/parser/reference"
"github.com/guacsec/guac/pkg/ingestor/parser/scorecard"
"github.com/guacsec/guac/pkg/ingestor/parser/slsa"
"github.com/guacsec/guac/pkg/ingestor/parser/spdx"
Expand All @@ -50,6 +51,7 @@ func init() {
_ = RegisterDocumentParser(csaf.NewCsafParser, processor.DocumentCsaf)
_ = RegisterDocumentParser(open_vex.NewOpenVEXParser, processor.DocumentOpenVEX)
_ = RegisterDocumentParser(eol.NewEOLCertificationParser, processor.DocumentITE6EOL)
_ = RegisterDocumentParser(reference.NewReferenceParser, processor.DocumentITE6Reference)
_ = RegisterDocumentParser(opaque.NewOpaqueParser, processor.DocumentOpaque)
}

Expand Down
Loading
Loading