diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 00000000000..e7bd329d2d6 --- /dev/null +++ b/.ruff.toml @@ -0,0 +1 @@ +line-length = 80 \ No newline at end of file diff --git a/Pipfile.lock b/Pipfile.lock index ccd5a4c10eb..0d5708d964e 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -332,10 +332,10 @@ }, "grpcio-status": { "hashes": [ - "sha256:4f4bc140ed718a343d04a573c838c9fff5cb87f40d3cb2aa33b027c55031489d", - "sha256:5a46820dc7d94bac48aae5dd97c94cadea09d80a05553aca74a92a2954030420" + "sha256:96968314e0c8576b2b631be3917c665964c8018900cb980d58a736fbff828578", + "sha256:b50305d52c0df6169493cca5f2e39b9b4d773b3f30d4a7a6b6dd7c18cb89007c" ], - "version": "==1.53.0" + "version": "==1.54.0" }, "idna": { "hashes": [ @@ -371,22 +371,22 @@ }, "protobuf": { "hashes": [ - "sha256:3e19dcf4adbf608924d3486ece469dd4f4f2cf7d2649900f0efcd1a84e8fd3ba", - "sha256:5917412347e1da08ce2939eb5cd60650dfb1a9ab4606a415b9278a1041fb4d19", - "sha256:651113695bc2e5678b799ee5d906b5d3613f4ccfa61b12252cfceb6404558af0", - "sha256:67b7d19da0fda2733702c2299fd1ef6cb4b3d99f09263eacaf1aa151d9d05f02", - "sha256:6e100f7bc787cd0a0ae58dbf0ab8bbf1ee7953f862b89148b6cf5436d5e9eaa1", - "sha256:85aa9acc5a777adc0c21b449dafbc40d9a0b6413ff3a4f77ef9df194be7f975b", - "sha256:87a6393fa634f294bf24d1cfe9fdd6bb605cbc247af81b9b10c4c0f12dfce4b3", - "sha256:8bc971d76c03f1dd49f18115b002254f2ddb2d4b143c583bb860b796bb0d399e", - "sha256:953fc7904ef46900262a26374b28c2864610b60cdc8b272f864e22143f8373c4", - "sha256:9e12e2810e7d297dbce3c129ae5e912ffd94240b050d33f9ecf023f35563b14f", - "sha256:b8700792f88e59ccecfa246fa48f689d6eee6900eddd486cdae908ff706c482b", - "sha256:dce7a55d501c31ecf688adb2f6c3f763cf11bc0be815d1946a84d74772ab07a7", - "sha256:e3fb58076bdb550e75db06ace2a8b3879d4c4f7ec9dd86e4254656118f4a78d7" + "sha256:13233ee2b9d3bd9a5f216c1fa2c321cd564b93d8f2e4f521a85b585447747997", + "sha256:23452f2fdea754a8251d0fc88c0317735ae47217e0d27bf330a30eec2848811a", + "sha256:52f0a78141078077cfe15fe333ac3e3a077420b9a3f5d1bf9b5fe9d286b4d881", + "sha256:70659847ee57a5262a65954538088a1d72dfc3e9882695cab9f0c54ffe71663b", + "sha256:7760730063329d42a9d4c4573b804289b738d4931e363ffbe684716b796bde51", + "sha256:7cf56e31907c532e460bb62010a513408e6cdf5b03fb2611e4b67ed398ad046d", + "sha256:8b54f56d13ae4a3ec140076c9d937221f887c8f64954673d46f63751209e839a", + "sha256:d14fc1a41d1a1909998e8aff7e80d2a7ae14772c4a70e4bf7db8a36690b54425", + "sha256:d4b66266965598ff4c291416be429cef7989d8fae88b55b62095a2331511b3fa", + "sha256:e0e630d8e6a79f48c557cd1835865b593d0547dce221c66ed1b827de59c66c97", + "sha256:ecae944c6c2ce50dda6bf76ef5496196aeb1b85acb95df5843cd812615ec4b61", + "sha256:f08aa300b67f1c012100d8eb62d47129e53d1150f4469fd78a29fa3cb68c66f2", + "sha256:f2f4710543abec186aee332d6852ef5ae7ce2e9e807a3da570f36de5a732d88e" ], "markers": "python_version >= '3.7'", - "version": "==4.22.1" + "version": "==4.22.3" }, "pyasn1": { "hashes": [ @@ -829,22 +829,22 @@ }, "protobuf": { "hashes": [ - "sha256:3e19dcf4adbf608924d3486ece469dd4f4f2cf7d2649900f0efcd1a84e8fd3ba", - "sha256:5917412347e1da08ce2939eb5cd60650dfb1a9ab4606a415b9278a1041fb4d19", - "sha256:651113695bc2e5678b799ee5d906b5d3613f4ccfa61b12252cfceb6404558af0", - "sha256:67b7d19da0fda2733702c2299fd1ef6cb4b3d99f09263eacaf1aa151d9d05f02", - "sha256:6e100f7bc787cd0a0ae58dbf0ab8bbf1ee7953f862b89148b6cf5436d5e9eaa1", - "sha256:85aa9acc5a777adc0c21b449dafbc40d9a0b6413ff3a4f77ef9df194be7f975b", - "sha256:87a6393fa634f294bf24d1cfe9fdd6bb605cbc247af81b9b10c4c0f12dfce4b3", - "sha256:8bc971d76c03f1dd49f18115b002254f2ddb2d4b143c583bb860b796bb0d399e", - "sha256:953fc7904ef46900262a26374b28c2864610b60cdc8b272f864e22143f8373c4", - "sha256:9e12e2810e7d297dbce3c129ae5e912ffd94240b050d33f9ecf023f35563b14f", - "sha256:b8700792f88e59ccecfa246fa48f689d6eee6900eddd486cdae908ff706c482b", - "sha256:dce7a55d501c31ecf688adb2f6c3f763cf11bc0be815d1946a84d74772ab07a7", - "sha256:e3fb58076bdb550e75db06ace2a8b3879d4c4f7ec9dd86e4254656118f4a78d7" + "sha256:13233ee2b9d3bd9a5f216c1fa2c321cd564b93d8f2e4f521a85b585447747997", + "sha256:23452f2fdea754a8251d0fc88c0317735ae47217e0d27bf330a30eec2848811a", + "sha256:52f0a78141078077cfe15fe333ac3e3a077420b9a3f5d1bf9b5fe9d286b4d881", + "sha256:70659847ee57a5262a65954538088a1d72dfc3e9882695cab9f0c54ffe71663b", + "sha256:7760730063329d42a9d4c4573b804289b738d4931e363ffbe684716b796bde51", + "sha256:7cf56e31907c532e460bb62010a513408e6cdf5b03fb2611e4b67ed398ad046d", + "sha256:8b54f56d13ae4a3ec140076c9d937221f887c8f64954673d46f63751209e839a", + "sha256:d14fc1a41d1a1909998e8aff7e80d2a7ae14772c4a70e4bf7db8a36690b54425", + "sha256:d4b66266965598ff4c291416be429cef7989d8fae88b55b62095a2331511b3fa", + "sha256:e0e630d8e6a79f48c557cd1835865b593d0547dce221c66ed1b827de59c66c97", + "sha256:ecae944c6c2ce50dda6bf76ef5496196aeb1b85acb95df5843cd812615ec4b61", + "sha256:f08aa300b67f1c012100d8eb62d47129e53d1150f4469fd78a29fa3cb68c66f2", + "sha256:f2f4710543abec186aee332d6852ef5ae7ce2e9e807a3da570f36de5a732d88e" ], "markers": "python_version >= '3.7'", - "version": "==4.22.1" + "version": "==4.22.3" }, "pylint": { "hashes": [ @@ -888,11 +888,11 @@ }, "virtualenv": { "hashes": [ - "sha256:31712f8f2a17bd06234fa97fdf19609e789dd4e3e4bf108c3da71d710651adbc", - "sha256:f50e3e60f990a0757c9b68333c9fdaa72d7188caa417f96af9e52407831a3b68" + "sha256:278753c47aaef1a0f14e6db8a4c5e1e040e90aea654d0fc1dc7e0d8a42616cc3", + "sha256:48fd3b907b5149c5aab7c23d9790bea4cac6bc6b150af8635febc4cfeab1275a" ], "markers": "python_version >= '3.7'", - "version": "==20.21.0" + "version": "==20.22.0" }, "virtualenv-clone": { "hashes": [ diff --git a/docker/importer/importer.py b/docker/importer/importer.py index 0c332b4bc16..42a4c021dd9 100755 --- a/docker/importer/importer.py +++ b/docker/importer/importer.py @@ -196,7 +196,7 @@ def import_new_oss_fuzz_entries(self, repo, oss_fuzz_source): logging.info('No new entries, skipping committing.') return - logging.info('Commiting and pushing new entries') + logging.info('Committing and pushing new entries') if osv.push_source_changes(repo, 'Import from OSS-Fuzz', self._git_callbacks(oss_fuzz_source)): ndb.put_multi(exported) @@ -217,13 +217,15 @@ def schedule_regular_updates(self, repo, source_repo: osv.SourceRepository): osv.Bug.source == source_repo.name): self._request_analysis(bug, source_repo, repo) + # yapf: disable # Perform a re-analysis on existing oss-fuzz bugs for a period of time, # more vulnerable releases might be made even though fixes have # already been merged into master/main cutoff_time = aest_time_now - datetime.timedelta(days=_BUG_REDO_DAYS) query = osv.Bug.query(osv.Bug.status == osv.BugStatus.PROCESSED, - osv.Bug.source == source_repo.name, osv.Bug.timestamp - >= cutoff_time) + osv.Bug.source == source_repo.name, + osv.Bug.timestamp >= cutoff_time) + # yapf: enable for bug in query: logging.info('Re-requesting impact for %s.', bug.key.id()) diff --git a/docker/indexer/Dockerfile b/docker/indexer/Dockerfile index 6f856bf15fd..f9cff7c340f 100644 --- a/docker/indexer/Dockerfile +++ b/docker/indexer/Dockerfile @@ -14,8 +14,14 @@ FROM golang:1.20 as GO_BUILD WORKDIR /build -ADD ./ /build -RUN chmod +x build.sh + +# Cache dependencies in these steps +COPY ./go.mod /build/go.mod +COPY ./go.sum /build/go.sum +RUN go mod download + +# Do the build here +COPY ./ /build RUN ./build.sh FROM gcr.io/distroless/base diff --git a/docker/indexer/build.sh b/docker/indexer/build.sh old mode 100644 new mode 100755 index ac171b52f3b..d6962bb332d --- a/docker/indexer/build.sh +++ b/docker/indexer/build.sh @@ -1,3 +1,2 @@ #!/bin/sh -go mod tidy go build -o indexer \ No newline at end of file diff --git a/docker/indexer/config/config.go b/docker/indexer/config/config.go index 4cbd636a6c6..dbd8ce42546 100644 --- a/docker/indexer/config/config.go +++ b/docker/indexer/config/config.go @@ -1,17 +1,17 @@ /* Copyright 2022 Google LLC - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ // Package config provides functionality to load configurations package config diff --git a/docker/indexer/indexer.go b/docker/indexer/indexer.go index 5e5e2b7c636..9c41bd9f2f5 100644 --- a/docker/indexer/indexer.go +++ b/docker/indexer/indexer.go @@ -1,17 +1,17 @@ /* Copyright 2022 Google LLC - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ package main @@ -85,7 +85,7 @@ func runWorker(ctx context.Context, storer *idxStorage.Store, repoBucketHdl *sto PubSubOutstandingMessages: outstanding, } // The preparation results are picked up by the processing stage - // in workder mode. + // in worker mode. // They include checkout options which are used to load the desired // repository state and hash the source files in that particular tree. // Finally, the computed hashes and repo state information is stored. diff --git a/docker/indexer/shared/shared.go b/docker/indexer/shared/shared.go index 106fb4a8bad..d6e3b991e3c 100644 --- a/docker/indexer/shared/shared.go +++ b/docker/indexer/shared/shared.go @@ -1,17 +1,17 @@ /* Copyright 2022 Google LLC - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ // Package shared provides functionality that is used in multiple packages. package shared @@ -34,7 +34,7 @@ const ( // CopyFromBucket copies a directory from a bucket to a temporary location. func CopyFromBucket(ctx context.Context, bucketHdl *storage.BucketHandle, name string) (string, error) { - tmpDir, err := os.MkdirTemp("", "") + tmpDir, err := os.MkdirTemp("", name) if err != nil { return "", err } diff --git a/docker/indexer/stages/preparation/preparation.go b/docker/indexer/stages/preparation/preparation.go index 08311d03864..ea3a90af3d9 100644 --- a/docker/indexer/stages/preparation/preparation.go +++ b/docker/indexer/stages/preparation/preparation.go @@ -1,17 +1,17 @@ /* Copyright 2022 Google LLC - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ // Package preparation provides functionality to extract tags, branches and commits from repository configurations. package preparation @@ -53,6 +53,7 @@ type Result struct { Version string CheckoutOptions *git.CheckoutOptions Commit plumbing.Hash + Reference plumbing.Hash CommitTag string When time.Time Type string @@ -150,6 +151,13 @@ func (s *Stage) processGit(ctx context.Context, repoCfg *config.RepoConfig) erro commitTracker := make(map[plumbing.Hash]bool) // repoInfo is used as the iterator function to create RepositoryInformation structs. repoInfo := func(ref *plumbing.Reference) error { + // Resolve the real commit hash + commitHash, err := repo.ResolveRevision(plumbing.Revision(ref.Name().String())) + + if err != nil { + return err + } + found, err := s.Checker.Exists(ctx, repoCfg.Address, shared.MD5, ref.Hash()) if err != nil { return err @@ -159,7 +167,7 @@ func (s *Stage) processGit(ctx context.Context, repoCfg *config.RepoConfig) erro } var when time.Time - if c, ok := allCommits[ref.Hash()]; ok { + if c, ok := allCommits[*commitHash]; ok { when = c.Author.When } @@ -191,17 +199,20 @@ func (s *Stage) processGit(ctx context.Context, repoCfg *config.RepoConfig) erro Branch: ref.Name(), }, When: when, - Commit: ref.Hash(), + Commit: *commitHash, + Reference: ref.Hash(), CommitTag: commitTag, Type: shared.Git, Addr: repoCfg.Address, FileExts: repoCfg.FileExts, } - commitTracker[ref.Hash()] = true + commitTracker[*commitHash] = true buf, err := json.Marshal(result) if err != nil { return err } + + log.Infof("publishing %s at version: %s", result.Name, result.Version) pubRes := s.Output.Publish(ctx, &pubsub.Message{Data: buf}) _, err = pubRes.Get(ctx) return err @@ -241,10 +252,11 @@ func (s *Stage) processGit(ctx context.Context, repoCfg *config.RepoConfig) erro Hash: h, Force: true, }, - When: c.Author.When, - Commit: h, - Type: shared.Git, - FileExts: repoCfg.FileExts, + Reference: h, + When: c.Author.When, + Commit: h, + Type: shared.Git, + FileExts: repoCfg.FileExts, } buf, err := json.Marshal(result) if err != nil { diff --git a/docker/indexer/stages/processing/processing.go b/docker/indexer/stages/processing/processing.go index 3ca3724b3cf..9303d0d4038 100644 --- a/docker/indexer/stages/processing/processing.go +++ b/docker/indexer/stages/processing/processing.go @@ -1,30 +1,33 @@ /* Copyright 2022 Google LLC - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ // Package processing implements the hashing step for each provide input. package processing import ( + "bytes" "context" "crypto/md5" + "encoding/binary" "encoding/json" "errors" "fmt" "io/fs" "os" "path/filepath" + "sort" "strings" "cloud.google.com/go/pubsub" @@ -36,15 +39,23 @@ import ( log "github.com/golang/glog" ) +type Hash = []byte + // Storer is used to permanently store the results. type Storer interface { - Store(ctx context.Context, repoInfo *preparation.Result, hashType string, fileResults []*FileResult) error + Store(ctx context.Context, repoInfo *preparation.Result, hashType string, fileInBucketResults [][]*FileResult, bucketNodes []*BucketNode) error } // FileResult holds the per file hash and path information. type FileResult struct { Path string `datastore:"path,noindex"` - Hash []byte `datastore:"hash"` + Hash Hash `datastore:"hash"` +} + +// FileResult holds the per file hash and path information. +type BucketNode struct { + NodeHash Hash `datastore:"node_hash"` + FilesContained int `datastore:"files_contained,noindex"` } // Stage holds the data structures necessary to perform the processing. @@ -55,6 +66,11 @@ type Stage struct { PubSubOutstandingMessages int } +// bucketCount should be a divisor of 2^16 +// Changing this will require deleting all RepoIndex entries to +// completely rebuild all entries +const bucketCount = 512 + // Run runs the stages and hashes all files for each incoming request. func (s *Stage) Run(ctx context.Context) error { s.Input.ReceiveSettings.MaxOutstandingMessages = s.PubSubOutstandingMessages @@ -67,6 +83,7 @@ func (s *Stage) Run(ctx context.Context) error { log.Errorf("failed to unmarshal input: %v", err) return } + log.Infof("begin processing: '%v' @ '%v'", repoInfo.Name, repoInfo.Version) var err error switch repoInfo.Type { case shared.Git: @@ -76,6 +93,8 @@ func (s *Stage) Run(ctx context.Context) error { } if err != nil { log.Errorf("failed to process input: %v", err) + } else { + log.Infof("successfully processed: '%v' @ '%v'", repoInfo.Name, repoInfo.Version) } }) } @@ -90,17 +109,18 @@ func (s *Stage) processGit(ctx context.Context, repoInfo *preparation.Result) er log.Errorf("failed to remove repo folder: %v", err) } }() + repo, err := git.PlainOpen(repoDir) if err != nil { return fmt.Errorf("failed to open repo: %v", err) } tree, err := repo.Worktree() if err != nil { - return err + return fmt.Errorf("failed to get work tree: %v", err) } repoInfo.CheckoutOptions.Force = true if err := tree.Checkout(repoInfo.CheckoutOptions); err != nil { - return err + return fmt.Errorf("failed to checkout tree: %v", err) } var fileResults []*FileResult @@ -123,7 +143,44 @@ func (s *Stage) processGit(ctx context.Context, repoInfo *preparation.Result) er } return nil }); err != nil { - return err + return fmt.Errorf("failed during file walk: %v", err) + } + + log.Info("begin processing buckets") + bucketResults, filesInBucketResults := processBuckets(fileResults) + log.Info("begin storage") + return s.Storer.Store(ctx, repoInfo, shared.MD5, filesInBucketResults, bucketResults) +} + +// Returns bucket hashes and the individual file hashes of each bucket +func processBuckets(fileResults []*FileResult) ([]*BucketNode, [][]*FileResult) { + buckets := make([][]*FileResult, bucketCount) + + for _, fr := range fileResults { + // Evenly divide into bucketCount buckets, + idx := binary.BigEndian.Uint16(fr.Hash[0:2]) % bucketCount + buckets[idx] = append(buckets[idx], fr) } - return s.Storer.Store(ctx, repoInfo, shared.MD5, fileResults) + + results := make([]*BucketNode, bucketCount) + + for bucketIdx := range buckets { + // Sort hashes to produce deterministic bucket hashes + sort.Slice(buckets[bucketIdx], func(i, j int) bool { + return bytes.Compare(buckets[bucketIdx][i].Hash, buckets[bucketIdx][j].Hash) < 0 + }) + + hasher := md5.New() + for _, v := range buckets[bucketIdx] { + // md5.Write can never return a non nil error + _, _ = hasher.Write(v.Hash) + } + + results[bucketIdx] = &BucketNode{ + NodeHash: hasher.Sum(nil), + FilesContained: len(buckets[bucketIdx]), + } + } + + return results, buckets } diff --git a/docker/indexer/stages/processing/processing_test.go b/docker/indexer/stages/processing/processing_test.go new file mode 100644 index 00000000000..54f20df3c83 --- /dev/null +++ b/docker/indexer/stages/processing/processing_test.go @@ -0,0 +1,61 @@ +package processing + +import ( + "reflect" + "testing" +) + +func Test_processBuckets(t *testing.T) { + type args struct { + fileResults []*FileResult + } + tests := []struct { + name string + args args + want map[int]*BucketNode + }{ + { + name: "Test bucket", + args: args{ + fileResults: []*FileResult{ + { + Path: "abc", + Hash: []byte{0, 1, 2, 3, 4, 5, 6}, + }, + { + Path: "efg", + Hash: []byte{7, 4, 1, 3, 4, 5, 6}, + }, + { + Path: "hji", + Hash: []byte{1, 9, 1, 3, 4, 5, 6}, + }, + }, + }, + want: map[int]*BucketNode{ + 1: { + NodeHash: []byte{154, 164, 97, 225, 236, 164, 8, 111, 146, 48, 170, 73, 201, 11, 12, 97}, + FilesContained: 1, + }, + 260: { + NodeHash: []byte{216, 219, 93, 48, 21, 44, 152, 195, 127, 147, 177, 201, 84, 210, 171, 150}, + FilesContained: 1, + }, + 265: { + NodeHash: []byte{8, 158, 190, 9, 14, 126, 134, 10, 210, 118, 69, 57, 158, 64, 170, 161}, + FilesContained: 1, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, _ := processBuckets(tt.args.fileResults) + for key, value := range tt.want { + if !reflect.DeepEqual(got[key], value) { + t.Errorf("processBuckets() got = %v: %v, want %v", key, got, value) + } + } + }) + } +} diff --git a/docker/indexer/storage/storage.go b/docker/indexer/storage/storage.go index bda09422f7d..e07010ffae8 100644 --- a/docker/indexer/storage/storage.go +++ b/docker/indexer/storage/storage.go @@ -1,17 +1,17 @@ /* Copyright 2022 Google LLC - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ // Package storage provides functionality to interact with permanent storage. package storage @@ -30,12 +30,12 @@ import ( const ( docKind = "RepoIndex" - resultKind = "RepoIndexResult" - // Address-HashType-CommitHash + bucketKind = "RepoIndexBucket" + // Address-HashType-ReferenceHash docKeyFmt = "%s-%s-%x" - // CommitHash-HashType-Page - resultKeyFmt = "%x-%s-%d" - pageSize = 1000 + // BucketHash-HashType-NumberOfFiles + bucketKeyFmt = "%x-%s-%d" + datastoreMultiEntrySize = 490 ) // document represents a single repository entry in datastore. @@ -50,16 +50,15 @@ type document struct { RepoAddr string `datastore:"repo_addr"` FileExts []string `datastore:"file_exts"` FileHashType string `datastore:"file_hash_type"` - Pages int `datastore:"pages"` } type result struct { - Page int `datastore:"page"` - Path []string `datastore:"file_results.path"` - Hash [][]byte `datastores:"file_results.hash"` + BucketHash []byte `datastore:"bucket_hash"` + Path []string `datastore:"bucket_results.path,noindex"` + Hash [][]byte `datastore:"bucket_results.hash,noindex"` } -func newDoc(repoInfo *preparation.Result, hashType string, fileResults []*processing.FileResult) (*document, []*result) { +func newDoc(repoInfo *preparation.Result, hashType string) *document { doc := &document{ Name: repoInfo.Name, BaseCPE: repoInfo.BaseCPE, @@ -71,27 +70,11 @@ func newDoc(repoInfo *preparation.Result, hashType string, fileResults []*proces RepoAddr: repoInfo.Addr, FileExts: repoInfo.FileExts, FileHashType: hashType, - Pages: 1, } - if len(fileResults) <= pageSize { - return doc, []*result{newResult(1, fileResults)} - } - var r []*result - resultLen := len(fileResults) - pages := resultLen / pageSize - remainder := resultLen % pageSize - for i := 0; i < pages; i++ { - r = append(r, newResult(i, fileResults[i*pageSize:(i+1)*pageSize])) - } - if remainder != 0 { - r = append(r, newResult(pages, fileResults[pages*pageSize:])) - } - doc.Pages = len(r) - return doc, r - + return doc } -func newResult(page int, results []*processing.FileResult) *result { +func newResult(results []*processing.FileResult, bucketHash []byte) *result { var ( paths []string hashes [][]byte @@ -101,7 +84,7 @@ func newResult(page int, results []*processing.FileResult) *result { paths = append(paths, r.Path) hashes = append(hashes, r.Hash) } - return &result{Page: page, Path: paths, Hash: hashes} + return &result{Path: paths, Hash: hashes, BucketHash: bucketHash} } // Store provides the functionality to check for existing documents @@ -122,10 +105,13 @@ func New(ctx context.Context, projectID string) (*Store, error) { // Exists checks whether a name/hash pair already exists in datastore. func (s *Store) Exists(ctx context.Context, addr string, hashType string, hash plumbing.Hash) (bool, error) { - if _, ok := s.cache.Load(fmt.Sprintf(docKeyFmt, addr, hashType, hash)); ok { + if _, ok := s.cache.Load(fmt.Sprintf(docKeyFmt, addr, hashType, hash[:])); ok { return true, nil } - key := datastore.NameKey(docKind, fmt.Sprintf(docKeyFmt, addr, hashType, hash), nil) + // hash[:], the [:] is important, since the formatting uses %x, which will return a different result if not used + // This is because plumbing.Hash implements it's own String() method, and the %x will create a hex of the hex produced + // by plumbing.Hash String() + key := datastore.NameKey(docKind, fmt.Sprintf(docKeyFmt, addr, hashType, hash[:]), nil) tmp := &document{} if err := s.dsCl.Get(ctx, key, tmp); err != nil { if err == datastore.ErrNoSuchEntity { @@ -133,29 +119,52 @@ func (s *Store) Exists(ctx context.Context, addr string, hashType string, hash p } return false, err } - s.cache.Store(fmt.Sprintf(docKeyFmt, addr, hashType, hash), true) + s.cache.Store(fmt.Sprintf(docKeyFmt, addr, hashType, hash[:]), true) return true, nil } // Store stores a new entry in datastore. -func (s *Store) Store(ctx context.Context, repoInfo *preparation.Result, hashType string, fileResults []*processing.FileResult) error { - docKey := datastore.NameKey(docKind, fmt.Sprintf(docKeyFmt, repoInfo.Addr, hashType, repoInfo.Commit[:]), nil) - doc, results := newDoc(repoInfo, hashType, fileResults) - _, err := s.dsCl.RunInTransaction(ctx, func(tx *datastore.Transaction) error { - _, err := s.dsCl.Put(ctx, docKey, doc) +func (s *Store) Store(ctx context.Context, repoInfo *preparation.Result, hashType string, bucketResults [][]*processing.FileResult, treeNodes []*processing.BucketNode) error { + docKey := datastore.NameKey(docKind, fmt.Sprintf(docKeyFmt, repoInfo.Addr, hashType, repoInfo.Reference[:]), nil) + + // There are slightly too many items to put in a transaction (max 500 entries per transaction) + putMultiKeys := []*datastore.Key{} + putMultiNodes := []*processing.BucketNode{} + for _, node := range treeNodes { + if node.FilesContained == 0 { + continue + } + + bucketKey := datastore.NameKey(bucketKind, + fmt.Sprintf(bucketKeyFmt, node.NodeHash, hashType, node.FilesContained), + docKey) + + putMultiKeys = append(putMultiKeys, bucketKey) + putMultiNodes = append(putMultiNodes, node) + } + + // Batch Puts into datastoreMultiEntrySize chunks + for i := 0; i < len(putMultiKeys); i += datastoreMultiEntrySize { + end := i + datastoreMultiEntrySize + if end > len(putMultiKeys) { + end = len(putMultiKeys) + } + + _, err := s.dsCl.PutMulti(ctx, putMultiKeys[i:end], putMultiNodes[i:end]) if err != nil { return err } - for _, r := range results { - resultKey := datastore.NameKey(resultKind, fmt.Sprintf(resultKeyFmt, repoInfo.Commit[:], hashType, r.Page), docKey) - _, err := s.dsCl.Put(ctx, resultKey, r) - if err != nil { - return err - } - } - return nil - }) - return err + } + + // Leave the repoIndex entry to last so that if previous input fails + // the controller will try again + doc := newDoc(repoInfo, hashType) + _, err := s.dsCl.Put(ctx, docKey, doc) + if err != nil { + return err + } + + return nil } // Close closes the datastore client. diff --git a/docker/indexer/storage/storage_test.go b/docker/indexer/storage/storage_test.go index 368a62435ed..e4954b65f5c 100644 --- a/docker/indexer/storage/storage_test.go +++ b/docker/indexer/storage/storage_test.go @@ -1,26 +1,28 @@ +/* +Copyright 2022 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package storage import ( - "strconv" "testing" "github.com/google/go-cmp/cmp" "github.com/google/osv.dev/docker/indexer/stages/preparation" - "github.com/google/osv.dev/docker/indexer/stages/processing" ) -func getFileResults(t *testing.T, count int) []*processing.FileResult { - t.Helper() - var r []*processing.FileResult - for i := 0; i < count; i++ { - r = append(r, &processing.FileResult{ - Path: strconv.Itoa(i), - Hash: []byte{0x42, 0x42}, - }) - } - return r -} - func getRepoInfo(t *testing.T) *preparation.Result { return &preparation.Result{ Name: "abc", @@ -33,55 +35,38 @@ func getDoc(t *testing.T, pages int) *document { Name: "abc", Commit: []byte{0x41, 0x41, 0x41, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, FileHashType: "MD5", - Pages: pages, } } func TestNewDoc(t *testing.T) { for _, tc := range []struct { - repoInfo *preparation.Result - fileResults []*processing.FileResult - wantDoc *document - expectedResultLen int + repoInfo *preparation.Result + wantDoc *document }{ { - repoInfo: getRepoInfo(t), - fileResults: getFileResults(t, 1), - wantDoc: getDoc(t, 1), - expectedResultLen: 1, + repoInfo: getRepoInfo(t), + wantDoc: getDoc(t, 1), }, { - repoInfo: getRepoInfo(t), - fileResults: getFileResults(t, 2), - wantDoc: getDoc(t, 1), - expectedResultLen: 1, + repoInfo: getRepoInfo(t), + wantDoc: getDoc(t, 1), }, { - repoInfo: getRepoInfo(t), - fileResults: getFileResults(t, 1000), - wantDoc: getDoc(t, 1), - expectedResultLen: 1, + repoInfo: getRepoInfo(t), + wantDoc: getDoc(t, 1), }, { - repoInfo: getRepoInfo(t), - fileResults: getFileResults(t, 1001), - wantDoc: getDoc(t, 2), - expectedResultLen: 2, + repoInfo: getRepoInfo(t), + wantDoc: getDoc(t, 2), }, { - repoInfo: getRepoInfo(t), - fileResults: getFileResults(t, 2001), - wantDoc: getDoc(t, 3), - expectedResultLen: 3, + repoInfo: getRepoInfo(t), + wantDoc: getDoc(t, 3), }, } { - doc, r := newDoc(tc.repoInfo, "MD5", tc.fileResults) + doc := newDoc(tc.repoInfo, "MD5") if diff := cmp.Diff(tc.wantDoc, doc); diff != "" { t.Errorf("newDoc() returned an unexpected document diff (-want, +got):\n%s", diff) } - if len(r) != tc.expectedResultLen { - t.Errorf("expected result length %d doesn't match %d", tc.expectedResultLen, len(r)) - } - } } diff --git a/gcp/api/osv_service_v1_pb2.py b/gcp/api/osv_service_v1_pb2.py index 62e1d719a81..32f5f0ae9f3 100644 --- a/gcp/api/osv_service_v1_pb2.py +++ b/gcp/api/osv_service_v1_pb2.py @@ -10,54 +10,59 @@ _sym_db = _symbol_database.Default() - from osv import vulnerability_pb2 as osv_dot_vulnerability__pb2 from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14osv_service_v1.proto\x12\x06osv.v1\x1a\x17osv/vulnerability.proto\x1a\x1cgoogle/api/annotations.proto\"O\n\x11VulnerabilityList\x12!\n\x05vulns\x18\x01 \x03(\x0b\x32\x12.osv.Vulnerability\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t\"D\n\x16\x42\x61tchVulnerabilityList\x12*\n\x07results\x18\x01 \x03(\x0b\x32\x19.osv.v1.VulnerabilityList\"h\n\x05Query\x12\x10\n\x06\x63ommit\x18\x01 \x01(\tH\x00\x12\x11\n\x07version\x18\x02 \x01(\tH\x00\x12\x1d\n\x07package\x18\x04 \x01(\x0b\x32\x0c.osv.Package\x12\x12\n\npage_token\x18\x05 \x01(\tB\x07\n\x05param\",\n\nBatchQuery\x12\x1e\n\x07queries\x18\x01 \x03(\x0b\x32\r.osv.v1.Query\"#\n\x15GetVulnByIdParameters\x12\n\n\x02id\x18\x01 \x01(\t\"7\n\x17QueryAffectedParameters\x12\x1c\n\x05query\x18\x01 \x01(\x0b\x32\r.osv.v1.Query\"A\n\x1cQueryAffectedBatchParameters\x12!\n\x05query\x18\x01 \x01(\x0b\x32\x12.osv.v1.BatchQuery\"A\n\x1a\x44\x65termineVersionParameters\x12#\n\x05query\x18\x01 \x01(\x0b\x32\x14.osv.v1.VersionQuery\"C\n\x0cVersionQuery\x12\x0c\n\x04name\x18\x01 \x01(\t\x12%\n\x0b\x66ile_hashes\x18\x02 \x03(\x0b\x32\x10.osv.v1.FileHash\">\n\x08\x46ileHash\x12\x11\n\tfile_path\x18\x01 \x01(\t\x12\x11\n\thash_type\x18\x02 \x01(\t\x12\x0c\n\x04hash\x18\x03 \x01(\x0c\"9\n\x10VersionMatchList\x12%\n\x07matches\x18\x01 \x03(\x0b\x32\x14.osv.v1.VersionMatch\"\x8b\x01\n\x0cVersionMatch\x12\r\n\x05score\x18\x01 \x01(\x01\x12\x37\n\trepo_info\x18\x02 \x01(\x0b\x32$.osv.v1.VersionRepositoryInformation\x12$\n\x0eosv_identifier\x18\x03 \x01(\x0b\x32\x0c.osv.Package\x12\r\n\x05\x63pe23\x18\x05 \x01(\t\"\xc0\x01\n\x1cVersionRepositoryInformation\x12;\n\x04type\x18\x01 \x01(\x0e\x32-.osv.v1.VersionRepositoryInformation.RepoType\x12\x0f\n\x07\x61\x64\x64ress\x18\x02 \x01(\t\x12\x0e\n\x06\x63ommit\x18\x03 \x01(\x0c\x12\x0b\n\x03tag\x18\x04 \x01(\t\x12\x0f\n\x07version\x18\x05 \x01(\t\"$\n\x08RepoType\x12\x0f\n\x0bUNSPECIFIED\x10\x00\x12\x07\n\x03GIT\x10\x01\x32\xc5\x03\n\x03OSV\x12X\n\x0bGetVulnById\x12\x1d.osv.v1.GetVulnByIdParameters\x1a\x12.osv.Vulnerability\"\x16\x82\xd3\xe4\x93\x02\x10\x12\x0e/v1/vulns/{id}\x12\x65\n\rQueryAffected\x12\x1f.osv.v1.QueryAffectedParameters\x1a\x19.osv.v1.VulnerabilityList\"\x18\x82\xd3\xe4\x93\x02\x12\"\t/v1/query:\x05query\x12y\n\x12QueryAffectedBatch\x12$.osv.v1.QueryAffectedBatchParameters\x1a\x1e.osv.v1.BatchVulnerabilityList\"\x1d\x82\xd3\xe4\x93\x02\x17\"\x0e/v1/querybatch:\x05query\x12\x81\x01\n\x10\x44\x65termineVersion\x12\".osv.v1.DetermineVersionParameters\x1a\x18.osv.v1.VersionMatchList\"/\x82\xd3\xe4\x93\x02)\" /v1experimental/determineversion:\x05queryb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x14osv_service_v1.proto\x12\x06osv.v1\x1a\x17osv/vulnerability.proto\x1a\x1cgoogle/api/annotations.proto\"O\n\x11VulnerabilityList\x12!\n\x05vulns\x18\x01 \x03(\x0b\x32\x12.osv.Vulnerability\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t\"D\n\x16\x42\x61tchVulnerabilityList\x12*\n\x07results\x18\x01 \x03(\x0b\x32\x19.osv.v1.VulnerabilityList\"h\n\x05Query\x12\x10\n\x06\x63ommit\x18\x01 \x01(\tH\x00\x12\x11\n\x07version\x18\x02 \x01(\tH\x00\x12\x1d\n\x07package\x18\x04 \x01(\x0b\x32\x0c.osv.Package\x12\x12\n\npage_token\x18\x05 \x01(\tB\x07\n\x05param\",\n\nBatchQuery\x12\x1e\n\x07queries\x18\x01 \x03(\x0b\x32\r.osv.v1.Query\"#\n\x15GetVulnByIdParameters\x12\n\n\x02id\x18\x01 \x01(\t\"7\n\x17QueryAffectedParameters\x12\x1c\n\x05query\x18\x01 \x01(\x0b\x32\r.osv.v1.Query\"A\n\x1cQueryAffectedBatchParameters\x12!\n\x05query\x18\x01 \x01(\x0b\x32\x12.osv.v1.BatchQuery\"A\n\x1a\x44\x65termineVersionParameters\x12#\n\x05query\x18\x01 \x01(\x0b\x32\x14.osv.v1.VersionQuery\"C\n\x0cVersionQuery\x12\x0c\n\x04name\x18\x01 \x01(\t\x12%\n\x0b\x66ile_hashes\x18\x02 \x03(\x0b\x32\x10.osv.v1.FileHash\">\n\x08\x46ileHash\x12\x11\n\tfile_path\x18\x01 \x01(\t\x12\x11\n\thash_type\x18\x02 \x01(\t\x12\x0c\n\x04hash\x18\x03 \x01(\x0c\"9\n\x10VersionMatchList\x12%\n\x07matches\x18\x01 \x03(\x0b\x32\x14.osv.v1.VersionMatch\"\xc7\x01\n\x0cVersionMatch\x12\r\n\x05score\x18\x01 \x01(\x01\x12\x37\n\trepo_info\x18\x02 \x01(\x0b\x32$.osv.v1.VersionRepositoryInformation\x12$\n\x0eosv_identifier\x18\x03 \x01(\x0b\x32\x0c.osv.Package\x12\r\n\x05\x63pe23\x18\x05 \x01(\t\x12\x1c\n\x14minimum_file_matches\x18\x06 \x01(\x03\x12\x1c\n\x14\x65stimated_diff_files\x18\x07 \x01(\x03\"\xc0\x01\n\x1cVersionRepositoryInformation\x12;\n\x04type\x18\x01 \x01(\x0e\x32-.osv.v1.VersionRepositoryInformation.RepoType\x12\x0f\n\x07\x61\x64\x64ress\x18\x02 \x01(\t\x12\x0e\n\x06\x63ommit\x18\x03 \x01(\x0c\x12\x0b\n\x03tag\x18\x04 \x01(\t\x12\x0f\n\x07version\x18\x05 \x01(\t\"$\n\x08RepoType\x12\x0f\n\x0bUNSPECIFIED\x10\x00\x12\x07\n\x03GIT\x10\x01\x32\xc5\x03\n\x03OSV\x12X\n\x0bGetVulnById\x12\x1d.osv.v1.GetVulnByIdParameters\x1a\x12.osv.Vulnerability\"\x16\x82\xd3\xe4\x93\x02\x10\x12\x0e/v1/vulns/{id}\x12\x65\n\rQueryAffected\x12\x1f.osv.v1.QueryAffectedParameters\x1a\x19.osv.v1.VulnerabilityList\"\x18\x82\xd3\xe4\x93\x02\x12\"\t/v1/query:\x05query\x12y\n\x12QueryAffectedBatch\x12$.osv.v1.QueryAffectedBatchParameters\x1a\x1e.osv.v1.BatchVulnerabilityList\"\x1d\x82\xd3\xe4\x93\x02\x17\"\x0e/v1/querybatch:\x05query\x12\x81\x01\n\x10\x44\x65termineVersion\x12\".osv.v1.DetermineVersionParameters\x1a\x18.osv.v1.VersionMatchList\"/\x82\xd3\xe4\x93\x02)\" /v1experimental/determineversion:\x05queryb\x06proto3' +) _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'osv_service_v1_pb2', globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'osv_service_v1_pb2', + globals()) if _descriptor._USE_C_DESCRIPTORS == False: DESCRIPTOR._options = None _OSV.methods_by_name['GetVulnById']._options = None - _OSV.methods_by_name['GetVulnById']._serialized_options = b'\202\323\344\223\002\020\022\016/v1/vulns/{id}' + _OSV.methods_by_name[ + 'GetVulnById']._serialized_options = b'\202\323\344\223\002\020\022\016/v1/vulns/{id}' _OSV.methods_by_name['QueryAffected']._options = None - _OSV.methods_by_name['QueryAffected']._serialized_options = b'\202\323\344\223\002\022\"\t/v1/query:\005query' + _OSV.methods_by_name[ + 'QueryAffected']._serialized_options = b'\202\323\344\223\002\022\"\t/v1/query:\005query' _OSV.methods_by_name['QueryAffectedBatch']._options = None - _OSV.methods_by_name['QueryAffectedBatch']._serialized_options = b'\202\323\344\223\002\027\"\016/v1/querybatch:\005query' + _OSV.methods_by_name[ + 'QueryAffectedBatch']._serialized_options = b'\202\323\344\223\002\027\"\016/v1/querybatch:\005query' _OSV.methods_by_name['DetermineVersion']._options = None - _OSV.methods_by_name['DetermineVersion']._serialized_options = b'\202\323\344\223\002)\" /v1experimental/determineversion:\005query' - _VULNERABILITYLIST._serialized_start=87 - _VULNERABILITYLIST._serialized_end=166 - _BATCHVULNERABILITYLIST._serialized_start=168 - _BATCHVULNERABILITYLIST._serialized_end=236 - _QUERY._serialized_start=238 - _QUERY._serialized_end=342 - _BATCHQUERY._serialized_start=344 - _BATCHQUERY._serialized_end=388 - _GETVULNBYIDPARAMETERS._serialized_start=390 - _GETVULNBYIDPARAMETERS._serialized_end=425 - _QUERYAFFECTEDPARAMETERS._serialized_start=427 - _QUERYAFFECTEDPARAMETERS._serialized_end=482 - _QUERYAFFECTEDBATCHPARAMETERS._serialized_start=484 - _QUERYAFFECTEDBATCHPARAMETERS._serialized_end=549 - _DETERMINEVERSIONPARAMETERS._serialized_start=551 - _DETERMINEVERSIONPARAMETERS._serialized_end=616 - _VERSIONQUERY._serialized_start=618 - _VERSIONQUERY._serialized_end=685 - _FILEHASH._serialized_start=687 - _FILEHASH._serialized_end=749 - _VERSIONMATCHLIST._serialized_start=751 - _VERSIONMATCHLIST._serialized_end=808 - _VERSIONMATCH._serialized_start=811 - _VERSIONMATCH._serialized_end=950 - _VERSIONREPOSITORYINFORMATION._serialized_start=953 - _VERSIONREPOSITORYINFORMATION._serialized_end=1145 - _VERSIONREPOSITORYINFORMATION_REPOTYPE._serialized_start=1109 - _VERSIONREPOSITORYINFORMATION_REPOTYPE._serialized_end=1145 - _OSV._serialized_start=1148 - _OSV._serialized_end=1601 + _OSV.methods_by_name[ + 'DetermineVersion']._serialized_options = b'\202\323\344\223\002)\" /v1experimental/determineversion:\005query' + _VULNERABILITYLIST._serialized_start = 87 + _VULNERABILITYLIST._serialized_end = 166 + _BATCHVULNERABILITYLIST._serialized_start = 168 + _BATCHVULNERABILITYLIST._serialized_end = 236 + _QUERY._serialized_start = 238 + _QUERY._serialized_end = 342 + _BATCHQUERY._serialized_start = 344 + _BATCHQUERY._serialized_end = 388 + _GETVULNBYIDPARAMETERS._serialized_start = 390 + _GETVULNBYIDPARAMETERS._serialized_end = 425 + _QUERYAFFECTEDPARAMETERS._serialized_start = 427 + _QUERYAFFECTEDPARAMETERS._serialized_end = 482 + _QUERYAFFECTEDBATCHPARAMETERS._serialized_start = 484 + _QUERYAFFECTEDBATCHPARAMETERS._serialized_end = 549 + _DETERMINEVERSIONPARAMETERS._serialized_start = 551 + _DETERMINEVERSIONPARAMETERS._serialized_end = 616 + _VERSIONQUERY._serialized_start = 618 + _VERSIONQUERY._serialized_end = 685 + _FILEHASH._serialized_start = 687 + _FILEHASH._serialized_end = 749 + _VERSIONMATCHLIST._serialized_start = 751 + _VERSIONMATCHLIST._serialized_end = 808 + _VERSIONMATCH._serialized_start = 811 + _VERSIONMATCH._serialized_end = 1010 + _VERSIONREPOSITORYINFORMATION._serialized_start = 1013 + _VERSIONREPOSITORYINFORMATION._serialized_end = 1205 + _VERSIONREPOSITORYINFORMATION_REPOTYPE._serialized_start = 1169 + _VERSIONREPOSITORYINFORMATION_REPOTYPE._serialized_end = 1205 + _OSV._serialized_start = 1208 + _OSV._serialized_end = 1661 # @@protoc_insertion_point(module_scope) diff --git a/gcp/api/server.py b/gcp/api/server.py index dccc1034ce2..f31395272d3 100644 --- a/gcp/api/server.py +++ b/gcp/api/server.py @@ -16,12 +16,15 @@ import argparse import codecs import concurrent +import math +import hashlib import functools import logging import os -import random import sys import time +from typing import List + from collections import defaultdict from google.cloud import ndb @@ -35,14 +38,20 @@ import osv_service_v1_pb2 import osv_service_v1_pb2_grpc -from typing import List - _SHUTDOWN_GRACE_DURATION = 5 _MAX_BATCH_QUERY = 1000 _MAX_VULNERABILITIES_LISTED = 16 -_MAX_HASHES_TO_TRY = 50 -_MAX_COMMITS_TO_TRY = 10 + +# Used in DetermineVersion +# If there are more results for a bucket than this number, +# ignore the bucket completely +_MAX_MATCHES_TO_CARE = 100 +# Max results to return for DetermineVersion +_MAX_DETERMINE_VER_RESULTS_TO_RETURN = 10 +# Size of buckets to divide hashes into in DetermineVersion +# This should match the number in the indexer +_BUCKET_SIZE = 512 _ndb_client = ndb.Client() @@ -110,104 +119,139 @@ def QueryAffectedBatch(self, request, context): @ndb_context def DetermineVersion(self, request, context): """Determine the version of the provided hashes.""" - return determine_version(request.query, context).result() + res = determine_version(request.query, context).result() + return res + + +def process_buckets( + file_results: List[osv.FileResult]) -> List[osv.RepoIndexBucket]: + """ + Create buckets in the same process as + indexer to generate the same bucket hashes + """ + buckets: list[list[bytes]] = [[] for _ in range(_BUCKET_SIZE)] + + for fr in file_results: + buckets[int.from_bytes(fr.hash[:2], byteorder='big') % _BUCKET_SIZE].append( + fr.hash) + + results: list[osv.RepoIndexBucket] = [None] * _BUCKET_SIZE + for bucket_idx, bucket in enumerate(buckets): + bucket.sort() + + hasher = hashlib.md5() + for v in bucket: + hasher.update(v) + + results[bucket_idx] = osv.RepoIndexBucket( + node_hash=hasher.digest(), + files_contained=len(bucket), + ) + + return results + + +def build_determine_version_result( + file_matches_by_proj: dict[ndb.Key, int], + bucket_matches_by_proj: dict[ndb.Key, int], zero_match_offset: int, + max_files: int) -> osv_service_v1_pb2.VersionMatchList: + """Build sorted determine version result from the input""" + bucket_match_items = list(bucket_matches_by_proj.items()) + # Sort by number of files matched + bucket_match_items.sort(key=lambda x: x[1], reverse=True) + # Only interested in our maximum number of results + bucket_match_items = bucket_match_items[:min( + _MAX_DETERMINE_VER_RESULTS_TO_RETURN, len(bucket_match_items))] + idx_futures = ndb.get_multi_async([b[0] for b in bucket_match_items]) + output = [] + + for f in idx_futures: + idx: osv.RepoIndex = f.result() + estimated_num_of_diff = estimate_diff( + _BUCKET_SIZE - bucket_matches_by_proj[idx.key], zero_match_offset) + + version_match = osv_service_v1_pb2.VersionMatch( + score=(max_files - estimated_num_of_diff) / max_files, + minimum_file_matches=file_matches_by_proj[idx.key], + estimated_diff_files=estimated_num_of_diff, + repo_info=osv_service_v1_pb2.VersionRepositoryInformation( + type=osv_service_v1_pb2.VersionRepositoryInformation.GIT, + address=idx.repo_addr, + commit=idx.commit, + version=idx.version, + ), + ) + output.append(version_match) + + # output.sort(reverse=True, key=lambda x: x.score) + return osv_service_v1_pb2.VersionMatchList(matches=output) + + +def estimate_diff(num_of_bucket_change: int, zero_match_offset: int) -> int: + estimate = _BUCKET_SIZE * math.log( + (_BUCKET_SIZE + 1) / (_BUCKET_SIZE - + (num_of_bucket_change - zero_match_offset) + 1)) + # Scale the "file change" denominator linearly by how many buckets are not + # considered, since if a lot of buckets are skipped, a file that's been + # changed might end up in one of them decreasing the chance it will + # be counted twice. + return round(estimate / (2 - zero_match_offset / _BUCKET_SIZE)) @ndb.tasklet def determine_version(version_query: osv_service_v1_pb2.VersionQuery, - context: grpc.ServicerContext) -> ndb.Future: + _: grpc.ServicerContext) -> ndb.Future: """Identify fitting commits based on a subset of hashes""" - if len(version_query.file_hashes) <= _MAX_HASHES_TO_TRY: - hashes = [ - f.hash for f in version_query - .file_hashes[:min(_MAX_HASHES_TO_TRY, len(version_query.file_hashes))] - ] - else: - hashes = [ - f.hash - for f in random.sample(version_query.file_hashes, _MAX_HASHES_TO_TRY) - ] - tracker = defaultdict(int) - - hash_futures = [] - for h in hashes: - query = osv.RepoIndexResult.query( - osv.RepoIndexResult.file_results.hash == h) - query.keys_only = True - hash_futures.append(query.fetch_async()) - - for f in hash_futures: - for r in f.result(): - tracker[r.key.parent()] += 1 - - idx_keys = [] - for k, v in tracker.items(): - if v == _MAX_HASHES_TO_TRY: - idx_keys.append(k) - if not idx_keys: - idx_keys = [ - k for k, _ in sorted( - tracker.items(), key=lambda item: item[1], reverse=True) - ] - idx_keys = idx_keys[:min(_MAX_COMMITS_TO_TRY, len(idx_keys))] - if len(idx_keys) == 0: - context.abort(grpc.StatusCode.NOT_FOUND, 'no matches found') - return None + req_list = [osv.FileResult(hash=x.hash) for x in version_query.file_hashes] - idx_futures = ndb.get_multi_async(idx_keys) - match_futures = [] - for f in idx_futures: - idx = f.result() - if version_query.name not in ('', idx.name): + # Build all the buckets and query the bucket hash + buckets = process_buckets(req_list) + + file_match_count: dict[ndb.Key, int] = defaultdict(int) + bucket_match_count: dict[ndb.Key, int] = defaultdict(int) + num_skipped_buckets = 0 + skipped_files = 0 + + # Tuple is (Future, index, number_of_files) + query_futures: list[tuple[ndb.Future, int, int]] = [] + + for idx, bucket in enumerate(buckets): + if bucket.files_contained == 0: + num_skipped_buckets += 1 continue - match = compare_hashes_from_commit(idx, version_query.file_hashes) - match_futures.append(match) - results = [] - for f in match_futures: - version_match = f.result() - if version_match.score != 0.0: - results.append(version_match) - if len(results) == 0: - context.abort(grpc.StatusCode.NOT_FOUND, 'no matches found') - return None - return osv_service_v1_pb2.VersionMatchList(matches=results) + query = osv.RepoIndexBucket.query( + osv.RepoIndexBucket.node_hash == bucket.node_hash) + # Limit the number of requests to prevent super long queries + query_futures.append((query.fetch_async(limit=_MAX_MATCHES_TO_CARE), idx, + bucket.files_contained)) + + # Take the results and group the library versions, + # aggregating on the number of files matched + + for future, idx, num_of_files in query_futures: + result: list[osv.RepoIndexBucket] = list(future.result()) + if result: # If there is a match, add it to list of potential versions + # If it equals the limit, there probably is more versions beyond the limit + # so just ignore it completely since it's not a useful indicator + if len(result) == _MAX_MATCHES_TO_CARE: + num_skipped_buckets += 1 + skipped_files += num_of_files + continue + + for index_bucket in result: + parent_key = index_bucket.key.parent() + file_match_count[parent_key] += index_bucket.files_contained + bucket_match_count[parent_key] += 1 + # Up the matches by the ones that match too commonly + # This is used to return 100% matches + for key in file_match_count.keys(): + file_match_count[key] += skipped_files -@ndb.tasklet -def compare_hashes_from_commit( - idx: osv.RepoIndex, - hashes: List[osv_service_v1_pb2.FileHash]) -> ndb.Future: - """"Retrieves the hashes from the provided index and compares - them to the input hashes.""" - total_files = 0 - matching_hashes = 0 - for i in range(idx.pages): - key = version_hashes_key(idx.key, idx.commit, idx.file_hash_type, i) - result = key.get() - for f_result in result.file_results: - for in_hash in hashes: - if in_hash.hash == f_result.hash: - matching_hashes += 1 - break - total_files += 1 - score = matching_hashes / total_files if total_files != 0 else 0.0 - version_match = osv_service_v1_pb2.VersionMatch( - score=score, - repo_info=osv_service_v1_pb2.VersionRepositoryInformation( - type=osv_service_v1_pb2.VersionRepositoryInformation.GIT, - address=idx.repo_addr, - commit=idx.commit, - version=idx.version, - ), - ) - return version_match - - -def version_hashes_key(parent_key: ndb.Key, commit: bytes, hash_type: str, - page: int) -> ndb.Key: - return ndb.Key(parent_key.kind(), parent_key.id(), osv.RepoIndexResult, - f"{commit.hex()}-{hash_type}-{page}") + return build_determine_version_result(file_match_count, bucket_match_count, + num_skipped_buckets, + len(version_query.file_hashes)) @ndb.tasklet @@ -481,12 +525,17 @@ def query_by_version(project: str, ecosystem_info = ecosystems.get(ecosystem) is_semver = ecosystem_info and ecosystem_info.is_semver if project: - query = osv.Bug.query(osv.Bug.status == osv.BugStatus.PROCESSED, - osv.Bug.project == project, osv.Bug.public == True) # pylint: disable=singleton-comparison + query = osv.Bug.query( + osv.Bug.status == osv.BugStatus.PROCESSED, + osv.Bug.project == project, + # pylint: disable=singleton-comparison + osv.Bug.public == True) # noqa: E712 elif purl: - query = osv.Bug.query(osv.Bug.status == osv.BugStatus.PROCESSED, - osv.Bug.purl == purl.to_string(), - osv.Bug.public == True) # pylint: disable=singleton-comparison + query = osv.Bug.query( + osv.Bug.status == osv.BugStatus.PROCESSED, + osv.Bug.purl == purl.to_string(), + # pylint: disable=singleton-comparison + osv.Bug.public == True) # noqa: E712 else: return [] @@ -518,14 +567,18 @@ def query_by_package(project, ecosystem, purl: PackageURL, page_token, """Query by package.""" bugs = [] if project and ecosystem: - query = osv.Bug.query(osv.Bug.status == osv.BugStatus.PROCESSED, - osv.Bug.project == project, - osv.Bug.ecosystem == ecosystem, - osv.Bug.public == True) # pylint: disable=singleton-comparison + query = osv.Bug.query( + osv.Bug.status == osv.BugStatus.PROCESSED, + osv.Bug.project == project, + osv.Bug.ecosystem == ecosystem, + # pylint: disable=singleton-comparison + osv.Bug.public == True) # noqa: E712 elif purl: - query = osv.Bug.query(osv.Bug.status == osv.BugStatus.PROCESSED, - osv.Bug.purl == purl.to_string(), - osv.Bug.public == True) # pylint: disable=singleton-comparison + query = osv.Bug.query( + osv.Bug.status == osv.BugStatus.PROCESSED, + osv.Bug.purl == purl.to_string(), + # pylint: disable=singleton-comparison + osv.Bug.public == True) # noqa: E712 else: return [] diff --git a/gcp/api/v1/README.md b/gcp/api/v1/README.md index 36c7ef3962f..e5833e94b19 100644 --- a/gcp/api/v1/README.md +++ b/gcp/api/v1/README.md @@ -17,6 +17,9 @@ python3 -m grpc_tools.protoc \ --descriptor_set_out=api_descriptor.pb \ --python_out=../. \ --grpc_python_out=../ \ + --go_out=../../../tools/indexer-api-caller/ \ + --go_opt=Mosv/vulnerability.proto=github.com/google/osv/proto \ + --go_opt=Mosv_service_v1.proto=github.com/google/osv/api/proto \ osv_service_v1.proto ``` diff --git a/gcp/api/v1/api_descriptor.pb b/gcp/api/v1/api_descriptor.pb index b6aa1ce7f2a..e26e2dd9275 100644 Binary files a/gcp/api/v1/api_descriptor.pb and b/gcp/api/v1/api_descriptor.pb differ diff --git a/gcp/api/v1/grpc_cmd.sh b/gcp/api/v1/grpc_cmd.sh new file mode 100755 index 00000000000..4846fe3bcc5 --- /dev/null +++ b/gcp/api/v1/grpc_cmd.sh @@ -0,0 +1 @@ +grpcurl -plaintext -d '{"query": {"name":"protobuf", "file_hashes": [{"hash": "HYrPv5WwrtoW1m/i3Fu8gQ=="}, {"hash": "SdayPppMycbiZhx+G8WDdA=="}, {"hash": "Rcp1gASbawvNvoEDxI0XZA=="}, {"hash": "hTT8dw5YkKcrBgRJmAAEMg=="}, {"hash": "zTXPCm3PQLvdhZ1KzpK55g=="}, {"hash": "VkVniZjASuB421IBOdEkqg=="}, {"hash": "SGQKsY9Epu7KFoBpFJdeDg=="}, {"hash": "PgfmF7ymHCy8UutBR/weaw=="}, {"hash": "A1aTtdORE34wT9+yCONb6Q=="}, {"hash": "vKdSzuWE5aT6XBuGpGmJMA=="}, {"hash": "+smfDkmVxUGdR4MFod2JAg=="}, {"hash": "+AsjgtZVqNIPwqFizDDU3g=="}, {"hash": "jW/f9U7Mb0gfEDbyhDaVRQ=="}, {"hash": "d2NawrxMkFi+bytJQR8hWw=="}, {"hash": "Hbtni+f6Sr+4b6q8/kDIxg=="}, {"hash": "ssBoCkvm62K8UFFPRHtYWA=="}, {"hash": "RDrIcKLwoUMMwtiaL5phHQ=="}, {"hash": "wIViE9qPzeHgREIEh+9dTQ=="}, {"hash": "Hr94gjnNRY1gWn5Bu4+5cg=="}, {"hash": "WpOemM2uoNN9d6XtFnp/mg=="}, {"hash": "9ZXSXpuRj/CT9+ZfmVuBcg=="}, {"hash": "hnGEOJZ4kYplOwtzeYgCzA=="}, {"hash": "/kt3tiQwUVIUd4RTQ1FC9Q=="}, {"hash": "7jZa9gnc4nFr8gJLQGQB0A=="}, {"hash": "SfxCbMqTbiTO0smF+W/1Ug=="}, {"hash": "AsGA1U2zeMyeK6OotYIyfQ=="}, {"hash": "HsJo1GuY7vGhjNd8UESTdw=="}, {"hash": "nRLoNKh1JbgWYg5RdiDV9A=="}, {"hash": "jkpGJHAQXMiWOLNYnbBtAw=="}, {"hash": "F8QLKCxCztdWBqxi2GXAYQ=="}, {"hash": "gFYQVAvE+R6nlIODyp2JvQ=="}, {"hash": "4RGnHrRMOfj5vFPPf57mhA=="}, {"hash": "U77TuPOtgOfKRR779hx8Jw=="}, {"hash": "5PLvMbKboB5YMCdG9AxMLw=="}, {"hash": "gC+mg0XEl93AiAm/a0Z3KA=="}, {"hash": "WoCeTyaeWFNAnmhA7/opaQ=="}, {"hash": "HgTTFFTPiQcmWMnQtYVIZw=="}, {"hash": "a1lKoN9JX4GprJj5CQp+7w=="}, {"hash": "C9moWJsKFx2tbIap24h9xA=="}, {"hash": "d94CR+SryQMvMjOjwAJz2A=="}, {"hash": "E/R7tFWHp+EJOFaB+7+6qQ=="}, {"hash": "IHHBh42HzPxW8y+8lKbbSw=="}, {"hash": "ZEwPwp26geppuwmdCJTrsA=="}, {"hash": "4keOwn0V2jdBWvCpLV5w2g=="}, {"hash": "Q/zZWcpwZMONqMxWxuXLnQ=="}, {"hash": "+5uCn0oKDENOhhkCQ+fMEA=="}, {"hash": "+FtVUFP33PUZxD4VisYj4g=="}, {"hash": "dS8kkj8PQ+Ikw5VmKF8Qxw=="}, {"hash": "UYv8NgXGrg8uPLhcXRVjnw=="}, {"hash": "030tCeXY+NPLNAaQMxP41A=="}, {"hash": "Td0T0EUKSFNMMwcthVWWQg=="}, {"hash": "O3kDW4xAAw2pOfnSOKT0rA=="}, {"hash": "4IfzntTSAp9K/8ELnZafqw=="}, {"hash": "W7sMjOEHoCCce4jK9wfGJA=="}, {"hash": "J3DbaO3UlQaHGEE1Rn811A=="}, {"hash": "pKjM31QNAzxJNAn/kFG9tw=="}, {"hash": "lwg4vPuZT4BLly38BxZdZQ=="}, {"hash": "ERLgEmwe/WIEWiZ/H5jovA=="}, {"hash": "OYkn/a8Zc/ICVobbqVGtlg=="}, {"hash": "knJGXgbghSr+7//6aFxEyA=="}, {"hash": "UZC9DT77F2MxpIuRGjKvjQ=="}, {"hash": "aItDlo3GfiTqewv1axsjUg=="}, {"hash": "XxeRirYqIy1NvLckKxs4Dw=="}, {"hash": "V1rg5flNBhbyGDuScV1uEg=="}, {"hash": "uEi+lsj3yU2lMd/+yNIlbA=="}, {"hash": "oDrCVrXRYbmorSlMITOHww=="}, {"hash": "8P0BH2TlGPLnIa+5RUGB8w=="}, {"hash": "0xADd1leAIL26RKK9bla1w=="}, {"hash": "nsB3mo8McTTrhiYgzxeJCw=="}, {"hash": "nc49F7B4S0LsAoXmmri33Q=="}, {"hash": "lVAlqsQGTBELrKIyle78og=="}, {"hash": "qNAu17VLTxjyK/vcQx8ZwA=="}, {"hash": "qK23MtoDFxiEN0WhPujGfw=="}, {"hash": "zYavVejgr8qOsRfgryZITw=="}, {"hash": "ooTf4Ft1JkYuF4XTI9OKkQ=="}, {"hash": "QPQrWQua5s74ROyVLu1Ogg=="}, {"hash": "zZaJDCnGpeL4Lxafnfnzrw=="}, {"hash": "bKuYM1E2T2yRoVpgMzRhsQ=="}, {"hash": "guLpz4FKfAszEd2/4EtJtg=="}, {"hash": "9jTG88lAkClLGhL158wSwg=="}, {"hash": "/+pxtU+IGOL53mzDfxEezg=="}, {"hash": "iPjksnXnhDS4SYkpoaN37w=="}, {"hash": "2cbVuD13ovRrmqUIrL4Arw=="}, {"hash": "Cn7BKxYnxXDgfaLrn0ygtg=="}, {"hash": "CFMtSozW6sa067UuDuioUA=="}, {"hash": "2tzIK9ak4CmOsUc4tLF4Aw=="}, {"hash": "hkGsgqGJe9slKxtNbsfnIQ=="}, {"hash": "vmtchBkLu6dIqJyLj3eGKg=="}, {"hash": "7Sd3xYDoLN9euaFdzy8DvQ=="}, {"hash": "v413ogby6ZmPZpaNH6Butg=="}, {"hash": "dYRbU7YeSB/UxTJ2sn4Yuw=="}, {"hash": "CaKYr2/z3AN2MpTm+vrG8g=="}, {"hash": "LH0KfP64cMkH09376tBFaQ=="}, {"hash": "XfS2HxMGbR4zacy9TH7S5A=="}, {"hash": "rOW+QgT+dzUeRAuB9+iETw=="}, {"hash": "blbzMMQQ3HDEcKVLE7Tf9w=="}, {"hash": "7n9JGL59EzQU8Zade8ubdw=="}, {"hash": "f2HsqKWlcjooLGG3GnKiSA=="}, {"hash": "hC7FYyw4Y6Kx3W4hKFN7Rw=="}, {"hash": "M0IM9scGS+HEDEhg8ignjQ=="}, {"hash": "Z+2396lJjxMxU4MQpaOqgg=="}, {"hash": "juCHL9FIbDK5MUffgA9Fjw=="}, {"hash": "kAhGmY4uk94KTbGxxqkj+g=="}, {"hash": "0Lkcj/Fvu6kuUn/o8Zq+uw=="}, {"hash": "JaB+8uBc+gmIVwwSjZ1a/w=="}, {"hash": "DDzfqoUxoRPinwIq4EX+Pw=="}, {"hash": "cTg3NSKIICCB/5kNNTV6Fw=="}, {"hash": "YBCXLPzShvSp1Vq2MhQRzw=="}, {"hash": "H54tNEx1jlJrCMcldn+OtA=="}, {"hash": "mWS2RdgEQi1dUWGq2Q9pyA=="}, {"hash": "6cxliykPCwob45bGMjwlww=="}, {"hash": "AUak9FPiESDNdf+DzxRbIw=="}, {"hash": "BgcaSQaiNnATretu8Su1pA=="}, {"hash": "Lgkhsx7MS7VBpDzCdEVsNg=="}, {"hash": "7QyY30NM/pOPTyyLGkVzvw=="}, {"hash": "USM9KRX7zdmYYwXZOQAykw=="}, {"hash": "o1JB+CJmu8HAlpOQu20V3A=="}, {"hash": "XDjsgNK1Ot2xm3Rgp6/Gog=="}, {"hash": "jAwaRQnwAnnN8rp2I8Sj8w=="}, {"hash": "8fXGbCMhaXhc2IzLNZveXQ=="}, {"hash": "/evaKbxq8VBASZxCpYwJdw=="}, {"hash": "riMTC4iF5u1EeVdeTvPC4w=="}, {"hash": "7b/YlSWslrsk+23t3c9SqA=="}, {"hash": "x/x//sUjRORl6+sof5vwWg=="}, {"hash": "b9RstYVneUFeiZPzrqcF5Q=="}, {"hash": "p1B5oC9HSO4r9WWrPxk2kQ=="}, {"hash": "o5iOXyhNirB2r0kyfA8NDw=="}, {"hash": "lOqjBGu+gRdFwKmlEidV2Q=="}, {"hash": "rwX+Y9i9hx/V7oyV1SymuQ=="}, {"hash": "7kKWBZJpf86do03aiPQkQA=="}, {"hash": "Y0d9K5pEsmR8806Zy9CGsg=="}, {"hash": "K9RhlSQmXNhGS6EbGDNVXA=="}, {"hash": "Ma/iqqrk62UQqIg3O5A29A=="}, {"hash": "SODlflAu6PS1xaElYKCjDQ=="}, {"hash": "dQsiYMHac/kQOegZlAWN9w=="}, {"hash": "AkTyeR0JWaK+MLd8SZ/KKg=="}, {"hash": "7GMqVh9LmMifKPT9GB3KsQ=="}, {"hash": "1RGMHNzY5QgGmXmCSUQlUg=="}, {"hash": "VmIXmVpAs8IRol/J8t54vQ=="}, {"hash": "IOczHI/mkTpExC4ab/O1xw=="}, {"hash": "3ghAo0apYDbFPGK5MAF27A=="}, {"hash": "9a2W1qjDNdgYlgauOzuemA=="}, {"hash": "wsE8Mjw2Jz93HTkhk0xxvg=="}, {"hash": "oiML+g00uIVU3iqjGBkMxg=="}, {"hash": "Jw22FcBul7LJd26vvitBcg=="}, {"hash": "3rEbA5/VRilATTWF52L1Ww=="}, {"hash": "TyY2CsOMpysctQjHGXqObA=="}, {"hash": "ZoqxQcbbR0hT9AVGue2AfQ=="}, {"hash": "xBBIPvaePKJ1G/l1/SQTJg=="}, {"hash": "GTSj33V1ygUuNY5IMHUo0w=="}, {"hash": "82kw4uXTVb9UpR7VV7l23w=="}, {"hash": "bphRvVRqwkvSzfVxDucYew=="}, {"hash": "zW84q7QTnHWnAFhLc61Hbw=="}, {"hash": "VtUpg5T0Vx3SLxVQkin0Og=="}, {"hash": "NBEYP7XeH1x5FIbzYOVgDQ=="}, {"hash": "GrDTa14qI+7olPxRETQ6Gw=="}, {"hash": "x8JdvsMQho3OgtUj5jusvA=="}, {"hash": "u3gJxaRbPxl2MSsk0Nrc6A=="}, {"hash": "IiUjltpbxB8+9gEaW1mHHQ=="}, {"hash": "jLb1jsrMDSaPSqTzD966Xg=="}, {"hash": "N3g7I1YNR6Z4ZsUnMq3juQ=="}, {"hash": "YtbmqguGLmH9VMMtO4Hy5g=="}, {"hash": "KCnkj8Gh2pniPFUWQdWW1Q=="}, {"hash": "F6wUSkoUehQCshLIitpnbA=="}, {"hash": "ibDTn8ALr5PQda7z8MnDFw=="}, {"hash": "AwLvxgouvpS4pojJ6T89Ww=="}, {"hash": "vi/uHQSpFM1hSwwyMOWSrQ=="}, {"hash": "1ck8LWaY5hkcqYucSiwcjg=="}, {"hash": "re9uS+H1elsE69Rx6uJI+Q=="}, {"hash": "RqMTNEUNLVuOP/qxhiS1Cw=="}, {"hash": "Oy82R5JIQtz90yyZUR80NA=="}, {"hash": "qz9T/3BVS39EX/nUUnDD4g=="}, {"hash": "oNsiGfNDpAb+J4I3f6/lng=="}, {"hash": "QgkWL4hgvBxyEc9WUxp0ww=="}, {"hash": "KQnNA9iFhxBEijMQeGG+yQ=="}, {"hash": "Ml6D4yTe+tvMhr/ONUVQDA=="}, {"hash": "k1dcU9Io7wDIWKscrUW/dQ=="}, {"hash": "WD/Y8TCcgTXS0XtnSioZwg=="}, {"hash": "JQwlW4ppyOo/MgtqV2qg3A=="}, {"hash": "a7NqulFYo5H32lV70yd2nQ=="}, {"hash": "/9lOWUxqRJJthKYuGN36VQ=="}, {"hash": "Tv2POCWr4Cl4MQznfck4yg=="}, {"hash": "dh2ZVRYVPker2FTwrYohig=="}, {"hash": "V+uay1lSARkAtk9hEYrzYg=="}, {"hash": "f5HMaYaChsUfyyVkPpshmg=="}, {"hash": "tx5A3dXx3o5ASfUgDamUPw=="}, {"hash": "k17jZuceIKs8kd4N9/SANA=="}, {"hash": "un9y9KCbrG4Wv4Kbem3vHA=="}, {"hash": "0IAkYm7flqggCcthSt5SZQ=="}, {"hash": "z4MdKaN6k9mB6vcPgiGCDQ=="}, {"hash": "nVxMW23S/lri4tNPvppwGw=="}, {"hash": "JWeKN0ZczQwainyl+EDd2A=="}, {"hash": "RMkZbF9Z/O0EHWy1hMD3Aw=="}, {"hash": "h1k5vT9VbLZqsVQZ3XFE5w=="}, {"hash": "DlOJ+nwuD85qpZwwMNhT5Q=="}, {"hash": "Y3s9rCh2jDdSAF/rmUdomw=="}, {"hash": "DDJ+zR+boJmpy1Af9BHNIA=="}, {"hash": "5HHA+Pzjrl/3c7QR7OX6Xg=="}, {"hash": "1zWQ8kRMDX8qq1TLTsu7yQ=="}, {"hash": "ZmfN2mpdyeDDk7UzQ2nvkA=="}, {"hash": "xfnl2tDoY/pvwhfGeR67Pw=="}, {"hash": "SoFV7/oYmtISja+NMuY0QQ=="}, {"hash": "9j89QzerhYjZglhInuwLog=="}, {"hash": "31rM4T2zvNcr4pN4EM68RA=="}, {"hash": "uFE1sCrk1vM4ZFSilHZHiQ=="}, {"hash": "T4eEMwt3jxERdhY/NS5Apw=="}, {"hash": "AbBtf0rPf3w7ST2/qjY37A=="}, {"hash": "EDtXGcmyxouhDUBD/tXb/g=="}, {"hash": "v44m3r8YLqD4OOqNrTIhsw=="}, {"hash": "R9dtgnz3fXj4Pw3jzNNJfQ=="}, {"hash": "SWwYrvUFIcAoxJUhssvQlg=="}, {"hash": "jVJlUfo4wDJx7tkHfknx3Q=="}, {"hash": "xNFUTxlAr338RYazJ66iaQ=="}, {"hash": "WrldlyDf4jU6kuVlAdz/qQ=="}, {"hash": "WGBe2SF+vkKXSGEmE3jJDA=="}, {"hash": "avybie68RBE6VsFm+sp0pg=="}, {"hash": "L9UIZ/AQQwmaVK9Z6V+zkg=="}, {"hash": "solRnKCfmfOxLXlgMNz0eA=="}, {"hash": "89W37CHaDh8eH+3yjuh/pA=="}, {"hash": "QW+fc6fBJAJy7LXJFzZn7w=="}, {"hash": "RGoF0+gstNq8qMPoUJyPZQ=="}, {"hash": "BR82JJmcjxxxvx5GB3AHow=="}, {"hash": "PMxhOizrfsgmLqzfp8ub7A=="}, {"hash": "v+X1C7Pz+DTf3SeSADG+Fw=="}, {"hash": "sUK5T+HYmsfnCpPI8WlFhQ=="}, {"hash": "XMyHT4GLKOqq0tvKQ5WbVQ=="}, {"hash": "AQRUJhrv63LeZcyvtR1Msg=="}, {"hash": "nzowZLbxUAWZyKj8hmAkuA=="}, {"hash": "+hfAs69rqum2f0HW+0oRiA=="}, {"hash": "ntzRgtp6OYpn8y3dETgOAQ=="}, {"hash": "NeJvzHyUT5qn+TczNgd5/Q=="}, {"hash": "BkVXNSEDfITXtkPbDJZdaQ=="}, {"hash": "GHG21+yZ0jMOPVddvvdQWA=="}, {"hash": "jv+YomKi+iso1c2ktGJ2Nw=="}, {"hash": "5kqPajzwEeQxvhbIO9JYfg=="}, {"hash": "8z2O/VQ57ctmy/hmcdPnFg=="}, {"hash": "l4mqcpKtY4ahp1N7uapbiw=="}, {"hash": "tryenhs5EMKK7jBoQHopjA=="}, {"hash": "g6mHeURudcQb+EFVFSiHqQ=="}, {"hash": "6a0CfOp/85vniRY0Otfrzg=="}, {"hash": "RfpEpjuHnHQr5INEQPNhXw=="}, {"hash": "7AlxhMGXoVWLICVYMsR9ag=="}, {"hash": "qJqAJj3gpmCNC24NW746Gw=="}, {"hash": "/DskKvt6uEp6MZVyiAfAeA=="}, {"hash": "EKoKabUd6qtcjUzCEndk+Q=="}, {"hash": "Xsyw0Ia6pSJ6faHMriugEA=="}, {"hash": "CN35qDIUKKeH0ZCb5k5rTA=="}, {"hash": "JeIdGP9jYW1Bn20BALIGCw=="}, {"hash": "u5e9nKgOLnObejKsJy/dNQ=="}, {"hash": "GQTTxS95WSYBhA2L/xTBxA=="}, {"hash": "thaKpyzuNtwfYHofzlbicg=="}, {"hash": "4IxARHGMyJ8gDNMYGCn0Vw=="}, {"hash": "OSp/HtT3sWGLmq5no7FYZQ=="}, {"hash": "7bdIawM461Gi3xAYu9fohw=="}, {"hash": "b6156uJkHHmx+H2YyY/s1g=="}, {"hash": "0QrNuFHt1gu81+oXsKLXQw=="}, {"hash": "syDZ57mw1ElQkP65lE85HQ=="}, {"hash": "JVgvtDMuUw+TVHpCc4t/vA=="}, {"hash": "kyzft2W06RSXn3SblUMK8Q=="}, {"hash": "BdJp+vNJCrIHurTJyRdwKQ=="}, {"hash": "BMgbHKfIw0YTDtH8LuqYaw=="}, {"hash": "ge7pIOFnl7/2X7mJJfqx5Q=="}, {"hash": "xb7curu8w8akIx+4jJDOiw=="}, {"hash": "rph6xCoYS8kHzBmWrvrX3w=="}, {"hash": "TxsnFpbolWmZPXN7TEMZYA=="}, {"hash": "JtERgY7P1bFOZ5Zwro/tyg=="}, {"hash": "djCnC72g5ntI5t00mSSokQ=="}, {"hash": "GYTfJ+KxpZGnd61mmLjbMw=="}, {"hash": "FWC0AMUvvFi2rd6vYM5OTA=="}, {"hash": "1j7qiQLveMf3GZe/3XNbsg=="}, {"hash": "IRcV32gzpm5KYFfe7raqiA=="}, {"hash": "biW7e/EgN540EA59naQU0A=="}, {"hash": "OU6MdjPPQGTppl8qXdAf+A=="}, {"hash": "bJN+geHKNztPIIfbiY8Uew=="}, {"hash": "OxWQZHh4znCJII2PNfBcUg=="}, {"hash": "YoFSA9z/J6bxqD58vczJvw=="}, {"hash": "0hmVCfVwEPHysDCKUFDeZw=="}, {"hash": "wKppKk4HPLO4wYUygWTUyw=="}, {"hash": "kHTC6TxsEO+HW7N//QviRA=="}, {"hash": "4ZEo1h4BP/8PSmFhfkMYAQ=="}, {"hash": "tUZ1lM5KmkWSTJiw3KF1Qw=="}, {"hash": "XkipucqX4iyap1TRQIN8Iw=="}, {"hash": "/ntPhFMFmQs8FJInt9gCKA=="}, {"hash": "W8MdggpFMAoqw221fMTT8Q=="}, {"hash": "VeIkAZQCxItSmzLw3zD8JA=="}, {"hash": "XDTKrYjuNPw6I30/gQpdbg=="}, {"hash": "VhlGmk4fN6JDn5/m8BbYhg=="}, {"hash": "PST+1PElXUV5UhHduXhPVg=="}, {"hash": "c3mcNx7Y/HV3ivcXeQu9bA=="}, {"hash": "Bc3cWtimT+3bDY0zaP2Crg=="}, {"hash": "asdlfO0NqGlyKIhl3T9yrg=="}, {"hash": "8e/4kAWLdCnwmk/P/hQzlQ=="}, {"hash": "F2sfW0uSNX/mK+ILFKgXyA=="}, {"hash": "kvzFLtR1CluN0yZbZOWRfA=="}, {"hash": "EU7ldzXiTmlvDK/RFYKj2w=="}, {"hash": "Xa+hiJR1k8fWzdwdGeJJKg=="}, {"hash": "vJWg4kfEBdxDNf5LgWfXmQ=="}, {"hash": "0/yNcmDSuBpK78Yv90QK/Q=="}, {"hash": "Q0RqR1f3fEzHufk0ghc3PA=="}, {"hash": "K2sznrNBLmoME1RB1/Od7w=="}, {"hash": "YXUwdp+J33tROi07rMxcQA=="}, {"hash": "1cWwCXiLoWjVmxYUAGeQwg=="}, {"hash": "TdIlo/VmXwB5bzfYopVMhg=="}, {"hash": "iPYg6mQVZKdAo54+reP+bw=="}, {"hash": "FEce8RptZOZpokLrisjldQ=="}, {"hash": "1mZ0oBu827pffcwjBolaPQ=="}, {"hash": "cyoVpy6wVxFaKBvvGp4FgQ=="}, {"hash": "C3adov1Z8yI/WgIQBHTYhg=="}, {"hash": "J9+yVx6G0emuujDqy8LBcw=="}, {"hash": "0V8E8uTZEzmLxpmS5MPitg=="}, {"hash": "2mRBM5wu0tNC2DVmNG+SRg=="}, {"hash": "OqvBfDc+G2YbHYHWk5cRBg=="}, {"hash": "6e9csq1bZFTfuj5ML+kJPw=="}, {"hash": "ozKiulLBFKqBwnMMJklG3A=="}, {"hash": "CDiEkZNFsfm+2h5BLEECLw=="}, {"hash": "NWuqBGulRMMAbw8lduxEMg=="}, {"hash": "AINeupGsbdn7Qx8X8PHsCg=="}, {"hash": "Vic/Z2jK7tXjazhUSq5cwg=="}, {"hash": "dzOHUIiNtucdtujpIrnV0g=="}, {"hash": "bKQJaIc46YlPV9DyaKorJw=="}, {"hash": "Nn+Iyr3tYGlqyLikhORU+w=="}, {"hash": "Z70Yhq/xpSboLvlOwsicvw=="}, {"hash": "STHpbp0d0SvVu4XWstaeyA=="}, {"hash": "OXU4cW4L0OA+jJ8WryDBPw=="}, {"hash": "HqFRPIXLSYa3dZQhF0u+lw=="}, {"hash": "uz68rZRbh+EXMs55hwFEBQ=="}, {"hash": "vTnqOHGqmOSAVbH1qgHALQ=="}, {"hash": "fu9tdZp3UGhA0a38QfnWcw=="}, {"hash": "HyhHEVwlHEW68NOR1YKkMg=="}, {"hash": "Sfh2dtNEOSimCbCtBYFppQ=="}, {"hash": "i4DM+tOivzxqMBsCpxrzAw=="}, {"hash": "PGwNB4U9FUTDt3YZn60gvQ=="}, {"hash": "IiNytOqz9omSwLwip44soA=="}, {"hash": "YUVHHG1mF3CO0gJiYshkUg=="}, {"hash": "zRjR7bW4cbixO2AxtSIe+w=="}, {"hash": "f/aARhkck4XQ1OIpHm8d/A=="}, {"hash": "BA2m9UzXyOcN+76h+wdk9A=="}, {"hash": "XFzxhOKh8scO9tZXvRWtxA=="}, {"hash": "W78u4RvjIlYATFhb16nfeg=="}, {"hash": "gkQbxhNobGwlvAtGlu9nvQ=="}, {"hash": "yTTrayZKz/qbIk+Zs4UvMA=="}, {"hash": "DZFLsxhGy8EYfB3B3HEk4w=="}, {"hash": "77NvK815kZB6pPRjIxWzvA=="}, {"hash": "oUKXsP0lCLoLhNwmAmxGNg=="}, {"hash": "Lk81X492137O/OZr1+8qgg=="}, {"hash": "8IlQYf5c7v0gpXsf9JRzkg=="}, {"hash": "aa/P+aC1d+dNlFvHRIA8Lg=="}, {"hash": "9popGD1KsSYhLXbJB6ak5w=="}, {"hash": "V/e7xjpbAfEVvk7GsGImLg=="}, {"hash": "mh6rbBTzVYQOBnjc/qumFQ=="}, {"hash": "89TRGEXGvARZy3O5BUee4Q=="}, {"hash": "rpTvD7UT3h0vso9bTH94Ug=="}, {"hash": "aalDo5qPsWu8c8weKKkTUA=="}, {"hash": "7OWb/kFMDDRFamwlOVf5Jw=="}, {"hash": "sgt0arjvhztM/P4SaUHn7g=="}, {"hash": "grTsrQn/u1oln6rmWm05AQ=="}, {"hash": "qj4AtmYdb3wPR6N/Uq32rw=="}, {"hash": "WfuEosJurBo5YjQwikNvsQ=="}, {"hash": "dzb29d41ugujhjMKwfR4uA=="}, {"hash": "8UloimvaEMbCIJ2egVr/FQ=="}, {"hash": "D/qeZRc6KplFPg4KoaGOnw=="}, {"hash": "947aWgfKktDgu9o2E0GJwg=="}, {"hash": "sXUfEu8fGpsLUB+4ETtygg=="}, {"hash": "8SSxV5PKzfEF79M6+tnE8A=="}, {"hash": "84zKspZ1bQWG0i96xo/xgA=="}, {"hash": "TOQIMppHKcNHBPDTu7zIlw=="}, {"hash": "j7h8n6iPSTfKma/Jp9BZOw=="}, {"hash": "gPdFbMtfgRDLAC1jYNrC1Q=="}, {"hash": "9vb16BH5isY8993wq/7QTw=="}, {"hash": "1dgnA519GJa1lmWLbtUvZg=="}, {"hash": "c0C2czo0ycFh3LakdZOcHA=="}, {"hash": "QY5HHVwqZkFwmr8Y+midew=="}, {"hash": "IcczONqiHETEgcGSjbk9Zg=="}, {"hash": "c9fCtPblhsPmB9hplaFdpw=="}, {"hash": "UphRs/A6WII+q+1zjmbarg=="}, {"hash": "U3D3GyfyvduTNhHVXbyH/Q=="}, {"hash": "wSgDXOhoOj+z20i/yMu59g=="}, {"hash": "cOgO+C67HtYHU0dHJgfRQA=="}, {"hash": "wPttK/Pv7pyQZhjuMIWJeg=="}, {"hash": "kFe2B7aZnTRdRX6E92xBng=="}, {"hash": "eoFsU4gYGp3EuboHtpxaag=="}, {"hash": "bUH5cLTEoaPNGEI/ajDtKA=="}, {"hash": "YzaCP8uaYnzE5jSnNWjVkw=="}, {"hash": "5UVox4zw5vmalN+ZF8lKvw=="}, {"hash": "LsV038+ak/wYfvyBxOWQyQ=="}, {"hash": "AKNp318irnzecUvHg/e4eg=="}, {"hash": "BOVSB/L52fCQKTZwFinRPg=="}, {"hash": "579dBfzvvVCiq3aWtiWBvA=="}, {"hash": "xgRthmPIL3ooEZ7gG5VDrQ=="}, {"hash": "YiQKb0fwVYikuM/O6/+JJA=="}, {"hash": "ue3V+fLqpwcFK/8IysLuXw=="}, {"hash": "+BqCD84Bg/XRGGFGboBHKQ=="}, {"hash": "HAOJb8IBJ4nUUDBvPAK0Xg=="}, {"hash": "FeBezYWst7XtN9r14ag6eg=="}, {"hash": "2XKr0C2Vxn+kxKKeCsQEhw=="}, {"hash": "NEl1HKne1A7v58ZFbqLJgg=="}, {"hash": "cgVnqyYPvcJ0mNu/OQY5cg=="}, {"hash": "UahBzyMuL+WjHO0IAPN37g=="}, {"hash": "PMq1qqtc9Ly2jpJG2Inl2A=="}, {"hash": "jkvgfY9vdyKQ5rc0yb4XWg=="}, {"hash": "+r576Oe/tf9+IS4UXtajJA=="}, {"hash": "j1p1O37CmJ5PyFfjoCFI6g=="}, {"hash": "q3lTc24mmBJQBsICaeIX4w=="}, {"hash": "nfAW1+wGjdSTjfxM5kTkSQ=="}, {"hash": "Z3J6iwlik9RgqbxDqLRoDA=="}, {"hash": "7xHvlsY5Hlu36H8yqy9ugA=="}, {"hash": "OMVzSvyBgJspcTe9+9ZJGw=="}, {"hash": "BrbAnU54io74QNvtCo7EkA=="}, {"hash": "70Yo6ia4kgUa9exk6nBcdQ=="}, {"hash": "NPsZGEdWj0djjaqRc0jk6A=="}, {"hash": "o8rgNK/YFPnsotPHW/JctQ=="}, {"hash": "ryQIqgeLwT8xa+ZYFJ0kBw=="}, {"hash": "R+zqsCqIkxkKuscMvB5rQQ=="}, {"hash": "Gxn0FxjCtOvNdcKa/jFCFQ=="}, {"hash": "BDlkMsadD9xdJjj4hmjPhg=="}, {"hash": "A98MAlM2TkVolD8rn9J8cw=="}, {"hash": "5q0nwdGcCVt4r5uJtaS5lw=="}, {"hash": "7w+SSkg15DH/xN25+FvBUA=="}, {"hash": "4inn9njGwyG7hstCsdse+A=="}, {"hash": "LZjxwBYl6TQB/fgpUY95KA=="}, {"hash": "wAHxpGzKNa9PNTCJyi3eBg=="}, {"hash": "8cfQPCOb3xPQaFs1mwK4vw=="}, {"hash": "wEmiFzOQU0ndkGhyjyyuoA=="}, {"hash": "Ix8+MUFkioOsuHW/KImyXg=="}, {"hash": "YiMg7govV5eDnxhX+Z4y6w=="}, {"hash": "u6pfQ6qlnqwN517AvyfNZQ=="}, {"hash": "6fcBX4AmonByq+1FdgyGag=="}, {"hash": "Fk0Nj04DRZGKSM2+ZrGINQ=="}, {"hash": "vo3OVN+5k6ZFsyy2QRoykg=="}, {"hash": "oWiZhVYB3BGuRyBlZRQ82A=="}, {"hash": "tiPm2g6AuIDhej5yKk8xSQ=="}, {"hash": "dEc/SqSkUZDddNKQGo6DNg=="}, {"hash": "mfIi9KoLyUVTF94DXKnYbg=="}, {"hash": "eUnmS1jBZA5D0jocFZX8CA=="}, {"hash": "X1uiJzgn+XN+joF+baeQww=="}, {"hash": "Sg70SUMFet8F+kNbBI+SHQ=="}, {"hash": "0tf/H5Hintd9E+zxIGVsDQ=="}, {"hash": "CfnlyuCKzJQ1PPP6L2X60A=="}, {"hash": "JghX1OKuKIcPX3M3CdM/pg=="}, {"hash": "VtYIbZe/AqS0Lx61XZ/E3g=="}, {"hash": "gO+TVz+5EnOO+sAiCSSsbg=="}, {"hash": "6MezlhxEwNTZ2736107big=="}, {"hash": "KzigPhdcNJmwwo2qqA5YEA=="}, {"hash": "V3hj+0mSmbnX+kCwsQI75w=="}, {"hash": "dYnWPKqt2iW7Y5Zy7H397w=="}, {"hash": "LD5MqYm94GjQbgS6BNT/Ng=="}, {"hash": "uwDL8FkBaXk9O6zuo5Rr9w=="}, {"hash": "6ixuDgBDAFidWUZKwwLVXA=="}, {"hash": "Z1KWDbhX0U0EzB3RtEZjmA=="}, {"hash": "LrDh+/3gGrYkp1COxVitHw=="}, {"hash": "u+mAR0OWIWTzzb+PTM/9rA=="}, {"hash": "YHm882JhR5S35A0fYc6tLw=="}, {"hash": "pxjJ9/TCzvBTXplNQJ+XJg=="}, {"hash": "cyuLlJOFwZG8OjTwx3bZqA=="}, {"hash": "/s5HGuYQS/nLu/51syOL9g=="}, {"hash": "MrEkN0fgpjTpA/IbZXKIpg=="}, {"hash": "X+zLb3Xvx4dEuFVLDzMF3Q=="}, {"hash": "xTLLh7VFpe3z0r6UP6SRgA=="}, {"hash": "/8joiwd3wwQi51NAfkA76g=="}, {"hash": "A96KcDyaHPhHc5TUKF7oEg=="}, {"hash": "awJTHStRQ6R2xICVslOyyg=="}, {"hash": "RfpUVdlD2AsqX3xFK0wz6g=="}, {"hash": "txuBOba1wFgTNZC8CGdmeA=="}, {"hash": "hB+Fwc0/UjrtPJV4lrmjoA=="}, {"hash": "c2BOexg/G1/atvc7o8cSJg=="}, {"hash": "4DpSvZrKNwqqc4BbSW5VhA=="}, {"hash": "z05XB6E37nLizPOZov5TRA=="}, {"hash": "1Xlw9t2me2cWd/sTak7YPA=="}, {"hash": "ESHCqfRG7K+dHpFNeeVEGg=="}, {"hash": "0LrZapLJIduNikh2jS87WA=="}, {"hash": "P0Zhz8bTxa+KogWK/sV1nA=="}, {"hash": "iPwRH7l4b21EI7AuYWtNTQ=="}, {"hash": "iyy/mRvOArO1Xrb3PuGMQQ=="}, {"hash": "32ZlTdqJ3DzNPLNVxOFVgw=="}, {"hash": "mYW1/LAE2KkZOz2oQq+92Q=="}, {"hash": "kputGA9B25OTx/eLvC0vRQ=="}, {"hash": "6gI7kDdeo3DZVesbflLHJQ=="}, {"hash": "cZNkx+zZJB1pZuC+UZQL1A=="}, {"hash": "Dzi0vMlpcRtjtT5pCVfA3A=="}, {"hash": "8sgQMDdEPNVJp4vECPcXFQ=="}, {"hash": "1yKShYNmIioRZtlYeDQUQg=="}, {"hash": "wZn/IFuT13iB6XoLTPDpDw=="}, {"hash": "9Jx0P14q7J6A7Ard0wIrsg=="}, {"hash": "x8dpja2zZlwE269UfI67JA=="}, {"hash": "q3+3KQCHVKZZZFbzhmJ9rQ=="}, {"hash": "BJkY38aptz7nqlOT86Z2BQ=="}, {"hash": "gDRoUkCbRCg/RHPyBnrIlA=="}, {"hash": "to3DZHtZJ5M2g24KIUlHpw=="}, {"hash": "xTyTC65Z4MvkI460ONceqg=="}, {"hash": "MnKuFVB7jZDG2Vq7JwriYA=="}, {"hash": "XkGA4bqTL5VHE17NwRxr3Q=="}, {"hash": "7ypa55D0qzsQI1XzrWh0uw=="}, {"hash": "ivWrqJclcfpywxxs2twhxQ=="}, {"hash": "NUM1Qhw5nqxnU8Ut6WS9zg=="}, {"hash": "b92N/ukwHyXs7IStnchhog=="}, {"hash": "7ZWE86MN0oJZ9lSl2LqCyA=="}, {"hash": "Acv6KrxicLa4Wa17kNg1RQ=="}, {"hash": "CvuRA4Obg7/zmbE6TKUIKQ=="}, {"hash": "R74qEya2JB3FrSXwbdZocQ=="}, {"hash": "QJDz2HPtWfKE/9Hqq8dEHA=="}, {"hash": "y9oB1eZZiA5kKqsXr3oweQ=="}, {"hash": "XErgXYDGV8gf9MMggbeKDA=="}, {"hash": "/PRMxHtV0GRQ3f0I8ShOMg=="}, {"hash": "0T3cdmxRiTpU7GUIzM+zVQ=="}, {"hash": "kjHonJMTOW/Q5A4GWA7oZw=="}, {"hash": "vdurMt3MFuKAF78j/9P5Ow=="}, {"hash": "w4rbCmXVk39y8afvE+R85w=="}, {"hash": "wxRi/83HaKvj5Dn92CXRDA=="}, {"hash": "ZekGZ6Cs+itF4mos+Ouxgg=="}, {"hash": "ofUK5CUlqSJ83tThrQnHMQ=="}, {"hash": "4q1kg8doFyLB0RKhb1MuAA=="}, {"hash": "5IxLQ5vftWVEi+SkDOowWQ=="}, {"hash": "KhlKAodRh2H8ZC+jJGXZnQ=="}, {"hash": "NVFRrjq6AZUDRkJXCoIDFA=="}, {"hash": "dZbh2/j2YuWQ6qVzZowWiQ=="}, {"hash": "pjl7qOavQGeVEKbpX2IMvA=="}, {"hash": "fYbYB9boexW4QYbEm/clLg=="}, {"hash": "WChhmWZX4KHfS21I6GRzDQ=="}, {"hash": "HgB5DPeONsex0ZxHqKjcaA=="}, {"hash": "pCHCOmCimUa+FQSX5j/oqg=="}, {"hash": "hQ4bFVnIN5YPotP7O6os+A=="}, {"hash": "mpjGuk9oT772YXIeu5i54g=="}, {"hash": "6ZOZBSTGBDtb48eRNf++Lg=="}, {"hash": "noYa/p/YDmK6+DR9gavQAQ=="}, {"hash": "yIFW7dHkDUx70r6rPIQOhQ=="}, {"hash": "ei0M3H3/2iQ6pJR0lVfBtQ=="}, {"hash": "nyaeEWSE2IvXocdL7IgKpQ=="}, {"hash": "amJrFBVZE0HagowAqAutZg=="}, {"hash": "Vu+epFLtCnnycKq3yuUV5Q=="}, {"hash": "/YzHL+GZN+v9YTARHEAzDg=="}, {"hash": "+626RjL03/l9200fFb2vqQ=="}, {"hash": "kWiP4H94FANGgRlKUwOy8w=="}, {"hash": "LP701b+eScwy5cYPE/2vJg=="}, {"hash": "sHWyodmbAfEU9hZV+M5Rvw=="}, {"hash": "N7TkOmJqKOAhoM33I00MnQ=="}, {"hash": "yoJPs9609274V4CQ1z2lNw=="}, {"hash": "ZTPI4qHu1aCvu6EpVkW6Mw=="}, {"hash": "gtn0oxWwB5b1zcQg2ulOdA=="}, {"hash": "WRxfWwv8LpJRt77vvRQs5g=="}, {"hash": "M7OyGmJUiW2XrCV+EB0ltg=="}, {"hash": "zmVRwflkZ+eLALRBzRDI9Q=="}, {"hash": "xBAMOsssKEWb6SWIxpIw2g=="}, {"hash": "1N5ol3bRO2Ny1YZlsfYTNg=="}, {"hash": "4KlIq3pq3U9E4FVM6Q4snQ=="}, {"hash": "tIjC1HK7GZWE6CXXvKTZ1A=="}, {"hash": "vn6pQUO6+zn9Es2AnYTV1Q=="}, {"hash": "5KuKFvzKnjwp7SCxaKGBGQ=="}, {"hash": "55Fx5we44VBdUdNsafmyEw=="}, {"hash": "/IDZokqc+OSCEY4au4R2Cw=="}, {"hash": "KP9LqYrMOEiHm2gueAC+xw=="}, {"hash": "0rIkDjkbIf28wV9ulSIOWQ=="}, {"hash": "Ij50Cv8pCx2jogLMWOmd0A=="}, {"hash": "MliXKzxT5vF2qSVNjea5Ew=="}, {"hash": "kttlKH7qm1OMKGS+8ZpT0A=="}, {"hash": "iRpxNW3L7NcXPfkZeAQm7w=="}, {"hash": "U31SBQgfWyRN5A8ubq6T0Q=="}, {"hash": "9SR4IqDrRhcXf1k5Jo84/A=="}, {"hash": "LS2o4OJl6Yjvpy1VdHa9jQ=="}, {"hash": "P4aT175AGXcoOOJ2k22hHA=="}, {"hash": "9U71ZWSlETFR34dZ5m8Fkw=="}, {"hash": "o5V4Fz32KBA1DynymnKHrQ=="}, {"hash": "fE3yNmvUeTfoClGm8v6yUw=="}, {"hash": "nu/j+JIeTalVnjMmNjkHPA=="}, {"hash": "qH9G0BNooEt9DtLO1xV8kg=="}, {"hash": "pIJn8JbSDgRoI717EU19Gw=="}, {"hash": "Dq/7PfhB+UBLCwpKKwKS+Q=="}, {"hash": "WucAJQT0aDnxfX69GXww2Q=="}, {"hash": "b5PHBVioOYU7SjyOHQuCbQ=="}, {"hash": "tKCh4vggVZ3fxsWdRmgZtQ=="}, {"hash": "ENnBp5iuYnysItUs4N8Jyw=="}, {"hash": "75L6bfyNj0/bcbAIcC+ucA=="}, {"hash": "Jiw352ftDwFMFBIRuz5C2Q=="}, {"hash": "uIbLe0iVJzphTvaEsZZs3A=="}, {"hash": "TXYMZ6tcALJHd0x59Qr1Zg=="}, {"hash": "kowR809EpiAiyOllJcDP7A=="}, {"hash": "/gIv9jW2S06b6xHY5B8nSw=="}, {"hash": "nCnRImTVlV7i4O7xxK6aRw=="}, {"hash": "xRMrpTxn+6iKVBbgp44IJw=="}, {"hash": "5+m48E0/a1JOqOpmbvQolg=="}, {"hash": "aLbhrX+i4bYw9l8TXz0Jlw=="}, {"hash": "y9qt7aCThtp+VN75a9w5Ow=="}, {"hash": "FZOAoBL2mw9Va48WGKYQVg=="}, {"hash": "24t5mRcH6kzB+1inFmIF6g=="}, {"hash": "PQCUf6k/bquCE1tzbeOVHQ=="}, {"hash": "rGyxHFrQZSoYYrO9usDv2Q=="}, {"hash": "MnIPTEIpqMG/2so0VobH7Q=="}, {"hash": "KaodM1cAFaz/Wh+jY2xbQA=="}, {"hash": "ghfupeXj+wZKpFFVuYu8TQ=="}, {"hash": "LiXwx3pc/YtNOC/iCFsdTw=="}, {"hash": "8/lw71jYB9Oq4pj4yFkJIw=="}, {"hash": "PXysONRyFGSfW2Z8mqrSEQ=="}, {"hash": "EkWNOln3HitvXpPoqVTVNQ=="}, {"hash": "vCJaTNbOBOooHx4U2IS8Zg=="}, {"hash": "nvOWVn/Y4PBD7vjBVfsKXQ=="}, {"hash": "wPxE3zi+y1dNYP2faJpAkg=="}, {"hash": "wCo+laUJhm/KwxFxocDM/w=="}, {"hash": "5woNOzXys8mpLnKYppG2HQ=="}, {"hash": "RIZcSDv+dPiiuvhxQsx8mQ=="}, {"hash": "BRG6c0S8yf3BX2jbJOfr/A=="}, {"hash": "zLHpUsUNf5HW915m1SBgVA=="}, {"hash": "xuuC5/DAYg8K3tdcZvl9GQ=="}, {"hash": "vAmvTyKR58Ndz1ID4ympmg=="}, {"hash": "9y4ikV2zNzC/YHjrTj/0kQ=="}, {"hash": "o4UTrO787lyyTQ/UzWU2AA=="}, {"hash": "w7GaEW9PIKaTaveuCEpLEQ=="}, {"hash": "9hMh/sH1CxOGdVreuzNJeQ=="}, {"hash": "gTL2EQvsFLQOMZcanGZ1ng=="}, {"hash": "H4Mrq2N89ktpwY8oCXJi/Q=="}, {"hash": "apE7xIEF/ho3toV3FkJ8MA=="}, {"hash": "61DT7RIQQyP8eKG6G40uVQ=="}, {"hash": "/NKe/N12mEmWwk5MvW+smQ=="}, {"hash": "abPOmif7fa2y6Mk4kKYRcg=="}, {"hash": "+u1B/E7v1I2KsvOYOQUGOQ=="}, {"hash": "N5mfd9hGDUqTWi0QupzHTg=="}, {"hash": "Awd6oF7+Bt1+WL4nSwqS3Q=="}, {"hash": "BjbXDGeNgx44vJEZjL7l3A=="}, {"hash": "7TbLByqaQDyLLOOUEA5cEw=="}, {"hash": "lM5bnzi7ga8dbO+EEV0wfw=="}, {"hash": "ABO5zXv6xvT5Yh5hThgZHA=="}, {"hash": "1Gs4+7/YgCLAyPSNaxOqiA=="}, {"hash": "pTe4eGeYK5KGXrXwZPtLjg=="}, {"hash": "M51Wb0s9WOu6CSvR/bbLBA=="}, {"hash": "gI2WyynujhqgbAnrrgm4zg=="}, {"hash": "uYWA19IvF8t+OTjuavuKYA=="}, {"hash": "i/VneFM7BnbaePi8r8q0CQ=="}, {"hash": "Jk7eTs21nx+Qhg7hnwHTcQ=="}, {"hash": "1npobuXaYsB3i8DfJahMdQ=="}, {"hash": "5852DUudaK1HmZdSOC97Hw=="}, {"hash": "pc9URiov5IjU3u8Jc+ju9w=="}, {"hash": "poKWZymnFuPSkvHFiBoKvg=="}]}}' -protoset api_descriptor.pb 127.0.0.1:8000 osv.v1.OSV/DetermineVersion diff --git a/gcp/api/v1/osv_service_v1.proto b/gcp/api/v1/osv_service_v1.proto index 939eefd1150..1298eda5a1d 100644 --- a/gcp/api/v1/osv_service_v1.proto +++ b/gcp/api/v1/osv_service_v1.proto @@ -97,12 +97,17 @@ message VersionMatchList { message VersionMatch { // Score in the interval (0.0, 1.0] with 1.0 being a perfect match. double score = 1; + // Information about the upstream repository. VersionRepositoryInformation repo_info = 2; // The OSV identifier. osv.Package osv_identifier = 3; // CPE 2.3. string cpe23 = 5; + // Definite number of files that have matched. + int64 minimum_file_matches = 6; + // Estimated number of files that are different. + int64 estimated_diff_files = 7; } message VersionRepositoryInformation { diff --git a/osv/models.py b/osv/models.py index 37687f9f0dc..5d1f643e53b 100644 --- a/osv/models.py +++ b/osv/models.py @@ -679,12 +679,12 @@ class FileResult(ndb.Model): path = ndb.TextProperty() -class RepoIndexResult(ndb.Model): +class RepoIndexBucket(ndb.Model): """RepoIndexResult entries containing the actual hash values""" # The file results per file - file_results = ndb.StructuredProperty(FileResult, repeated=True) - # The actual page - page = ndb.IntegerProperty() + node_hash = ndb.BlobProperty(indexed=True) + # number of files this hash represents + files_contained = ndb.IntegerProperty() class SourceRepositoryType(enum.IntEnum): diff --git a/tools/indexer-api-caller/api_descriptor.pb b/tools/indexer-api-caller/api_descriptor.pb new file mode 100644 index 00000000000..e26e2dd9275 Binary files /dev/null and b/tools/indexer-api-caller/api_descriptor.pb differ diff --git a/tools/indexer-api-caller/caller.go b/tools/indexer-api-caller/caller.go new file mode 100644 index 00000000000..96b238757c2 --- /dev/null +++ b/tools/indexer-api-caller/caller.go @@ -0,0 +1,100 @@ +package main + +import ( + "bytes" + "crypto/md5" + "encoding/base64" + "flag" + "fmt" + "io/fs" + "log" + "os" + "os/exec" + "path/filepath" + "strings" +) + +var ( + repoDir = flag.String("repo", "", "repo directory") +) + +type Hash = []byte + +// FileResult holds the per file hash and path information. +type FileResult struct { + Path string `datastore:"path,noindex"` + Hash Hash `datastore:"hash"` +} + +func main() { + flag.Parse() + buildGit(*repoDir) +} + +func buildGit(repoDir string) error { + fileExts := []string{ + ".hpp", + ".h", + ".hh", + ".cc", + ".c", + ".cpp", + } + + var fileResults []*FileResult + if err := filepath.Walk(repoDir, func(p string, info fs.FileInfo, err error) error { + if info.IsDir() { + return nil + } + for _, ext := range fileExts { + if filepath.Ext(p) == ext { + buf, err := os.ReadFile(p) + if err != nil { + return err + } + hash := md5.Sum(buf) + fileResults = append(fileResults, &FileResult{ + Path: strings.ReplaceAll(p, repoDir, ""), + Hash: hash[:], + }) + } + } + return nil + }); err != nil { + return fmt.Errorf("failed during file walk: %v", err) + } + + log.Printf("%v", len(fileResults)) + + b := strings.Builder{} + b.WriteString(`{"query": {"name":"protobuf", "file_hashes": [`) + + for i, fr := range fileResults { + if i == len(fileResults)-1 { + fmt.Fprintf(&b, "{\"hash\": \"%s\"}", base64.StdEncoding.EncodeToString(fr.Hash)) + } else { + fmt.Fprintf(&b, "{\"hash\": \"%s\"},", base64.StdEncoding.EncodeToString(fr.Hash)) + } + } + b.WriteString("]}}") + + // TODO: Use proper grpc library calls here + cmd := exec.Command("bash") + cmd.Args = append(cmd.Args, "-c", `grpcurl -plaintext -d @ -protoset api_descriptor.pb 127.0.0.1:8000 osv.v1.OSV/DetermineVersion`) + + buffer := bytes.Buffer{} + _, err := buffer.Write([]byte(b.String())) + if err != nil { + log.Panicln(err) + } + + cmd.Stdin = &buffer + output, err := cmd.CombinedOutput() + + if err != nil { + log.Panicf("%s: %s", err.Error(), string(output)) + } + + log.Println(string(output)) + return nil +} diff --git a/tools/indexer-api-caller/go.mod b/tools/indexer-api-caller/go.mod new file mode 100644 index 00000000000..e247c425019 --- /dev/null +++ b/tools/indexer-api-caller/go.mod @@ -0,0 +1,3 @@ +module github.com/google/indexer-api-caller + +go 1.19 diff --git a/tools/indexer-api-caller/go.sum b/tools/indexer-api-caller/go.sum new file mode 100644 index 00000000000..e69de29bb2d