@@ -430,6 +430,103 @@ service ContentAddressableStorage {
430430 rpc GetTree (GetTreeRequest ) returns (stream GetTreeResponse ) {
431431 option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{root_digest.hash}/{root_digest.size_bytes}:getTree" };
432432 }
433+
434+ // Split a blob into chunks.
435+ //
436+ // This splitting API aims to reduce download traffic between client and
437+ // server, e.g., if a client needs to fetch a large blob that just has been
438+ // modified slightly since the last built. In this case, there is no need to
439+ // fetch the entire blob data, but just the binary differences between the two
440+ // blob versions, which are typically determined by deduplication techniques
441+ // such as content-defined chunking.
442+ //
443+ // Clients can use this API before downloading a blob to determine which parts
444+ // of the blob are already present locally and do not need to be downloaded
445+ // again. The server splits the blob into chunks according to a specified
446+ // content-defined chunking algorithm and returns a list of the chunk digests
447+ // in the order in which the chunks have to be concatenated to assemble the
448+ // requested blob.
449+ //
450+ // A client can expect the following guarantees from the server if a split
451+ // request is answered successfully:
452+ // 1. The blob chunks are stored in CAS.
453+ // 2. Concatenating the blob chunks in the order of the digest list returned
454+ // by the server results in the original blob.
455+ //
456+ // The usage of this API is optional for clients but it allows them to
457+ // download only the missing parts of a large blob instead of the entire blob
458+ // data, which in turn can considerably reduce download network traffic.
459+ //
460+ // Since the generated chunks are stored as blobs, they underlie the same
461+ // lifetimes as other blobs. However, their lifetime is extended if they are
462+ // part of the result of a split blob request.
463+ //
464+ // For the client, it is recommended to verify whether the digest of the blob
465+ // assembled by the fetched chunks results in the requested blob digest.
466+ //
467+ // If several clients use blob splitting, it is recommended that they request
468+ // the same splitting algorithm to benefit from each others chunking data. In
469+ // combination with blob splicing, an agreement about the chunking algorithm
470+ // is recommended since both client as well as server side can benefit from
471+ // each others chunking data.
472+ //
473+ // Servers are free to implement this functionality, but they need to declare
474+ // whether they support it or not by setting the
475+ // [CacheCapabilities.blob_split_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_split_support]
476+ // field accordingly.
477+ //
478+ // Errors:
479+ //
480+ // * `NOT_FOUND`: The requested blob is not present in the CAS.
481+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the blob
482+ // chunks.
483+ rpc SplitBlob (SplitBlobRequest ) returns (SplitBlobResponse ) {
484+ option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{blob_digest.hash}/{blob_digest.size_bytes}:splitBlob" };
485+ }
486+
487+ // Splice a blob from chunks.
488+ //
489+ // This is the complementary operation to the
490+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
491+ // function to handle the splitted upload of large blobs to save upload
492+ // traffic.
493+ //
494+ // If a client needs to upload a large blob and is able to split a blob into
495+ // chunks locally according to some content-defined chunking algorithm, it can
496+ // first determine which parts of the blob are already available in the remote
497+ // CAS and upload the missing chunks, and then use this API to instruct the
498+ // server to splice the original blob from the remotely available blob chunks.
499+ //
500+ // In order to ensure data consistency of the CAS, the server will verify the
501+ // spliced result whether digest calculation results in the provided digest
502+ // from the request and will reject a splice request if this check fails.
503+ //
504+ // The usage of this API is optional for clients but it allows them to upload
505+ // only the missing parts of a large blob instead of the entire blob data,
506+ // which in turn can considerably reduce upload network traffic.
507+ //
508+ // In order to split a blob into chunks, it is recommended for the client to
509+ // use one of the servers' advertised chunking algorithms by
510+ // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
511+ // to benefit from each others chunking data. If several clients use blob
512+ // splicing, it is recommended that they use the same splitting algorithm to
513+ // split their blobs into chunks.
514+ //
515+ // Servers are free to implement this functionality, but they need to declare
516+ // whether they support it or not by setting the
517+ // [CacheCapabilities.blob_splice_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_splice_support]
518+ // field accordingly.
519+ //
520+ // Errors:
521+ //
522+ // * `NOT_FOUND`: At least one of the blob chunks is not present in the CAS.
523+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the
524+ // spliced blob.
525+ // * `INVALID_ARGUMENT`: The digest of the spliced blob is different from the
526+ // provided expected digest.
527+ rpc SpliceBlob (SpliceBlobRequest ) returns (SpliceBlobResponse ) {
528+ option (google.api.http ) = { post : "/v2/{instance_name=**}/blobs:spliceBlob" body: "*" };
529+ }
433530}
434531
435532// The Capabilities service may be used by remote execution clients to query
@@ -1814,6 +1911,60 @@ message GetTreeResponse {
18141911 string next_page_token = 2 ;
18151912}
18161913
1914+ // A request message for
1915+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1916+ message SplitBlobRequest {
1917+ // The instance of the execution system to operate against. A server may
1918+ // support multiple instances of the execution system (with their own workers,
1919+ // storage, caches, etc.). The server MAY require use of this field to select
1920+ // between them in an implementation-defined fashion, otherwise it can be
1921+ // omitted.
1922+ string instance_name = 1 ;
1923+
1924+ // The digest of the blob to be splitted.
1925+ Digest blob_digest = 2 ;
1926+
1927+ // The chunking algorithm to be used. Must be IDENTITY (no chunking) or one of
1928+ // the algorithms advertised by the
1929+ // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
1930+ // field.
1931+ ChunkingAlgorithm.Value chunking_algorithm = 3 ;
1932+ }
1933+
1934+ // A response message for
1935+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1936+ message SplitBlobResponse {
1937+ // The ordered list of digests of the chunks into which the blob was splitted.
1938+ // The original blob is assembled by concatenating the chunk data according to
1939+ // the order of the digests given by this list.
1940+ repeated Digest chunk_digests = 1 ;
1941+ }
1942+
1943+ // A request message for
1944+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
1945+ message SpliceBlobRequest {
1946+ // The instance of the execution system to operate against. A server may
1947+ // support multiple instances of the execution system (with their own workers,
1948+ // storage, caches, etc.). The server MAY require use of this field to select
1949+ // between them in an implementation-defined fashion, otherwise it can be
1950+ // omitted.
1951+ string instance_name = 1 ;
1952+
1953+ // Expected digest of the spliced blob.
1954+ Digest blob_digest = 2 ;
1955+
1956+ // The ordered list of digests of the chunks which need to be concatenated to
1957+ // assemble the original blob.
1958+ repeated Digest chunk_digests = 3 ;
1959+ }
1960+
1961+ // A response message for
1962+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
1963+ message SpliceBlobResponse {
1964+ // Computed digest of the spliced blob.
1965+ Digest blob_digest = 1 ;
1966+ }
1967+
18171968// A request message for
18181969// [Capabilities.GetCapabilities][build.bazel.remote.execution.v2.Capabilities.GetCapabilities].
18191970message GetCapabilitiesRequest {
@@ -2000,6 +2151,40 @@ message Compressor {
20002151 }
20012152}
20022153
2154+ // Content-defined chunking algorithms used for splitting blobs into chunks.
2155+ message ChunkingAlgorithm {
2156+ enum Value {
2157+ // No chunking. Servers MUST always support this, and do not need to
2158+ // advertise it.
2159+ IDENTITY = 0 ;
2160+
2161+ // Content-defined chunking using Rabin fingerprints. An implementation of
2162+ // this scheme in presented in this paper
2163+ // https://link.springer.com/chapter/10.1007/978-1-4613-9323-8_11. The final
2164+ // implementation of this algorithm should be configured to have the
2165+ // following properties on resulting chunk sizes.
2166+ // - Minimum chunk size: 2 KB
2167+ // - Average chunk size: 8 KB (0x00000000007FFFFF bit mask)
2168+ // - Maximum chunk size: 64 KB
2169+ // The irreducible polynomial to be used for the modulo divisions is the
2170+ // following 64-bit polynomial of degree 53: 0x003DA3358B4DC173. The window
2171+ // size to be used is 64 bits.
2172+ RABINCDC = 1 ;
2173+
2174+ // Content-defined chunking using the FastCDC algorithm. The algorithm is
2175+ // described in this paper https://ieeexplore.ieee.org/document/9055082
2176+ // (Algorithm 2, FastCDC8KB). The algorithm is configured to have the
2177+ // following properties on resulting chunk sizes.
2178+ // - Minimum chunk size: 2 KB
2179+ // - Average chunk size: 8 KB
2180+ // - Maximum chunk size: 64 KB
2181+ // The 256 64-bit random numbers in the Gear table are to be created with
2182+ // the Mersenne Twister pseudo-random number generator for 64-bit numbers
2183+ // with a state size of 19937 bits and a seed of 0.
2184+ FASTCDC = 2 ;
2185+ }
2186+ }
2187+
20032188// Capabilities of the remote cache system.
20042189message CacheCapabilities {
20052190 // All the digest functions supported by the remote cache.
@@ -2033,6 +2218,25 @@ message CacheCapabilities {
20332218 // [BatchUpdateBlobs][build.bazel.remote.execution.v2.ContentAddressableStorage.BatchUpdateBlobs]
20342219 // requests.
20352220 repeated Compressor.Value supported_batch_update_compressors = 7 ;
2221+
2222+ // All the chunking algorithms supported by the remote cache. Remote cache may
2223+ // support multiple chunking algorithms simultaneously. Servers MUST support
2224+ // IDENTITY (no chunking), even if it is not listed here.
2225+ repeated ChunkingAlgorithm.Value supported_chunking_algorithms = 8 ;
2226+
2227+ // Whether blob splitting is supported for the particular server/instance. If
2228+ // yes, the server/instance implements the specified behavior for blob
2229+ // splitting and a meaningful result can be expected from the
2230+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
2231+ // operation.
2232+ bool blob_split_support = 9 ;
2233+
2234+ // Whether blob splicing is supported for the particular server/instance. If
2235+ // yes, the server/instance implements the specified behavior for blob
2236+ // splicing and a meaningful result can be expected from the
2237+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob]
2238+ // operation.
2239+ bool blob_splice_support = 10 ;
20362240}
20372241
20382242// Capabilities of the remote execution system.
0 commit comments