diff --git a/README.md b/README.md index b1d7cfe..91e215c 100644 --- a/README.md +++ b/README.md @@ -193,3 +193,7 @@ $ docker run -d -p 9000:9000 -p 9001:9001 \ ``` The full test suite contains 63 tests + +## Architecture Decisions + +- [ADR 0001: Streaming File Uploads and Configurable S3 Endpoints](docs/adr/0001-streaming-file-upload-and-custom-endpoints.md) diff --git a/docs/adr/0001-streaming-file-upload-and-custom-endpoints.md b/docs/adr/0001-streaming-file-upload-and-custom-endpoints.md new file mode 100644 index 0000000..51a3885 --- /dev/null +++ b/docs/adr/0001-streaming-file-upload-and-custom-endpoints.md @@ -0,0 +1,96 @@ +# ADR 0001: Streaming File Uploads and Configurable S3 Endpoints + +- Status: Accepted +- Date: 2026-06-12 + +## Context + +`s3cpp` supports uploading an in-memory string, but it does not provide a +file-backed upload operation. Supporting file uploads without loading the +complete file into memory requires: + +- upload progress reporting and cancellation; +- HTTP and HTTPS custom endpoints; +- a configurable AWS signing region; +- `Content-Type` and `Content-Length` metadata on uploaded objects. + +The existing `PutObject` implementation accepts an in-memory string. This is +appropriate for small objects, but it causes memory usage to grow with the file +size and cannot report streaming upload progress. + +AWS Signature Version 4 normally includes the SHA-256 hash of the complete +payload. A streamed file therefore needs its hash before the signed HTTP +request starts. + +## Decision + +Add a file-backed single-request `PutObjectFile` operation. + +`PutObjectFile` performs these steps: + +1. Read the file and calculate its SHA-256 hash with OpenSSL EVP. +2. Sign the request using the precomputed payload hash. +3. Stream the file through libcurl using `CURLOPT_UPLOAD`. +4. Report progress through libcurl's transfer progress callback. + +The callback returns `true` to continue or `false` to cancel the transfer. + +Add `HttpFileRequest` as a separate request type instead of adding file state +to `HttpBodyRequest`. This keeps in-memory and file-backed uploads explicit and +prevents accidental buffering of file contents. + +Extend the custom-endpoint `S3Client` constructor with: + +- `useHttps`, controlling the URL scheme; +- `region`, used by AWS Signature Version 4. + +Keep the existing constructor as a compatibility overload. It defaults to HTTP +and the `us-east-1` region, matching the previous custom-endpoint behavior. + +Apply optional `Content-Type` and `Content-Length` values in the existing +in-memory `PutObject` implementation as well. + +## Consequences + +### Positive + +- File memory usage remains bounded regardless of object size. +- Applications can display upload progress and cancel an upload. +- The same client can connect to HTTP MinIO instances and HTTPS S3-compatible + services. +- File uploads are signed with the actual payload hash. +- Existing users of the custom-endpoint constructor remain source compatible. + +### Negative + +- Each file is read twice: once for SHA-256 calculation and once for upload. +- Upload begins only after the initial hashing pass completes. +- The implementation uses a single S3 `PutObject` request and does not support + multipart upload, retrying individual parts, or resuming an interrupted + upload. +- A single `PutObject` is limited to 5 GiB by the S3 API. +- The new implementation adds direct OpenSSL EVP and filesystem usage. + +## Alternatives Considered + +### Load the complete file into a string + +Rejected because memory consumption would scale with file size and progress +would only describe sending an already-buffered payload. + +### Use unsigned payload mode + +Rejected as the default because support differs between S3-compatible servers +and it weakens payload integrity guarantees. + +### Implement multipart upload + +Deferred. Multipart upload is the appropriate solution for large files, +per-part retries, and resumable transfers, but requires additional S3 +operations and lifecycle handling. It should be introduced in a separate ADR. + +## Scope + +This decision covers single-request file uploads and endpoint configuration. +It does not define multipart upload, download streaming, retry policy, or +persistent transfer state. diff --git a/src/s3cpp/auth.cpp b/src/s3cpp/auth.cpp index 2704cee..632f9a8 100644 --- a/src/s3cpp/auth.cpp +++ b/src/s3cpp/auth.cpp @@ -29,8 +29,16 @@ template void AWSSigV4Signer::sign(HttpRequestBase &request) { "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; } request.header("x-amz-content-sha256", payload_hash); + sign(request, payload_hash); +} + +template +void AWSSigV4Signer::sign(HttpRequestBase &request, + const std::string &payload_hash) { + request.header("x-amz-content-sha256", payload_hash); // Skip signing for anonymous requests + const bool is_anonymous = access_key.empty() || secret_key.empty(); if (is_anonymous) { return; } @@ -87,17 +95,18 @@ AWSSigV4Signer::createCannonicalRequest(HttpRequestBase &request, std::string url = request.getURL(); // URI - std::string uri{}; - if (size_t bpos = url.find("amazonaws.com"); bpos != std::string::npos) { - uri = url.erase(0, bpos + 13); + const size_t scheme_end = url.find("://"); + const size_t authority_start = + scheme_end == std::string::npos ? 0 : scheme_end + 3; + const size_t path_start = url.find_first_of("/?", authority_start); + + std::string uri; + if (path_start == std::string::npos) { + uri = "/"; + } else if (url[path_start] == '?') { + uri = "/" + url.substr(path_start); } else { - // Assume localhost:XXXX (dirty, sorry :( i know) - size_t path_start = url.find('/', 7); - if (path_start != std::string::npos) { - uri = url.substr(path_start); - } else { - uri = "/"; - } + uri = url.substr(path_start); } size_t begin_q = uri.find("?"); const std::string cannonical_uri = @@ -227,9 +236,14 @@ AWSSigV4Signer::deriveSigningKey(const std::string request_date) { template void AWSSigV4Signer::sign(HttpRequestBase &); template void AWSSigV4Signer::sign(HttpRequestBase &); +template void +AWSSigV4Signer::sign(HttpRequestBase &, + const std::string &); template std::string AWSSigV4Signer::createCannonicalRequest( HttpRequestBase &, const std::string &); template std::string AWSSigV4Signer::createCannonicalRequest( HttpRequestBase &, const std::string &); +template std::string AWSSigV4Signer::createCannonicalRequest( + HttpRequestBase &, const std::string &); } // namespace s3cpp diff --git a/src/s3cpp/auth.h b/src/s3cpp/auth.h index effb0e9..67c6d5d 100644 --- a/src/s3cpp/auth.h +++ b/src/s3cpp/auth.h @@ -17,6 +17,8 @@ class AWSSigV4Signer { secret_key(std::move(secret)) {} template void sign(HttpRequestBase &request); + template + void sign(HttpRequestBase &request, const std::string &payload_hash); template std::string createCannonicalRequest( diff --git a/src/s3cpp/httpclient.cpp b/src/s3cpp/httpclient.cpp index 0d10a8a..45c63ff 100644 --- a/src/s3cpp/httpclient.cpp +++ b/src/s3cpp/httpclient.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,10 @@ std::expected HttpBodyRequest::execute() { } } +std::expected HttpFileRequest::execute() { + return client_.execute_upload(*this); +} + std::expected HttpClient::execute_get(HttpRequest &request) { if (!curl_handle) { @@ -193,6 +198,68 @@ HttpClient::execute_post(HttpBodyRequest &request) { std::move(headers_buf)); } +std::expected +HttpClient::execute_upload(HttpFileRequest &request) { + if (!curl_handle) { + return std::unexpected("cURL handle is invalid"); + } + + FILE *file = std::fopen(request.getFilename().c_str(), "rb"); + if (!file) { + return std::unexpected( + std::format("unable to open file: {}", request.getFilename())); + } + + std::string body_buf; + std::map headers_buf; + auto headers = request.getHeaders(); + headers.insert(this->getHeaders().begin(), this->getHeaders().end()); + struct curl_slist *list = nullptr; + for (const auto &[key, value] : headers) { + list = curl_slist_append(list, std::format("{}: {}", key, value).c_str()); + } + + curl_easy_reset(curl_handle); + curl_easy_setopt(curl_handle, CURLOPT_URL, request.getURL().c_str()); + curl_easy_setopt(curl_handle, CURLOPT_UPLOAD, 1L); + curl_easy_setopt(curl_handle, CURLOPT_READDATA, file); + curl_easy_setopt(curl_handle, CURLOPT_INFILESIZE_LARGE, request.getFileSize()); + curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_callback); + curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, &body_buf); + curl_easy_setopt(curl_handle, CURLOPT_HEADERFUNCTION, header_callback); + curl_easy_setopt(curl_handle, CURLOPT_HEADERDATA, &headers_buf); + curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, list); + curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, request.getTimeout()); + + const auto &progress = request.getProgressCallback(); + if (progress) { + curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 0L); + curl_easy_setopt( + curl_handle, + CURLOPT_XFERINFOFUNCTION, + +[](void *userdata, curl_off_t, curl_off_t, curl_off_t upload_total, + curl_off_t uploaded) -> int { + const auto *callback = + static_cast(userdata); + return (*callback)(uploaded, upload_total) ? 0 : 1; + }); + curl_easy_setopt(curl_handle, CURLOPT_XFERINFODATA, &progress); + } + + const CURLcode code = curl_easy_perform(curl_handle); + long response_code = 0; + curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CODE, &response_code); + curl_slist_free_all(list); + std::fclose(file); + + if (code != CURLE_OK) { + return std::unexpected( + std::format("libcurl upload error: {}", curl_easy_strerror(code))); + } + return HttpResponse(static_cast(response_code), std::move(body_buf), + std::move(headers_buf)); +} + std::expected HttpClient::execute_delete(HttpBodyRequest &request) { if (!curl_handle) { diff --git a/src/s3cpp/httpclient.h b/src/s3cpp/httpclient.h index 0f0e5b7..b806ee4 100644 --- a/src/s3cpp/httpclient.h +++ b/src/s3cpp/httpclient.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,9 @@ class HttpClient; enum class HttpMethod { Get, Post, Put, Head, Delete }; +using UploadProgressCallback = + std::function; + struct LowerCaseCompare { // A custom lambda to sort keys alphabetically bool operator()(const std::string &a, const std::string &b) const { std::string sa = a; @@ -150,12 +154,38 @@ class HttpBodyRequest : public HttpRequestBase { std::string body_ = ""; }; +class HttpFileRequest : public HttpRequestBase { +public: + HttpFileRequest(HttpClient &client, std::string URL, std::string filename, + curl_off_t file_size) + : HttpRequestBase(client, std::move(URL), HttpMethod::Put), + filename_(std::move(filename)), file_size_(file_size) {} + + HttpFileRequest &progress(UploadProgressCallback callback) { + progress_callback_ = std::move(callback); + return *this; + } + + const std::string &getFilename() const { return filename_; } + curl_off_t getFileSize() const { return file_size_; } + const UploadProgressCallback &getProgressCallback() const { + return progress_callback_; + } + std::expected execute(); + +private: + std::string filename_; + curl_off_t file_size_; + UploadProgressCallback progress_callback_; +}; + // HttpClient should only focus on handling the cURL handle // and making the request (HttpRequest) and returning HttpResponse class HttpClient { // `execute()` is invoked from the request only friend class HttpRequest; friend class HttpBodyRequest; + friend class HttpFileRequest; public: HttpClient() { @@ -207,6 +237,11 @@ class HttpClient { [[nodiscard]] HttpBodyRequest put(const std::string &URL) { return HttpBodyRequest{*this, URL, HttpMethod::Put}; }; + [[nodiscard]] HttpFileRequest putFile(const std::string &URL, + const std::string &filename, + curl_off_t file_size) { + return HttpFileRequest{*this, URL, filename, file_size}; + }; [[nodiscard]] HttpBodyRequest del(const std::string &URL) { return HttpBodyRequest{*this, URL, HttpMethod::Delete}; }; @@ -228,6 +263,8 @@ class HttpClient { std::expected execute_post(HttpBodyRequest &request); std::expected + execute_upload(HttpFileRequest &request); + std::expected execute_delete(HttpBodyRequest &request); const std::unordered_map &getHeaders() const { diff --git a/src/s3cpp/s3.cpp b/src/s3cpp/s3.cpp index 8d9aa32..05abed2 100644 --- a/src/s3cpp/s3.cpp +++ b/src/s3cpp/s3.cpp @@ -2,9 +2,65 @@ #include "s3cpp/httpclient.h" #include #include +#include +#include +#include +#include namespace s3cpp { +namespace { + +std::expected HashFile(const std::string &filename) { + std::ifstream input(filename, std::ios::binary); + if (!input) { + return std::unexpected( + Error{.Code = "FileError", .Message = "unable to open " + filename}); + } + + EVP_MD_CTX *context = EVP_MD_CTX_new(); + if (!context || EVP_DigestInit_ex(context, EVP_sha256(), nullptr) != 1) { + EVP_MD_CTX_free(context); + return std::unexpected( + Error{.Code = "HashError", .Message = "unable to initialize SHA-256"}); + } + std::vector buffer(1024 * 1024); + while (input) { + input.read(buffer.data(), static_cast(buffer.size())); + const auto count = input.gcount(); + if (count > 0) { + if (EVP_DigestUpdate(context, buffer.data(), + static_cast(count)) != 1) { + EVP_MD_CTX_free(context); + return std::unexpected( + Error{.Code = "HashError", .Message = "unable to hash file"}); + } + } + } + if (!input.eof()) { + EVP_MD_CTX_free(context); + return std::unexpected( + Error{.Code = "FileError", .Message = "unable to read " + filename}); + } + + unsigned char digest[EVP_MAX_MD_SIZE]; + unsigned int digest_size = 0; + if (EVP_DigestFinal_ex(context, digest, &digest_size) != 1) { + EVP_MD_CTX_free(context); + return std::unexpected( + Error{.Code = "HashError", .Message = "unable to finish SHA-256"}); + } + EVP_MD_CTX_free(context); + std::ostringstream output; + for (unsigned int index = 0; index < digest_size; ++index) { + output << std::hex << std::setw(2) << std::setfill('0') + << static_cast(digest[index]); + } + return output.str(); +} + +} // namespace + bool Ping(const std::string & endpoint_) { // AWS S3 docs do not provide a check health or ping method. // We send a GET request and check for S3-specific response @@ -334,8 +390,10 @@ std::expected S3Client::PutObject(const std::string &buc HttpBodyRequest req = Client.put(url).header("Host", getHostHeader(bucket)).body(body); - // opt headers - // ... + if (options.ContentType) + req.header("Content-Type", *options.ContentType); + if (options.ContentLength) + req.header("Content-Length", std::to_string(*options.ContentLength)); Signer.sign(req); auto result = req.execute(); @@ -351,6 +409,43 @@ std::expected S3Client::PutObject(const std::string &buc return std::unexpected(deserializeError(XMLBody)); } +std::expected +S3Client::PutObjectFile(const std::string &bucket, const std::string &key, + const std::string &filename, + const std::string &contentType, + UploadProgressCallback progress) { + std::error_code error; + const auto size = std::filesystem::file_size(filename, error); + if (error) { + return std::unexpected( + Error{.Code = "FileError", .Message = error.message()}); + } + + auto hash = HashFile(filename); + if (!hash) { + return std::unexpected(hash.error()); + } + + const std::string url = buildURL(bucket) + std::format("/{}", key); + auto request = + Client.putFile(url, filename, static_cast(size)) + .header("Host", getHostHeader(bucket)) + .header("Content-Type", contentType) + .header("Content-Length", std::to_string(size)) + .progress(std::move(progress)); + Signer.sign(request, *hash); + + auto result = request.execute(); + if (!result) { + return std::unexpected( + Error{.Code = "HttpError", .Message = result.error()}); + } + if (result->is_ok()) { + return deserializePutObjectResult(result->headers()); + } + return std::unexpected(deserializeError(Parser.parse(result->body()))); +} + std::expected S3Client::DeleteObject(const std::string &bucket, const std::string &key, const DeleteObjectInput &options) { std::string url = buildURL(bucket) + std::format("/{}", key); diff --git a/src/s3cpp/s3.h b/src/s3cpp/s3.h index 261dd76..e624a3d 100644 --- a/src/s3cpp/s3.h +++ b/src/s3cpp/s3.h @@ -2,6 +2,7 @@ #define S3CPP_S3 #include +#include #include #include #include @@ -31,9 +32,13 @@ class S3Client { } S3Client(const std::string &access, const std::string &secret, const std::string &customEndpoint, S3AddressingStyle style) - : Client(HttpClient()), Signer(AWSSigV4Signer(access, secret)), + : S3Client(access, secret, customEndpoint, style, false, "us-east-1") {} + S3Client(const std::string &access, const std::string &secret, + const std::string &customEndpoint, S3AddressingStyle style, + bool useHttps, const std::string ®ion) + : Client(HttpClient()), Signer(AWSSigV4Signer(access, secret, region)), Parser(XMLParser()), endpoint_(customEndpoint), - addressing_style_(style) {} + addressing_style_(style), use_https_(useHttps) {} // S3 operations: Goal is to support CRUD and stay minimal @@ -41,6 +46,11 @@ class S3Client { std::expected ListBuckets(const ListBucketsInput &options = {}); std::expected GetObject(const std::string &bucket, const std::string &key, const GetObjectInput &options = {}); std::expected PutObject(const std::string &bucket, const std::string &key, const std::string &body, const PutObjectInput &options = {}); + std::expected + PutObjectFile(const std::string &bucket, const std::string &key, + const std::string &filename, + const std::string &contentType = "application/octet-stream", + UploadProgressCallback progress = {}); std::expected DeleteObject(const std::string &bucket, const std::string &key, const DeleteObjectInput &options = {}); std::expected CreateBucket(const std::string &bucket, const CreateBucketConfiguration &configuration = {}, const CreateBucketInput &options = {}); std::expected DeleteBucket(const std::string &bucket, const DeleteBucketInput &options = {}); std::expected HeadBucket(const std::string &bucket, const HeadBucketInput &options = {}); @@ -65,6 +75,7 @@ class S3Client { XMLParser Parser; std::string endpoint_; S3AddressingStyle addressing_style_; + bool use_https_ = true; std::string buildURL(const std::string &bucket) const { if (addressing_style_ == S3AddressingStyle::VirtualHosted) { @@ -72,7 +83,8 @@ class S3Client { return std::format("https://{}.{}", bucket, endpoint_); } else { // endpoint/bucket/key - return std::format("http://{}/{}", endpoint_, bucket); + return std::format("{}://{}/{}", use_https_ ? "https" : "http", + endpoint_, bucket); } }