Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remote IO: S3 support #479

Merged
merged 27 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions conda/environments/all_cuda-118_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ channels:
- conda-forge
- nvidia
dependencies:
- boto3>=1.21.21
- c-compiler
- cmake>=3.26.4,!=3.30.0
- cuda-python>=11.7.1,<12.0a0
Expand All @@ -18,6 +19,7 @@ dependencies:
- doxygen=1.9.1
- gcc_linux-aarch64=11.*
- libcurl>=7.87.0
- moto>=4.0.8
- ninja
- numcodecs !=0.12.0
- numpy>=1.23,<3.0a0
Expand Down
2 changes: 2 additions & 0 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ channels:
- conda-forge
- nvidia
dependencies:
- boto3>=1.21.21
- c-compiler
- cmake>=3.26.4,!=3.30.0
- cuda-python>=11.7.1,<12.0a0
Expand All @@ -20,6 +21,7 @@ dependencies:
- libcufile-dev=1.4.0.31
- libcufile=1.4.0.31
- libcurl>=7.87.0
- moto>=4.0.8
- ninja
- numcodecs !=0.12.0
- numpy>=1.23,<3.0a0
Expand Down
2 changes: 2 additions & 0 deletions conda/environments/all_cuda-125_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ channels:
- conda-forge
- nvidia
dependencies:
- boto3>=1.21.21
- c-compiler
- cmake>=3.26.4,!=3.30.0
- cuda-nvcc
Expand All @@ -19,6 +20,7 @@ dependencies:
- gcc_linux-aarch64=11.*
- libcufile-dev
- libcurl>=7.87.0
- moto>=4.0.8
- ninja
- numcodecs !=0.12.0
- numpy>=1.23,<3.0a0
Expand Down
2 changes: 2 additions & 0 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ channels:
- conda-forge
- nvidia
dependencies:
- boto3>=1.21.21
- c-compiler
- cmake>=3.26.4,!=3.30.0
- cuda-nvcc
Expand All @@ -19,6 +20,7 @@ dependencies:
- gcc_linux-64=11.*
- libcufile-dev
- libcurl>=7.87.0
- moto>=4.0.8
- ninja
- numcodecs !=0.12.0
- numpy>=1.23,<3.0a0
Expand Down
171 changes: 169 additions & 2 deletions cpp/include/kvikio/remote_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cstddef>
#include <cstring>
#include <memory>
#include <optional>
#include <sstream>
#include <stdexcept>
#include <string>
Expand Down Expand Up @@ -89,7 +90,7 @@ inline std::size_t callback_device_memory(char* data,
void* context)
{
auto ctx = reinterpret_cast<CallbackContext*>(context);
const std::size_t nbytes = size * nmemb;
std::size_t const nbytes = size * nmemb;
if (ctx->size < ctx->offset + nbytes) {
ctx->overflow_error = true;
return CURL_WRITEFUNC_ERROR;
Expand Down Expand Up @@ -145,12 +146,178 @@ class HttpEndpoint : public RemoteEndpoint {
std::string _url;

public:
/**
* @brief Create a http endpoint from a url.
madsbk marked this conversation as resolved.
Show resolved Hide resolved
*
* @param url The full http url to the remote file.
*/
HttpEndpoint(std::string url) : _url{std::move(url)} {}
void setopt(CurlHandle& curl) override { curl.setopt(CURLOPT_URL, _url.c_str()); }
std::string str() override { return _url; }
~HttpEndpoint() override = default;
};

/**
* @brief A remote endpoint using AWS's S3 protocol.
*/
class S3Endpoint : public RemoteEndpoint {
private:
std::string _url;
std::string _aws_sigv4;
std::string _aws_userpwd;

static std::string parse_aws_argument(std::optional<std::string> aws_arg,
vyasr marked this conversation as resolved.
Show resolved Hide resolved
std::string const& env_var,
std::string const& err_msg,
bool allow_empty = false)
vyasr marked this conversation as resolved.
Show resolved Hide resolved
{
if (aws_arg.has_value()) { return std::move(*aws_arg); }

char const* env = std::getenv(env_var.c_str());
if (env == nullptr) {
if (allow_empty) { return std::string(); }
throw std::invalid_argument(err_msg);
}
return std::string(env);
}

static std::string url_from_bucket_and_object(std::string const& bucket_name,
std::string const& object_name,
std::optional<std::string> const& aws_region,
std::optional<std::string> aws_endpoint_url)
{
std::string endpoint_url =
madsbk marked this conversation as resolved.
Show resolved Hide resolved
parse_aws_argument(std::move(aws_endpoint_url), "AWS_ENDPOINT_URL", "", true);
std::stringstream ss;
if (endpoint_url.empty()) {
std::string region =
madsbk marked this conversation as resolved.
Show resolved Hide resolved
parse_aws_argument(std::move(aws_region),
"AWS_DEFAULT_REGION",
"S3: must provide `aws_region` if AWS_DEFAULT_REGION isn't set.");
// We default to the official AWS url scheme.
ss << "https://" << bucket_name << ".s3." << region << ".amazonaws.com/" << object_name;
} else {
ss << endpoint_url << "/" << bucket_name << "/" << object_name;
}
return ss.str();
}

public:
/**
* @brief Given an url like "s3://<bucket>/<object>", return the name of the bucket and object.
*
* @throws std::invalid_argument if url is ill-formed or is missing the bucket or object name.
*
* @param s3_url S3 url.
* @return Pair of strings: [bucket-name, object-name].
*/
[[nodiscard]] static std::pair<std::string, std::string> parse_s3_url(std::string const& s3_url)
{
if (s3_url.empty()) { throw std::invalid_argument("The S3 url cannot be an empty string."); }
if (s3_url.size() < 5 || s3_url.substr(0, 5) != "s3://") {
throw std::invalid_argument("The S3 url must start with the S3 scheme (\"s3://\").");
}
std::string p = s3_url.substr(5);
if (p.empty()) { throw std::invalid_argument("The S3 url cannot be an empty string."); }
size_t pos = p.find_first_of('/');
std::string bucket_name = p.substr(0, pos);
if (bucket_name.empty()) {
throw std::invalid_argument("The S3 url does not contain a bucket name.");
}
std::string object_name = (pos == std::string::npos) ? "" : p.substr(pos + 1);
if (object_name.empty()) {
throw std::invalid_argument("The S3 url does not contain an object name.");
}
return std::make_pair(std::move(bucket_name), std::move(object_name));
madsbk marked this conversation as resolved.
Show resolved Hide resolved
}

/**
* @brief Create a S3 endpoint from a url.
*
* @param url The full http url to the S3 file. NB: this should be an url starting with
* "http://" or "https://". If you have an S3 url of the form "s3://<bucket>/<object>",
* please use `S3Endpoint::parse_s3_url()` to convert it.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we only use https please and reject http? Or do you want that for testing?

It looks like yes. I would like this interface to be "safe" by default, and so I would like the user to have to explicitly opt in to using an unencrypted link, given that we send secrets over the wire.

Also, how does parse_s3_url help directly? That returns a std::pair not a std::string. Should one use url_from_bucket_and_object on the result?

Should we error-check and raise if the URL doesn't start with https:// or http://?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we only use https please and reject http? Or do you want that for testing?

We also wants http for high performance access to public data.

It looks like yes. I would like this interface to be "safe" by default, and so I would like the user to have to explicitly opt in to using an unencrypted link, given that we send secrets over the wire.

NB: only a time specific signature are send over the wire, curl uses aws_secret_access_key to generate the AWS authentication signature V4. Of cause, the payload is send unencrypted.

I think it is reasonable to use https by default and accept http if the user overwrite the endpoint url explicitly?

* @param aws_region The AWS region, such as "us-east-1", to use. If nullopt, the value of the
* `AWS_DEFAULT_REGION` environment variable is used.
* @param aws_access_key The AWS access key to use. If nullopt, the value of the
* `AWS_ACCESS_KEY_ID` environment variable is used.
* @param aws_secret_access_key The AWS secret access key to use. If nullopt, the value of the
* `AWS_SECRET_ACCESS_KEY` environment variable is used.
*/
S3Endpoint(std::string url,
std::optional<std::string> aws_region = std::nullopt,
std::optional<std::string> aws_access_key = std::nullopt,
std::optional<std::string> aws_secret_access_key = std::nullopt)
: _url{std::move(url)}
{
std::string region =
madsbk marked this conversation as resolved.
Show resolved Hide resolved
parse_aws_argument(std::move(aws_region),
"AWS_DEFAULT_REGION",
"S3: must provide `aws_region` if AWS_DEFAULT_REGION isn't set.");

std::string access_key =
madsbk marked this conversation as resolved.
Show resolved Hide resolved
parse_aws_argument(std::move(aws_access_key),
"AWS_ACCESS_KEY_ID",
"S3: must provide `aws_access_key` if AWS_ACCESS_KEY_ID isn't set.");

std::string secret_access_key = parse_aws_argument(
madsbk marked this conversation as resolved.
Show resolved Hide resolved
std::move(aws_secret_access_key),
"AWS_SECRET_ACCESS_KEY",
"S3: must provide `aws_secret_access_key` if AWS_SECRET_ACCESS_KEY isn't set.");

// Create the CURLOPT_AWS_SIGV4 option
{
std::stringstream ss;
ss << "aws:amz:" << region << ":s3";
_aws_sigv4 = ss.str();
}
// Create the CURLOPT_USERPWD option
{
std::stringstream ss;
ss << access_key << ":" << secret_access_key;
_aws_userpwd = ss.str();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to have std::format in C++20...

}
}

/**
* @brief Create a S3 endpoint from a bucket and object name.
*
* @param bucket_name The name of the S3 bucket.
* @param object_name The name of the S3 object.
* @param aws_region The AWS region, such as "us-east-1", to use. If nullopt, the value of the
* `AWS_DEFAULT_REGION` environment variable is used.
* @param aws_access_key The AWS access key to use. If nullopt, the value of the
* `AWS_ACCESS_KEY_ID` environment variable is used.
* @param aws_secret_access_key The AWS secret access key to use. If nullopt, the value of the
* `AWS_SECRET_ACCESS_KEY` environment variable is used.
* @param aws_endpoint_url Overwrite the endpoint url to use. If nullopt, the value of
* the `AWS_ENDPOINT_URL` environment variable is used. If this is also not set, the regular
* AWS url scheme is used: "https://<bucket>.s3.<region>.amazonaws.com/<object>"
wence- marked this conversation as resolved.
Show resolved Hide resolved
*/
S3Endpoint(std::string const& bucket_name,
std::string const& object_name,
std::optional<std::string> aws_region = std::nullopt,
std::optional<std::string> aws_access_key = std::nullopt,
std::optional<std::string> aws_secret_access_key = std::nullopt,
std::optional<std::string> aws_endpoint_url = std::nullopt)
: S3Endpoint(url_from_bucket_and_object(
bucket_name, object_name, aws_region, std::move(aws_endpoint_url)),
std::move(aws_region),
std::move(aws_access_key),
std::move(aws_secret_access_key))
{
}

void setopt(CurlHandle& curl) override
{
curl.setopt(CURLOPT_URL, _url.c_str());
curl.setopt(CURLOPT_AWS_SIGV4, _aws_sigv4.c_str());
curl.setopt(CURLOPT_USERPWD, _aws_userpwd.c_str());
}
std::string str() override { return _url; }
~S3Endpoint() override = default;
};

/**
* @brief Handle of remote file.
*/
Expand Down Expand Up @@ -229,7 +396,7 @@ class RemoteHandle {
<< " bytes file (" << _endpoint->str() << ")";
throw std::invalid_argument(ss.str());
}
const bool is_host_mem = is_host_memory(buf);
bool const is_host_mem = is_host_memory(buf);
auto curl = create_curl_handle();
_endpoint->setopt(curl);

Expand Down
7 changes: 7 additions & 0 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,13 @@ dependencies:
- pytest
- pytest-cov
- rangehttpserver
- boto3>=1.21.21
- output_types: [requirements, pyproject]
packages:
- moto[server]>=4.0.8
- output_types: conda
packages:
- moto>=4.0.8
specific:
- output_types: [conda, requirements, pyproject]
matrices:
Expand Down
62 changes: 62 additions & 0 deletions python/kvikio/kvikio/_lib/remote_handle.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,15 @@ cdef extern from "<kvikio/remote_handle.hpp>" nogil:
cdef cppclass cpp_HttpEndpoint "kvikio::HttpEndpoint":
cpp_HttpEndpoint(string url) except +

cdef cppclass cpp_S3Endpoint "kvikio::S3Endpoint":
vyasr marked this conversation as resolved.
Show resolved Hide resolved
cpp_S3Endpoint(string url) except +

cdef cppclass cpp_S3Endpoint "kvikio::S3Endpoint":
cpp_S3Endpoint(string bucket_name, string object_name) except +
madsbk marked this conversation as resolved.
Show resolved Hide resolved

pair[string, string] cpp_parse_s3_url \
"kvikio::S3Endpoint::parse_s3_url"(string url) except +

cdef cppclass cpp_RemoteHandle "kvikio::RemoteHandle":
cpp_RemoteHandle(
unique_ptr[cpp_RemoteEndpoint] endpoint, size_t nbytes
Expand Down Expand Up @@ -67,6 +76,59 @@ cdef class RemoteFile:
ret._handle = make_unique[cpp_RemoteHandle](move(ep), n)
return ret

@classmethod
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a lot of logic repeated in these three functions and it would benefit from adding a single helper with most of the logic that you could just pass the endpoint created since the creation of ep is really the only difference.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried, but I cannot find a way to implement a from_unique_ptr factory function without it getting very complicated.
Basically, I want something like this:

    @staticmethod
    cdef RemoteFile from_unique_ptr(
        unique_ptr[cpp_RemoteHandle] handle,
        nbytes: Optional[int]
    ):
        cdef RemoteFile ret = RemoteFile.__new__(RemoteFile)
        if nbytes is None:
            ret._handle = make_unique[cpp_RemoteHandle](move(handle))
            return ret
        ret._handle = make_unique[cpp_RemoteHandle](move(handle), <size_t> nbytes)
        return ret

But I cannot find a nice way to call from_unique_ptr() with a derived class instances like unique_ptr[cpp_HttpEndpoint]. Any suggestions?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps like this. I am not sure it is much cleaner:

def _set_handle(self, unique_ptr[cpp_RemoteEndpoint] ep, nbytes):
    if nbytes is None:
        self._handle = make_unique[cpp_RemoteHandle](move(ep))
    else:
        self._handle = make_unique[cpp_RemoteHandle](move(ep), <size_t>nbytes)

def open_http(...):
    cdef RemoteFile ret = RemoteFile()
    cdef unique_ptr[cpp_HttpEndpoint] ep = make_unique[cpp_HttpEndpoint](...)
    ret._set_handle(<unique_ptr[cpp_RemoteEndpoint]>move(ep));
    return ret;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fundamental problem is that Cython does not natively understand smart pointer polymorphism in the same way that it understands raw pointer polymorphism, so you cannot pass a unique_ptr to a child class where it expects you to pass a unique_ptr to a base class or vice versa. It's a clear sign of the C rather than C++ roots of the project. Here's a patch that builds for me locally and is a bit cleaner IMHO than what Lawrence proposed, with the caveat that you temporarily have a raw pointer that you construct a unique_ptr from rather than using make_unique. Up to you two if you like it or not.

diff --git a/python/kvikio/kvikio/_lib/remote_handle.pyx b/python/kvikio/kvikio/_lib/remote_handle.pyx
index 1156300..de48bb9 100644
--- a/python/kvikio/kvikio/_lib/remote_handle.pyx
+++ b/python/kvikio/kvikio/_lib/remote_handle.pyx
@@ -20,13 +20,11 @@ cdef extern from "<kvikio/remote_handle.hpp>" nogil:
     cdef cppclass cpp_RemoteEndpoint "kvikio::RemoteEndpoint":
         pass
 
-    cdef cppclass cpp_HttpEndpoint "kvikio::HttpEndpoint":
+    cdef cppclass cpp_HttpEndpoint "kvikio::HttpEndpoint"(cpp_RemoteEndpoint):
         cpp_HttpEndpoint(string url) except +
 
-    cdef cppclass cpp_S3Endpoint "kvikio::S3Endpoint":
+    cdef cppclass cpp_S3Endpoint "kvikio::S3Endpoint"(cpp_RemoteEndpoint):
         cpp_S3Endpoint(string url) except +
-
-    cdef cppclass cpp_S3Endpoint "kvikio::S3Endpoint":
         cpp_S3Endpoint(string bucket_name, string object_name) except +
 
     pair[string, string] cpp_parse_s3_url \
@@ -56,6 +54,19 @@ cdef string _to_string(str_or_none):
     return str.encode(str(str_or_none))
 
 
+cdef RemoteFile make_remotefile(
+    cpp_RemoteEndpoint* ep,
+    nbytes: Optional[int],
+):
+    cdef RemoteFile ret = RemoteFile()
+    if nbytes is None:
+        ret._handle = make_unique[cpp_RemoteHandle](unique_ptr[cpp_RemoteEndpoint](ep))
+        return ret
+    cdef size_t n = nbytes
+    ret._handle = make_unique[cpp_RemoteHandle](unique_ptr[cpp_RemoteEndpoint](ep), n)
+    return ret
+
+
 cdef class RemoteFile:
     cdef unique_ptr[cpp_RemoteHandle] _handle
 
@@ -65,16 +76,10 @@ cdef class RemoteFile:
         url: str,
         nbytes: Optional[int],
     ):
-        cdef RemoteFile ret = RemoteFile()
-        cdef unique_ptr[cpp_HttpEndpoint] ep = make_unique[cpp_HttpEndpoint](
-            _to_string(url)
+        return make_remotefile(
+            new cpp_HttpEndpoint(_to_string(url)),
+            nbytes,
         )
-        if nbytes is None:
-            ret._handle = make_unique[cpp_RemoteHandle](move(ep))
-            return ret
-        cdef size_t n = nbytes
-        ret._handle = make_unique[cpp_RemoteHandle](move(ep), n)
-        return ret
 
     @classmethod
     def open_s3(
@@ -83,16 +88,10 @@ cdef class RemoteFile:
         object_name: str,
         nbytes: Optional[int],
     ):
-        cdef RemoteFile ret = RemoteFile()
-        cdef unique_ptr[cpp_S3Endpoint] ep = make_unique[cpp_S3Endpoint](
-            _to_string(bucket_name), _to_string(object_name)
+        return make_remotefile(
+            new cpp_S3Endpoint(_to_string(bucket_name), _to_string(object_name)),
+            nbytes,
         )
-        if nbytes is None:
-            ret._handle = make_unique[cpp_RemoteHandle](move(ep))
-            return ret
-        cdef size_t n = nbytes
-        ret._handle = make_unique[cpp_RemoteHandle](move(ep), n)
-        return ret
 
     @classmethod
     def open_s3_from_http_url(
@@ -100,16 +99,10 @@ cdef class RemoteFile:
         url: str,
         nbytes: Optional[int],
     ):
-        cdef RemoteFile ret = RemoteFile()
-        cdef unique_ptr[cpp_S3Endpoint] ep = make_unique[cpp_S3Endpoint](
-            _to_string(url)
+        return make_remotefile(
+            new cpp_S3Endpoint(_to_string(url)),
+            nbytes,
         )
-        if nbytes is None:
-            ret._handle = make_unique[cpp_RemoteHandle](move(ep))
-            return ret
-        cdef size_t n = nbytes
-        ret._handle = make_unique[cpp_RemoteHandle](move(ep), n)
-        return ret
 
     @classmethod
     def open_s3_from_s3_url(
@@ -118,16 +111,10 @@ cdef class RemoteFile:
         nbytes: Optional[int],
     ):
         cdef pair[string, string] bucket_and_object = cpp_parse_s3_url(_to_string(url))
-        cdef RemoteFile ret = RemoteFile()
-        cdef unique_ptr[cpp_S3Endpoint] ep = make_unique[cpp_S3Endpoint](
-            bucket_and_object.first, bucket_and_object.second
+        return make_remotefile(
+            new cpp_S3Endpoint(bucket_and_object.first, bucket_and_object.second),
+            nbytes,
         )
-        if nbytes is None:
-            ret._handle = make_unique[cpp_RemoteHandle](move(ep))
-            return ret
-        cdef size_t n = nbytes
-        ret._handle = make_unique[cpp_RemoteHandle](move(ep), n)
-        return ret
 
     def nbytes(self) -> int:
         return deref(self._handle).nbytes()

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this just be a templated C++ factory function that we extern to Cython and call here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could do part of it in C++ but RemoteFile is a Python class so you'd still need a wrapper around the C++ function to handle instantiation and assignment to attributes of that object.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think that is fine. Basically what we would be saying is here is a factory function that takes some arguments and returns a std::unique_ptr<cpp_RemoteHandle>, which we just move to ret._handle

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup that would be fine. Probably just use inline C++ to define that in this file itself.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using a C++ cast function as @jakirkham suggest: 6104106
It is a bit more verbose than @vyasr's raw pointer approach, but it enforces the pointer uniqueness.

def open_s3(
cls,
bucket_name: str,
object_name: str,
nbytes: Optional[int],
):
cdef RemoteFile ret = RemoteFile()
madsbk marked this conversation as resolved.
Show resolved Hide resolved
cdef unique_ptr[cpp_S3Endpoint] ep = make_unique[cpp_S3Endpoint](
_to_string(bucket_name), _to_string(object_name)
)
if nbytes is None:
ret._handle = make_unique[cpp_RemoteHandle](move(ep))
return ret
cdef size_t n = nbytes
ret._handle = make_unique[cpp_RemoteHandle](move(ep), n)
return ret

@classmethod
def open_s3_from_http_url(
cls,
url: str,
nbytes: Optional[int],
):
cdef RemoteFile ret = RemoteFile()
cdef unique_ptr[cpp_S3Endpoint] ep = make_unique[cpp_S3Endpoint](
_to_string(url)
)
if nbytes is None:
ret._handle = make_unique[cpp_RemoteHandle](move(ep))
return ret
cdef size_t n = nbytes
ret._handle = make_unique[cpp_RemoteHandle](move(ep), n)
return ret

@classmethod
def open_s3_from_s3_url(
cls,
url: str,
nbytes: Optional[int],
):
cdef pair[string, string] bucket_and_object = cpp_parse_s3_url(_to_string(url))
cdef RemoteFile ret = RemoteFile()
cdef unique_ptr[cpp_S3Endpoint] ep = make_unique[cpp_S3Endpoint](
bucket_and_object.first, bucket_and_object.second
)
if nbytes is None:
ret._handle = make_unique[cpp_RemoteHandle](move(ep))
return ret
cdef size_t n = nbytes
ret._handle = make_unique[cpp_RemoteHandle](move(ep), n)
return ret

def nbytes(self) -> int:
return deref(self._handle).nbytes()

Expand Down
Loading