Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions cdmtaskservice/s3/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
# https://code.jgi.doe.gov/advanced-analysis/jaws-docs/-/issues/86
# These characters are obnoxious in filenames anyway
# May need to add more characters as problems are discovered
# Double forward slash is disallowed by S3
# Double forward slash causes issues on POSIX filesystems when files are staged locally;
# S3-compatible backends also handle // inconsistently (e.g. Minio collapses, Ceph preserves)
_DISALLOWED_CHARACTERS = re.compile(r"([';]|[/]{2})")


Expand Down Expand Up @@ -69,16 +70,24 @@ def validate_path(path: str, index: int = None) -> str:

Returns a normalized path.
"""
# keys can have spaces and / but not //, except for the first characters
# keys can have spaces and single / but not //, and keys cannot start with /
i = f" at index {index}" if index is not None else ""
if not path or not path.strip():
raise S3PathSyntaxError(f"The s3 path{i} cannot be null or a whitespace string")
parts = path.lstrip().lstrip("/").split("/", 1) # ignore leading /s in the bucket, be nice
stripped = path.lstrip()
if stripped.startswith("/"):
raise S3PathSyntaxError(f"Path '{path}'{i} cannot start with '/'")
parts = stripped.split("/", 1)
if len(parts) != 2:
raise S3PathSyntaxError(
f"Path '{path}'{i} must start with the s3 bucket and include a key")
bucket = validate_bucket_name(parts[0], index=index)
key = parts[1].lstrip("/") # Leading /s are ignored by s3, but spaces and trailing /s count
key = parts[1]
if not key:
raise S3PathSyntaxError(
f"Path '{path}'{i} must start with the s3 bucket and include a key")
if key.startswith("/"):
raise S3PathSyntaxError(f"Path '{path}'{i}'s key cannot start with '/'")
if len(key.encode("UTF-8")) > 1024:
raise S3PathSyntaxError(f"Path '{path}'{i}'s key is longer than 1024 bytes in UTF-8")
match_ = _DISALLOWED_CHARACTERS.search(key)
Expand Down
1 change: 1 addition & 0 deletions test/controllers/minio.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ async def upload_file(
"ContentLength": len(main_part),
}
if crc64nvme:
args["ChecksumAlgorithm"] = "CRC64NVME"
args["ChecksumCRC64NVME"] = crc64nvme
return await client.put_object(**args)
else:
Expand Down
26 changes: 15 additions & 11 deletions test/s3/paths_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@ def test_init_fail_bad_path():
testset = {
None: "The s3 path at index x cannot be null or a whitespace string",
" \t ": "The s3 path at index x cannot be null or a whitespace string",
" / ": "Path ' / ' at index x must start with the s3 bucket and include a key",
" / / ": "Bucket name at index x cannot be whitespace only",
" ////bar ":
"Path ' ////bar ' at index x must start with the s3 bucket and include a key",
" / bar ": "Path ' / bar ' at index x must start with the s3 bucket and include a key",
" / ": "Path ' / ' at index x cannot start with '/'",
" bar ": "Path ' bar ' at index x must start with the s3 bucket and include a key",
" bar /": "Path ' bar /' at index x must start with the s3 bucket and include a key",
" / / ": "Path ' / / ' at index x cannot start with '/'",
" ////bar ": "Path ' ////bar ' at index x cannot start with '/'",
" / bar ": "Path ' / bar ' at index x cannot start with '/'",
"buckit//foo": "Path 'buckit//foo' at index x's key cannot start with '/'",
"il/foo": "Bucket name at index x must be > 2 and < 64 characters: il",
("illegal-bu" * 6) + "cket/foo":
f"Bucket name at index x must be > 2 and < 64 characters: {'illegal-bu' * 6}cket",
Expand All @@ -44,8 +46,10 @@ def test_init_fail_bad_path():
"Path 'buckit/foo//bar' at index x contains illegal character(s) '//' "
+ "at key index 3",
"buckit/// //bar":
"Path 'buckit/// //bar' at index x contains illegal character(s) '//' "
+ "at key index 2",
"Path 'buckit/// //bar' at index x's key cannot start with '/'",
"buckit/bar//foo":
"Path 'buckit/bar//foo' at index x contains illegal character(s) '//' "
+ "at key index 3",
"buckit/barbaz;foo'x":
"Path 'buckit/barbaz;foo'x' at index x contains illegal character(s) ';' "
+ "at key index 6",
Expand Down Expand Up @@ -83,8 +87,8 @@ def test_init():

s3p = S3Paths([
"foo/bar",
" /// baz//// bat /",
" thingy-stuff9/// /𝛙hatever/ ",
" baz / bat /",
" thingy-stuff9/ /𝛙hatever/ ",
" bukkit / "])
assert s3p.paths == (
"foo/bar",
Expand All @@ -95,8 +99,8 @@ def test_init():


def test_split_paths():
s3p = S3Paths(["foo/bar", "baz/bat ", " //thingy-stuff9///𝛙hatever/baz"])
s3p = S3Paths(["foo/bar", "baz/bat ", " thingy-stuff9/𝛙hatever/baz"])

assert list(s3p.split_paths()) == [
["foo", "bar"], ["baz", "bat "], ["thingy-stuff9", "𝛙hatever/baz"]
]
Expand Down
7 changes: 3 additions & 4 deletions test/s3/s3_client_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,7 @@ async def _is_bucket_writeable_fail(s3c, bucket, expected, print_stacktrace=Fals
async def test_get_object_meta_single_part_w_crc64nvme(minio):
await minio.clean() # couldn't get this to work as a fixture
await minio.create_bucket("test-bucket")
# test that leading /s in key are ignored
await minio.upload_file("test-bucket///test_file", b"abcdefghij", crc64nvme="e/Vz6rUQ/+o=")
await minio.upload_file("test-bucket/test_file", b"abcdefghij", crc64nvme="e/Vz6rUQ/+o=")

async with _client(minio) as s3c:
objm = await s3c.get_object_meta(S3Paths(["test-bucket/test_file"]))
Expand Down Expand Up @@ -321,7 +320,7 @@ async def _get_object_meta_fail(s3c, paths, expected, concurrency=1, print_stack
async def test_download_objects_to_file(minio, tmp_path):
await minio.clean() # couldn't get this to work as a fixture
await minio.create_bucket("test-bucket")
await minio.upload_file("test-bucket//test_file", b"imsounique")
await minio.upload_file("test-bucket/test_file", b"imsounique")
await minio.upload_file(
"test-bucket/big_test_file",
b"abcdefghij" * 600000,
Expand Down Expand Up @@ -442,7 +441,7 @@ async def _stream_and_assert(s3c, file_path, expected, seek=None, length=None):
async def test_stream_object(minio):
await minio.clean() # couldn't get this to work as a fixture
await minio.create_bucket("test-bucket")
await minio.upload_file("test-bucket//test_file", b"imsounique")
await minio.upload_file("test-bucket/test_file", b"imsounique")
await minio.upload_file(
"test-bucket/big_test_file",
b"abcdefghij" * 600000,
Expand Down
Loading