From 0b42306560f36db6f167525f4492e445fe760d2c Mon Sep 17 00:00:00 2001 From: Revathi Vijayaraghavan Date: Mon, 16 Jun 2025 19:30:07 -0400 Subject: [PATCH 1/4] Initial commit for S3a support for S3 express access points --- hadoop-project/pom.xml | 2 +- .../org/apache/hadoop/fs/s3a/ArnResource.java | 59 +++++++++++++++++-- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 8 ++- .../apache/hadoop/fs/s3a/TestArnResource.java | 30 ++++++++-- 4 files changed, 89 insertions(+), 10 deletions(-) diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index aa82ab06aa770..4219177f8a409 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -205,7 +205,7 @@ 1.0-beta-1 900 1.12.720 - 2.29.52 + 2.31.12 3.1.1 1.0.0 1.0.1 diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java index 98745b295b5d3..cf8dc9602392e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java @@ -22,12 +22,23 @@ import software.amazon.awssdk.arns.Arn; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + /** * Represents an Arn Resource, this can be an accesspoint or bucket. */ public final class ArnResource { private final static String S3_ACCESSPOINT_ENDPOINT_FORMAT = "s3-accesspoint.%s.amazonaws.com"; private final static String S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT = "s3-outposts.%s.amazonaws.com"; + private final static String S3_EXPRESS_ACCESSPOINT_ENDPOINT_FORMAT = "s3express-%s.%s.amazonaws.com"; + + // bucket example: mybucket--usw2-az1--x-s3 + // access point example: myaccesspoint--usw2-az1--xa-s3 + public final static Pattern S3_EXPRESS_RESOURCE_FORMAT_REGEX = Pattern.compile( + String.format("^(?[a-z0-9]([a-z0-9\\-]*[a-z0-9])?)--(?[a-z0-9\\-]+)--(?x|xa)-s3$") + ); /** * Resource name. @@ -54,6 +65,11 @@ public final class ArnResource { */ private final String partition; + /** + * Service for the resource. Allowed services: s3, s3-outposts, s3express + */ + private final String service; + /** * Because of the different ways an endpoint can be constructed depending on partition we're * relying on the AWS SDK to produce the endpoint. In this case we need a region key of the form @@ -61,12 +77,13 @@ public final class ArnResource { */ private final String accessPointRegionKey; - private ArnResource(String name, String owner, String region, String partition, String fullArn) { + private ArnResource(String name, String owner, String region, String partition, String fullArn, String service) { this.name = name; this.ownerAccountId = owner; this.region = region; this.partition = partition; this.fullArn = fullArn; + this.service = service; this.accessPointRegionKey = String.format("accesspoint-%s", region); } @@ -74,6 +91,10 @@ private boolean isOutposts(){ return fullArn.contains("s3-outposts"); } + private boolean isExpress(){ + return fullArn.contains("s3express"); + } + /** * Resource name. * @return resource name. @@ -106,13 +127,35 @@ public String getFullArn() { return fullArn; } + /** + * Service for resource. + * @return service for resource. + */ + public String getService() { + return service; + } + /** * Formatted endpoint for the resource. * @return resource endpoint. */ public String getEndpoint() { - String format = isOutposts() ? S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT : S3_ACCESSPOINT_ENDPOINT_FORMAT; - return String.format(format, region); + String format; + if(isExpress()) { + Optional zoneId = getZoneIdFromResourceName(name); + if(zoneId.isEmpty()) { + throw new IllegalArgumentException("Zone ID could not be extracted from S3Express resource name: " + name); + } + + format = S3_EXPRESS_ACCESSPOINT_ENDPOINT_FORMAT; + return String.format(format, zoneId.get(), region); + } else if (isOutposts()) { + format = S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT; + return String.format(format, region); + } else { + format = S3_ACCESSPOINT_ENDPOINT_FORMAT; + return String.format(format, region); + } } /** @@ -134,6 +177,14 @@ public static ArnResource accessPointFromArn(String arn) throws IllegalArgumentE String resourceName = parsed.resource().resource(); return new ArnResource(resourceName, parsed.accountId().get(), parsed.region().get(), - parsed.partition(), arn); + parsed.partition(), arn, parsed.service()); + } + + private static Optional getZoneIdFromResourceName(final String resourceName) { + return Optional.ofNullable(resourceName) + .map(name -> { + Matcher matcher = S3_EXPRESS_RESOURCE_FORMAT_REGEX.matcher(name); + return matcher.matches() ? matcher.group("zoneId") : null; + }); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 304ba032b416a..6ad3bc5bacc61 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -598,7 +598,13 @@ public void initialize(URI name, Configuration originalConf) if (!configuredArn.isEmpty()) { accessPoint = ArnResource.accessPointFromArn(configuredArn); LOG.info("Using AccessPoint ARN \"{}\" for bucket {}", configuredArn, bucket); - bucket = accessPoint.getFullArn(); + + // s3express does not support ARNs in requests, but instead takes in access point name as bucket paramater + if(accessPoint.getService().equals("s3express")) { + bucket = accessPoint.getName(); + } else { + bucket = accessPoint.getFullArn(); + } } else if (conf.getBoolean(AWS_S3_ACCESSPOINT_REQUIRED, false)) { LOG.warn("Access Point usage is required because \"{}\" is enabled," + " but not configured for the bucket: {}", AWS_S3_ACCESSPOINT_REQUIRED, bucket); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java index 3f2852181f4b2..4fc4c7bc9e24c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java @@ -49,7 +49,7 @@ public void parseAccessPointFromArn() throws IllegalArgumentException { String accessPoint = "testAp"; String[][] regionPartitionEndpoints = new String[][] { {Region.EU_WEST_1.id(), "aws"}, - {Region.US_GOV_EAST_1.id(), "aws-us-gov"}, + {Region.US_GOV_EAST_1.id(), "aws-us-gov"}, {Region.CN_NORTH_1.id(), "aws-cn"}, }; @@ -58,9 +58,10 @@ public void parseAccessPointFromArn() throws IllegalArgumentException { String partition = testPair[1]; ArnResource resource = getArnResourceFrom(partition, "s3", region, MOCK_ACCOUNT, accessPoint); - assertEquals(accessPoint, resource.getName(), "Access Point name does not match"); - assertEquals(MOCK_ACCOUNT, resource.getOwnerAccountId(), "Account Id does not match"); - assertEquals(region, resource.getRegion(), "Region does not match"); + // assertEquals(accessPoint, resource.getName(), "Access Point name does not match"); + // assertEquals(MOCK_ACCOUNT, resource.getOwnerAccountId(), "Account Id does not match"); + // assertEquals(region, resource.getRegion(), "Region does not match"); + // assertEquals("s3", resource.getService(), "Service does not match"); } } @@ -90,6 +91,27 @@ public void makeSureS3OutpostsEndpointHasTheCorrectFormat() { .isEqualTo(expected); } + @Test + public void makeSureS3ExpressEndpointHasTheCorrectFormat() { + ArnResource accessPoint = getArnResourceFrom("aws", "s3express", "us-west-2", MOCK_ACCOUNT, + "test--usw2-az1--xa-s3"); + String expected = "s3express-usw2-az1.us-west-2.amazonaws.com"; + + assertThat(accessPoint.getEndpoint()) + .describedAs("Endpoint has invalid format. Access Point requests will not work") + .isEqualTo(expected); + } + + @Test + public void getEndpointFromInvalidS3ExpressAccessPointNameMustThrow() throws Exception { + ArnResource accessPoint = getArnResourceFrom("aws", "s3express", "us-west-2", MOCK_ACCOUNT, + "test"); + describe("Using an invalid access point name format must throw when getting an endpoint."); + + intercept(IllegalArgumentException.class, () -> + accessPoint.getEndpoint()); + } + @Test public void invalidARNsMustThrow() throws Exception { describe("Using an invalid ARN format must throw when initializing an ArnResource."); From 1d5f246051a0c028f9cdfbeeaf148759fc4fcd69 Mon Sep 17 00:00:00 2001 From: Revathi Vijayaraghavan Date: Tue, 17 Jun 2025 14:03:31 -0400 Subject: [PATCH 2/4] Remove commented out assertions --- .../java/org/apache/hadoop/fs/s3a/TestArnResource.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java index 4fc4c7bc9e24c..a7c75ea4b4ddc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java @@ -58,10 +58,10 @@ public void parseAccessPointFromArn() throws IllegalArgumentException { String partition = testPair[1]; ArnResource resource = getArnResourceFrom(partition, "s3", region, MOCK_ACCOUNT, accessPoint); - // assertEquals(accessPoint, resource.getName(), "Access Point name does not match"); - // assertEquals(MOCK_ACCOUNT, resource.getOwnerAccountId(), "Account Id does not match"); - // assertEquals(region, resource.getRegion(), "Region does not match"); - // assertEquals("s3", resource.getService(), "Service does not match"); + assertEquals(accessPoint, resource.getName(), "Access Point name does not match"); + assertEquals(MOCK_ACCOUNT, resource.getOwnerAccountId(), "Account Id does not match"); + assertEquals(region, resource.getRegion(), "Region does not match"); + assertEquals("s3", resource.getService(), "Service does not match"); } } From 82574595ec238ac00b777903d8e6e2577e691f9a Mon Sep 17 00:00:00 2001 From: Revathi Vijayaraghavan Date: Wed, 18 Jun 2025 10:50:14 -0400 Subject: [PATCH 3/4] Fix trailing whitespaces --- .../src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java | 3 +-- .../test/java/org/apache/hadoop/fs/s3a/TestArnResource.java | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java index cf8dc9602392e..89589f2962621 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java @@ -33,7 +33,6 @@ public final class ArnResource { private final static String S3_ACCESSPOINT_ENDPOINT_FORMAT = "s3-accesspoint.%s.amazonaws.com"; private final static String S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT = "s3-outposts.%s.amazonaws.com"; private final static String S3_EXPRESS_ACCESSPOINT_ENDPOINT_FORMAT = "s3express-%s.%s.amazonaws.com"; - // bucket example: mybucket--usw2-az1--x-s3 // access point example: myaccesspoint--usw2-az1--xa-s3 public final static Pattern S3_EXPRESS_RESOURCE_FORMAT_REGEX = Pattern.compile( @@ -180,7 +179,7 @@ public static ArnResource accessPointFromArn(String arn) throws IllegalArgumentE parsed.partition(), arn, parsed.service()); } - private static Optional getZoneIdFromResourceName(final String resourceName) { + private static Optional getZoneIdFromResourceName(final String resourceName) { return Optional.ofNullable(resourceName) .map(name -> { Matcher matcher = S3_EXPRESS_RESOURCE_FORMAT_REGEX.matcher(name); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java index a7c75ea4b4ddc..df77f13823506 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java @@ -49,7 +49,7 @@ public void parseAccessPointFromArn() throws IllegalArgumentException { String accessPoint = "testAp"; String[][] regionPartitionEndpoints = new String[][] { {Region.EU_WEST_1.id(), "aws"}, - {Region.US_GOV_EAST_1.id(), "aws-us-gov"}, + {Region.US_GOV_EAST_1.id(), "aws-us-gov"}, {Region.CN_NORTH_1.id(), "aws-cn"}, }; @@ -108,7 +108,7 @@ public void getEndpointFromInvalidS3ExpressAccessPointNameMustThrow() throws Exc "test"); describe("Using an invalid access point name format must throw when getting an endpoint."); - intercept(IllegalArgumentException.class, () -> + intercept(IllegalArgumentException.class, () -> accessPoint.getEndpoint()); } From b6d27f0ba7fbe7363ec87c200a4cb97665d22cff Mon Sep 17 00:00:00 2001 From: Revathi Vijayaraghavan Date: Wed, 18 Jun 2025 16:05:17 -0400 Subject: [PATCH 4/4] Fix isEmpty() not found compilation error due to jdk version mismatch --- .../src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java index 89589f2962621..09a5f7154083a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java @@ -142,7 +142,7 @@ public String getEndpoint() { String format; if(isExpress()) { Optional zoneId = getZoneIdFromResourceName(name); - if(zoneId.isEmpty()) { + if(!zoneId.isPresent()) { throw new IllegalArgumentException("Zone ID could not be extracted from S3Express resource name: " + name); }