diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000..90df607a3a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+*/target
+*/dependency-reduced-pom.xml
+.idea/
+/target/
+*/*.iml
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000000..5b627cfa60
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,4 @@
+## Code of Conduct
+This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
+For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
+opensource-codeofconduct@amazon.com with any additional questions or comments.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000000..914e0741d7
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,61 @@
+# Contributing Guidelines
+
+Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
+documentation, we greatly value feedback and contributions from our community.
+
+Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
+information to effectively respond to your bug report or contribution.
+
+
+## Reporting Bugs/Feature Requests
+
+We welcome you to use the GitHub issue tracker to report bugs or suggest features.
+
+When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
+reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
+
+* A reproducible test case or series of steps
+* The version of our code being used
+* Any modifications you've made relevant to the bug
+* Anything unusual about your environment or deployment
+
+
+## Contributing via Pull Requests
+Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
+
+1. You are working against the latest source on the *master* branch.
+2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
+3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
+
+To send us a pull request, please:
+
+1. Fork the repository.
+2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
+3. Ensure local tests pass.
+4. Commit to your fork using clear commit messages.
+5. Send us a pull request, answering any default questions in the pull request interface.
+6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
+
+GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
+[creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
+
+
+## Finding contributions to work on
+Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
+
+
+## Code of Conduct
+This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
+For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
+opensource-codeofconduct@amazon.com with any additional questions or comments.
+
+
+## Security issue notifications
+If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
+
+
+## Licensing
+
+See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
+
+We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000..67db858821
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,175 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
diff --git a/NOTICE b/NOTICE
new file mode 100644
index 0000000000..b8d0d46a06
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1 @@
+Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000..188b4b4f54
--- /dev/null
+++ b/README.md
@@ -0,0 +1,138 @@
+# Amazon Athena Query Federation
+
+The Amazon Athena Query Federation SDK allows you to customize Amazon Athena with your own code. This enables you to integrate with new data sources, proprietary data formats, or build in new user defined functions. Initially these customizations will be limited to the parts of a query that occur during a TableScan operation but will eventually be expanded to include other parts of the query lifecycle using the same easy to understand interface.
+
+This functionality is currently in **Public Preview** while customers provide us feedback on usability, ease of using the service or building new connectors. We do not recommend that you use these connectors in production or use this preview to make assumptions about the performance of Athena’s Federation features. As we receive more feedback, we will make improvements to the preview and lift raise limits associated with query/connector performance, APIs, SDKs, and user experience. The best way to understand the performance of Athena Data Source Connectors is to run a benchmark when they become generally available (GA) or review our performance guidance.
+
+![Architecture Image](https://github.com/awslabs/aws-athena-query-federation/blob/master/docs/img/athena_federation_summary.png?raw=true)
+
+We've written integrations with more than 20 databases, storage formats, and live APIs in order to refine this interface and balance flexibility with ease of use. We hope that making this SDK and initial set of connectors Open Source will allow us to continue to improve the experience and performance of Athena Query Federation.
+
+## Serverless Big Data Using AWS Lambda
+
+![Architecture Image](https://github.com/awslabs/aws-athena-query-federation/blob/master/docs/img/athena_federation_flow.png?raw=true)
+
+## Example Usages
+
+- SecretsManager integration
+- Serverless Application Repository
+
+### Queries That Span Data Stores
+
+Imagine a hypothetical e-commerce company who's architecture uses:
+
+1. Payment processing in a secure VPC with transaction records stored in HBase on EMR
+2. Redis is used to store active orders so that the processing engine can get fast access to them.
+3. DocumentDB (e.g. a mongodb compatible store) for Customer account data like email address, shipping addresses, etc..
+4. Their e-commerce site using auto-scaling on Fargate with their product catalog in Amazon Aurora.
+5. Cloudwatch Logs to house the Order Processor's log events.
+6. A write-once-read-many datawarehouse on Redshift.
+7. Shipment tracking data in DynamoDB.
+8. A fleet of Drivers performing last-mile delivery while utilizing IoT enabled tablets.
+9. Advertising conversion data from a 3rd part cloud provider.
+
+![Architecture Image](https://github.com/awslabs/aws-athena-query-federation/blob/master/docs/img/athena_federation_demo.png?raw=true)
+
+Customer service agents begin receiving calls about orders 'stuck' in a weird state. Some show as pending even though they have delivered, others show as delivered but haven't actually shipped. It would be great if we could quickly run a query across this diverse architecture to understand which orders might be affected and what they have in common.
+
+Using Amazon Athena Query Federation and many of the connectors found in this repository, our hypothetical e-commerce company would be able to run a query that:
+
+1. Grabs all active orders from Redis. (see athena-redis)
+2. Joins against any orders with 'WARN' or 'ERROR' events in Cloudwatch logs by using regex matching and extraction. (see athena-cloudwatch)
+3. Joins against our EC2 inventory to get the hostname(s) and status of the Order Processor(s) that logged the 'WARN' or 'ERROR'. (see athena-cmdb)
+4. Joins against DocumentDB to obtain customer contact details for the affected orders. (see athena-docdb)
+5. Joins against a scatter-gather query sent to the Driver Fleet via Android Push notification. (see athena-android)
+6. Joins against DynamoDB to get shipping status and tracking details. (see athena-dynamodb)
+8. Joins against HBase to get payment status for the affected orders. (see athena-hbase)
+7. Joins against the advertising conversion data in BigQuery to see which promotions need to be applied if a re-order is needed. (see athena-bigquery)
+
+```sql
+WITH logs
+ AS (SELECT log_stream,
+ message AS
+ order_processor_log,
+ Regexp_extract(message, '.*orderId=(\d+) .*', 1) AS orderId,
+ Regexp_extract(message, '(.*):.*', 1) AS log_level
+ FROM
+ "lambda:cloudwatch"."/var/ecommerce-engine/order-processor".all_log_streams
+ WHERE Regexp_extract(message, '(.*):.*', 1) != 'WARN'),
+ active_orders
+ AS (SELECT *
+ FROM redis.redis_db.redis_customer_orders),
+ order_processors
+ AS (SELECT instanceid,
+ publicipaddress,
+ state.NAME
+ FROM awscmdb.ec2.ec2_instances),
+ customer
+ AS (SELECT id,
+ email
+ FROM docdb.customers.customer_info),
+ addresses
+ AS (SELECT id,
+ is_residential,
+ address.street AS street
+ FROM docdb.customers.customer_addresses),
+ drivers
+ AS ( SELECT name as driver_name,
+ result_field as driver_order,
+ device_id as truck_id,
+ last_updated
+ FROM android.android.live_query where query_timeout = 5000 and query_min_results=5),
+ impressions
+ AS ( SELECT path as advertisement,
+ conversion
+ FROM bigquery.click_impressions.click_conversions),
+ shipments
+ AS ( SELECT order_id,
+ shipment_id,
+ from_unixtime(cast(shipped_date as double)) as shipment_time,
+ carrier
+ FROM lambda_ddb.default.order_shipments),
+ payments
+ AS ( SELECT "summary:order_id",
+ "summary:status",
+ "summary:cc_id",
+ "details:network"
+ FROM "hbase".hbase_payments.transactions)
+
+SELECT _key_ AS redis_order_id,
+ customer_id,
+ customer.email AS cust_email,
+ "summary:cc_id" AS credit_card,
+ "details:network" AS CC_type,
+ "summary:status" AS payment_status,
+ impressions.advertisement as advertisement,
+ status AS redis_status,
+ addresses.street AS street_address,
+ shipments.shipment_time as shipment_time,
+ shipments.carrier as shipment_carrier,
+ driver_name AS driver_name,
+ truck_id AS truck_id,
+ last_updated AS driver_updated,
+ publicipaddress AS ec2_order_processor,
+ NAME AS ec2_state,
+ log_level,
+ order_processor_log
+FROM active_orders
+ LEFT JOIN logs
+ ON logs.orderid = active_orders._key_
+ LEFT JOIN order_processors
+ ON logs.log_stream = order_processors.instanceid
+ LEFT JOIN customer
+ ON customer.id = customer_id
+ LEFT JOIN addresses
+ ON addresses.id = address_id
+ LEFT JOIN drivers
+ ON drivers.driver_order = active_orders._key_
+ LEFT JOIN impressions
+ ON impressions.conversion = active_orders._key_
+ LEFT JOIN shipments
+ ON shipments.order_id = active_orders._key_
+ LEFT JOIN payments
+ ON payments."summary:order_id" = active_orders._key_
+```
+
+## License
+
+This project is licensed under the Apache-2.0 License.
diff --git a/athena-android/pom.xml b/athena-android/pom.xml
new file mode 100644
index 0000000000..801e7933c9
--- /dev/null
+++ b/athena-android/pom.xml
@@ -0,0 +1,67 @@
+
+
+
+ aws-athena-query-federation
+ com.amazonaws
+ 1.0
+
+ 4.0.0
+
+ athena-android
+
+
+
+ com.amazonaws
+ aws-athena-federation-sdk
+ ${aws-athena-federation-sdk.version}
+
+
+ com.google.firebase
+ firebase-admin
+ 6.10.0
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ 2.9.8
+
+
+ com.amazonaws
+ aws-java-sdk-sqs
+ 1.11.636
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.2.1
+
+ false
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+ package
+
+ shade
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/athena-android/src/main/java/com/amazonaws/athena/connectors/android/AndroidDeviceTable.java b/athena-android/src/main/java/com/amazonaws/athena/connectors/android/AndroidDeviceTable.java
new file mode 100644
index 0000000000..a64fd4fa52
--- /dev/null
+++ b/athena-android/src/main/java/com/amazonaws/athena/connectors/android/AndroidDeviceTable.java
@@ -0,0 +1,147 @@
+/*-
+ * #%L
+ * athena-android
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.android;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+public class AndroidDeviceTable
+{
+ private final TableName tableName;
+ private final Schema schema;
+
+ public AndroidDeviceTable()
+ {
+ //Table name must match the firebase push subscription topic used on the devices
+ this.tableName = new TableName("android", "live_query");
+ schema = new SchemaBuilder().newBuilder()
+ .addStringField("device_id")
+ .addStringField("name")
+ .addStringField("echo_value")
+ .addStringField("result_field")
+ .addField("last_updated", Types.MinorType.DATEMILLI.getType())
+ .addIntField("score")
+ .addBigIntField("query_timeout")
+ .addBigIntField("query_min_results")
+ .addMetadata("device_id", "Android device id of the responding device.")
+ .addMetadata("name", "Name of the simulated device owner.")
+ .addMetadata("last_updated", "Last time this data was fetched")
+ .addMetadata("echo_value", "The value requested by the search.")
+ .addMetadata("result_field", "Flattened copy of the first value from the values field.")
+ .addMetadata("score", "Randomly generated score")
+ .addMetadata("query_timeout", "used to configure the number of milli-seconds the query waits for the min_results")
+ .addMetadata("query_min_results", "The min number of results to wait for.")
+ .build();
+ }
+
+ public TableName getTableName()
+ {
+ return tableName;
+ }
+
+ public Schema getSchema()
+ {
+ return schema;
+ }
+
+ public String getQueryMinResultsField()
+ {
+ return "query_min_results";
+ }
+
+ public String getQueryTimeout()
+ {
+ return "query_timeout";
+ }
+
+ public String getDeviceIdField()
+ {
+ return "device_id";
+ }
+
+ public String getLastUpdatedField()
+ {
+ return "last_updated";
+ }
+
+ public String getNameField()
+ {
+ return "name";
+ }
+
+ public String getEchoValueField()
+ {
+ return "echo_value";
+ }
+
+ public String getResultField()
+ {
+ return "result_field";
+ }
+
+ public String getScoreField()
+ {
+ return "score";
+ }
+
+ public FieldVector getQueryMinResultsField(Block block)
+ {
+ return block.getFieldVector("query_min_results");
+ }
+
+ public FieldVector getQueryTimeout(Block block)
+ {
+ return block.getFieldVector("query_timeout");
+ }
+
+ public FieldVector getDeviceIdField(Block block)
+ {
+ return block.getFieldVector("device_id");
+ }
+
+ public FieldVector getNameField(Block block)
+ {
+ return block.getFieldVector("name");
+ }
+
+ public FieldVector getLastUpdatedField(Block block)
+ {
+ return block.getFieldVector("last_updated");
+ }
+
+ public FieldVector getEchoValueField(Block block)
+ {
+ return block.getFieldVector("echo_value");
+ }
+
+ public FieldVector getResultField(Block block)
+ {
+ return block.getFieldVector("result_field");
+ }
+
+ public FieldVector getScoreField(Block block)
+ {
+ return block.getFieldVector("score");
+ }
+}
diff --git a/athena-android/src/main/java/com/amazonaws/athena/connectors/android/AndroidMetadataHandler.java b/athena-android/src/main/java/com/amazonaws/athena/connectors/android/AndroidMetadataHandler.java
new file mode 100644
index 0000000000..8f5d7a4da6
--- /dev/null
+++ b/athena-android/src/main/java/com/amazonaws/athena/connectors/android/AndroidMetadataHandler.java
@@ -0,0 +1,109 @@
+/*-
+ * #%L
+ * athena-android
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.android;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockWriter;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation;
+import com.amazonaws.athena.connector.lambda.handlers.MetadataHandler;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse;
+import com.amazonaws.athena.connector.lambda.security.EncryptionKey;
+import com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import org.apache.arrow.util.VisibleForTesting;
+
+import java.util.Collections;
+
+public class AndroidMetadataHandler
+ extends MetadataHandler
+{
+ private static final String sourceType = "android";
+ private static final AndroidDeviceTable androidDeviceTable = new AndroidDeviceTable();
+
+ public AndroidMetadataHandler()
+ {
+ super(sourceType);
+ }
+
+ @VisibleForTesting
+ protected AndroidMetadataHandler(
+ EncryptionKeyFactory keyFactory,
+ AWSSecretsManager secretsManager,
+ AmazonAthena athena,
+ String spillBucket,
+ String spillPrefix)
+ {
+ super(keyFactory, secretsManager, athena, sourceType, spillBucket, spillPrefix);
+ }
+
+ @Override
+ public ListSchemasResponse doListSchemaNames(BlockAllocator blockAllocator, ListSchemasRequest listSchemasRequest)
+ {
+ String schemaName = androidDeviceTable.getTableName().getSchemaName();
+ return new ListSchemasResponse(listSchemasRequest.getCatalogName(), Collections.singletonList(schemaName));
+ }
+
+ @Override
+ public ListTablesResponse doListTables(BlockAllocator blockAllocator, ListTablesRequest listTablesRequest)
+ {
+ return new ListTablesResponse(listTablesRequest.getCatalogName(),
+ Collections.singletonList(androidDeviceTable.getTableName()));
+ }
+
+ @Override
+ public GetTableResponse doGetTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ if (!androidDeviceTable.getTableName().equals(getTableRequest.getTableName())) {
+ throw new RuntimeException("Unknown table " + getTableRequest.getTableName());
+ }
+
+ return new GetTableResponse(getTableRequest.getCatalogName(),
+ androidDeviceTable.getTableName(),
+ androidDeviceTable.getSchema());
+ }
+
+ @Override
+ public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker)
+ throws Exception
+ {
+ //NoOp since we don't support partitioning
+ }
+
+ @Override
+ public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest)
+ {
+ //Every split needs a unique spill location.
+ SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
+ EncryptionKey encryptionKey = makeEncryptionKey();
+ Split split = Split.newBuilder(spillLocation, encryptionKey).build();
+ return new GetSplitsResponse(getSplitsRequest.getCatalogName(), split);
+ }
+}
diff --git a/athena-android/src/main/java/com/amazonaws/athena/connectors/android/AndroidRecordHandler.java b/athena-android/src/main/java/com/amazonaws/athena/connectors/android/AndroidRecordHandler.java
new file mode 100644
index 0000000000..1e27ec4dbd
--- /dev/null
+++ b/athena-android/src/main/java/com/amazonaws/athena/connectors/android/AndroidRecordHandler.java
@@ -0,0 +1,174 @@
+/*-
+ * #%L
+ * athena-android
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.android;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.handlers.RecordHandler;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.athena.AmazonAthenaClientBuilder;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import com.amazonaws.services.secretsmanager.AWSSecretsManagerClientBuilder;
+import com.amazonaws.services.sqs.AmazonSQS;
+import com.amazonaws.services.sqs.AmazonSQSClientBuilder;
+import com.amazonaws.services.sqs.model.DeleteMessageBatchRequestEntry;
+import com.amazonaws.services.sqs.model.GetQueueUrlResult;
+import com.amazonaws.services.sqs.model.ReceiveMessageRequest;
+import com.amazonaws.services.sqs.model.ReceiveMessageResult;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.arrow.util.VisibleForTesting;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class AndroidRecordHandler
+ extends RecordHandler
+{
+ private static final String sourceType = "android";
+ private static final Logger logger = LoggerFactory.getLogger(AndroidRecordHandler.class);
+
+ private static final String FIREBASE_DB_URL = "FIREBASE_DB_URL";
+ private static final String FIREBASE_CONFIG = "FIREBASE_CONFIG";
+ private static final String RESPONSE_QUEUE_NAME = "RESPONSE_QUEUE_NAME";
+ private static final String MAX_WAIT_TIME = "MAX_WAIT_TIME";
+ private static final String MIN_RESULTS = "MIN_RESULTS";
+
+ private final AndroidDeviceTable androidTable = new AndroidDeviceTable();
+ private final ObjectMapper mapper = new ObjectMapper();
+ private final AmazonSQS amazonSQS;
+ private final LiveQueryService liveQueryService;
+ private final String queueUrl;
+
+ public AndroidRecordHandler()
+ {
+ this(AmazonS3ClientBuilder.defaultClient(),
+ AWSSecretsManagerClientBuilder.defaultClient(),
+ AmazonAthenaClientBuilder.defaultClient(),
+ AmazonSQSClientBuilder.defaultClient(),
+ new LiveQueryService(System.getenv(FIREBASE_CONFIG), System.getenv(FIREBASE_DB_URL)));
+ }
+
+ @VisibleForTesting
+ protected AndroidRecordHandler(AmazonS3 amazonS3,
+ AWSSecretsManager secretsManager,
+ AmazonAthena athena,
+ AmazonSQS amazonSQS,
+ LiveQueryService liveQueryService)
+ {
+ super(amazonS3, secretsManager, athena, sourceType);
+ this.amazonSQS = amazonSQS;
+ this.liveQueryService = liveQueryService;
+ GetQueueUrlResult queueUrlResult = amazonSQS.getQueueUrl(System.getenv(RESPONSE_QUEUE_NAME));
+ queueUrl = queueUrlResult.getQueueUrl();
+ }
+
+ @Override
+ protected void readWithConstraint(BlockSpiller blockSpiller, ReadRecordsRequest readRecordsRequest, QueryStatusChecker queryStatusChecker)
+ {
+ QueryRequest request = QueryRequest.newBuilder()
+ .withQueryId(readRecordsRequest.getQueryId())
+ .withQuery("query details")
+ .withResponseQueue(queueUrl)
+ .build();
+
+ String response = liveQueryService.broadcastQuery(readRecordsRequest.getTableName().getTableName(), request);
+ logger.info("readWithConstraint: Android broadcast result: " + response);
+
+ readResultsFromSqs(blockSpiller, readRecordsRequest);
+ }
+
+ private void readResultsFromSqs(BlockSpiller blockSpiller, ReadRecordsRequest readRecordsRequest)
+ {
+ final Map fields = new HashMap<>();
+ readRecordsRequest.getSchema().getFields().forEach(next -> fields.put(next.getName(), next));
+
+ ReceiveMessageRequest receiveRequest = new ReceiveMessageRequest()
+ .withQueueUrl(queueUrl)
+ .withWaitTimeSeconds(1);
+
+ ValueSet queryTimeoutValueSet = readRecordsRequest.getConstraints().getSummary().get(androidTable.getQueryTimeout());
+ ValueSet minResultsValueSet = readRecordsRequest.getConstraints().getSummary().get(androidTable.getQueryMinResultsField());
+
+ long maxWaitTime = queryTimeoutValueSet != null && queryTimeoutValueSet.isSingleValue() ?
+ (long) queryTimeoutValueSet.getSingleValue() : Long.parseLong(System.getenv(MAX_WAIT_TIME));
+ long minResults = minResultsValueSet != null && minResultsValueSet.isSingleValue() ?
+ (long) minResultsValueSet.getSingleValue() : Long.parseLong(System.getenv(MIN_RESULTS));
+
+ logger.info("readResultsFromSqs: using timeout of " + maxWaitTime + " ms and min_results of " + minResults);
+
+ long startTime = System.currentTimeMillis();
+ long numResults = 0;
+ ReceiveMessageResult receiveMessageResult;
+ List msgsToAck = new ArrayList<>();
+ do {
+ receiveMessageResult = amazonSQS.receiveMessage(receiveRequest);
+ for (com.amazonaws.services.sqs.model.Message next : receiveMessageResult.getMessages()) {
+ try {
+ QueryResponse queryResponse = mapper.readValue(next.getBody(), QueryResponse.class);
+ if (queryResponse.getQueryId().equals(readRecordsRequest.getQueryId())) {
+ numResults++;
+ msgsToAck.add(new DeleteMessageBatchRequestEntry().withReceiptHandle(next.getReceiptHandle()).withId(next.getMessageId()));
+ blockSpiller.writeRows((Block block, int rowNum) -> {
+ int newRows = 0;
+
+ for (String nextVal : queryResponse.getValues()) {
+ boolean matches = true;
+ int effectiveRow = newRows + rowNum;
+
+ matches &= block.offerValue(androidTable.getDeviceIdField(), effectiveRow, queryResponse.getDeviceId());
+ matches &= block.offerValue(androidTable.getNameField(), effectiveRow, queryResponse.getName());
+ matches &= block.offerValue(androidTable.getEchoValueField(), effectiveRow, queryResponse.getEchoValue());
+ matches &= block.offerValue(androidTable.getLastUpdatedField(), effectiveRow, System.currentTimeMillis());
+ matches &= block.offerValue(androidTable.getResultField(), effectiveRow, nextVal);
+ matches &= block.offerValue(androidTable.getScoreField(), effectiveRow, queryResponse.getRandom());
+ matches &= block.offerValue(androidTable.getQueryMinResultsField(), effectiveRow, minResults);
+ matches &= block.offerValue(androidTable.getQueryTimeout(), effectiveRow, maxWaitTime);
+
+ newRows += matches ? 1 : 0;
+ }
+
+ return newRows;
+ });
+ logger.info("Received matching response " + queryResponse.toString());
+ }
+ }
+ catch (RuntimeException | IOException ex) {
+ logger.error("Error processing msg", ex);
+ }
+ }
+ if (!msgsToAck.isEmpty()) {
+ amazonSQS.deleteMessageBatch(queueUrl, msgsToAck);
+ msgsToAck.clear();
+ }
+ }
+ while (System.currentTimeMillis() - startTime < maxWaitTime && (numResults < minResults || receiveMessageResult.getMessages().size() > 0));
+ }
+}
diff --git a/athena-android/src/main/java/com/amazonaws/athena/connectors/android/LiveQueryService.java b/athena-android/src/main/java/com/amazonaws/athena/connectors/android/LiveQueryService.java
new file mode 100644
index 0000000000..2e287c2092
--- /dev/null
+++ b/athena-android/src/main/java/com/amazonaws/athena/connectors/android/LiveQueryService.java
@@ -0,0 +1,68 @@
+/*-
+ * #%L
+ * athena-android
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.android;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.auth.oauth2.GoogleCredentials;
+import com.google.firebase.FirebaseApp;
+import com.google.firebase.FirebaseOptions;
+import com.google.firebase.messaging.FirebaseMessaging;
+import com.google.firebase.messaging.FirebaseMessagingException;
+import com.google.firebase.messaging.Message;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+public class LiveQueryService
+{
+ private static final String PUSH_MSG_FIELD = "query_request";
+ private final ObjectMapper mapper = new ObjectMapper();
+
+ public LiveQueryService(String authConfig, String databaseUrl)
+ {
+ try {
+ InputStream inputStream = new ByteArrayInputStream(authConfig.getBytes());
+ FirebaseOptions options = new FirebaseOptions.Builder()
+ .setCredentials(GoogleCredentials.fromStream(inputStream))
+ .setDatabaseUrl(databaseUrl)
+ .build();
+
+ FirebaseApp.initializeApp(options);
+ }
+ catch (IOException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ public String broadcastQuery(String topic, QueryRequest query)
+ {
+ try {
+ Message.Builder messageBuilder = Message.builder();
+ messageBuilder.putData(PUSH_MSG_FIELD, mapper.writeValueAsString(query));
+ messageBuilder.setTopic(topic);
+ return FirebaseMessaging.getInstance().send(messageBuilder.build());
+ }
+ catch (JsonProcessingException | FirebaseMessagingException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+}
diff --git a/athena-android/src/main/java/com/amazonaws/athena/connectors/android/QueryRequest.java b/athena-android/src/main/java/com/amazonaws/athena/connectors/android/QueryRequest.java
new file mode 100644
index 0000000000..01b8e8ae3f
--- /dev/null
+++ b/athena-android/src/main/java/com/amazonaws/athena/connectors/android/QueryRequest.java
@@ -0,0 +1,132 @@
+/*-
+ * #%L
+ * athena-android
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.android;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+public class QueryRequest
+{
+ private final String queryId;
+ private final String query;
+ private final String echoValue;
+ private final String responseQueue;
+
+ @JsonCreator
+ public QueryRequest(@JsonProperty("queryId") String queryId,
+ @JsonProperty("query") String query,
+ @JsonProperty("echoValue") String echoValue,
+ @JsonProperty("responseQueue") String responseQueue)
+ {
+ this.queryId = queryId;
+ this.query = query;
+ this.echoValue = echoValue;
+ this.responseQueue = responseQueue;
+ }
+
+ private QueryRequest(Builder builder)
+ {
+ queryId = builder.queryId;
+ query = builder.query;
+ echoValue = builder.echoValue;
+ responseQueue = builder.responseQueue;
+ }
+
+ public static Builder newBuilder()
+ {
+ return new Builder();
+ }
+
+ @JsonProperty("query")
+ public String getQuery()
+ {
+ return query;
+ }
+
+ @JsonProperty("echoValue")
+ public String getEchoValue()
+ {
+ return echoValue;
+ }
+
+ @JsonProperty("queryId")
+ public String getQueryId()
+ {
+ return queryId;
+ }
+
+ @JsonProperty("responseQueue")
+ public String getResponseQueue()
+ {
+ return responseQueue;
+ }
+
+ @Override
+ public String toString()
+ {
+ return "QueryRequest{" +
+ "queryId='" + queryId + '\'' +
+ ", query='" + query + '\'' +
+ ", echoValue='" + echoValue + '\'' +
+ ", responseQueue='" + responseQueue + '\'' +
+ '}';
+ }
+
+ public static final class Builder
+ {
+ private String queryId;
+ private String query;
+ private String echoValue;
+ private String responseQueue;
+
+ private Builder()
+ {
+ }
+
+ public Builder withQuery(String val)
+ {
+ query = val;
+ return this;
+ }
+
+ public Builder withEchoValue(String val)
+ {
+ echoValue = val;
+ return this;
+ }
+
+ public Builder withResponseQueue(String val)
+ {
+ responseQueue = val;
+ return this;
+ }
+
+ public Builder withQueryId(String val)
+ {
+ queryId = val;
+ return this;
+ }
+
+ public QueryRequest build()
+ {
+ return new QueryRequest(this);
+ }
+ }
+}
diff --git a/athena-android/src/main/java/com/amazonaws/athena/connectors/android/QueryResponse.java b/athena-android/src/main/java/com/amazonaws/athena/connectors/android/QueryResponse.java
new file mode 100644
index 0000000000..b556d024e2
--- /dev/null
+++ b/athena-android/src/main/java/com/amazonaws/athena/connectors/android/QueryResponse.java
@@ -0,0 +1,170 @@
+/*-
+ * #%L
+ * athena-android
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.android;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.List;
+
+public class QueryResponse
+{
+ private final String deviceId;
+ private final String queryId;
+ private final String name;
+ private final String echoValue;
+ private final List values;
+ private final int random;
+
+ @JsonCreator
+ public QueryResponse(@JsonProperty("deviceId") String deviceId,
+ @JsonProperty("queryId") String queryId,
+ @JsonProperty("name") String name,
+ @JsonProperty("echoValue") String echoValue,
+ @JsonProperty("values") List values,
+ @JsonProperty("random") int random)
+ {
+ this.deviceId = deviceId;
+ this.queryId = queryId;
+ this.name = name;
+ this.echoValue = echoValue;
+ this.values = values;
+ this.random = random;
+ }
+
+ private QueryResponse(Builder builder)
+ {
+ queryId = builder.queryId;
+ deviceId = builder.deviceId;
+ name = builder.name;
+ echoValue = builder.echoValue;
+ values = builder.values;
+ random = builder.random;
+ }
+
+ public static Builder newBuilder()
+ {
+ return new Builder();
+ }
+
+ @JsonProperty("deviceId")
+ public String getDeviceId()
+ {
+ return deviceId;
+ }
+
+ @JsonProperty("queryId")
+ public String getQueryId()
+ {
+ return queryId;
+ }
+
+ @JsonProperty("name")
+ public String getName()
+ {
+ return name;
+ }
+
+ @JsonProperty("echoValue")
+ public String getEchoValue()
+ {
+ return echoValue;
+ }
+
+ @JsonProperty("values")
+ public List getValues()
+ {
+ return values;
+ }
+
+ @JsonProperty("random")
+ public int getRandom()
+ {
+ return random;
+ }
+
+ @Override
+ public String toString()
+ {
+ return "QueryResponse{" +
+ "deviceId='" + deviceId + '\'' +
+ ", queryId='" + queryId + '\'' +
+ ", name='" + name + '\'' +
+ ", echoValue='" + echoValue + '\'' +
+ ", values=" + values +
+ ", random=" + random +
+ '}';
+ }
+
+ public static final class Builder
+ {
+ private String deviceId;
+ private String queryId;
+ private String name;
+ private String echoValue;
+ private List values;
+ private int random;
+
+ private Builder()
+ {
+ }
+
+ public Builder withDeviceId(String val)
+ {
+ deviceId = val;
+ return this;
+ }
+
+ public Builder withQueryId(String val)
+ {
+ queryId = val;
+ return this;
+ }
+
+ public Builder withEchoValue(String val)
+ {
+ echoValue = val;
+ return this;
+ }
+
+ public Builder withName(String val)
+ {
+ name = val;
+ return this;
+ }
+
+ public Builder withValues(List val)
+ {
+ values = val;
+ return this;
+ }
+
+ public Builder withRandom(int val)
+ {
+ random = val;
+ return this;
+ }
+
+ public QueryResponse build()
+ {
+ return new QueryResponse(this);
+ }
+ }
+}
diff --git a/athena-aws-cmdb/LICENSE.txt b/athena-aws-cmdb/LICENSE.txt
new file mode 100644
index 0000000000..418de4c108
--- /dev/null
+++ b/athena-aws-cmdb/LICENSE.txt
@@ -0,0 +1,174 @@
+Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
\ No newline at end of file
diff --git a/athena-aws-cmdb/README.md b/athena-aws-cmdb/README.md
new file mode 100644
index 0000000000..1cfdfa2925
--- /dev/null
+++ b/athena-aws-cmdb/README.md
@@ -0,0 +1,66 @@
+# Amazon Athena AWS CMDB Connector
+
+This connector enables Amazon Athena to communicate with various AWS Services, making your AWS Resource inventory accessible via SQL.
+
+## Usage
+
+### Parameters
+
+The Athena AWS CMDB Connector provides several configuration options via Lambda environment variables. More detail on the available parameters can be found below.
+
+1. **spill_bucket** - When the data returned by your Lambda function exceeds Lambda’s limits, this is the bucket that the data will be written to for Athena to read the excess from. (e.g. my_bucket)
+2. **spill_prefix** - (Optional) Defaults to sub-folder in your bucket called 'athena-federation-spill'. Used in conjunction with spill_bucket, this is the path within the above bucket that large responses are spilled to. You should configure an S3 lifecycle on this location to delete old spills after X days/Hours.
+3. **kms_key_id** - (Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys. (e.g. a7e63k4b-8loc-40db-a2a1-4d0en2cd8331)
+4. **disable_spill_encryption** - (Optional) Defaults to False so that any data that is spilled to S3 is encrypted using AES-GMC either with a randomly generated key or using KMS to generate keys. Setting this to false will disable spill encryption. You may wish to disable this for improved performance, especially if your spill location in S3 uses S3 Server Side Encryption. (e.g. True or False)
+5. **default_ec2_image_owner** - (Optional) When set, this controls the default ec2 image (aka AMI) owner used to filter AMIs. When this isn't set and your query against the ec2 images table does not include a filter for owner you will get a large number of results since the response will include all public images.
+
+### Databases & Tables
+
+The Athena AWS CMDB Connector makes the following databases and tables available for querying your AWS Resource Inventory. For more information on the columns available in each table, try running a 'describe database.table' from the Athena Console or API.
+
+1. **ec2** - This database contains EC2 related resources, including:
+ * **ebs_volumes** - Contains details of you EBS volumes.
+ * **ec2_instances** - Contains details of your EC2 Instances.
+ * **ec2_images** - Contains details of your EC2 Instance images.
+ * **routing_tables** - Contains details of your VPC Routing Tables.
+ * **security_groups** - Contains details of your Security Groups.
+ * **subnets** - Contains details of your VPC Subnets.
+ * **vpcs** - Contains details of your VPCs.
+2. **emr** - This database contains EMR related resources, including:
+ * **emr_clusters** - Contains details of your EMR Clusters.
+3. **rds** - This database contains RDS related resources, including:
+ * **rds_instances** - Contains details of your RDS Instances.
+4. **s3** - This database contains RDS related resources, including:
+ * **buckets** - Contains details of your S3 buckets.
+ * **objects** - Contains details of your S3 Objects (excludes their contents).
+
+### Required Permissions
+
+Review the "Policies" section of the athena-aws-cmdb.yaml file for full details on the IAM Policies required by this connector. A brief summary is below.
+
+1. S3 Write Access - In order to successfully handle large queries, the connector requires write access to a location in S3.
+1. EC2 Describe - The connector uses this access to describe your EC2 Instances, Security Groups, VPCs, EBS Volumes, etc...
+1. EMR Describe / List - The connector uses this access to describe your EMR Clusters.
+1. RDS Describe - The connector uses this access to describe your RDS Instances.
+1. S3 List - The connector uses this access to list your buckets and objects.
+1. Athena GetQueryExecution - The connector uses this access to fast-fail when the upstream Athena query has terminated.
+
+### Deploying The Connector
+
+To use this connector in your queries, navigate to AWS Serverless Application Repository and deploy a pre-built version of this connector. Alternatively, you can build and deploy this connector from source follow the below steps or use the more detailed tutorial in the athena-example module:
+
+1. From the athena-federation-sdk dir, run `mvn clean install` if you haven't already.
+2. From the athena-aws-cmdb dir, run `mvn clean install`.
+3. From the athena-aws-cmdb dir, run `../tools/publish.sh S3_BUCKET_NAME athena-aws-cmdb` to publish the connector to your private AWS Serverless Application Repository. The S3_BUCKET in the command is where a copy of the connector's code will be stored for Serverless Application Repository to retrieve it. This will allow users with permission to do so, the ability to deploy instances of the connector via 1-Click form. Then navigate to [Serverless Application Repository](https://aws.amazon.com/serverless/serverlessrepo)
+4. Try running a query like the one below in Athena:
+```sql
+select * from "lambda:".ec2.ec2_instances limit 100
+```
+
+## Performance
+
+The Athena AWS CMDB Connector does not current support parallel scans. Predicate Pushdown is performed within the Lambda function and where possible partial predicates are pushed to the services being queried. For example, a query for the details of a specific EC2 Instance will turn into a targeted describe of that specific instance id against the EC2 API.
+
+## License
+
+This project is licensed under the Apache-2.0 License.
\ No newline at end of file
diff --git a/athena-aws-cmdb/athena-aws-cmdb.yaml b/athena-aws-cmdb/athena-aws-cmdb.yaml
new file mode 100644
index 0000000000..76225a36b3
--- /dev/null
+++ b/athena-aws-cmdb/athena-aws-cmdb.yaml
@@ -0,0 +1,73 @@
+Transform: 'AWS::Serverless-2016-10-31'
+Metadata:
+ 'AWS::ServerlessRepo::Application':
+ Name: AthenaAwsCmdbConnector
+ Description: 'This connector enables Amazon Athena to communicate with various AWS Services, making your resource inventories accessible via SQL.'
+ Author: 'Amazon Athena'
+ SpdxLicenseId: Apache-2.0
+ LicenseUrl: LICENSE.txt
+ ReadmeUrl: README.md
+ Labels:
+ - athena-federation
+ HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation'
+ SemanticVersion: 1.0.0
+ SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation'
+Parameters:
+ AthenaCatalogName:
+ Description: 'The name you will give to this catalog in Athena. It will also be used as the function name.'
+ Type: String
+ SpillBucket:
+ Description: 'The bucket where this function can spill data.'
+ Type: String
+ SpillPrefix:
+ Description: 'The bucket prefix where this function can spill large responses.'
+ Type: String
+ Default: athena-spill
+ LambdaTimeout:
+ Description: 'Maximum Lambda invocation runtime in seconds. (min 1 - 900 max)'
+ Default: 900
+ Type: Number
+ LambdaMemory:
+ Description: 'Lambda memory in MB (min 128 - 3008 max).'
+ Default: 3008
+ Type: Number
+ DisableSpillEncryption:
+ Description: "WARNING: If set to 'true' encryption for spilled data is disabled."
+ Default: 'false'
+ Type: String
+Resources:
+ ConnectorConfig:
+ Type: 'AWS::Serverless::Function'
+ Properties:
+ Environment:
+ Variables:
+ disable_spill_encryption: !Ref DisableSpillEncryption
+ spill_bucket: !Ref SpillBucket
+ spill_prefix: !Ref SpillPrefix
+ FunctionName: !Ref AthenaCatalogName
+ Handler: "com.amazonaws.athena.connectors.aws.cmdb.AwsCmdbCompositeHandler"
+ CodeUri: "./target/athena-aws-cmdb-1.0.jar"
+ Description: "Enables Amazon Athena to communicate with various AWS Services, making your resource inventories accessible via SQL."
+ Runtime: java8
+ Timeout: !Ref LambdaTimeout
+ MemorySize: !Ref LambdaMemory
+ Policies:
+ - Statement:
+ - Action:
+ - autoscaling:Describe*
+ - elasticloadbalancing:Describe*
+ - ec2:Describe*
+ - elasticmapreduce:Describe*
+ - elasticmapreduce:List*
+ - rds:Describe*
+ - rds:ListTagsForResource
+ - athena:GetQueryExecution
+ - s3:ListAllMyBuckets
+ - s3:ListBucket
+ Effect: Allow
+ Resource: '*'
+ Version: '2012-10-17'
+ #S3CrudPolicy allows our connector to spill large responses to S3. You can optionally replace this pre-made policy
+ #with one that is more restrictive and can only 'put' but not read,delete, or overwrite files.
+ - S3CrudPolicy:
+ BucketName: !Ref SpillBucket
\ No newline at end of file
diff --git a/athena-aws-cmdb/pom.xml b/athena-aws-cmdb/pom.xml
new file mode 100644
index 0000000000..981a2aac8b
--- /dev/null
+++ b/athena-aws-cmdb/pom.xml
@@ -0,0 +1,66 @@
+
+
+
+ aws-athena-query-federation
+ com.amazonaws
+ 1.0
+
+ 4.0.0
+
+ athena-aws-cmdb
+
+
+ com.amazonaws
+ aws-athena-federation-sdk
+ ${aws-athena-federation-sdk.version}
+
+
+ com.amazonaws
+ aws-java-sdk-ec2
+ ${aws-sdk.version}
+
+
+ com.amazonaws
+ aws-java-sdk-emr
+ ${aws-sdk.version}
+
+
+ com.amazonaws
+ aws-java-sdk-rds
+ ${aws-sdk.version}
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.2.1
+
+ false
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+ package
+
+ shade
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbCompositeHandler.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbCompositeHandler.java
new file mode 100644
index 0000000000..8036fd1e31
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbCompositeHandler.java
@@ -0,0 +1,35 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb;
+
+import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler;
+
+/**
+ * Boilerplate composite handler that allows us to use a single Lambda function for both
+ * Metadata and Data. In this case we just compose AwsCmdbMetadataHandler and AwsCmdbRecordHandler.
+ */
+public class AwsCmdbCompositeHandler
+ extends CompositeHandler
+{
+ public AwsCmdbCompositeHandler()
+ {
+ super(new AwsCmdbMetadataHandler(), new AwsCmdbRecordHandler());
+ }
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbMetadataHandler.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbMetadataHandler.java
new file mode 100644
index 0000000000..78cd23ddc2
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbMetadataHandler.java
@@ -0,0 +1,176 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockWriter;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation;
+import com.amazonaws.athena.connector.lambda.handlers.MetadataHandler;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse;
+import com.amazonaws.athena.connector.lambda.security.EncryptionKey;
+import com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import org.apache.arrow.util.VisibleForTesting;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Handles metadata requests for the Athena AWS CMDB Connector.
+ *
+ * For more detail, please see the module's README.md, some notable characteristics of this class include:
+ *
+ * 1. Maps AWS Resources to SQL tables using a set of TableProviders constructed from a TableProviderFactory.
+ * 2. This class is largely a mux that delegates requests to the appropriate TableProvider based on the
+ * requested TableName.
+ * 3. Provides a schema and table list by scanning all loaded TableProviders.
+ */
+public class AwsCmdbMetadataHandler
+ extends MetadataHandler
+{
+ private static final String SOURCE_TYPE = "cmdb";
+ //Map of schema name to list of TableNames generated by scanning all loaded TableProviders.
+ private Map> schemas;
+ //Map of available fully qualified TableNames to their respective TableProviders.
+ private Map tableProviders;
+
+ public AwsCmdbMetadataHandler()
+ {
+ super(SOURCE_TYPE);
+ TableProviderFactory tableProviderFactory = new TableProviderFactory();
+ schemas = tableProviderFactory.getSchemas();
+ tableProviders = tableProviderFactory.getTableProviders();
+ }
+
+ @VisibleForTesting
+ protected AwsCmdbMetadataHandler(TableProviderFactory tableProviderFactory,
+ EncryptionKeyFactory keyFactory,
+ AWSSecretsManager secretsManager,
+ AmazonAthena athena,
+ String spillBucket,
+ String spillPrefix)
+ {
+ super(keyFactory, secretsManager, athena, SOURCE_TYPE, spillBucket, spillPrefix);
+ schemas = tableProviderFactory.getSchemas();
+ tableProviders = tableProviderFactory.getTableProviders();
+ }
+
+ /**
+ * Returns the list of supported schemas discovered from the loaded TableProvider scan.
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public ListSchemasResponse doListSchemaNames(BlockAllocator blockAllocator, ListSchemasRequest listSchemasRequest)
+ {
+ return new ListSchemasResponse(listSchemasRequest.getCatalogName(), schemas.keySet());
+ }
+
+ /**
+ * Returns the list of supported tables on the requested schema discovered from the loaded TableProvider scan.
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public ListTablesResponse doListTables(BlockAllocator blockAllocator, ListTablesRequest listTablesRequest)
+ {
+ return new ListTablesResponse(listTablesRequest.getCatalogName(), schemas.get(listTablesRequest.getSchemaName()));
+ }
+
+ /**
+ * Delegates to the TableProvider that is registered for the requested table.
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public GetTableResponse doGetTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ TableProvider tableProvider = tableProviders.get(getTableRequest.getTableName());
+ if (tableProvider == null) {
+ throw new RuntimeException("Unknown table " + getTableRequest.getTableName());
+ }
+ return tableProvider.getTable(blockAllocator, getTableRequest);
+ }
+
+ /**
+ * Delegates to the TableProvider that is registered for the requested table.
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public void enhancePartitionSchema(SchemaBuilder partitionSchemaBuilder, GetTableLayoutRequest request)
+ {
+ TableProvider tableProvider = tableProviders.get(request.getTableName());
+ if (tableProvider == null) {
+ throw new RuntimeException("Unknown table " + request.getTableName());
+ }
+ tableProvider.enhancePartitionSchema(partitionSchemaBuilder, request);
+ }
+
+ /**
+ * Delegates to the TableProvider that is registered for the requested table.
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker)
+ throws Exception
+ {
+ TableProvider tableProvider = tableProviders.get(request.getTableName());
+ if (tableProvider == null) {
+ throw new RuntimeException("Unknown table " + request.getTableName());
+ }
+ tableProvider.getPartitions(blockWriter, request);
+ }
+
+ /**
+ * Delegates to the TableProvider that is registered for the requested table.
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest)
+ {
+ TableProvider tableProvider = tableProviders.get(getSplitsRequest.getTableName());
+ if (tableProvider == null) {
+ throw new RuntimeException("Unknown table " + getSplitsRequest.getTableName());
+ }
+
+ //Every split needs a unique spill location.
+ SpillLocation spillLocation = makeSpillLocation(getSplitsRequest);
+ EncryptionKey encryptionKey = makeEncryptionKey();
+ Split split = Split.newBuilder(spillLocation, encryptionKey).build();
+ return new GetSplitsResponse(getSplitsRequest.getCatalogName(), split);
+ }
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbRecordHandler.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbRecordHandler.java
new file mode 100644
index 0000000000..ea78f6d996
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbRecordHandler.java
@@ -0,0 +1,76 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.handlers.RecordHandler;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import org.apache.arrow.util.VisibleForTesting;
+
+import java.util.Map;
+
+/**
+ * Handles record requests for the Athena AWS CMDB Connector.
+ *
+ * For more detail, please see the module's README.md, some notable characteristics of this class include:
+ *
+ * 1. Maps AWS Resources to SQL tables using a set of TableProviders constructed from a TableProviderFactory.
+ * 2. This class is largely a mux that delegates requests to the appropriate TableProvider based on the
+ * requested TableName.
+ */
+public class AwsCmdbRecordHandler
+ extends RecordHandler
+{
+ private static final String SOURCE_TYPE = "cmdb";
+
+ //Map of available fully qualified TableNames to their respective TableProviders.
+ private Map tableProviders;
+
+ public AwsCmdbRecordHandler()
+ {
+ super(SOURCE_TYPE);
+ tableProviders = new TableProviderFactory().getTableProviders();
+ }
+
+ @VisibleForTesting
+ protected AwsCmdbRecordHandler(AmazonS3 amazonS3, AWSSecretsManager secretsManager, AmazonAthena athena, TableProviderFactory tableProviderFactory)
+ {
+ super(amazonS3, secretsManager, athena, SOURCE_TYPE);
+ tableProviders = tableProviderFactory.getTableProviders();
+ }
+
+ /**
+ * Delegates to the TableProvider that is registered for the requested table.
+ *
+ * @see RecordHandler
+ */
+ @Override
+ protected void readWithConstraint(BlockSpiller blockSpiller, ReadRecordsRequest readRecordsRequest, QueryStatusChecker queryStatusChecker)
+ {
+ TableProvider tableProvider = tableProviders.get(readRecordsRequest.getTableName());
+ tableProvider.readWithConstraint(blockSpiller, readRecordsRequest, queryStatusChecker);
+ }
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/TableProviderFactory.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/TableProviderFactory.java
new file mode 100644
index 0000000000..11259c0c2a
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/TableProviderFactory.java
@@ -0,0 +1,123 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb;
+
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.EmrClusterTableProvider;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.RdsTableProvider;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.ec2.EbsTableProvider;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.ec2.Ec2TableProvider;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.ec2.ImagesTableProvider;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.ec2.RouteTableProvider;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.ec2.SecurityGroupsTableProvider;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.ec2.SubnetTableProvider;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.ec2.VpcTableProvider;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.s3.S3BucketsTableProvider;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.s3.S3ObjectsTableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.AmazonEC2ClientBuilder;
+import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce;
+import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClientBuilder;
+import com.amazonaws.services.rds.AmazonRDS;
+import com.amazonaws.services.rds.AmazonRDSClientBuilder;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import org.apache.arrow.util.VisibleForTesting;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Acts as a factory for all supported TableProviders and also a source of meta-data about the
+ * schemas and tables that the loaded TableProviders support.
+ */
+public class TableProviderFactory
+{
+ private Map> schemas = new HashMap<>();
+ private Map tableProviders = new HashMap<>();
+
+ public TableProviderFactory()
+ {
+ this(AmazonEC2ClientBuilder.standard().build(),
+ AmazonElasticMapReduceClientBuilder.standard().build(),
+ AmazonRDSClientBuilder.standard().build(),
+ AmazonS3ClientBuilder.standard().build());
+ }
+
+ @VisibleForTesting
+ protected TableProviderFactory(AmazonEC2 ec2, AmazonElasticMapReduce emr, AmazonRDS rds, AmazonS3 amazonS3)
+ {
+ addProvider(new Ec2TableProvider(ec2));
+ addProvider(new EbsTableProvider(ec2));
+ addProvider(new VpcTableProvider(ec2));
+ addProvider(new SecurityGroupsTableProvider(ec2));
+ addProvider(new RouteTableProvider(ec2));
+ addProvider(new SubnetTableProvider(ec2));
+ addProvider(new ImagesTableProvider(ec2));
+ addProvider(new EmrClusterTableProvider(emr));
+ addProvider(new RdsTableProvider(rds));
+ addProvider(new S3ObjectsTableProvider(amazonS3));
+ addProvider(new S3BucketsTableProvider(amazonS3));
+ }
+
+ /**
+ * Adds a new TableProvider to the loaded set, if and only if, no existing TableProvider is known
+ * for the fully qualified table represented by the new TableProvider we are attempting to add.
+ *
+ * @param provider The TableProvider to add.
+ */
+ private void addProvider(TableProvider provider)
+ {
+ if (tableProviders.putIfAbsent(provider.getTableName(), provider) != null) {
+ throw new RuntimeException("Duplicate provider for " + provider.getTableName());
+ }
+
+ List tables = schemas.get(provider.getSchema());
+ if (tables == null) {
+ tables = new ArrayList<>();
+ schemas.put(provider.getSchema(), tables);
+ }
+ tables.add(provider.getTableName());
+ }
+
+ /**
+ * Provides access to the mapping of loaded TableProviders by their fully qualified table names.
+ *
+ * @return Map of TableNames to their corresponding TableProvider.
+ */
+ public Map getTableProviders()
+ {
+ return tableProviders;
+ }
+
+ /**
+ * Provides access to the mapping of TableNames for each schema name discovered during the TableProvider
+ * scann.
+ *
+ * @return Map of schema names to their corresponding list of fully qualified TableNames.
+ */
+ public Map> getSchemas()
+ {
+ return schemas;
+ }
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/EmrClusterTableProvider.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/EmrClusterTableProvider.java
new file mode 100644
index 0000000000..ee3b15da91
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/EmrClusterTableProvider.java
@@ -0,0 +1,205 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.FieldResolver;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce;
+import com.amazonaws.services.elasticmapreduce.model.Cluster;
+import com.amazonaws.services.elasticmapreduce.model.ClusterSummary;
+import com.amazonaws.services.elasticmapreduce.model.DescribeClusterRequest;
+import com.amazonaws.services.elasticmapreduce.model.DescribeClusterResult;
+import com.amazonaws.services.elasticmapreduce.model.ListClustersRequest;
+import com.amazonaws.services.elasticmapreduce.model.ListClustersResult;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Maps your EMR Clusters to a table.
+ */
+public class EmrClusterTableProvider
+ implements TableProvider
+{
+ private static final Schema SCHEMA;
+ private AmazonElasticMapReduce emr;
+
+ public EmrClusterTableProvider(AmazonElasticMapReduce emr)
+ {
+ this.emr = emr;
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public String getSchema()
+ {
+ return "emr";
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public TableName getTableName()
+ {
+ return new TableName(getSchema(), "emr_clusters");
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public GetTableResponse getTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ return new GetTableResponse(getTableRequest.getCatalogName(), getTableName(), SCHEMA);
+ }
+
+ /**
+ * Calls ListClusters and DescribeCluster on the AWS EMR Client returning all clusters that match the supplied
+ * predicate and attempting to push down certain predicates (namely queries for specific cluster) to EC2.
+ *
+ * @See TableProvider
+ */
+ @Override
+ public void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker)
+ {
+ boolean done = false;
+ ListClustersRequest request = new ListClustersRequest();
+
+ while (!done) {
+ ListClustersResult response = emr.listClusters(request);
+
+ for (ClusterSummary next : response.getClusters()) {
+ Cluster cluster = null;
+ if (!next.getStatus().getState().toLowerCase().contains("terminated")) {
+ DescribeClusterResult clusterResponse = emr.describeCluster(new DescribeClusterRequest().withClusterId(next.getId()));
+ cluster = clusterResponse.getCluster();
+ }
+ clusterToRow(next, cluster, spiller);
+ }
+
+ request.setMarker(response.getMarker());
+
+ if (response.getMarker() == null || !queryStatusChecker.isQueryRunning()) {
+ done = true;
+ }
+ }
+ }
+
+ /**
+ * Maps an EBS Volume into a row in our Apache Arrow response block(s).
+ *
+ * @param clusterSummary The CluserSummary for the provided Cluster.
+ * @param cluster The EMR Cluster to map.
+ * @param spiller The BlockSpiller to use when we want to write a matching row to the response.
+ * @note The current implementation is rather naive in how it maps fields. It leverages a static
+ * list of fields that we'd like to provide and then explicitly filters and converts each field.
+ */
+ private void clusterToRow(ClusterSummary clusterSummary,
+ Cluster cluster,
+ BlockSpiller spiller)
+ {
+ spiller.writeRows((Block block, int row) -> {
+ boolean matched = true;
+
+ matched &= block.offerValue("id", row, clusterSummary.getId());
+ matched &= block.offerValue("name", row, clusterSummary.getName());
+ matched &= block.offerValue("instance_hours", row, clusterSummary.getNormalizedInstanceHours());
+ matched &= block.offerValue("state", row, clusterSummary.getStatus().getState());
+ matched &= block.offerValue("state_code", row, clusterSummary.getStatus().getStateChangeReason().getCode());
+ matched &= block.offerValue("state_msg", row, clusterSummary.getStatus().getStateChangeReason().getMessage());
+
+ if (cluster != null) {
+ matched &= block.offerValue("autoscaling_role", row, cluster.getAutoScalingRole());
+ matched &= block.offerValue("custom_ami", row, cluster.getCustomAmiId());
+ matched &= block.offerValue("instance_collection_type", row, cluster.getInstanceCollectionType());
+ matched &= block.offerValue("log_uri", row, cluster.getLogUri());
+ matched &= block.offerValue("master_public_dns", row, cluster.getMasterPublicDnsName());
+ matched &= block.offerValue("release_label", row, cluster.getReleaseLabel());
+ matched &= block.offerValue("running_ami", row, cluster.getRunningAmiVersion());
+ matched &= block.offerValue("scale_down_behavior", row, cluster.getScaleDownBehavior());
+ matched &= block.offerValue("service_role", row, cluster.getServiceRole());
+ matched &= block.offerValue("service_role", row, cluster.getServiceRole());
+
+ List applications = cluster.getApplications().stream()
+ .map(next -> next.getName() + ":" + next.getVersion()).collect(Collectors.toList());
+ matched &= block.offerComplexValue("applications", row, FieldResolver.DEFAULT, applications);
+
+ List tags = cluster.getTags().stream()
+ .map(next -> next.getKey() + ":" + next.getValue()).collect(Collectors.toList());
+ matched &= block.offerComplexValue("tags", row, FieldResolver.DEFAULT, tags);
+ }
+
+ return matched ? 1 : 0;
+ });
+ }
+
+ /**
+ * Defines the schema of this table.
+ */
+ static {
+ SCHEMA = SchemaBuilder.newBuilder()
+ .addStringField("id")
+ .addStringField("name")
+ .addIntField("instance_hours")
+ .addStringField("state")
+ .addStringField("state_code")
+ .addStringField("state_msg")
+ .addStringField("autoscaling_role")
+ .addStringField("custom_ami")
+ .addStringField("instance_collection_type")
+ .addStringField("log_uri")
+ .addStringField("master_public_dns")
+ .addStringField("release_label")
+ .addStringField("running_ami")
+ .addStringField("scale_down_behavior")
+ .addStringField("service_role")
+ .addListField("applications", Types.MinorType.VARCHAR.getType())
+ .addListField("tags", Types.MinorType.VARCHAR.getType())
+ .addMetadata("id", "Cluster Id")
+ .addMetadata("name", "Cluster Name")
+ .addMetadata("state", "State of the cluster.")
+ .addMetadata("state_code", "Code associated with the state of the cluster.")
+ .addMetadata("state_msg", "Message associated with the state of the cluster.")
+ .addMetadata("autoscaling_role", "AutoScaling role used by the cluster.")
+ .addMetadata("custom_ami", "Custom AMI used by the cluster (if any)")
+ .addMetadata("instance_collection_type", "Instance collection type used by the cluster.")
+ .addMetadata("log_uri", "URI where debug logs can be found for the cluster.")
+ .addMetadata("master_public_dns", "Public DNS name of the master node.")
+ .addMetadata("release_label", "EMR release label the cluster is running.")
+ .addMetadata("running_ami", "AMI the cluster are running.")
+ .addMetadata("scale_down_behavior", "Scale down behavoir of the cluster.")
+ .addMetadata("applications", "The EMR applications installed on the cluster.")
+ .addMetadata("tags", "Tags associated with the volume.")
+ .build();
+ }
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/RdsTableProvider.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/RdsTableProvider.java
new file mode 100644
index 0000000000..c8338bbeb8
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/RdsTableProvider.java
@@ -0,0 +1,378 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.FieldBuilder;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.services.rds.AmazonRDS;
+import com.amazonaws.services.rds.model.DBInstance;
+import com.amazonaws.services.rds.model.DBInstanceStatusInfo;
+import com.amazonaws.services.rds.model.DBParameterGroupStatus;
+import com.amazonaws.services.rds.model.DBSecurityGroupMembership;
+import com.amazonaws.services.rds.model.DBSubnetGroup;
+import com.amazonaws.services.rds.model.DescribeDBInstancesRequest;
+import com.amazonaws.services.rds.model.DescribeDBInstancesResult;
+import com.amazonaws.services.rds.model.DomainMembership;
+import com.amazonaws.services.rds.model.Endpoint;
+import com.amazonaws.services.rds.model.Subnet;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import java.util.stream.Collectors;
+
+/**
+ * Maps your RDS instances to a table.
+ */
+public class RdsTableProvider
+ implements TableProvider
+{
+ private static final Schema SCHEMA;
+ private AmazonRDS rds;
+
+ public RdsTableProvider(AmazonRDS rds)
+ {
+ this.rds = rds;
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public String getSchema()
+ {
+ return "rds";
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public TableName getTableName()
+ {
+ return new TableName(getSchema(), "rds_instances");
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public GetTableResponse getTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ return new GetTableResponse(getTableRequest.getCatalogName(), getTableName(), SCHEMA);
+ }
+
+ /**
+ * Calls DescribeDBInstances on the AWS RDS Client returning all DB Instances that match the supplied predicate and attempting
+ * to push down certain predicates (namely queries for specific DB Instance) to EC2.
+ *
+ * @See TableProvider
+ */
+ @Override
+ public void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker)
+ {
+ boolean done = false;
+ DescribeDBInstancesRequest request = new DescribeDBInstancesRequest();
+
+ ValueSet idConstraint = recordsRequest.getConstraints().getSummary().get("instance_id");
+ if (idConstraint != null && idConstraint.isSingleValue()) {
+ request.setDBInstanceIdentifier(idConstraint.getSingleValue().toString());
+ }
+
+ while (!done) {
+ DescribeDBInstancesResult response = rds.describeDBInstances(request);
+
+ for (DBInstance instance : response.getDBInstances()) {
+ instanceToRow(instance, spiller);
+ }
+
+ request.setMarker(response.getMarker());
+
+ if (response.getMarker() == null || !queryStatusChecker.isQueryRunning()) {
+ done = true;
+ }
+ }
+ }
+
+ /**
+ * Maps a DBInstance into a row in our Apache Arrow response block(s).
+ *
+ * @param instance The DBInstance to map.
+ * @param spiller The BlockSpiller to use when we want to write a matching row to the response.
+ * @note The current implementation is rather naive in how it maps fields. It leverages a static
+ * list of fields that we'd like to provide and then explicitly filters and converts each field.
+ */
+ private void instanceToRow(DBInstance instance,
+ BlockSpiller spiller)
+ {
+ spiller.writeRows((Block block, int row) -> {
+ boolean matched = true;
+
+ matched &= block.offerValue("instance_id", row, instance.getDBInstanceIdentifier());
+ matched &= block.offerValue("primary_az", row, instance.getAvailabilityZone());
+ matched &= block.offerValue("storage_gb", row, instance.getAllocatedStorage());
+ matched &= block.offerValue("is_encrypted", row, instance.getStorageEncrypted());
+ matched &= block.offerValue("storage_type", row, instance.getStorageType());
+ matched &= block.offerValue("backup_retention_days", row, instance.getBackupRetentionPeriod());
+ matched &= block.offerValue("auto_upgrade", row, instance.getAutoMinorVersionUpgrade());
+ matched &= block.offerValue("instance_class", row, instance.getDBInstanceClass());
+ matched &= block.offerValue("port", row, instance.getDbInstancePort());
+ matched &= block.offerValue("status", row, instance.getDBInstanceStatus());
+ matched &= block.offerValue("dbi_resource_id", row, instance.getDbiResourceId());
+ matched &= block.offerValue("name", row, instance.getDBName());
+ matched &= block.offerValue("engine", row, instance.getEngine());
+ matched &= block.offerValue("engine_version", row, instance.getEngineVersion());
+ matched &= block.offerValue("license_model", row, instance.getLicenseModel());
+ matched &= block.offerValue("secondary_az", row, instance.getSecondaryAvailabilityZone());
+ matched &= block.offerValue("backup_window", row, instance.getPreferredBackupWindow());
+ matched &= block.offerValue("maint_window", row, instance.getPreferredMaintenanceWindow());
+ matched &= block.offerValue("read_replica_source_id", row, instance.getReadReplicaSourceDBInstanceIdentifier());
+ matched &= block.offerValue("create_time", row, instance.getInstanceCreateTime());
+ matched &= block.offerValue("public_access", row, instance.getPubliclyAccessible());
+ matched &= block.offerValue("iops", row, instance.getIops());
+ matched &= block.offerValue("is_multi_az", row, instance.getMultiAZ());
+
+ matched &= block.offerComplexValue("domains", row, (Field field, Object val) -> {
+ if (field.getName().equals("domain")) {
+ return ((DomainMembership) val).getDomain();
+ }
+ else if (field.getName().equals("fqdn")) {
+ return ((DomainMembership) val).getFQDN();
+ }
+ else if (field.getName().equals("iam_role")) {
+ return ((DomainMembership) val).getIAMRoleName();
+ }
+ else if (field.getName().equals("status")) {
+ return ((DomainMembership) val).getStatus();
+ }
+
+ throw new RuntimeException("Unexpected field " + field.getName());
+ },
+ instance.getDomainMemberships());
+
+ matched &= block.offerComplexValue("param_groups", row, (Field field, Object val) -> {
+ if (field.getName().equals("name")) {
+ return ((DBParameterGroupStatus) val).getDBParameterGroupName();
+ }
+ else if (field.getName().equals("status")) {
+ return ((DBParameterGroupStatus) val).getParameterApplyStatus();
+ }
+ throw new RuntimeException("Unexpected field " + field.getName());
+ },
+ instance.getDBParameterGroups());
+
+ matched &= block.offerComplexValue("db_security_groups",
+ row,
+ (Field field, Object val) -> {
+ if (field.getName().equals("name")) {
+ return ((DBSecurityGroupMembership) val).getDBSecurityGroupName();
+ }
+ else if (field.getName().equals("status")) {
+ return ((DBSecurityGroupMembership) val).getStatus();
+ }
+ throw new RuntimeException("Unexpected field " + field.getName());
+ },
+ instance.getDBSecurityGroups());
+
+ matched &= block.offerComplexValue("subnet_group",
+ row,
+ (Field field, Object val) -> {
+ if (field.getName().equals("description")) {
+ return ((DBSubnetGroup) val).getDBSubnetGroupDescription();
+ }
+ else if (field.getName().equals("name")) {
+ return ((DBSubnetGroup) val).getDBSubnetGroupName();
+ }
+ else if (field.getName().equals("status")) {
+ return ((DBSubnetGroup) val).getSubnetGroupStatus();
+ }
+ else if (field.getName().equals("vpc")) {
+ return ((DBSubnetGroup) val).getVpcId();
+ }
+ else if (field.getName().equals("subnets")) {
+ return ((DBSubnetGroup) val).getSubnets().stream()
+ .map(next -> next.getSubnetIdentifier()).collect(Collectors.toList());
+ }
+ else if (val instanceof Subnet) {
+ return ((Subnet) val).getSubnetIdentifier();
+ }
+ throw new RuntimeException("Unexpected field " + field.getName());
+ },
+ instance.getDBSubnetGroup());
+
+ matched &= block.offerComplexValue("endpoint",
+ row,
+ (Field field, Object val) -> {
+ if (field.getName().equals("address")) {
+ return ((Endpoint) val).getAddress();
+ }
+ else if (field.getName().equals("port")) {
+ return ((Endpoint) val).getPort();
+ }
+ else if (field.getName().equals("zone")) {
+ return ((Endpoint) val).getHostedZoneId();
+ }
+ throw new RuntimeException("Unexpected field " + field.getName());
+ },
+ instance.getEndpoint());
+
+ matched &= block.offerComplexValue("status_infos",
+ row,
+ (Field field, Object val) -> {
+ if (field.getName().equals("message")) {
+ return ((DBInstanceStatusInfo) val).getMessage();
+ }
+ else if (field.getName().equals("is_normal")) {
+ return ((DBInstanceStatusInfo) val).getNormal();
+ }
+ else if (field.getName().equals("status")) {
+ return ((DBInstanceStatusInfo) val).getStatus();
+ }
+ else if (field.getName().equals("type")) {
+ return ((DBInstanceStatusInfo) val).getStatusType();
+ }
+ throw new RuntimeException("Unexpected field " + field.getName());
+ },
+ instance.getStatusInfos());
+
+ return matched ? 1 : 0;
+ });
+ }
+
+ /**
+ * Defines the schema of this table.
+ */
+ static {
+ SCHEMA = SchemaBuilder.newBuilder()
+ .addStringField("instance_id")
+ .addStringField("primary_az")
+ .addIntField("storage_gb")
+ .addBitField("is_encrypted")
+ .addStringField("storage_type")
+ .addIntField("backup_retention_days")
+ .addBitField("auto_upgrade")
+ .addStringField("instance_class")
+ .addIntField("port")
+ .addStringField("status")
+ .addStringField("dbi_resource_id")
+ .addStringField("name")
+ .addField(
+ FieldBuilder.newBuilder("domains", new ArrowType.List())
+ .addField(
+ FieldBuilder.newBuilder("domain", Types.MinorType.STRUCT.getType())
+ .addStringField("domain")
+ .addStringField("fqdn")
+ .addStringField("iam_role")
+ .addStringField("status")
+ .build())
+ .build())
+ .addStringField("engine")
+ .addStringField("engine_version")
+ .addStringField("license_model")
+ .addStringField("secondary_az")
+ .addStringField("backup_window")
+ .addStringField("maint_window")
+ .addStringField("read_replica_source_id")
+ .addField(
+ FieldBuilder.newBuilder("param_groups", new ArrowType.List())
+ .addField(
+ FieldBuilder.newBuilder("param_group", Types.MinorType.STRUCT.getType())
+ .addStringField("name")
+ .addStringField("status")
+ .build())
+ .build())
+ .addField(
+ FieldBuilder.newBuilder("db_security_groups", new ArrowType.List())
+ .addField(
+ FieldBuilder.newBuilder("db_security_group", Types.MinorType.STRUCT.getType())
+ .addStringField("name")
+ .addStringField("status")
+ .build())
+ .build())
+ .addStructField("subnet_group")
+ .addChildField("subnet_group", "name", Types.MinorType.VARCHAR.getType())
+ .addChildField("subnet_group", "status", Types.MinorType.VARCHAR.getType())
+ .addChildField("subnet_group", "vpc", Types.MinorType.VARCHAR.getType())
+ .addChildField("subnet_group", FieldBuilder.newBuilder("subnets", Types.MinorType.LIST.getType())
+ .addStringField("subnets").build())
+ .addField(FieldBuilder.newBuilder("endpoint", Types.MinorType.STRUCT.getType())
+ .addStringField("address")
+ .addIntField("port")
+ .addStringField("zone")
+ .build())
+ .addField("create_time", Types.MinorType.DATEMILLI.getType())
+ .addBitField("public_access")
+
+ .addField(
+ FieldBuilder.newBuilder("status_infos", new ArrowType.List())
+ .addField(
+ FieldBuilder.newBuilder("status_info", Types.MinorType.STRUCT.getType())
+ .addStringField("message")
+ .addBitField("is_normal")
+ .addStringField("status")
+ .addStringField("type")
+ .build())
+ .build())
+
+ .addIntField("iops")
+ .addBitField("is_multi_az")
+ .addMetadata("instance_id", "Database Instance Id")
+ .addMetadata("primary_az", "The primary az for the database instance")
+ .addMetadata("storage_gb", "Total allocated storage for the Database Instances in GB.")
+ .addMetadata("is_encrypted", "True if the database is encrypted.")
+ .addMetadata("storage_type", "The type of storage used by this Database Instance.")
+ .addMetadata("backup_retention_days", "The number of days of backups to keep.")
+ .addMetadata("auto_upgrade", "True if the cluster auto-upgrades minor versions.")
+ .addMetadata("instance_class", "The instance type used by this database.")
+ .addMetadata("port", "Listen port for the database.")
+ .addMetadata("status", "Status of the DB Instance.")
+ .addMetadata("dbi_resource_id", "Unique id for the instance of the database.")
+ .addMetadata("name", "Name of the DB Instance.")
+ .addMetadata("domains", "Active Directory domains to which the DB Instance is associated.")
+ .addMetadata("applications", "The EMR applications installed on the cluster.")
+ .addMetadata("engine", "The engine type of the DB Instance.")
+ .addMetadata("engine_version", "The engine version of the DB Instance")
+ .addMetadata("license_model", "The license model of the DB Instance")
+ .addMetadata("secondary_az", "The secondary AZ of the DB Instance")
+ .addMetadata("backup_window", "The backup window of the DB Instance")
+ .addMetadata("maint_window", "The maintenance window of the DB Instance")
+ .addMetadata("read_replica_source_id", "The read replica source id, if present, of the DB Instance")
+ .addMetadata("param_groups", "The param groups applied to the DB Instance")
+ .addMetadata("db_security_groups", "The security groups applies the DB Instance")
+ .addMetadata("subnet_groups", "The subnets available to the DB Instance")
+ .addMetadata("endpoint", "The endpoint of the DB Instance")
+ .addMetadata("create_time", "The create time of the DB Instance")
+ .addMetadata("public_access", "True if publically accessible.")
+ .addMetadata("status_infos", "The status info details associated with the DB Instance")
+ .addMetadata("iops", "The total provisioned IOPs for the DB Instance.")
+ .addMetadata("is_multi_az", "True if the DB Instance is avialable in multiple AZs.")
+ .build();
+ }
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/TableProvider.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/TableProvider.java
new file mode 100644
index 0000000000..dd2a3d7a25
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/TableProvider.java
@@ -0,0 +1,88 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.BlockWriter;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+
+/**
+ * Defines the functionality required to supply the metadata and data required for the Athena AWS CMDB connector to
+ * to allow SQL queries to run over the virtual table.
+ */
+public interface TableProvider
+{
+ /**
+ * The schema name (aka database) that this table provider's table belongs to.
+ *
+ * @return String containing the schema name.
+ */
+ String getSchema();
+
+ /**
+ * The fully qualified name of the table represented by this TableProvider.
+ *
+ * @return The TableName containing the fully qualified name of the Table.
+ */
+ TableName getTableName();
+
+ /**
+ * Provides access to the Schema details of the requested table.
+ *
+ * @See MetadataHandler
+ */
+ GetTableResponse getTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest);
+
+ /**
+ * Default implementation returns a single partition since many of the TableProviders may not support
+ * parallel scans.
+ *
+ * @See MetadataHandler
+ */
+ default void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request)
+ throws Exception
+ {
+ //NoOp as we do not support partitioning.
+ }
+
+ /**
+ * Default implementation does not enhance the partition results schema
+ *
+ * @See MetadataHandler
+ */
+ default void enhancePartitionSchema(SchemaBuilder partitionSchemaBuilder, GetTableLayoutRequest request)
+ {
+ //NoOp as we do not support partitioning or added partition data
+ }
+
+ /**
+ * Effects the requested read against the table, writing result row data using the supplied BlockSpliller.
+ *
+ * @See RecordHandler
+ */
+ void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker);
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/EbsTableProvider.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/EbsTableProvider.java
new file mode 100644
index 0000000000..48b6503757
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/EbsTableProvider.java
@@ -0,0 +1,199 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.ec2;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.FieldResolver;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.model.DescribeVolumesRequest;
+import com.amazonaws.services.ec2.model.DescribeVolumesResult;
+import com.amazonaws.services.ec2.model.Volume;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Maps your EBS volumes to a table.
+ */
+public class EbsTableProvider
+ implements TableProvider
+{
+ private static final Logger logger = LoggerFactory.getLogger(EbsTableProvider.class);
+ private static final Schema SCHEMA;
+ private AmazonEC2 ec2;
+
+ public EbsTableProvider(AmazonEC2 ec2)
+ {
+ this.ec2 = ec2;
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public String getSchema()
+ {
+ return "ec2";
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public TableName getTableName()
+ {
+ return new TableName(getSchema(), "ebs_volumes");
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public GetTableResponse getTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ return new GetTableResponse(getTableRequest.getCatalogName(), getTableName(), SCHEMA);
+ }
+
+ /**
+ * Calls DescribeVolumes on the AWS EC2 Client returning all volumes that match the supplied predicate and attempting
+ * to push down certain predicates (namely queries for specific volumes) to EC2.
+ *
+ * @See TableProvider
+ */
+ @Override
+ public void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker)
+ {
+ boolean done = false;
+ DescribeVolumesRequest request = new DescribeVolumesRequest();
+
+ ValueSet idConstraint = recordsRequest.getConstraints().getSummary().get("id");
+ if (idConstraint != null && idConstraint.isSingleValue()) {
+ request.setVolumeIds(Collections.singletonList(idConstraint.getSingleValue().toString()));
+ }
+
+ while (!done) {
+ DescribeVolumesResult response = ec2.describeVolumes(request);
+
+ for (Volume volume : response.getVolumes()) {
+ logger.info("readWithConstraint: {}", response);
+ instanceToRow(volume, spiller);
+ }
+
+ request.setNextToken(response.getNextToken());
+
+ if (response.getNextToken() == null || !queryStatusChecker.isQueryRunning()) {
+ done = true;
+ }
+ }
+ }
+
+ /**
+ * Maps an EBS Volume into a row in our Apache Arrow response block(s).
+ *
+ * @param volume The EBS Volume to map.
+ * @param spiller The BlockSpiller to use when we want to write a matching row to the response.
+ * @note The current implementation is rather naive in how it maps fields. It leverages a static
+ * list of fields that we'd like to provide and then explicitly filters and converts each field.
+ */
+ private void instanceToRow(Volume volume,
+ BlockSpiller spiller)
+ {
+ spiller.writeRows((Block block, int row) -> {
+ boolean matched = true;
+
+ matched &= block.offerValue("id", row, volume.getVolumeId());
+ matched &= block.offerValue("type", row, volume.getVolumeType());
+ matched &= block.offerValue("availability_zone", row, volume.getAvailabilityZone());
+ matched &= block.offerValue("created_time", row, volume.getCreateTime());
+ matched &= block.offerValue("is_encrypted", row, volume.getEncrypted());
+ matched &= block.offerValue("kms_key_id", row, volume.getKmsKeyId());
+ matched &= block.offerValue("size", row, volume.getSize());
+ matched &= block.offerValue("iops", row, volume.getIops());
+ matched &= block.offerValue("snapshot_id", row, volume.getSnapshotId());
+ matched &= block.offerValue("state", row, volume.getState());
+
+ if (volume.getAttachments().size() == 1) {
+ matched &= block.offerValue("target", row, volume.getAttachments().get(0).getInstanceId());
+ matched &= block.offerValue("attached_device", row, volume.getAttachments().get(0).getDevice());
+ matched &= block.offerValue("attachment_state", row, volume.getAttachments().get(0).getState());
+ matched &= block.offerValue("attachment_time", row, volume.getAttachments().get(0).getAttachTime());
+ }
+
+ List tags = volume.getTags().stream()
+ .map(next -> next.getKey() + ":" + next.getValue()).collect(Collectors.toList());
+ matched &= block.offerComplexValue("tags", row, FieldResolver.DEFAULT, tags);
+
+ return matched ? 1 : 0;
+ });
+ }
+
+ /**
+ * Defines the schema of this table.
+ */
+ static {
+ SCHEMA = SchemaBuilder.newBuilder()
+ .addStringField("id")
+ .addStringField("type")
+ .addStringField("target")
+ .addStringField("attached_device")
+ .addStringField("attachment_state")
+ .addField("attachment_time", Types.MinorType.DATEMILLI.getType())
+ .addStringField("availability_zone")
+ .addField("created_time", Types.MinorType.DATEMILLI.getType())
+ .addBitField("is_encrypted")
+ .addStringField("kms_key_id")
+ .addIntField("size")
+ .addIntField("iops")
+ .addStringField("snapshot_id")
+ .addStringField("state")
+ .addListField("tags", Types.MinorType.VARCHAR.getType())
+ .addMetadata("id", "EBS Volume Id")
+ .addMetadata("type", "EBS Volume Type")
+ .addMetadata("target", "EC2 Instance Id that this volume is attached to.")
+ .addMetadata("attached_device", "Device name where this EBS volume is attached.")
+ .addMetadata("attachment_state", "The state of the volume attachement.")
+ .addMetadata("attachment_time", "The time this volume was attached to its target.")
+ .addMetadata("availability_zone", "The AZ that this EBS Volume is in.")
+ .addMetadata("created_time", "The date time that the volume was created.")
+ .addMetadata("is_encrypted", "True if the volume is encrypted with KMS managed key.")
+ .addMetadata("kms_key_id", "The KMS key id used to encrypt this volume.")
+ .addMetadata("size", "The size in GBs of this volume.")
+ .addMetadata("iops", "Provisioned IOPs supported by this volume.")
+ .addMetadata("snapshot_id", "ID of the last snapshot for this volume.")
+ .addMetadata("state", "State of the EBS Volume.")
+ .addMetadata("tags", "Tags associated with the volume.")
+ .build();
+ }
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/Ec2TableProvider.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/Ec2TableProvider.java
new file mode 100644
index 0000000000..1bf2f7af49
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/Ec2TableProvider.java
@@ -0,0 +1,313 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.ec2;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.FieldBuilder;
+import com.amazonaws.athena.connector.lambda.data.FieldResolver;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.model.DescribeInstancesRequest;
+import com.amazonaws.services.ec2.model.DescribeInstancesResult;
+import com.amazonaws.services.ec2.model.Instance;
+import com.amazonaws.services.ec2.model.InstanceNetworkInterface;
+import com.amazonaws.services.ec2.model.InstanceState;
+import com.amazonaws.services.ec2.model.Reservation;
+import com.amazonaws.services.ec2.model.StateReason;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Maps your EC2 instances to a table.
+ */
+public class Ec2TableProvider
+ implements TableProvider
+{
+ private static final Schema SCHEMA;
+ private AmazonEC2 ec2;
+
+ public Ec2TableProvider(AmazonEC2 ec2)
+ {
+ this.ec2 = ec2;
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public String getSchema()
+ {
+ return "ec2";
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public TableName getTableName()
+ {
+ return new TableName(getSchema(), "ec2_instances");
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public GetTableResponse getTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ return new GetTableResponse(getTableRequest.getCatalogName(), getTableName(), SCHEMA);
+ }
+
+ /**
+ * Calls DescribeInstances on the AWS EC2 Client returning all instances that match the supplied predicate and attempting
+ * to push down certain predicates (namely queries for specific ec2 instance) to EC2.
+ *
+ * @See TableProvider
+ */
+ @Override
+ public void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker)
+ {
+ boolean done = false;
+ DescribeInstancesRequest request = new DescribeInstancesRequest();
+
+ ValueSet idConstraint = recordsRequest.getConstraints().getSummary().get("instance_id");
+ if (idConstraint != null && idConstraint.isSingleValue()) {
+ request.setInstanceIds(Collections.singletonList(idConstraint.getSingleValue().toString()));
+ }
+
+ while (!done) {
+ DescribeInstancesResult response = ec2.describeInstances(request);
+
+ for (Reservation reservation : response.getReservations()) {
+ for (Instance instance : reservation.getInstances()) {
+ instanceToRow(instance, spiller);
+ }
+ }
+
+ request.setNextToken(response.getNextToken());
+
+ if (response.getNextToken() == null || !queryStatusChecker.isQueryRunning()) {
+ done = true;
+ }
+ }
+ }
+
+ /**
+ * Maps an EC2 Instance into a row in our Apache Arrow response block(s).
+ *
+ * @param instance The EBS Volume to map.
+ * @param spiller The BlockSpiller to use when we want to write a matching row to the response.
+ * @note The current implementation is rather naive in how it maps fields. It leverages a static
+ * list of fields that we'd like to provide and then explicitly filters and converts each field.
+ */
+ private void instanceToRow(Instance instance,
+ BlockSpiller spiller)
+ {
+ spiller.writeRows((Block block, int row) -> {
+ boolean matched = true;
+
+ matched &= block.offerValue("instance_id", row, instance.getInstanceId());
+ matched &= block.offerValue("image_id", row, instance.getImageId());
+ matched &= block.offerValue("instance_type", row, instance.getInstanceType());
+ matched &= block.offerValue("platform", row, instance.getPlatform());
+ matched &= block.offerValue("private_dns_name", row, instance.getPrivateDnsName());
+ matched &= block.offerValue("private_ip_address", row, instance.getPrivateIpAddress());
+ matched &= block.offerValue("public_dns_name", row, instance.getPublicDnsName());
+ matched &= block.offerValue("public_ip_address", row, instance.getPublicIpAddress());
+ matched &= block.offerValue("subnet_id", row, instance.getSubnetId());
+ matched &= block.offerValue("vpc_id", row, instance.getVpcId());
+ matched &= block.offerValue("architecture", row, instance.getArchitecture());
+ matched &= block.offerValue("instance_lifecycle", row, instance.getInstanceLifecycle());
+ matched &= block.offerValue("root_device_name", row, instance.getRootDeviceName());
+ matched &= block.offerValue("root_device_type", row, instance.getRootDeviceType());
+ matched &= block.offerValue("spot_instance_request_id", row, instance.getSpotInstanceRequestId());
+ matched &= block.offerValue("virtualization_type", row, instance.getVirtualizationType());
+ matched &= block.offerValue("key_name", row, instance.getKeyName());
+ matched &= block.offerValue("kernel_id", row, instance.getKernelId());
+ matched &= block.offerValue("capacity_reservation_id", row, instance.getCapacityReservationId());
+ matched &= block.offerValue("launch_time", row, instance.getLaunchTime());
+
+ matched &= block.offerComplexValue("state",
+ row,
+ (Field field, Object val) -> {
+ if (field.getName().equals("name")) {
+ return ((InstanceState) val).getName();
+ }
+ else if (field.getName().equals("code")) {
+ return ((InstanceState) val).getCode();
+ }
+ throw new RuntimeException("Unknown field " + field.getName());
+ }, instance.getState());
+
+ matched &= block.offerComplexValue("network_interfaces",
+ row,
+ (Field field, Object val) -> {
+ if (field.getName().equals("status")) {
+ return ((InstanceNetworkInterface) val).getStatus();
+ }
+ else if (field.getName().equals("subnet")) {
+ return ((InstanceNetworkInterface) val).getSubnetId();
+ }
+ else if (field.getName().equals("vpc")) {
+ return ((InstanceNetworkInterface) val).getVpcId();
+ }
+ else if (field.getName().equals("mac")) {
+ return ((InstanceNetworkInterface) val).getMacAddress();
+ }
+ else if (field.getName().equals("private_dns")) {
+ return ((InstanceNetworkInterface) val).getPrivateDnsName();
+ }
+ else if (field.getName().equals("private_ip")) {
+ return ((InstanceNetworkInterface) val).getPrivateIpAddress();
+ }
+ else if (field.getName().equals("security_groups")) {
+ return ((InstanceNetworkInterface) val).getGroups().stream().map(next -> next.getGroupName() + ":" + next.getGroupId()).collect(Collectors.toList());
+ }
+ else if (field.getName().equals("interface_id")) {
+ return ((InstanceNetworkInterface) val).getNetworkInterfaceId();
+ }
+
+ throw new RuntimeException("Unknown field " + field.getName());
+ }, instance.getNetworkInterfaces());
+
+ matched &= block.offerComplexValue("state_reason", row, (Field field, Object val) -> {
+ if (field.getName().equals("message")) {
+ return ((StateReason) val).getMessage();
+ }
+ else if (field.getName().equals("code")) {
+ return ((StateReason) val).getCode();
+ }
+ throw new RuntimeException("Unknown field " + field.getName());
+ }, instance.getStateReason());
+
+ matched &= block.offerValue("ebs_optimized", row, instance.getEbsOptimized());
+
+ List securityGroups = instance.getSecurityGroups().stream()
+ .map(next -> next.getGroupId()).collect(Collectors.toList());
+ matched &= block.offerComplexValue("security_groups", row, FieldResolver.DEFAULT, securityGroups);
+
+ List securityGroupNames = instance.getSecurityGroups().stream()
+ .map(next -> next.getGroupName()).collect(Collectors.toList());
+ matched &= block.offerComplexValue("security_group_names", row, FieldResolver.DEFAULT, securityGroupNames);
+
+ List ebsVolumes = instance.getBlockDeviceMappings().stream()
+ .map(next -> next.getEbs().getVolumeId()).collect(Collectors.toList());
+ matched &= block.offerComplexValue("ebs_volumes", row, FieldResolver.DEFAULT, ebsVolumes);
+
+ return matched ? 1 : 0;
+ });
+ }
+
+ /**
+ * Defines the schema of this table.
+ */
+ static {
+ SCHEMA = SchemaBuilder.newBuilder()
+ .addStringField("instance_id")
+ .addStringField("image_id")
+ .addStringField("instance_type")
+ .addStringField("platform")
+ .addStringField("private_dns_name")
+ .addStringField("private_ip_address")
+ .addStringField("public_dns_name")
+ .addStringField("public_ip_address")
+ .addStringField("subnet_id")
+ .addStringField("vpc_id")
+ .addStringField("architecture")
+ .addStringField("instance_lifecycle")
+ .addStringField("root_device_name")
+ .addStringField("root_device_type")
+ .addStringField("spot_instance_request_id")
+ .addStringField("virtualization_type")
+ .addStringField("key_name")
+ .addStringField("kernel_id")
+ .addStringField("capacity_reservation_id")
+ .addField("launch_time", Types.MinorType.DATEMILLI.getType())
+ .addStructField("state")
+ .addChildField("state", "name", Types.MinorType.VARCHAR.getType())
+ .addChildField("state", "code", Types.MinorType.INT.getType())
+ .addStructField("state_reason")
+ .addChildField("state_reason", "message", Types.MinorType.VARCHAR.getType())
+ .addChildField("state_reason", "code", Types.MinorType.VARCHAR.getType())
+
+ //Example of a List of Structs
+ .addField(
+ FieldBuilder.newBuilder("network_interfaces", new ArrowType.List())
+ .addField(
+ FieldBuilder.newBuilder("interface", Types.MinorType.STRUCT.getType())
+ .addStringField("status")
+ .addStringField("subnet")
+ .addStringField("vpc")
+ .addStringField("mac")
+ .addStringField("private_dns")
+ .addStringField("private_ip")
+ .addListField("security_groups", Types.MinorType.VARCHAR.getType())
+ .addStringField("interface_id")
+ .build())
+ .build())
+ .addBitField("ebs_optimized")
+ .addListField("security_groups", Types.MinorType.VARCHAR.getType())
+ .addListField("security_group_names", Types.MinorType.VARCHAR.getType())
+ .addListField("ebs_volumes", Types.MinorType.VARCHAR.getType())
+ .addMetadata("instance_id", "EC2 Instance id.")
+ .addMetadata("image_id", "The id of the AMI used to boot the instance.")
+ .addMetadata("instance_type", "The EC2 instance type,")
+ .addMetadata("platform", "The platform of the instance (e.g. Linux)")
+ .addMetadata("private_dns_name", "The private dns name of the instance.")
+ .addMetadata("private_ip_address", "The private ip address of the instance.")
+ .addMetadata("public_dns_name", "The public dns name of the instance.")
+ .addMetadata("public_ip_address", "The public ip address of the instance.")
+ .addMetadata("subnet_id", "The subnet id that the instance was launched in.")
+ .addMetadata("vpc_id", "The id of the VPC that the instance was launched in.")
+ .addMetadata("architecture", "The architecture of the instance (e.g. x86).")
+ .addMetadata("instance_lifecycle", "The lifecycle state of the instance.")
+ .addMetadata("root_device_name", "The name of the root device that the instance booted from.")
+ .addMetadata("root_device_type", "The type of the root device that the instance booted from.")
+ .addMetadata("spot_instance_requestId", "Spot Request ID if the instance was launched via spot. ")
+ .addMetadata("virtualization_type", "The type of virtualization used by the instance (e.g. HVM)")
+ .addMetadata("key_name", "The name of the ec2 instance from the name tag.")
+ .addMetadata("kernel_id", "The id of the kernel used in the AMI that booted the instance.")
+ .addMetadata("capacity_reservation_id", "Capacity reservation id that this instance was launched against.")
+ .addMetadata("launch_time", "The time that the instance was launched at.")
+ .addMetadata("state", "The state of the ec2 instance.")
+ .addMetadata("state_reason", "The reason for the 'state' associated with the instance.")
+ .addMetadata("ebs_optimized", "True if the instance is EBS optimized.")
+ .addMetadata("network_interfaces", "The list of the network interfaces on the instance.")
+ .addMetadata("security_groups", "The list of security group (ids) attached to this instance.")
+ .addMetadata("security_group_names", "The list of security group (names) attached to this instance.")
+ .addMetadata("ebs_volumes", "The list of ebs volume (ids) attached to this instance.")
+ .build();
+ }
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/ImagesTableProvider.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/ImagesTableProvider.java
new file mode 100644
index 0000000000..8c7bc7a4e0
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/ImagesTableProvider.java
@@ -0,0 +1,288 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.ec2;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.FieldBuilder;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.model.BlockDeviceMapping;
+import com.amazonaws.services.ec2.model.DescribeImagesRequest;
+import com.amazonaws.services.ec2.model.DescribeImagesResult;
+import com.amazonaws.services.ec2.model.EbsBlockDevice;
+import com.amazonaws.services.ec2.model.Image;
+import com.amazonaws.services.ec2.model.Tag;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Maps your EC2 images (aka AMIs) to a table.
+ */
+public class ImagesTableProvider
+ implements TableProvider
+{
+ private static final String DEFAULT_OWNER_ENV = "default_ec2_image_owner";
+ private static final int MAX_IMAGES = 1000;
+ //Sets a default owner filter (when not null) to reduce the number of irrelevant AMIs returned when you do not
+ //query for a specific owner.
+ private static final String DEFAULT_OWNER = System.getenv(DEFAULT_OWNER_ENV);
+ private static final Schema SCHEMA;
+ private AmazonEC2 ec2;
+
+ public ImagesTableProvider(AmazonEC2 ec2)
+ {
+ this.ec2 = ec2;
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public String getSchema()
+ {
+ return "ec2";
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public TableName getTableName()
+ {
+ return new TableName(getSchema(), "ec2_images");
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public GetTableResponse getTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ return new GetTableResponse(getTableRequest.getCatalogName(), getTableName(), SCHEMA);
+ }
+
+ /**
+ * Calls DescribeImagess on the AWS EC2 Client returning all images that match the supplied predicate and attempting
+ * to push down certain predicates (namely queries for specific volumes) to EC2.
+ *
+ * @note Because of the large number of public AMIs we also support using a default 'owner' filter if your query doesn't
+ * filter on owner itself. You can set this using an env variable on your Lambda function defined by DEFAULT_OWNER_ENV.
+ * @See TableProvider
+ */
+ @Override
+ public void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker)
+ {
+ DescribeImagesRequest request = new DescribeImagesRequest();
+
+ ValueSet idConstraint = recordsRequest.getConstraints().getSummary().get("id");
+ ValueSet ownerConstraint = recordsRequest.getConstraints().getSummary().get("owner");
+ if (idConstraint != null && idConstraint.isSingleValue()) {
+ request.setImageIds(Collections.singletonList(idConstraint.getSingleValue().toString()));
+ }
+ else if (ownerConstraint != null && ownerConstraint.isSingleValue()) {
+ request.setOwners(Collections.singletonList(ownerConstraint.getSingleValue().toString()));
+ }
+ else if (DEFAULT_OWNER != null) {
+ request.setOwners(Collections.singletonList(DEFAULT_OWNER));
+ }
+ else {
+ throw new RuntimeException("A default owner account must be set or the query must have owner" +
+ "in the where clause with exactly 1 value otherwise results may be too big.");
+ }
+
+ DescribeImagesResult response = ec2.describeImages(request);
+
+ int count = 0;
+ for (Image next : response.getImages()) {
+ if (count++ > MAX_IMAGES) {
+ throw new RuntimeException("Too many images returned, add an owner or id filter.");
+ }
+ instanceToRow(next, spiller);
+ }
+ }
+
+ /**
+ * Maps an EC2 Image (AMI) into a row in our Apache Arrow response block(s).
+ *
+ * @param image The EC2 Image (AMI) to map.
+ * @param spiller The BlockSpiller to use when we want to write a matching row to the response.
+ * @note The current implementation is rather naive in how it maps fields. It leverages a static
+ * list of fields that we'd like to provide and then explicitly filters and converts each field.
+ */
+ private void instanceToRow(Image image,
+ BlockSpiller spiller)
+ {
+ spiller.writeRows((Block block, int row) -> {
+ boolean matched = true;
+
+ matched &= block.offerValue("id", row, image.getImageId());
+ matched &= block.offerValue("architecture", row, image.getArchitecture());
+ matched &= block.offerValue("created", row, image.getCreationDate());
+ matched &= block.offerValue("description", row, image.getDescription());
+ matched &= block.offerValue("hypervisor", row, image.getHypervisor());
+ matched &= block.offerValue("location", row, image.getImageLocation());
+ matched &= block.offerValue("type", row, image.getImageType());
+ matched &= block.offerValue("kernel", row, image.getKernelId());
+ matched &= block.offerValue("name", row, image.getName());
+ matched &= block.offerValue("owner", row, image.getOwnerId());
+ matched &= block.offerValue("platform", row, image.getPlatform());
+ matched &= block.offerValue("ramdisk", row, image.getRamdiskId());
+ matched &= block.offerValue("root_device", row, image.getRootDeviceName());
+ matched &= block.offerValue("root_type", row, image.getRootDeviceType());
+ matched &= block.offerValue("srvio_net", row, image.getSriovNetSupport());
+ matched &= block.offerValue("state", row, image.getState());
+ matched &= block.offerValue("virt_type", row, image.getVirtualizationType());
+ matched &= block.offerValue("is_public", row, image.getPublic());
+
+ List tags = image.getTags();
+ matched &= block.offerComplexValue("tags",
+ row,
+ (Field field, Object val) -> {
+ if (field.getName().equals("key")) {
+ return ((Tag) val).getKey();
+ }
+ else if (field.getName().equals("value")) {
+ return ((Tag) val).getValue();
+ }
+
+ throw new RuntimeException("Unexpected field " + field.getName());
+ },
+ tags);
+
+ matched &= block.offerComplexValue("block_devices",
+ row,
+ (Field field, Object val) -> {
+ if (field.getName().equals("dev_name")) {
+ return ((BlockDeviceMapping) val).getDeviceName();
+ }
+ else if (field.getName().equals("no_device")) {
+ return ((BlockDeviceMapping) val).getNoDevice();
+ }
+ else if (field.getName().equals("virt_name")) {
+ return ((BlockDeviceMapping) val).getVirtualName();
+ }
+ else if (field.getName().equals("ebs")) {
+ return ((BlockDeviceMapping) val).getEbs();
+ }
+ else if (field.getName().equals("ebs_size")) {
+ return ((EbsBlockDevice) val).getVolumeSize();
+ }
+ else if (field.getName().equals("ebs_iops")) {
+ return ((EbsBlockDevice) val).getIops();
+ }
+ else if (field.getName().equals("ebs_type")) {
+ return ((EbsBlockDevice) val).getVolumeType();
+ }
+ else if (field.getName().equals("ebs_kms_key")) {
+ return ((EbsBlockDevice) val).getKmsKeyId();
+ }
+
+ throw new RuntimeException("Unexpected field " + field.getName());
+ },
+ image.getBlockDeviceMappings());
+
+ return matched ? 1 : 0;
+ });
+ }
+
+ /**
+ * Defines the schema of this table.
+ */
+ static {
+ SCHEMA = SchemaBuilder.newBuilder()
+ .addStringField("id")
+ .addStringField("architecture")
+ .addStringField("created")
+ .addStringField("description")
+ .addStringField("hypervisor")
+ .addStringField("location")
+ .addStringField("type")
+ .addStringField("kernel")
+ .addStringField("name")
+ .addStringField("owner")
+ .addStringField("platform")
+ .addStringField("ramdisk")
+ .addStringField("root_device")
+ .addStringField("root_type")
+ .addStringField("srvio_net")
+ .addStringField("state")
+ .addStringField("virt_type")
+ .addBitField("is_public")
+ .addField(
+ FieldBuilder.newBuilder("tags", new ArrowType.List())
+ .addField(
+ FieldBuilder.newBuilder("tag", Types.MinorType.STRUCT.getType())
+ .addStringField("key")
+ .addStringField("value")
+ .build())
+ .build())
+ .addField(
+ FieldBuilder.newBuilder("block_devices", new ArrowType.List())
+ .addField(
+ FieldBuilder.newBuilder("device", Types.MinorType.STRUCT.getType())
+ .addStringField("dev_name")
+ .addStringField("no_device")
+ .addStringField("virt_name")
+ .addField(
+ FieldBuilder.newBuilder("ebs", Types.MinorType.STRUCT.getType())
+ .addIntField("ebs_size")
+ .addIntField("ebs_iops")
+ .addStringField("ebs_type")
+ .addStringField("ebs_kms_key")
+ .build())
+ .build())
+ .build())
+ .addMetadata("id", "The id of the image.")
+ .addMetadata("architecture", "The architecture required to run the image.")
+ .addMetadata("created", "The date and time the image was created.")
+ .addMetadata("description", "The description associated with the image.")
+ .addMetadata("hypervisor", "The type of hypervisor required by the image.")
+ .addMetadata("location", "The location of the image.")
+ .addMetadata("type", "The type of image.")
+ .addMetadata("kernel", "The kernel used by the image.")
+ .addMetadata("name", "The name of the image.")
+ .addMetadata("owner", "The owner of the image.")
+ .addMetadata("platform", "The platform required by the image.")
+ .addMetadata("ramdisk", "Detailed of the ram disk used by the image.")
+ .addMetadata("root_device", "The root device used by the image.")
+ .addMetadata("root_type", "The type of root device required by the image.")
+ .addMetadata("srvio_net", "Details of srvio network support in the image.")
+ .addMetadata("state", "The state of the image.")
+ .addMetadata("virt_type", "The type of virtualization supported by the image.")
+ .addMetadata("is_public", "True if the image is publically available.")
+ .addMetadata("tags", "Tags associated with the image.")
+ .addMetadata("block_devices", "Block devices required by the image.")
+ .build();
+ }
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/RouteTableProvider.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/RouteTableProvider.java
new file mode 100644
index 0000000000..24583be45e
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/RouteTableProvider.java
@@ -0,0 +1,215 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.ec2;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.FieldResolver;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.model.DescribeRouteTablesRequest;
+import com.amazonaws.services.ec2.model.DescribeRouteTablesResult;
+import com.amazonaws.services.ec2.model.Route;
+import com.amazonaws.services.ec2.model.RouteTable;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Maps your EC2 RouteTable entries (routes) to a table.
+ */
+public class RouteTableProvider
+ implements TableProvider
+{
+ private static final Schema SCHEMA;
+ private AmazonEC2 ec2;
+
+ public RouteTableProvider(AmazonEC2 ec2)
+ {
+ this.ec2 = ec2;
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public String getSchema()
+ {
+ return "ec2";
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public TableName getTableName()
+ {
+ return new TableName(getSchema(), "routing_tables");
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public GetTableResponse getTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ return new GetTableResponse(getTableRequest.getCatalogName(), getTableName(), SCHEMA);
+ }
+
+ /**
+ * Calls DescribeRouteTables on the AWS EC2 Client returning all Routes that match the supplied predicate and attempting
+ * to push down certain predicates (namely queries for specific RoutingTables) to EC2.
+ *
+ * @See TableProvider
+ */
+ @Override
+ public void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker)
+ {
+ boolean done = false;
+ DescribeRouteTablesRequest request = new DescribeRouteTablesRequest();
+
+ ValueSet idConstraint = recordsRequest.getConstraints().getSummary().get("route_table_id");
+ if (idConstraint != null && idConstraint.isSingleValue()) {
+ request.setRouteTableIds(Collections.singletonList(idConstraint.getSingleValue().toString()));
+ }
+
+ while (!done) {
+ DescribeRouteTablesResult response = ec2.describeRouteTables(request);
+
+ for (RouteTable nextRouteTable : response.getRouteTables()) {
+ for (Route route : nextRouteTable.getRoutes()) {
+ instanceToRow(nextRouteTable, route, spiller);
+ }
+ }
+
+ request.setNextToken(response.getNextToken());
+
+ if (response.getNextToken() == null || !queryStatusChecker.isQueryRunning()) {
+ done = true;
+ }
+ }
+ }
+
+ /**
+ * Maps an EC2 Route into a row in our Apache Arrow response block(s).
+ *
+ * @param routeTable The RouteTable that owns the given Route.
+ * @param route The Route to map.
+ * @param spiller The BlockSpiller to use when we want to write a matching row to the response.
+ * @note The current implementation is rather naive in how it maps fields. It leverages a static
+ * list of fields that we'd like to provide and then explicitly filters and converts each field.
+ */
+ private void instanceToRow(RouteTable routeTable,
+ Route route,
+ BlockSpiller spiller)
+ {
+ spiller.writeRows((Block block, int row) -> {
+ boolean matched = true;
+
+ matched &= block.offerValue("route_table_id", row, routeTable.getRouteTableId());
+ matched &= block.offerValue("owner", row, routeTable.getOwnerId());
+ matched &= block.offerValue("vpc", row, routeTable.getVpcId());
+ matched &= block.offerValue("dst_cidr", row, route.getDestinationCidrBlock());
+ matched &= block.offerValue("dst_cidr_v6", row, route.getDestinationIpv6CidrBlock());
+ matched &= block.offerValue("dst_prefix_list", row, route.getDestinationPrefixListId());
+ matched &= block.offerValue("egress_igw", row, route.getEgressOnlyInternetGatewayId());
+ matched &= block.offerValue("gateway", row, route.getGatewayId());
+ matched &= block.offerValue("instance_id", row, route.getInstanceId());
+ matched &= block.offerValue("instance_owner", row, route.getInstanceOwnerId());
+ matched &= block.offerValue("nat_gateway", row, route.getNatGatewayId());
+ matched &= block.offerValue("interface", row, route.getNetworkInterfaceId());
+ matched &= block.offerValue("origin", row, route.getOrigin());
+ matched &= block.offerValue("state", row, route.getState());
+ matched &= block.offerValue("transit_gateway", row, route.getTransitGatewayId());
+ matched &= block.offerValue("vpc_peering_con", row, route.getVpcPeeringConnectionId());
+
+ List associations = routeTable.getAssociations().stream()
+ .map(next -> next.getSubnetId() + ":" + next.getRouteTableId()).collect(Collectors.toList());
+ matched &= block.offerComplexValue("associations", row, FieldResolver.DEFAULT, associations);
+
+ List tags = routeTable.getTags().stream()
+ .map(next -> next.getKey() + ":" + next.getValue()).collect(Collectors.toList());
+ matched &= block.offerComplexValue("tags", row, FieldResolver.DEFAULT, tags);
+
+ List propagatingVgws = routeTable.getPropagatingVgws().stream()
+ .map(next -> next.getGatewayId()).collect(Collectors.toList());
+ matched &= block.offerComplexValue("propagating_vgws", row, FieldResolver.DEFAULT, propagatingVgws);
+
+ return matched ? 1 : 0;
+ });
+ }
+
+ /**
+ * Defines the schema of this table.
+ */
+ static {
+ SCHEMA = SchemaBuilder.newBuilder()
+ .addStringField("route_table_id")
+ .addStringField("owner")
+ .addStringField("vpc")
+ .addListField("associations", Types.MinorType.VARCHAR.getType())
+ .addListField("tags", Types.MinorType.VARCHAR.getType())
+ .addListField("propagating_vgws", Types.MinorType.VARCHAR.getType())
+ .addStringField("dst_cidr")
+ .addStringField("dst_cidr_v6")
+ .addStringField("dst_prefix_list")
+ .addStringField("egress_igw")
+ .addStringField("gateway")
+ .addStringField("instance_id")
+ .addStringField("instance_owner")
+ .addStringField("nat_gateway")
+ .addStringField("interface")
+ .addStringField("origin")
+ .addStringField("state")
+ .addStringField("transit_gateway")
+ .addStringField("vpc_peering_con")
+ .addMetadata("route_table_id", "Id of the route table the route belongs to.")
+ .addMetadata("owner", "Owner of the route table.")
+ .addMetadata("vpc", "VPC the route table is associated with.")
+ .addMetadata("associations", "List of associations for this route table.")
+ .addMetadata("tags", "Tags on the route table.")
+ .addMetadata("propagating_vgws", "Vgws the route table propogates through.")
+ .addMetadata("dst_cidr", "Destination IPv4 CIDR block for the route.")
+ .addMetadata("dst_cidr_v6", "Destination IPv6 CIDR block for the route.")
+ .addMetadata("dst_prefix_list", "Destination prefix list for the route.")
+ .addMetadata("egress_igw", "Egress gateway for the route.")
+ .addMetadata("gateway", "Gateway for the route.")
+ .addMetadata("instance_id", "Instance id of the route.")
+ .addMetadata("instance_owner", "Owner of the route.")
+ .addMetadata("nat_gateway", "NAT gateway used by the route.")
+ .addMetadata("interface", "Interface associated with the route.")
+ .addMetadata("origin", "Origin of the route.")
+ .addMetadata("state", "State of the route.")
+ .addMetadata("transit_gateway", "Transit Gateway associated with the route.")
+ .addMetadata("vpc_peering_con", "VPC Peering connection associated with the route.")
+ .build();
+ }
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/SecurityGroupsTableProvider.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/SecurityGroupsTableProvider.java
new file mode 100644
index 0000000000..8f4f6dd3c3
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/SecurityGroupsTableProvider.java
@@ -0,0 +1,209 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.ec2;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.FieldResolver;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.model.DescribeSecurityGroupsRequest;
+import com.amazonaws.services.ec2.model.DescribeSecurityGroupsResult;
+import com.amazonaws.services.ec2.model.IpPermission;
+import com.amazonaws.services.ec2.model.SecurityGroup;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Maps your EC2 SecurityGroups to a table.
+ */
+public class SecurityGroupsTableProvider
+ implements TableProvider
+{
+ private static final String INGRESS = "ingress";
+ private static final String EGRESS = "egress";
+
+ private static final Schema SCHEMA;
+ private AmazonEC2 ec2;
+
+ public SecurityGroupsTableProvider(AmazonEC2 ec2)
+ {
+ this.ec2 = ec2;
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public String getSchema()
+ {
+ return "ec2";
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public TableName getTableName()
+ {
+ return new TableName(getSchema(), "security_groups");
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public GetTableResponse getTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ return new GetTableResponse(getTableRequest.getCatalogName(), getTableName(), SCHEMA);
+ }
+
+ /**
+ * Calls DescribeSecurityGroups on the AWS EC2 Client returning all SecurityGroup rules that match the supplied
+ * predicate and attempting to push down certain predicates (namely queries for specific SecurityGroups) to EC2.
+ *
+ * @See TableProvider
+ */
+ @Override
+ public void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker)
+ {
+ boolean done = false;
+ DescribeSecurityGroupsRequest request = new DescribeSecurityGroupsRequest();
+
+ ValueSet idConstraint = recordsRequest.getConstraints().getSummary().get("id");
+ if (idConstraint != null && idConstraint.isSingleValue()) {
+ request.setGroupIds(Collections.singletonList(idConstraint.getSingleValue().toString()));
+ }
+
+ ValueSet nameConstraint = recordsRequest.getConstraints().getSummary().get("name");
+ if (nameConstraint != null && nameConstraint.isSingleValue()) {
+ request.setGroupNames(Collections.singletonList(nameConstraint.getSingleValue().toString()));
+ }
+
+ while (!done) {
+ DescribeSecurityGroupsResult response = ec2.describeSecurityGroups(request);
+
+ //Each rule is mapped to a row in the response. SGs have INGRESS and EGRESS rules.
+ for (SecurityGroup next : response.getSecurityGroups()) {
+ for (IpPermission nextPerm : next.getIpPermissions()) {
+ instanceToRow(next, nextPerm, INGRESS, spiller);
+ }
+
+ for (IpPermission nextPerm : next.getIpPermissionsEgress()) {
+ instanceToRow(next, nextPerm, EGRESS, spiller);
+ }
+ }
+
+ request.setNextToken(response.getNextToken());
+ if (response.getNextToken() == null || !queryStatusChecker.isQueryRunning()) {
+ done = true;
+ }
+ }
+ }
+
+ /**
+ * Maps an each SecurityGroup rule (aka IpPermission) to a row in the response.
+ *
+ * @param securityGroup The SecurityGroup that owns the permission entry.
+ * @param permission The permission entry (aka rule) to map.
+ * @param direction The direction (EGRESS or INGRESS) of the rule.
+ * @param spiller The BlockSpiller to use when we want to write a matching row to the response.
+ * @note The current implementation is rather naive in how it maps fields. It leverages a static
+ * list of fields that we'd like to provide and then explicitly filters and converts each field.
+ */
+ private void instanceToRow(SecurityGroup securityGroup,
+ IpPermission permission,
+ String direction,
+ BlockSpiller spiller)
+ {
+ spiller.writeRows((Block block, int row) -> {
+ boolean matched = true;
+
+ matched &= block.offerValue("id", row, securityGroup.getGroupId());
+ matched &= block.offerValue("name", row, securityGroup.getGroupName());
+ matched &= block.offerValue("description", row, securityGroup.getDescription());
+ matched &= block.offerValue("from_port", row, permission.getFromPort());
+ matched &= block.offerValue("to_port", row, permission.getFromPort());
+ matched &= block.offerValue("protocol", row, permission.getIpProtocol());
+ matched &= block.offerValue("direction", row, permission.getIpProtocol());
+
+ List ipv4Ranges = permission.getIpv4Ranges().stream()
+ .map(next -> next.getCidrIp() + ":" + next.getDescription()).collect(Collectors.toList());
+ matched &= block.offerComplexValue("ipv4_ranges", row, FieldResolver.DEFAULT, ipv4Ranges);
+
+ List ipv6Ranges = permission.getIpv6Ranges().stream()
+ .map(next -> next.getCidrIpv6() + ":" + next.getDescription()).collect(Collectors.toList());
+ matched &= block.offerComplexValue("ipv6_ranges", row, FieldResolver.DEFAULT, ipv6Ranges);
+
+ List prefixLists = permission.getPrefixListIds().stream()
+ .map(next -> next.getPrefixListId() + ":" + next.getDescription()).collect(Collectors.toList());
+ matched &= block.offerComplexValue("prefix_lists", row, FieldResolver.DEFAULT, prefixLists);
+
+ List userIdGroups = permission.getUserIdGroupPairs().stream()
+ .map(next -> next.getUserId() + ":" + next.getGroupId())
+ .collect(Collectors.toList());
+ matched &= block.offerComplexValue("user_id_groups", row, FieldResolver.DEFAULT, userIdGroups);
+
+ return matched ? 1 : 0;
+ });
+ }
+
+ /**
+ * Defines the schema of this table.
+ */
+ static {
+ SCHEMA = SchemaBuilder.newBuilder()
+ .addStringField("id")
+ .addStringField("name")
+ .addStringField("description")
+ .addIntField("from_port")
+ .addIntField("to_port")
+ .addStringField("protocol")
+ .addStringField("direction")
+ .addListField("ipv4_ranges", Types.MinorType.VARCHAR.getType())
+ .addListField("ipv6_ranges", Types.MinorType.VARCHAR.getType())
+ .addListField("prefix_lists", Types.MinorType.VARCHAR.getType())
+ .addListField("user_id_groups", Types.MinorType.VARCHAR.getType())
+ .addMetadata("id", "Security Group ID.")
+ .addMetadata("name", "Name of the security group.")
+ .addMetadata("description", "Description of the security group.")
+ .addMetadata("from_port", "Beginging of the port range covered by this security group.")
+ .addMetadata("to_port", "Ending of the port range covered by this security group.")
+ .addMetadata("protocol", "The network protocol covered by this security group.")
+ .addMetadata("direction", "Notes if the rule applies inbound (ingress) or outbound (egress).")
+ .addMetadata("ipv4_ranges", "The ip v4 ranges covered by this security group.")
+ .addMetadata("ipv6_ranges", "The ip v6 ranges covered by this security group.")
+ .addMetadata("prefix_lists", "The prefix lists covered by this security group.")
+ .addMetadata("user_id_groups", "The user id groups covered by this security group.")
+ .build();
+ }
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/SubnetTableProvider.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/SubnetTableProvider.java
new file mode 100644
index 0000000000..f64bb9bd26
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/SubnetTableProvider.java
@@ -0,0 +1,168 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.ec2;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.FieldResolver;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.model.DescribeSubnetsRequest;
+import com.amazonaws.services.ec2.model.DescribeSubnetsResult;
+import com.amazonaws.services.ec2.model.Subnet;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Maps your EC2 Subnets to a table.
+ */
+public class SubnetTableProvider
+ implements TableProvider
+{
+ private static final Schema SCHEMA;
+ private AmazonEC2 ec2;
+
+ public SubnetTableProvider(AmazonEC2 ec2)
+ {
+ this.ec2 = ec2;
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public String getSchema()
+ {
+ return "ec2";
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public TableName getTableName()
+ {
+ return new TableName(getSchema(), "subnets");
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public GetTableResponse getTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ return new GetTableResponse(getTableRequest.getCatalogName(), getTableName(), SCHEMA);
+ }
+
+ /**
+ * Calls DescribeSubnets on the AWS EC2 Client returning all subnets that match the supplied predicate and attempting
+ * to push down certain predicates (namely queries for specific subnet) to EC2.
+ *
+ * @See TableProvider
+ */
+ @Override
+ public void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker)
+ {
+ DescribeSubnetsRequest request = new DescribeSubnetsRequest();
+
+ ValueSet idConstraint = recordsRequest.getConstraints().getSummary().get("id");
+ if (idConstraint != null && idConstraint.isSingleValue()) {
+ request.setSubnetIds(Collections.singletonList(idConstraint.getSingleValue().toString()));
+ }
+
+ DescribeSubnetsResult response = ec2.describeSubnets(request);
+ for (Subnet subnet : response.getSubnets()) {
+ instanceToRow(subnet, spiller);
+ }
+ }
+
+ /**
+ * Maps an EC2 Subnet into a row in our Apache Arrow response block(s).
+ *
+ * @param subnet The EC2 Subnet to map.
+ * @param spiller The BlockSpiller to use when we want to write a matching row to the response.
+ * @note The current implementation is rather naive in how it maps fields. It leverages a static
+ * list of fields that we'd like to provide and then explicitly filters and converts each field.
+ */
+ private void instanceToRow(Subnet subnet,
+ BlockSpiller spiller)
+ {
+ spiller.writeRows((Block block, int row) -> {
+ boolean matched = true;
+
+ matched &= block.offerValue("id", row, subnet.getSubnetId());
+ matched &= block.offerValue("availability_zone", row, subnet.getAvailabilityZone());
+ matched &= block.offerValue("available_ip_count", row, subnet.getAvailableIpAddressCount());
+ matched &= block.offerValue("cidr_block", row, subnet.getCidrBlock());
+ matched &= block.offerValue("default_for_az", row, subnet.getDefaultForAz());
+ matched &= block.offerValue("map_public_ip", row, subnet.getMapPublicIpOnLaunch());
+ matched &= block.offerValue("owner", row, subnet.getOwnerId());
+ matched &= block.offerValue("state", row, subnet.getState());
+ matched &= block.offerValue("vpc", row, subnet.getVpcId());
+ matched &= block.offerValue("vpc", row, subnet.getVpcId());
+
+ List tags = subnet.getTags().stream()
+ .map(next -> next.getKey() + ":" + next.getValue()).collect(Collectors.toList());
+ matched &= block.offerComplexValue("tags", row, FieldResolver.DEFAULT, tags);
+
+ return matched ? 1 : 0;
+ });
+ }
+
+ /**
+ * Defines the schema of this table.
+ */
+ static {
+ SCHEMA = SchemaBuilder.newBuilder()
+ .addStringField("id")
+ .addStringField("availability_zone")
+ .addIntField("available_ip_count")
+ .addStringField("cidr_block")
+ .addBitField("default_for_az")
+ .addBitField("map_public_ip")
+ .addStringField("owner")
+ .addStringField("state")
+ .addListField("tags", Types.MinorType.VARCHAR.getType())
+ .addStringField("vpc")
+ .addMetadata("id", "Subnet Id")
+ .addMetadata("availability_zone", "Availability zone the subnet is in.")
+ .addMetadata("available_ip_count", "Number of available IPs in the subnet.")
+ .addMetadata("cidr_block", "The CIDR block that the subnet uses to allocate addresses.")
+ .addMetadata("default_for_az", "True if this is the default subnet for the AZ.")
+ .addMetadata("map_public_ip", "True if public addresses are signed by default in this subnet.")
+ .addMetadata("owner", "Owner of the subnet.")
+ .addMetadata("state", "The state of the subnet.")
+ .addMetadata("vpc", "The VPC the subnet is part of.")
+ .addMetadata("tags", "Tags associated with the volume.")
+ .build();
+ }
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/VpcTableProvider.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/VpcTableProvider.java
new file mode 100644
index 0000000000..18087ba5e5
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/VpcTableProvider.java
@@ -0,0 +1,161 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.ec2;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.FieldResolver;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.model.DescribeVpcsRequest;
+import com.amazonaws.services.ec2.model.DescribeVpcsResult;
+import com.amazonaws.services.ec2.model.Vpc;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Maps your VPCs to a table.
+ */
+public class VpcTableProvider
+ implements TableProvider
+{
+ private static final Schema SCHEMA;
+ private AmazonEC2 ec2;
+
+ public VpcTableProvider(AmazonEC2 ec2)
+ {
+ this.ec2 = ec2;
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public String getSchema()
+ {
+ return "ec2";
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public TableName getTableName()
+ {
+ return new TableName(getSchema(), "vpcs");
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public GetTableResponse getTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ return new GetTableResponse(getTableRequest.getCatalogName(), getTableName(), SCHEMA);
+ }
+
+ /**
+ * Calls DescribeVPCs on the AWS EC2 Client returning all VPCs that match the supplied predicate and attempting
+ * to push down certain predicates (namely queries for specific VPCs) to EC2.
+ *
+ * @See TableProvider
+ */
+ @Override
+ public void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker)
+ {
+ DescribeVpcsRequest request = new DescribeVpcsRequest();
+
+ ValueSet idConstraint = recordsRequest.getConstraints().getSummary().get("id");
+ if (idConstraint != null && idConstraint.isSingleValue()) {
+ request.setVpcIds(Collections.singletonList(idConstraint.getSingleValue().toString()));
+ }
+
+ DescribeVpcsResult response = ec2.describeVpcs(request);
+ for (Vpc vpc : response.getVpcs()) {
+ instanceToRow(vpc, spiller);
+ }
+ }
+
+ /**
+ * Maps a VPC into a row in our Apache Arrow response block(s).
+ *
+ * @param vpc The VPCs to map.
+ * @param spiller The BlockSpiller to use when we want to write a matching row to the response.
+ * @note The current implementation is rather naive in how it maps fields. It leverages a static
+ * list of fields that we'd like to provide and then explicitly filters and converts each field.
+ */
+ private void instanceToRow(Vpc vpc,
+ BlockSpiller spiller)
+ {
+ spiller.writeRows((Block block, int row) -> {
+ boolean matched = true;
+
+ matched &= block.offerValue("id", row, vpc.getVpcId());
+ matched &= block.offerValue("cidr_block", row, vpc.getCidrBlock());
+ matched &= block.offerValue("dhcp_opts", row, vpc.getDhcpOptionsId());
+ matched &= block.offerValue("tenancy", row, vpc.getInstanceTenancy());
+ matched &= block.offerValue("owner", row, vpc.getOwnerId());
+ matched &= block.offerValue("state", row, vpc.getState());
+ matched &= block.offerValue("is_default", row, vpc.getIsDefault());
+
+ List tags = vpc.getTags().stream()
+ .map(next -> next.getKey() + ":" + next.getValue()).collect(Collectors.toList());
+ matched &= block.offerComplexValue("tags", row, FieldResolver.DEFAULT, tags);
+
+ return matched ? 1 : 0;
+ });
+ }
+
+ /**
+ * Defines the schema of this table.
+ */
+ static {
+ SCHEMA = SchemaBuilder.newBuilder()
+ .addStringField("id")
+ .addStringField("cidr_block")
+ .addStringField("dhcp_opts")
+ .addStringField("tenancy")
+ .addStringField("owner")
+ .addStringField("state")
+ .addBitField("is_default")
+ .addListField("tags", Types.MinorType.VARCHAR.getType())
+ .addMetadata("id", "VPC Id")
+ .addMetadata("cidr_block", "CIDR block used to vend IPs for the VPC.")
+ .addMetadata("dhcp_opts", "DHCP options used for DNS resolution in the VPC.")
+ .addMetadata("tenancy", "EC2 Instance tenancy of this VPC (e.g. dedicated)")
+ .addMetadata("owner", "The owner of the VPC.")
+ .addMetadata("state", "The state of the VPC.")
+ .addMetadata("is_default", "True if the VPC is the default VPC.")
+ .addMetadata("tags", "Tags associated with the volume.")
+ .build();
+ }
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/s3/S3BucketsTableProvider.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/s3/S3BucketsTableProvider.java
new file mode 100644
index 0000000000..0387ac6bf7
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/s3/S3BucketsTableProvider.java
@@ -0,0 +1,133 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.s3;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.model.Bucket;
+import com.amazonaws.services.s3.model.Owner;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Maps your S3 Objects to a table.
+ */
+public class S3BucketsTableProvider
+ implements TableProvider
+{
+ private static final Schema SCHEMA;
+ private AmazonS3 amazonS3;
+
+ public S3BucketsTableProvider(AmazonS3 amazonS3)
+ {
+ this.amazonS3 = amazonS3;
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public String getSchema()
+ {
+ return "s3";
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public TableName getTableName()
+ {
+ return new TableName(getSchema(), "buckets");
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public GetTableResponse getTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ return new GetTableResponse(getTableRequest.getCatalogName(), getTableName(), SCHEMA);
+ }
+
+ /**
+ * Calls DescribeDBInstances on the AWS RDS Client returning all DB Instances that match the supplied predicate and attempting
+ * to push down certain predicates (namely queries for specific DB Instance) to EC2.
+ *
+ * @See TableProvider
+ */
+ @Override
+ public void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker)
+ {
+ for (Bucket next : amazonS3.listBuckets()) {
+ toRow(next, spiller);
+ }
+ }
+
+ /**
+ * Maps a DBInstance into a row in our Apache Arrow response block(s).
+ *
+ * @param bucket The S3 Bucket to map.
+ * @param spiller The BlockSpiller to use when we want to write a matching row to the response.
+ * @note The current implementation is rather naive in how it maps fields. It leverages a static
+ * list of fields that we'd like to provide and then explicitly filters and converts each field.
+ */
+ private void toRow(Bucket bucket,
+ BlockSpiller spiller)
+ {
+ spiller.writeRows((Block block, int row) -> {
+ boolean matched = true;
+ matched &= block.offerValue("bucket_name", row, bucket.getName());
+ matched &= block.offerValue("create_date", row, bucket.getCreationDate());
+
+ Owner owner = bucket.getOwner();
+ if (owner != null) {
+ matched &= block.offerValue("owner_name", row, bucket.getOwner().getDisplayName());
+ matched &= block.offerValue("owner_id", row, bucket.getOwner().getId());
+ }
+
+ return matched ? 1 : 0;
+ });
+ }
+
+ /**
+ * Defines the schema of this table.
+ */
+ static {
+ SCHEMA = SchemaBuilder.newBuilder()
+ .addStringField("bucket_name")
+ .addDateMilliField("create_date")
+ .addStringField("owner_name")
+ .addStringField("owner_id")
+ .addMetadata("bucket_name", "The name of the bucket that this object is in.")
+ .addMetadata("create_date", "The time the bucket was created.")
+ .addMetadata("owner_name", "The owner name of the object.")
+ .addMetadata("owner_id", "The owner_id of the object.")
+ .build();
+ }
+}
diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/s3/S3ObjectsTableProvider.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/s3/S3ObjectsTableProvider.java
new file mode 100644
index 0000000000..c58315f49e
--- /dev/null
+++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/tables/s3/S3ObjectsTableProvider.java
@@ -0,0 +1,166 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.s3;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.model.ListObjectsV2Request;
+import com.amazonaws.services.s3.model.ListObjectsV2Result;
+import com.amazonaws.services.s3.model.Owner;
+import com.amazonaws.services.s3.model.S3ObjectSummary;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Maps your S3 Objects to a table.
+ */
+public class S3ObjectsTableProvider
+ implements TableProvider
+{
+ private static final int MAX_KEYS = 1000;
+ private static final Schema SCHEMA;
+ private AmazonS3 amazonS3;
+
+ public S3ObjectsTableProvider(AmazonS3 amazonS3)
+ {
+ this.amazonS3 = amazonS3;
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public String getSchema()
+ {
+ return "s3";
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public TableName getTableName()
+ {
+ return new TableName(getSchema(), "objects");
+ }
+
+ /**
+ * @See TableProvider
+ */
+ @Override
+ public GetTableResponse getTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ return new GetTableResponse(getTableRequest.getCatalogName(), getTableName(), SCHEMA);
+ }
+
+ /**
+ * Calls DescribeDBInstances on the AWS RDS Client returning all DB Instances that match the supplied predicate and attempting
+ * to push down certain predicates (namely queries for specific DB Instance) to EC2.
+ *
+ * @See TableProvider
+ */
+ @Override
+ public void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker)
+ {
+ ValueSet bucketConstraint = recordsRequest.getConstraints().getSummary().get("bucket_name");
+ String bucket;
+ if (bucketConstraint != null && bucketConstraint.isSingleValue()) {
+ bucket = bucketConstraint.getSingleValue().toString();
+ }
+ else {
+ throw new IllegalArgumentException("Queries against the objects table must filter on a single bucket " +
+ "(e.g. where bucket_name='my_bucket'.");
+ }
+
+ ListObjectsV2Request req = new ListObjectsV2Request().withBucketName(bucket).withMaxKeys(MAX_KEYS);
+ ListObjectsV2Result result;
+ do {
+ result = amazonS3.listObjectsV2(req);
+ for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
+ toRow(objectSummary, spiller);
+ }
+ req.setContinuationToken(result.getNextContinuationToken());
+ }
+ while (result.isTruncated() && queryStatusChecker.isQueryRunning());
+ }
+
+ /**
+ * Maps a DBInstance into a row in our Apache Arrow response block(s).
+ *
+ * @param objectSummary The S3 ObjectSummary to map.
+ * @param spiller The BlockSpiller to use when we want to write a matching row to the response.
+ * @note The current implementation is rather naive in how it maps fields. It leverages a static
+ * list of fields that we'd like to provide and then explicitly filters and converts each field.
+ */
+ private void toRow(S3ObjectSummary objectSummary,
+ BlockSpiller spiller)
+ {
+ spiller.writeRows((Block block, int row) -> {
+ boolean matched = true;
+ matched &= block.offerValue("bucket_name", row, objectSummary.getBucketName());
+ matched &= block.offerValue("e_tag", row, objectSummary.getETag());
+ matched &= block.offerValue("key", row, objectSummary.getKey());
+ matched &= block.offerValue("bytes", row, objectSummary.getSize());
+ matched &= block.offerValue("storage_class", row, objectSummary.getStorageClass());
+ matched &= block.offerValue("last_modified", row, objectSummary.getLastModified());
+
+ Owner owner = objectSummary.getOwner();
+ if (owner != null) {
+ matched &= block.offerValue("owner_name", row, owner.getDisplayName());
+ matched &= block.offerValue("owner_id", row, owner.getId());
+ }
+
+ return matched ? 1 : 0;
+ });
+ }
+
+ /**
+ * Defines the schema of this table.
+ */
+ static {
+ SCHEMA = SchemaBuilder.newBuilder()
+ .addStringField("bucket_name")
+ .addStringField("key")
+ .addStringField("e_tag")
+ .addBigIntField("bytes")
+ .addStringField("storage_class")
+ .addDateMilliField("last_modified")
+ .addStringField("owner_name")
+ .addStringField("owner_id")
+ .addMetadata("bucket_name", "The name of the bucket that this object is in.")
+ .addMetadata("key", "The key of the object.")
+ .addMetadata("e_tag", "eTag of the Object.")
+ .addMetadata("bytes", "The size of the object in bytes.")
+ .addMetadata("storage_class", "The storage class of the object.")
+ .addMetadata("last_modified", "The last time the object was modified.")
+ .addMetadata("owner_name", "The owner name of the object.")
+ .addMetadata("owner_id", "The owner_id of the object.")
+ .build();
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbMetadataHandlerTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbMetadataHandlerTest.java
new file mode 100644
index 0000000000..909ceda41d
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbMetadataHandlerTest.java
@@ -0,0 +1,207 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl;
+import com.amazonaws.athena.connector.lambda.data.BlockWriter;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse;
+import com.amazonaws.athena.connector.lambda.security.FederatedIdentity;
+import com.amazonaws.athena.connector.lambda.security.LocalKeyFactory;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.*;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class AwsCmdbMetadataHandlerTest
+{
+ private String catalog = "catalog";
+ private String bucket = "bucket";
+ private String prefix = "prefix";
+ private String queryId = "queryId";
+ private FederatedIdentity identity = new FederatedIdentity("id", "principal", "account");
+
+ @Mock
+ private AmazonS3 mockS3;
+
+ @Mock
+ private TableProviderFactory mockTableProviderFactory;
+
+ @Mock
+ private Constraints mockConstraints;
+
+ @Mock
+ private TableProvider mockTableProvider1;
+
+ @Mock
+ private TableProvider mockTableProvider2;
+
+ @Mock
+ private TableProvider mockTableProvider3;
+
+ private BlockAllocator blockAllocator;
+
+ @Mock
+ private Block mockBlock;
+
+ @Mock
+ private AWSSecretsManager mockSecretsManager;
+
+ @Mock
+ private AmazonAthena mockAthena;
+
+ private AwsCmdbMetadataHandler handler;
+
+ @Before
+ public void setUp()
+ throws Exception
+ {
+ blockAllocator = new BlockAllocatorImpl();
+ Map tableProviderMap = new HashMap<>();
+ tableProviderMap.putIfAbsent(new TableName("schema1", "table1"), mockTableProvider1);
+ tableProviderMap.putIfAbsent(new TableName("schema1", "table2"), mockTableProvider2);
+ tableProviderMap.putIfAbsent(new TableName("schema2", "table1"), mockTableProvider3);
+
+ when(mockTableProviderFactory.getTableProviders()).thenReturn(tableProviderMap);
+
+ Map> schemas = new HashMap<>();
+ schemas.put("schema1", new ArrayList<>());
+ schemas.put("schema2", new ArrayList<>());
+ schemas.get("schema1").add(new TableName("schema1", "table1"));
+ schemas.get("schema1").add(new TableName("schema1", "table2"));
+ schemas.get("schema2").add(new TableName("schema2", "table1"));
+
+ when(mockTableProviderFactory.getSchemas()).thenReturn(schemas);
+
+ handler = new AwsCmdbMetadataHandler(mockTableProviderFactory, new LocalKeyFactory(), mockSecretsManager, mockAthena, bucket, prefix);
+
+ verify(mockTableProviderFactory, times(1)).getTableProviders();
+ verify(mockTableProviderFactory, times(1)).getSchemas();
+ verifyNoMoreInteractions(mockTableProviderFactory);
+ }
+
+ @After
+ public void tearDown()
+ throws Exception
+ {
+ blockAllocator.close();
+ }
+
+ @Test
+ public void doListSchemaNames()
+ {
+ ListSchemasRequest request = new ListSchemasRequest(identity, queryId, catalog);
+ ListSchemasResponse response = handler.doListSchemaNames(blockAllocator, request);
+
+ assertEquals(2, response.getSchemas().size());
+ assertTrue(response.getSchemas().contains("schema1"));
+ assertTrue(response.getSchemas().contains("schema2"));
+ }
+
+ @Test
+ public void doListTables()
+ {
+ ListTablesRequest request = new ListTablesRequest(identity, queryId, catalog, "schema1");
+ ListTablesResponse response = handler.doListTables(blockAllocator, request);
+
+ assertEquals(2, response.getTables().size());
+ assertTrue(response.getTables().contains(new TableName("schema1", "table1")));
+ assertTrue(response.getTables().contains(new TableName("schema1", "table2")));
+ }
+
+ @Test
+ public void doGetTable()
+ {
+ GetTableRequest request = new GetTableRequest(identity, queryId, catalog, new TableName("schema1", "table1"));
+
+ when(mockTableProvider1.getTable(eq(blockAllocator), eq(request))).thenReturn(mock(GetTableResponse.class));
+ GetTableResponse response = handler.doGetTable(blockAllocator, request);
+
+ assertNotNull(response);
+ verify(mockTableProvider1, times(1)).getTable(eq(blockAllocator), eq(request));
+ }
+
+ @Test
+ public void doGetTableLayout()
+ throws Exception
+ {
+ GetTableLayoutRequest request = new GetTableLayoutRequest(identity, queryId, catalog,
+ new TableName("schema1", "table1"),
+ mockConstraints,
+ SchemaBuilder.newBuilder().build(),
+ Collections.EMPTY_SET);
+
+ GetTableLayoutResponse response = handler.doGetTableLayout(blockAllocator, request);
+
+ assertNotNull(response);
+ assertEquals(1, response.getPartitions().getRowCount());
+ }
+
+ @Test
+ public void doGetSplits()
+ {
+ GetSplitsRequest request = new GetSplitsRequest(identity, queryId, catalog,
+ new TableName("schema1", "table1"),
+ mockBlock,
+ Collections.emptyList(),
+ new Constraints(new HashMap<>()),
+ null);
+
+ GetSplitsResponse response = handler.doGetSplits(blockAllocator, request);
+
+ assertNotNull(response);
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbRecordHandlerTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbRecordHandlerTest.java
new file mode 100644
index 0000000000..5015515721
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbRecordHandlerTest.java
@@ -0,0 +1,124 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory;
+import com.amazonaws.athena.connector.lambda.security.FederatedIdentity;
+import com.amazonaws.athena.connector.lambda.security.LocalKeyFactory;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+
+import java.util.Collections;
+import java.util.UUID;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class AwsCmdbRecordHandlerTest
+{
+ private String bucket = "bucket";
+ private String prefix = "prefix";
+ private EncryptionKeyFactory keyFactory = new LocalKeyFactory();
+ private FederatedIdentity identity = new FederatedIdentity("id", "principal", "account");
+
+ @Mock
+ private AmazonS3 mockS3;
+
+ @Mock
+ private TableProviderFactory mockTableProviderFactory;
+
+ @Mock
+ private ConstraintEvaluator mockEvaluator;
+
+ @Mock
+ private BlockSpiller mockBlockSpiller;
+
+ @Mock
+ private TableProvider mockTableProvider;
+
+ @Mock
+ private AWSSecretsManager mockSecretsManager;
+
+ @Mock
+ private AmazonAthena mockAthena;
+
+ @Mock
+ private QueryStatusChecker queryStatusChecker;
+
+ private AwsCmdbRecordHandler handler;
+
+ @Before
+ public void setUp()
+ throws Exception
+ {
+ when(mockTableProviderFactory.getTableProviders())
+ .thenReturn(Collections.singletonMap(new TableName("schema", "table"), mockTableProvider));
+
+ handler = new AwsCmdbRecordHandler(mockS3, mockSecretsManager, mockAthena, mockTableProviderFactory);
+
+ verify(mockTableProviderFactory, times(1)).getTableProviders();
+ verifyNoMoreInteractions(mockTableProviderFactory);
+
+ when(queryStatusChecker.isQueryRunning()).thenReturn(true);
+ }
+
+ @Test
+ public void readWithConstraint()
+ {
+ ReadRecordsRequest request = new ReadRecordsRequest(identity, "catalog",
+ "queryId",
+ new TableName("schema", "table"),
+ SchemaBuilder.newBuilder().build(),
+ Split.newBuilder(S3SpillLocation.newBuilder()
+ .withBucket(bucket)
+ .withSplitId(UUID.randomUUID().toString())
+ .withQueryId(UUID.randomUUID().toString())
+ .withIsDirectory(true)
+ .build(), keyFactory.create()).build(),
+ new Constraints(Collections.EMPTY_MAP),
+ 100_000,
+ 100_000);
+
+ handler.readWithConstraint(mockBlockSpiller, request, queryStatusChecker);
+
+ verify(mockTableProvider, times(1)).readWithConstraint(any(BlockSpiller.class), eq(request), eq(queryStatusChecker));
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/TableProviderFactoryTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/TableProviderFactoryTest.java
new file mode 100644
index 0000000000..cea1c54fa4
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/TableProviderFactoryTest.java
@@ -0,0 +1,85 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb;
+
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce;
+import com.amazonaws.services.rds.AmazonRDS;
+import com.amazonaws.services.s3.AmazonS3;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.*;
+
+@RunWith(MockitoJUnitRunner.class)
+public class TableProviderFactoryTest
+{
+ private int expectedSchemas = 4;
+ private int expectedTables = 11;
+
+ @Mock
+ private AmazonEC2 mockEc2;
+
+ @Mock
+ private AmazonElasticMapReduce mockEmr;
+
+ @Mock
+ private AmazonRDS mockRds;
+
+ @Mock
+ private AmazonS3 amazonS3;
+
+ private TableProviderFactory factory = new TableProviderFactory(mockEc2, mockEmr, mockRds, amazonS3);
+
+ @Test
+ public void getTableProviders()
+ {
+ int count = 0;
+ for (Map.Entry next : factory.getTableProviders().entrySet()) {
+ assertEquals(next.getKey(), next.getValue().getTableName());
+ assertEquals(next.getKey().getSchemaName(), next.getValue().getSchema());
+ count++;
+ }
+ assertEquals(expectedTables, count);
+ }
+
+ @Test
+ public void getSchemas()
+ {
+ int schemas = 0;
+ int tables = 0;
+ for (Map.Entry> next : factory.getSchemas().entrySet()) {
+ for (TableName nextTableName : next.getValue()) {
+ assertEquals(next.getKey(), nextTableName.getSchemaName());
+ tables++;
+ }
+ schemas++;
+ }
+ assertEquals(expectedSchemas, schemas);
+ assertEquals(expectedTables, tables);
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/AbstractTableProviderTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/AbstractTableProviderTest.java
new file mode 100644
index 0000000000..9ed15516fd
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/AbstractTableProviderTest.java
@@ -0,0 +1,262 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.BlockUtils;
+import com.amazonaws.athena.connector.lambda.data.S3BlockSpillReader;
+import com.amazonaws.athena.connector.lambda.data.S3BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.SpillConfig;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation;
+import com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connector.lambda.security.EncryptionKey;
+import com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory;
+import com.amazonaws.athena.connector.lambda.security.FederatedIdentity;
+import com.amazonaws.athena.connector.lambda.security.LocalKeyFactory;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.model.PutObjectResult;
+import com.amazonaws.services.s3.model.S3Object;
+import com.amazonaws.services.s3.model.S3ObjectInputStream;
+import com.google.common.io.ByteStreams;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+import static org.junit.Assert.*;
+import static org.mockito.Matchers.anyObject;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public abstract class AbstractTableProviderTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(AbstractTableProviderTest.class);
+
+ private BlockAllocator allocator;
+
+ private FederatedIdentity identity = new FederatedIdentity("id", "principal", "account");
+ private String idField = getIdField();
+ private String idValue = getIdValue();
+ private String expectedQuery = "queryId";
+ private String expectedCatalog = "catalog";
+ private String expectedSchema = getExpectedSchema();
+ private String expectedTable = getExpectedTable();
+ private TableName expectedTableName = new TableName(expectedSchema, expectedTable);
+
+ private TableProvider provider;
+
+ private final List mockS3Store = new ArrayList<>();
+
+ @Mock
+ private AmazonS3 amazonS3;
+
+ @Mock
+ private QueryStatusChecker queryStatusChecker;
+
+ private S3BlockSpillReader blockSpillReader;
+
+ private EncryptionKeyFactory keyFactory = new LocalKeyFactory();
+
+ protected abstract String getIdField();
+
+ protected abstract String getIdValue();
+
+ protected abstract String getExpectedSchema();
+
+ protected abstract String getExpectedTable();
+
+ protected abstract TableProvider setUpSource();
+
+ protected abstract void setUpRead();
+
+ protected abstract int getExpectedRows();
+
+ protected abstract void validateRow(Block block, int pos);
+
+ @Before
+ public void setUp()
+ {
+ allocator = new BlockAllocatorImpl();
+
+ when(amazonS3.putObject(anyObject(), anyObject(), anyObject(), anyObject()))
+ .thenAnswer((InvocationOnMock invocationOnMock) -> {
+ InputStream inputStream = (InputStream) invocationOnMock.getArguments()[2];
+ ByteHolder byteHolder = new ByteHolder();
+ byteHolder.setBytes(ByteStreams.toByteArray(inputStream));
+ mockS3Store.add(byteHolder);
+ return mock(PutObjectResult.class);
+ });
+
+ when(amazonS3.getObject(anyString(), anyString()))
+ .thenAnswer((InvocationOnMock invocationOnMock) -> {
+ S3Object mockObject = mock(S3Object.class);
+ ByteHolder byteHolder = mockS3Store.get(0);
+ mockS3Store.remove(0);
+ when(mockObject.getObjectContent()).thenReturn(
+ new S3ObjectInputStream(
+ new ByteArrayInputStream(byteHolder.getBytes()), null));
+ return mockObject;
+ });
+
+ blockSpillReader = new S3BlockSpillReader(amazonS3, allocator);
+
+ provider = setUpSource();
+
+ when(queryStatusChecker.isQueryRunning()).thenReturn(true);
+ }
+
+ @After
+ public void after()
+ {
+ mockS3Store.clear();
+ allocator.close();
+ }
+
+ @Test
+ public void getSchema()
+ {
+ assertEquals(expectedSchema, provider.getSchema());
+ }
+
+ @Test
+ public void getTableName()
+ {
+ assertEquals(expectedTableName, provider.getTableName());
+ }
+
+ @Test
+ public void readTableTest()
+ {
+ GetTableRequest request = new GetTableRequest(identity, expectedQuery, expectedCatalog, expectedTableName);
+ GetTableResponse response = provider.getTable(allocator, request);
+ assertTrue(response.getSchema().getFields().size() > 1);
+
+ Map constraintsMap = new HashMap<>();
+
+ constraintsMap.put(idField,
+ EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false)
+ .add(idValue).build());
+
+ Constraints constraints = new Constraints(constraintsMap);
+
+ ConstraintEvaluator evaluator = new ConstraintEvaluator(allocator, response.getSchema(), constraints);
+
+ S3SpillLocation spillLocation = S3SpillLocation.newBuilder()
+ .withBucket("bucket")
+ .withPrefix("prefix")
+ .withSplitId(UUID.randomUUID().toString())
+ .withQueryId(UUID.randomUUID().toString())
+ .withIsDirectory(true)
+ .build();
+
+ ReadRecordsRequest readRequest = new ReadRecordsRequest(identity,
+ expectedCatalog,
+ "queryId",
+ expectedTableName,
+ response.getSchema(),
+ Split.newBuilder(spillLocation, keyFactory.create()).build(),
+ constraints,
+ 100_000_000,
+ 100_000_000);
+
+ SpillConfig spillConfig = SpillConfig.newBuilder()
+ .withSpillLocation(spillLocation)
+ .withMaxBlockBytes(3_000_000)
+ .withMaxInlineBlockBytes(0)
+ .withRequestId("queryid")
+ .withEncryptionKey(keyFactory.create())
+ .build();
+
+ setUpRead();
+
+ BlockSpiller spiller = new S3BlockSpiller(amazonS3, spillConfig, allocator, response.getSchema(), evaluator);
+ provider.readWithConstraint(spiller, readRequest, queryStatusChecker);
+
+ validateRead(response.getSchema(), blockSpillReader, spiller.getSpillLocations(), spillConfig.getEncryptionKey());
+ }
+
+ protected void validateRead(Schema schema, S3BlockSpillReader reader, List locations, EncryptionKey encryptionKey)
+ {
+ int blockNum = 0;
+ int rowNum = 0;
+ for (SpillLocation next : locations) {
+ S3SpillLocation spillLocation = (S3SpillLocation) next;
+ try (Block block = reader.read(spillLocation, encryptionKey, schema)) {
+ logger.info("validateRead: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
+
+ for (int i = 0; i < block.getRowCount(); i++) {
+ logger.info("validateRead: {}", BlockUtils.rowToString(block, i));
+ rowNum++;
+ validateRow(block, i);
+ }
+ }
+ catch (Exception ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ assertEquals(getExpectedRows(), rowNum);
+ }
+
+ private class ByteHolder
+ {
+ private byte[] bytes;
+
+ public void setBytes(byte[] bytes)
+ {
+ this.bytes = bytes;
+ }
+
+ public byte[] getBytes()
+ {
+ return bytes;
+ }
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/EmrClusterTableProviderTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/EmrClusterTableProviderTest.java
new file mode 100644
index 0000000000..b7d3d75b98
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/EmrClusterTableProviderTest.java
@@ -0,0 +1,201 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce;
+import com.amazonaws.services.elasticmapreduce.model.Application;
+import com.amazonaws.services.elasticmapreduce.model.Cluster;
+import com.amazonaws.services.elasticmapreduce.model.ClusterStateChangeReason;
+import com.amazonaws.services.elasticmapreduce.model.ClusterStatus;
+import com.amazonaws.services.elasticmapreduce.model.ClusterSummary;
+import com.amazonaws.services.elasticmapreduce.model.DescribeClusterRequest;
+import com.amazonaws.services.elasticmapreduce.model.DescribeClusterResult;
+import com.amazonaws.services.elasticmapreduce.model.ListClustersRequest;
+import com.amazonaws.services.elasticmapreduce.model.ListClustersResult;
+import com.amazonaws.services.elasticmapreduce.model.Tag;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.joda.time.DateTimeZone;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class EmrClusterTableProviderTest
+ extends AbstractTableProviderTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(EmrClusterTableProviderTest.class);
+
+ @Mock
+ private AmazonElasticMapReduce mockEmr;
+
+ protected String getIdField()
+ {
+ return "id";
+ }
+
+ protected String getIdValue()
+ {
+ return "123";
+ }
+
+ protected String getExpectedSchema()
+ {
+ return "emr";
+ }
+
+ protected String getExpectedTable()
+ {
+ return "emr_clusters";
+ }
+
+ protected int getExpectedRows()
+ {
+ return 2;
+ }
+
+ protected TableProvider setUpSource()
+ {
+ return new EmrClusterTableProvider(mockEmr);
+ }
+
+ @Override
+ protected void setUpRead()
+ {
+ when(mockEmr.listClusters(any(ListClustersRequest.class)))
+ .thenAnswer((InvocationOnMock invocation) -> {
+ ListClustersResult mockResult = mock(ListClustersResult.class);
+ List values = new ArrayList<>();
+ values.add(makeClusterSummary(getIdValue()));
+ values.add(makeClusterSummary(getIdValue()));
+ values.add(makeClusterSummary("fake-id"));
+ when(mockResult.getClusters()).thenReturn(values);
+ return mockResult;
+ });
+
+ when(mockEmr.describeCluster(any(DescribeClusterRequest.class)))
+ .thenAnswer((InvocationOnMock invocation) -> {
+ DescribeClusterRequest request = (DescribeClusterRequest) invocation.getArguments()[0];
+ DescribeClusterResult mockResult = mock(DescribeClusterResult.class);
+ List values = new ArrayList<>();
+ values.add(makeClusterSummary(getIdValue()));
+ values.add(makeClusterSummary(getIdValue()));
+ values.add(makeClusterSummary("fake-id"));
+ when(mockResult.getCluster()).thenReturn(makeCluster(request.getClusterId()));
+ return mockResult;
+ });
+ }
+
+ protected void validateRow(Block block, int pos)
+ {
+ for (FieldReader fieldReader : block.getFieldReaders()) {
+ fieldReader.setPosition(pos);
+ Field field = fieldReader.getField();
+
+ if (field.getName().equals(getIdField())) {
+ assertEquals(getIdValue(), fieldReader.readText().toString());
+ }
+ else {
+ validate(fieldReader);
+ }
+ }
+ }
+
+ private void validate(FieldReader fieldReader)
+ {
+ Field field = fieldReader.getField();
+ Types.MinorType type = Types.getMinorTypeForArrowType(field.getType());
+ switch (type) {
+ case VARCHAR:
+ if (field.getName().equals("$data$") || field.getName().equals("direction")) {
+ assertNotNull(fieldReader.readText().toString());
+ }
+ else {
+ assertEquals(field.getName(), fieldReader.readText().toString());
+ }
+ break;
+ case DATEMILLI:
+ assertEquals(100_000, fieldReader.readLocalDateTime().toDateTime(DateTimeZone.UTC).getMillis());
+ break;
+ case BIT:
+ assertTrue(fieldReader.readBoolean());
+ break;
+ case INT:
+ assertTrue(fieldReader.readInteger() > 0);
+ break;
+ case STRUCT:
+ for (Field child : field.getChildren()) {
+ validate(fieldReader.reader(child.getName()));
+ }
+ break;
+ case LIST:
+ validate(fieldReader.reader());
+ break;
+ default:
+ throw new RuntimeException("No validation configured for field " + field.getName() + ":" + type + " " + field.getChildren());
+ }
+ }
+
+ private ClusterSummary makeClusterSummary(String id)
+ {
+ return new ClusterSummary()
+ .withName("name")
+ .withId(id)
+ .withStatus(new ClusterStatus()
+ .withState("state")
+ .withStateChangeReason(new ClusterStateChangeReason()
+ .withCode("state_code")
+ .withMessage("state_msg")))
+ .withNormalizedInstanceHours(100);
+ }
+
+ private Cluster makeCluster(String id)
+ {
+ return new Cluster()
+ .withId(id)
+ .withName("name")
+ .withAutoScalingRole("autoscaling_role")
+ .withCustomAmiId("custom_ami")
+ .withInstanceCollectionType("instance_collection_type")
+ .withLogUri("log_uri")
+ .withMasterPublicDnsName("master_public_dns")
+ .withReleaseLabel("release_label")
+ .withRunningAmiVersion("running_ami")
+ .withScaleDownBehavior("scale_down_behavior")
+ .withServiceRole("service_role")
+ .withApplications(new Application().withName("name").withVersion("version"))
+ .withTags(new Tag("key", "value"));
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/RdsTableProviderTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/RdsTableProviderTest.java
new file mode 100644
index 0000000000..f27dec682f
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/RdsTableProviderTest.java
@@ -0,0 +1,237 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce;
+import com.amazonaws.services.elasticmapreduce.model.Application;
+import com.amazonaws.services.elasticmapreduce.model.Cluster;
+import com.amazonaws.services.elasticmapreduce.model.ClusterStateChangeReason;
+import com.amazonaws.services.elasticmapreduce.model.ClusterStatus;
+import com.amazonaws.services.elasticmapreduce.model.ClusterSummary;
+import com.amazonaws.services.elasticmapreduce.model.DescribeClusterRequest;
+import com.amazonaws.services.elasticmapreduce.model.DescribeClusterResult;
+import com.amazonaws.services.elasticmapreduce.model.ListClustersRequest;
+import com.amazonaws.services.elasticmapreduce.model.ListClustersResult;
+import com.amazonaws.services.elasticmapreduce.model.Tag;
+import com.amazonaws.services.rds.AmazonRDS;
+import com.amazonaws.services.rds.model.DBInstance;
+import com.amazonaws.services.rds.model.DBInstanceStatusInfo;
+import com.amazonaws.services.rds.model.DBParameterGroup;
+import com.amazonaws.services.rds.model.DBParameterGroupStatus;
+import com.amazonaws.services.rds.model.DBSecurityGroupMembership;
+import com.amazonaws.services.rds.model.DBSubnetGroup;
+import com.amazonaws.services.rds.model.DescribeDBInstancesRequest;
+import com.amazonaws.services.rds.model.DescribeDBInstancesResult;
+import com.amazonaws.services.rds.model.DomainMembership;
+import com.amazonaws.services.rds.model.Endpoint;
+import com.amazonaws.services.rds.model.Subnet;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.joda.time.DateTimeZone;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class RdsTableProviderTest
+ extends AbstractTableProviderTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(RdsTableProviderTest.class);
+
+ @Mock
+ private AmazonRDS mockRds;
+
+ protected String getIdField()
+ {
+ return "instance_id";
+ }
+
+ protected String getIdValue()
+ {
+ return "123";
+ }
+
+ protected String getExpectedSchema()
+ {
+ return "rds";
+ }
+
+ protected String getExpectedTable()
+ {
+ return "rds_instances";
+ }
+
+ protected int getExpectedRows()
+ {
+ return 6;
+ }
+
+ protected TableProvider setUpSource()
+ {
+ return new RdsTableProvider(mockRds);
+ }
+
+ @Override
+ protected void setUpRead()
+ {
+ final AtomicLong requestCount = new AtomicLong(0);
+ when(mockRds.describeDBInstances(any(DescribeDBInstancesRequest.class)))
+ .thenAnswer((InvocationOnMock invocation) -> {
+ DescribeDBInstancesResult mockResult = mock(DescribeDBInstancesResult.class);
+ List values = new ArrayList<>();
+ values.add(makeValue(getIdValue()));
+ values.add(makeValue(getIdValue()));
+ values.add(makeValue("fake-id"));
+ when(mockResult.getDBInstances()).thenReturn(values);
+
+ if (requestCount.incrementAndGet() < 3) {
+ when(mockResult.getMarker()).thenReturn(String.valueOf(requestCount.get()));
+ }
+ return mockResult;
+ });
+ }
+
+ protected void validateRow(Block block, int pos)
+ {
+ for (FieldReader fieldReader : block.getFieldReaders()) {
+ fieldReader.setPosition(pos);
+ Field field = fieldReader.getField();
+
+ if (field.getName().equals(getIdField())) {
+ assertEquals(getIdValue(), fieldReader.readText().toString());
+ }
+ else {
+ validate(fieldReader);
+ }
+ }
+ }
+
+ private void validate(FieldReader fieldReader)
+ {
+ try {
+ logger.info("validate: {} {}", fieldReader.getField().getName(), fieldReader.getMinorType());
+ Field field = fieldReader.getField();
+ Types.MinorType type = Types.getMinorTypeForArrowType(field.getType());
+ switch (type) {
+ case VARCHAR:
+ if (field.getName().equals("$data$")) {
+ assertNotNull(fieldReader.readText().toString());
+ }
+ else {
+ assertEquals(field.getName(), fieldReader.readText().toString());
+ }
+ break;
+ case DATEMILLI:
+ assertEquals(100_000, fieldReader.readLocalDateTime().toDateTime(DateTimeZone.UTC).getMillis());
+ break;
+ case BIT:
+ assertTrue(fieldReader.readBoolean());
+ break;
+ case INT:
+ assertTrue(fieldReader.readInteger() > 0);
+ break;
+ case STRUCT:
+ for (Field child : field.getChildren()) {
+ validate(fieldReader.reader(child.getName()));
+ }
+ break;
+ case LIST:
+ validate(fieldReader.reader());
+ break;
+ default:
+ throw new RuntimeException("No validation configured for field " + field.getName() + ":" + type + " " + field.getChildren());
+ }
+ }
+ catch (RuntimeException ex) {
+ throw new RuntimeException("Error validating field " + fieldReader.getField().getName(), ex);
+ }
+ }
+
+ private DBInstance makeValue(String id)
+ {
+ return new DBInstance()
+ .withDBInstanceIdentifier(id)
+ .withAvailabilityZone("primary_az")
+ .withAllocatedStorage(100)
+ .withStorageEncrypted(true)
+ .withBackupRetentionPeriod(100)
+ .withAutoMinorVersionUpgrade(true)
+ .withDBInstanceClass("instance_class")
+ .withDbInstancePort(100)
+ .withDBInstanceStatus("status")
+ .withStorageType("storage_type")
+ .withDbiResourceId("dbi_resource_id")
+ .withDBName("name")
+ .withDomainMemberships(new DomainMembership()
+ .withDomain("domain")
+ .withFQDN("fqdn")
+ .withIAMRoleName("iam_role")
+ .withStatus("status"))
+ .withEngine("engine")
+ .withEngineVersion("engine_version")
+ .withLicenseModel("license_model")
+ .withSecondaryAvailabilityZone("secondary_az")
+ .withPreferredBackupWindow("backup_window")
+ .withPreferredMaintenanceWindow("maint_window")
+ .withReadReplicaSourceDBInstanceIdentifier("read_replica_source_id")
+ .withDBParameterGroups(new DBParameterGroupStatus()
+ .withDBParameterGroupName("name")
+ .withParameterApplyStatus("status"))
+ .withDBSecurityGroups(new DBSecurityGroupMembership()
+ .withDBSecurityGroupName("name")
+ .withStatus("status"))
+ .withDBSubnetGroup(new DBSubnetGroup()
+ .withDBSubnetGroupName("name")
+ .withSubnetGroupStatus("status")
+ .withVpcId("vpc")
+ .withSubnets(new Subnet()
+ .withSubnetIdentifier("subnet")))
+ .withStatusInfos(new DBInstanceStatusInfo()
+ .withStatus("status")
+ .withMessage("message")
+ .withNormal(true)
+ .withStatusType("type"))
+ .withEndpoint(new Endpoint()
+ .withAddress("address")
+ .withPort(100)
+ .withHostedZoneId("zone"))
+ .withInstanceCreateTime(new Date(100000))
+ .withIops(100)
+ .withMultiAZ(true)
+ .withPubliclyAccessible(true);
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/EbsTableProviderTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/EbsTableProviderTest.java
new file mode 100644
index 0000000000..cf30fd58eb
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/EbsTableProviderTest.java
@@ -0,0 +1,181 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.ec2;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.AbstractTableProviderTest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.model.DescribeVolumesRequest;
+import com.amazonaws.services.ec2.model.DescribeVolumesResult;
+import com.amazonaws.services.ec2.model.Tag;
+import com.amazonaws.services.ec2.model.Volume;
+import com.amazonaws.services.ec2.model.VolumeAttachment;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.joda.time.DateTimeZone;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class EbsTableProviderTest
+ extends AbstractTableProviderTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(EbsTableProviderTest.class);
+
+ @Mock
+ private AmazonEC2 mockEc2;
+
+ protected String getIdField()
+ {
+ return "id";
+ }
+
+ protected String getIdValue()
+ {
+ return "123";
+ }
+
+ protected String getExpectedSchema()
+ {
+ return "ec2";
+ }
+
+ protected String getExpectedTable()
+ {
+ return "ebs_volumes";
+ }
+
+ protected int getExpectedRows()
+ {
+ return 2;
+ }
+
+ protected TableProvider setUpSource()
+ {
+ return new EbsTableProvider(mockEc2);
+ }
+
+ @Override
+ protected void setUpRead()
+ {
+ when(mockEc2.describeVolumes(any(DescribeVolumesRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
+ DescribeVolumesRequest request = (DescribeVolumesRequest) invocation.getArguments()[0];
+
+ assertEquals(getIdValue(), request.getVolumeIds().get(0));
+ DescribeVolumesResult mockResult = mock(DescribeVolumesResult.class);
+ List values = new ArrayList<>();
+ values.add(makeVolume(getIdValue()));
+ values.add(makeVolume(getIdValue()));
+ values.add(makeVolume("fake-id"));
+ when(mockResult.getVolumes()).thenReturn(values);
+ return mockResult;
+ });
+ }
+
+ protected void validateRow(Block block, int pos)
+ {
+ for (FieldReader fieldReader : block.getFieldReaders()) {
+ fieldReader.setPosition(pos);
+ Field field = fieldReader.getField();
+
+ if (field.getName().equals(getIdField())) {
+ assertEquals(getIdValue(), fieldReader.readText().toString());
+ }
+ else {
+ validate(fieldReader);
+ }
+ }
+ }
+
+ private void validate(FieldReader fieldReader)
+ {
+ Field field = fieldReader.getField();
+ Types.MinorType type = Types.getMinorTypeForArrowType(field.getType());
+ switch (type) {
+ case VARCHAR:
+ if (field.getName().equals("$data$")) {
+ assertNotNull(fieldReader.readText().toString());
+ }
+ else {
+ assertEquals(field.getName(), fieldReader.readText().toString());
+ }
+ break;
+ case DATEMILLI:
+ assertEquals(100_000, fieldReader.readLocalDateTime().toDateTime(DateTimeZone.UTC).getMillis());
+ break;
+ case BIT:
+ assertTrue(fieldReader.readBoolean());
+ break;
+ case INT:
+ assertTrue(fieldReader.readInteger() > 0);
+ break;
+ case STRUCT:
+ for (Field child : field.getChildren()) {
+ validate(fieldReader.reader(child.getName()));
+ }
+ break;
+ case LIST:
+ validate(fieldReader.reader());
+ break;
+ default:
+ throw new RuntimeException("No validation configured for field " + field.getName() + ":" + type + " " + field.getChildren());
+ }
+ }
+
+ private Volume makeVolume(String id)
+ {
+ Volume volume = new Volume();
+ volume.withVolumeId(id)
+ .withVolumeType("type")
+ .withAttachments(new VolumeAttachment()
+ .withInstanceId("target")
+ .withDevice("attached_device")
+ .withState("attachment_state")
+ .withAttachTime(new Date(100_000)))
+ .withAvailabilityZone("availability_zone")
+ .withCreateTime(new Date(100_000))
+ .withEncrypted(true)
+ .withKmsKeyId("kms_key_id")
+ .withSize(100)
+ .withIops(100)
+ .withSnapshotId("snapshot_id")
+ .withState("state")
+ .withTags(new Tag("key", "value"));
+
+ return volume;
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/Ec2TableProviderTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/Ec2TableProviderTest.java
new file mode 100644
index 0000000000..478d81100c
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/Ec2TableProviderTest.java
@@ -0,0 +1,226 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.ec2;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.AbstractTableProviderTest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.model.DescribeInstancesRequest;
+import com.amazonaws.services.ec2.model.DescribeInstancesResult;
+import com.amazonaws.services.ec2.model.EbsInstanceBlockDevice;
+import com.amazonaws.services.ec2.model.GroupIdentifier;
+import com.amazonaws.services.ec2.model.Instance;
+import com.amazonaws.services.ec2.model.InstanceBlockDeviceMapping;
+import com.amazonaws.services.ec2.model.InstanceNetworkInterface;
+import com.amazonaws.services.ec2.model.InstanceState;
+import com.amazonaws.services.ec2.model.Reservation;
+import com.amazonaws.services.ec2.model.StateReason;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.joda.time.DateTimeZone;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+
+import static org.junit.Assert.*;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class Ec2TableProviderTest
+ extends AbstractTableProviderTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(Ec2TableProviderTest.class);
+
+ @Mock
+ private AmazonEC2 mockEc2;
+
+ protected String getIdField()
+ {
+ return "instance_id";
+ }
+
+ protected String getIdValue()
+ {
+ return "123";
+ }
+
+ protected String getExpectedSchema()
+ {
+ return "ec2";
+ }
+
+ protected String getExpectedTable()
+ {
+ return "ec2_instances";
+ }
+
+ protected int getExpectedRows()
+ {
+ return 4;
+ }
+
+ protected TableProvider setUpSource()
+ {
+ return new Ec2TableProvider(mockEc2);
+ }
+
+ @Override
+ protected void setUpRead()
+ {
+ when(mockEc2.describeInstances(any(DescribeInstancesRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
+ DescribeInstancesRequest request = (DescribeInstancesRequest) invocation.getArguments()[0];
+
+ assertEquals(getIdValue(), request.getInstanceIds().get(0));
+ DescribeInstancesResult mockResult = mock(DescribeInstancesResult.class);
+ List reservations = new ArrayList<>();
+ reservations.add(makeReservation());
+ reservations.add(makeReservation());
+ when(mockResult.getReservations()).thenReturn(reservations);
+ return mockResult;
+ });
+ }
+
+ protected void validateRow(Block block, int pos)
+ {
+ for (FieldReader fieldReader : block.getFieldReaders()) {
+ fieldReader.setPosition(pos);
+ Field field = fieldReader.getField();
+
+ if (field.getName().equals(getIdField())) {
+ assertEquals(getIdValue(), fieldReader.readText().toString());
+ }
+ else {
+ validate(fieldReader);
+ }
+ }
+ }
+
+ private void validate(FieldReader fieldReader)
+ {
+ Field field = fieldReader.getField();
+ Types.MinorType type = Types.getMinorTypeForArrowType(field.getType());
+ switch (type) {
+ case VARCHAR:
+ if (field.getName().equals("$data$")) {
+ assertNotNull(fieldReader.readText().toString());
+ }
+ else {
+ assertEquals(field.getName(), fieldReader.readText().toString());
+ }
+ break;
+ case DATEMILLI:
+ assertEquals(100_000, fieldReader.readLocalDateTime().toDateTime(DateTimeZone.UTC).getMillis());
+ break;
+ case BIT:
+ assertTrue(fieldReader.readBoolean());
+ break;
+ case INT:
+ assertTrue(fieldReader.readInteger() > 0);
+ break;
+ case STRUCT:
+ for (Field child : field.getChildren()) {
+ validate(fieldReader.reader(child.getName()));
+ }
+ break;
+ case LIST:
+ validate(fieldReader.reader());
+ break;
+ default:
+ throw new RuntimeException("No validation configured for field " + field.getName() + ":" + type + " " + field.getChildren());
+ }
+ }
+
+ private Reservation makeReservation()
+ {
+ Reservation reservation = mock(Reservation.class);
+ List instances = new ArrayList<>();
+ instances.add(makeInstance(getIdValue()));
+ instances.add(makeInstance(getIdValue()));
+ instances.add(makeInstance("non-matching-id"));
+ when(reservation.getInstances()).thenReturn(instances);
+ return reservation;
+ }
+
+ private Instance makeInstance(String id)
+ {
+ Instance instance = new Instance();
+ instance.withInstanceId(id)
+ .withImageId("image_id")
+ .withInstanceType("instance_type")
+ .withPlatform("platform")
+ .withPrivateDnsName("private_dns_name")
+ .withPrivateIpAddress("private_ip_address")
+ .withPublicDnsName("public_dns_name")
+ .withPublicIpAddress("public_ip_address")
+ .withSubnetId("subnet_id")
+ .withVpcId("vpc_id")
+ .withArchitecture("architecture")
+ .withInstanceLifecycle("instance_lifecycle")
+ .withRootDeviceName("root_device_name")
+ .withRootDeviceType("root_device_type")
+ .withSpotInstanceRequestId("spot_instance_request_id")
+ .withVirtualizationType("virtualization_type")
+ .withKeyName("key_name")
+ .withKernelId("kernel_id")
+ .withCapacityReservationId("capacity_reservation_id")
+ .withLaunchTime(new Date(100_000))
+ .withState(new InstanceState().withCode(100).withName("name"))
+ .withStateReason(new StateReason().withCode("code").withMessage("message"))
+ .withEbsOptimized(true);
+
+ List interfaces = new ArrayList<>();
+ interfaces.add(new InstanceNetworkInterface()
+ .withStatus("status")
+ .withSubnetId("subnet")
+ .withVpcId("vpc")
+ .withMacAddress("mac_address")
+ .withPrivateDnsName("private_dns")
+ .withPrivateIpAddress("private_ip")
+ .withNetworkInterfaceId("interface_id")
+ .withGroups(new GroupIdentifier().withGroupId("group_id").withGroupName("group_name")));
+
+ interfaces.add(new InstanceNetworkInterface()
+ .withStatus("status")
+ .withSubnetId("subnet")
+ .withVpcId("vpc")
+ .withMacAddress("mac")
+ .withPrivateDnsName("private_dns")
+ .withPrivateIpAddress("private_ip")
+ .withNetworkInterfaceId("interface_id")
+ .withGroups(new GroupIdentifier().withGroupId("group_id").withGroupName("group_name")));
+
+ instance.withNetworkInterfaces(interfaces)
+ .withSecurityGroups(new GroupIdentifier().withGroupId("group_id").withGroupName("group_name"))
+ .withBlockDeviceMappings(new InstanceBlockDeviceMapping().withDeviceName("device_name").withEbs(new EbsInstanceBlockDevice().withVolumeId("volume_id")));
+
+ return instance;
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/ImagesTableProviderTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/ImagesTableProviderTest.java
new file mode 100644
index 0000000000..e58c6ee452
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/ImagesTableProviderTest.java
@@ -0,0 +1,193 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.ec2;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.AbstractTableProviderTest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.model.BlockDeviceMapping;
+import com.amazonaws.services.ec2.model.DescribeImagesRequest;
+import com.amazonaws.services.ec2.model.DescribeImagesResult;
+import com.amazonaws.services.ec2.model.EbsBlockDevice;
+import com.amazonaws.services.ec2.model.Image;
+import com.amazonaws.services.ec2.model.Tag;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.joda.time.DateTimeZone;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class ImagesTableProviderTest
+ extends AbstractTableProviderTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(ImagesTableProviderTest.class);
+
+ @Mock
+ private AmazonEC2 mockEc2;
+
+ protected String getIdField()
+ {
+ return "id";
+ }
+
+ protected String getIdValue()
+ {
+ return "123";
+ }
+
+ protected String getExpectedSchema()
+ {
+ return "ec2";
+ }
+
+ protected String getExpectedTable()
+ {
+ return "ec2_images";
+ }
+
+ protected int getExpectedRows()
+ {
+ return 2;
+ }
+
+ protected TableProvider setUpSource()
+ {
+ return new ImagesTableProvider(mockEc2);
+ }
+
+ @Override
+ protected void setUpRead()
+ {
+ when(mockEc2.describeImages(any(DescribeImagesRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
+ DescribeImagesRequest request = (DescribeImagesRequest) invocation.getArguments()[0];
+
+ assertEquals(getIdValue(), request.getImageIds().get(0));
+ DescribeImagesResult mockResult = mock(DescribeImagesResult.class);
+ List values = new ArrayList<>();
+ values.add(makeImage(getIdValue()));
+ values.add(makeImage(getIdValue()));
+ values.add(makeImage("fake-id"));
+ when(mockResult.getImages()).thenReturn(values);
+ return mockResult;
+ });
+ }
+
+ protected void validateRow(Block block, int pos)
+ {
+ for (FieldReader fieldReader : block.getFieldReaders()) {
+ fieldReader.setPosition(pos);
+ Field field = fieldReader.getField();
+
+ if (field.getName().equals(getIdField())) {
+ assertEquals(getIdValue(), fieldReader.readText().toString());
+ }
+ else {
+ validate(fieldReader);
+ }
+ }
+ }
+
+ private void validate(FieldReader fieldReader)
+ {
+ Field field = fieldReader.getField();
+ Types.MinorType type = Types.getMinorTypeForArrowType(field.getType());
+ switch (type) {
+ case VARCHAR:
+ if (field.getName().equals("$data$")) {
+ assertNotNull(fieldReader.readText().toString());
+ }
+ else {
+ assertEquals(field.getName(), fieldReader.readText().toString());
+ }
+ break;
+ case DATEMILLI:
+ assertEquals(100_000, fieldReader.readLocalDateTime().toDateTime(DateTimeZone.UTC).getMillis());
+ break;
+ case BIT:
+ assertTrue(fieldReader.readBoolean());
+ break;
+ case INT:
+ assertTrue(fieldReader.readInteger() > 0);
+ break;
+ case STRUCT:
+ for (Field child : field.getChildren()) {
+ validate(fieldReader.reader(child.getName()));
+ }
+ break;
+ case LIST:
+ validate(fieldReader.reader());
+ break;
+ default:
+ throw new RuntimeException("No validation configured for field " + field.getName() + ":" + type + " " + field.getChildren());
+ }
+ }
+
+ private Image makeImage(String id)
+ {
+ Image image = new Image();
+ image.withImageId(id)
+ .withArchitecture("architecture")
+ .withCreationDate("created")
+ .withDescription("description")
+ .withHypervisor("hypervisor")
+ .withImageLocation("location")
+ .withImageType("type")
+ .withKernelId("kernel")
+ .withName("name")
+ .withOwnerId("owner")
+ .withPlatform("platform")
+ .withRamdiskId("ramdisk")
+ .withRootDeviceName("root_device")
+ .withRootDeviceType("root_type")
+ .withSriovNetSupport("srvio_net")
+ .withState("state")
+ .withVirtualizationType("virt_type")
+ .withPublic(true)
+ .withTags(new Tag("key", "value"))
+ .withBlockDeviceMappings(new BlockDeviceMapping()
+ .withDeviceName("dev_name")
+ .withNoDevice("no_device")
+ .withVirtualName("virt_name")
+ .withEbs(new EbsBlockDevice()
+ .withIops(100)
+ .withKmsKeyId("ebs_kms_key")
+ .withVolumeType("ebs_type")
+ .withVolumeSize(100)));
+
+ return image;
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/RouteTableProviderTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/RouteTableProviderTest.java
new file mode 100644
index 0000000000..f7afdbec72
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/RouteTableProviderTest.java
@@ -0,0 +1,187 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.ec2;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.AbstractTableProviderTest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.model.DescribeRouteTablesRequest;
+import com.amazonaws.services.ec2.model.DescribeRouteTablesResult;
+import com.amazonaws.services.ec2.model.PropagatingVgw;
+import com.amazonaws.services.ec2.model.Route;
+import com.amazonaws.services.ec2.model.RouteTable;
+import com.amazonaws.services.ec2.model.RouteTableAssociation;
+import com.amazonaws.services.ec2.model.Tag;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.joda.time.DateTimeZone;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class RouteTableProviderTest
+ extends AbstractTableProviderTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(RouteTableProviderTest.class);
+
+ @Mock
+ private AmazonEC2 mockEc2;
+
+ protected String getIdField()
+ {
+ return "route_table_id";
+ }
+
+ protected String getIdValue()
+ {
+ return "123";
+ }
+
+ protected String getExpectedSchema()
+ {
+ return "ec2";
+ }
+
+ protected String getExpectedTable()
+ {
+ return "routing_tables";
+ }
+
+ protected int getExpectedRows()
+ {
+ return 2;
+ }
+
+ protected TableProvider setUpSource()
+ {
+ return new RouteTableProvider(mockEc2);
+ }
+
+ @Override
+ protected void setUpRead()
+ {
+ when(mockEc2.describeRouteTables(any(DescribeRouteTablesRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
+ DescribeRouteTablesRequest request = (DescribeRouteTablesRequest) invocation.getArguments()[0];
+
+ assertEquals(getIdValue(), request.getRouteTableIds().get(0));
+ DescribeRouteTablesResult mockResult = mock(DescribeRouteTablesResult.class);
+ List values = new ArrayList<>();
+ values.add(makeRouteTable(getIdValue()));
+ values.add(makeRouteTable(getIdValue()));
+ values.add(makeRouteTable("fake-id"));
+ when(mockResult.getRouteTables()).thenReturn(values);
+ return mockResult;
+ });
+ }
+
+ protected void validateRow(Block block, int pos)
+ {
+ for (FieldReader fieldReader : block.getFieldReaders()) {
+ fieldReader.setPosition(pos);
+ Field field = fieldReader.getField();
+
+ if (field.getName().equals(getIdField())) {
+ assertEquals(getIdValue(), fieldReader.readText().toString());
+ }
+ else {
+ validate(fieldReader);
+ }
+ }
+ }
+
+ private void validate(FieldReader fieldReader)
+ {
+ Field field = fieldReader.getField();
+ Types.MinorType type = Types.getMinorTypeForArrowType(field.getType());
+ switch (type) {
+ case VARCHAR:
+ if (field.getName().equals("$data$")) {
+ assertNotNull(fieldReader.readText().toString());
+ }
+ else {
+ assertEquals(field.getName(), fieldReader.readText().toString());
+ }
+ break;
+ case DATEMILLI:
+ assertEquals(100_000, fieldReader.readLocalDateTime().toDateTime(DateTimeZone.UTC).getMillis());
+ break;
+ case BIT:
+ assertTrue(fieldReader.readBoolean());
+ break;
+ case INT:
+ assertTrue(fieldReader.readInteger() > 0);
+ break;
+ case STRUCT:
+ for (Field child : field.getChildren()) {
+ validate(fieldReader.reader(child.getName()));
+ }
+ break;
+ case LIST:
+ validate(fieldReader.reader());
+ break;
+ default:
+ throw new RuntimeException("No validation configured for field " + field.getName() + ":" + type + " " + field.getChildren());
+ }
+ }
+
+ private RouteTable makeRouteTable(String id)
+ {
+ RouteTable routeTable = new RouteTable();
+ routeTable.withRouteTableId(id)
+ .withOwnerId("owner")
+ .withVpcId("vpc")
+ .withAssociations(new RouteTableAssociation().withSubnetId("subnet").withRouteTableId("route_table_id"))
+ .withTags(new Tag("key", "value"))
+ .withPropagatingVgws(new PropagatingVgw().withGatewayId("gateway_id"))
+ .withRoutes(new Route()
+ .withDestinationCidrBlock("dst_cidr")
+ .withDestinationIpv6CidrBlock("dst_cidr_v6")
+ .withDestinationPrefixListId("dst_prefix_list")
+ .withEgressOnlyInternetGatewayId("egress_igw")
+ .withGatewayId("gateway")
+ .withInstanceId("instance_id")
+ .withInstanceOwnerId("instance_owner")
+ .withNatGatewayId("nat_gateway")
+ .withNetworkInterfaceId("interface")
+ .withOrigin("origin")
+ .withState("state")
+ .withTransitGatewayId("transit_gateway")
+ .withVpcPeeringConnectionId("vpc_peering_con")
+ );
+
+ return routeTable;
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/SecurityGroupsTableProviderTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/SecurityGroupsTableProviderTest.java
new file mode 100644
index 0000000000..d49562e47d
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/SecurityGroupsTableProviderTest.java
@@ -0,0 +1,179 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.ec2;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.AbstractTableProviderTest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.model.DescribeSecurityGroupsRequest;
+import com.amazonaws.services.ec2.model.DescribeSecurityGroupsResult;
+import com.amazonaws.services.ec2.model.IpPermission;
+import com.amazonaws.services.ec2.model.IpRange;
+import com.amazonaws.services.ec2.model.Ipv6Range;
+import com.amazonaws.services.ec2.model.PrefixListId;
+import com.amazonaws.services.ec2.model.SecurityGroup;
+import com.amazonaws.services.ec2.model.UserIdGroupPair;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.joda.time.DateTimeZone;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class SecurityGroupsTableProviderTest
+ extends AbstractTableProviderTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(SecurityGroupsTableProviderTest.class);
+
+ @Mock
+ private AmazonEC2 mockEc2;
+
+ protected String getIdField()
+ {
+ return "id";
+ }
+
+ protected String getIdValue()
+ {
+ return "123";
+ }
+
+ protected String getExpectedSchema()
+ {
+ return "ec2";
+ }
+
+ protected String getExpectedTable()
+ {
+ return "security_groups";
+ }
+
+ protected int getExpectedRows()
+ {
+ return 2;
+ }
+
+ protected TableProvider setUpSource()
+ {
+ return new SecurityGroupsTableProvider(mockEc2);
+ }
+
+ @Override
+ protected void setUpRead()
+ {
+ when(mockEc2.describeSecurityGroups(any(DescribeSecurityGroupsRequest.class)))
+ .thenAnswer((InvocationOnMock invocation) -> {
+ DescribeSecurityGroupsRequest request = (DescribeSecurityGroupsRequest) invocation.getArguments()[0];
+
+ assertEquals(getIdValue(), request.getGroupIds().get(0));
+ DescribeSecurityGroupsResult mockResult = mock(DescribeSecurityGroupsResult.class);
+ List values = new ArrayList<>();
+ values.add(makeSecurityGroup(getIdValue()));
+ values.add(makeSecurityGroup(getIdValue()));
+ values.add(makeSecurityGroup("fake-id"));
+ when(mockResult.getSecurityGroups()).thenReturn(values);
+ return mockResult;
+ });
+ }
+
+ protected void validateRow(Block block, int pos)
+ {
+ for (FieldReader fieldReader : block.getFieldReaders()) {
+ fieldReader.setPosition(pos);
+ Field field = fieldReader.getField();
+
+ if (field.getName().equals(getIdField())) {
+ assertEquals(getIdValue(), fieldReader.readText().toString());
+ }
+ else {
+ validate(fieldReader);
+ }
+ }
+ }
+
+ private void validate(FieldReader fieldReader)
+ {
+ Field field = fieldReader.getField();
+ Types.MinorType type = Types.getMinorTypeForArrowType(field.getType());
+ switch (type) {
+ case VARCHAR:
+ if (field.getName().equals("$data$") || field.getName().equals("direction")) {
+ assertNotNull(fieldReader.readText().toString());
+ }
+ else {
+ assertEquals(field.getName(), fieldReader.readText().toString());
+ }
+ break;
+ case DATEMILLI:
+ assertEquals(100_000, fieldReader.readLocalDateTime().toDateTime(DateTimeZone.UTC).getMillis());
+ break;
+ case BIT:
+ assertTrue(fieldReader.readBoolean());
+ break;
+ case INT:
+ assertTrue(fieldReader.readInteger() > 0);
+ break;
+ case STRUCT:
+ for (Field child : field.getChildren()) {
+ validate(fieldReader.reader(child.getName()));
+ }
+ break;
+ case LIST:
+ validate(fieldReader.reader());
+ break;
+ default:
+ throw new RuntimeException("No validation configured for field " + field.getName() + ":" + type + " " + field.getChildren());
+ }
+ }
+
+ private SecurityGroup makeSecurityGroup(String id)
+ {
+ return new SecurityGroup()
+ .withGroupId(id)
+ .withGroupName("name")
+ .withDescription("description")
+ .withIpPermissions(new IpPermission()
+ .withIpProtocol("protocol")
+ .withFromPort(100)
+ .withToPort(100)
+ .withIpv4Ranges(new IpRange().withCidrIp("cidr").withDescription("description"))
+
+ .withIpv6Ranges(new Ipv6Range().withCidrIpv6("cidr").withDescription("description"))
+ .withPrefixListIds(new PrefixListId().withPrefixListId("prefix").withDescription("description"))
+ .withUserIdGroupPairs(new UserIdGroupPair().withGroupId("group_id").withUserId("user_id"))
+ );
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/SubnetTableProviderTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/SubnetTableProviderTest.java
new file mode 100644
index 0000000000..a17e1d3faf
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/SubnetTableProviderTest.java
@@ -0,0 +1,172 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.ec2;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.AbstractTableProviderTest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.model.DescribeSubnetsRequest;
+import com.amazonaws.services.ec2.model.DescribeSubnetsResult;
+import com.amazonaws.services.ec2.model.Subnet;
+import com.amazonaws.services.ec2.model.Tag;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.joda.time.DateTimeZone;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class SubnetTableProviderTest
+ extends AbstractTableProviderTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(SubnetTableProviderTest.class);
+
+ @Mock
+ private AmazonEC2 mockEc2;
+
+ protected String getIdField()
+ {
+ return "id";
+ }
+
+ protected String getIdValue()
+ {
+ return "123";
+ }
+
+ protected String getExpectedSchema()
+ {
+ return "ec2";
+ }
+
+ protected String getExpectedTable()
+ {
+ return "subnets";
+ }
+
+ protected int getExpectedRows()
+ {
+ return 2;
+ }
+
+ protected TableProvider setUpSource()
+ {
+ return new SubnetTableProvider(mockEc2);
+ }
+
+ @Override
+ protected void setUpRead()
+ {
+ when(mockEc2.describeSubnets(any(DescribeSubnetsRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
+ DescribeSubnetsRequest request = (DescribeSubnetsRequest) invocation.getArguments()[0];
+
+ assertEquals(getIdValue(), request.getSubnetIds().get(0));
+ DescribeSubnetsResult mockResult = mock(DescribeSubnetsResult.class);
+ List values = new ArrayList<>();
+ values.add(makeSubnet(getIdValue()));
+ values.add(makeSubnet(getIdValue()));
+ values.add(makeSubnet("fake-id"));
+ when(mockResult.getSubnets()).thenReturn(values);
+
+ return mockResult;
+ });
+ }
+
+ protected void validateRow(Block block, int pos)
+ {
+ for (FieldReader fieldReader : block.getFieldReaders()) {
+ fieldReader.setPosition(pos);
+ Field field = fieldReader.getField();
+
+ if (field.getName().equals(getIdField())) {
+ assertEquals(getIdValue(), fieldReader.readText().toString());
+ }
+ else {
+ validate(fieldReader);
+ }
+ }
+ }
+
+ private void validate(FieldReader fieldReader)
+ {
+ Field field = fieldReader.getField();
+ Types.MinorType type = Types.getMinorTypeForArrowType(field.getType());
+ switch (type) {
+ case VARCHAR:
+ if (field.getName().equals("$data$")) {
+ assertNotNull(fieldReader.readText().toString());
+ }
+ else {
+ assertEquals(field.getName(), fieldReader.readText().toString());
+ }
+ break;
+ case DATEMILLI:
+ assertEquals(100_000, fieldReader.readLocalDateTime().toDateTime(DateTimeZone.UTC).getMillis());
+ break;
+ case BIT:
+ assertTrue(fieldReader.readBoolean());
+ break;
+ case INT:
+ assertTrue(fieldReader.readInteger() > 0);
+ break;
+ case STRUCT:
+ for (Field child : field.getChildren()) {
+ validate(fieldReader.reader(child.getName()));
+ }
+ break;
+ case LIST:
+ validate(fieldReader.reader());
+ break;
+ default:
+ throw new RuntimeException("No validation configured for field " + field.getName() + ":" + type + " " + field.getChildren());
+ }
+ }
+
+ private Subnet makeSubnet(String id)
+ {
+ return new Subnet()
+ .withSubnetId(id)
+ .withAvailabilityZone("availability_zone")
+ .withCidrBlock("cidr_block")
+ .withAvailableIpAddressCount(100)
+ .withDefaultForAz(true)
+ .withMapPublicIpOnLaunch(true)
+ .withOwnerId("owner")
+ .withState("state")
+ .withTags(new Tag().withKey("key").withValue("value"))
+ .withVpcId("vpc");
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/VpcTableProviderTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/VpcTableProviderTest.java
new file mode 100644
index 0000000000..f22b9b4d8d
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/ec2/VpcTableProviderTest.java
@@ -0,0 +1,171 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.ec2;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.AbstractTableProviderTest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.ec2.AmazonEC2;
+import com.amazonaws.services.ec2.model.DescribeVpcsRequest;
+import com.amazonaws.services.ec2.model.DescribeVpcsResult;
+import com.amazonaws.services.ec2.model.Tag;
+import com.amazonaws.services.ec2.model.Vpc;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.joda.time.DateTimeZone;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class VpcTableProviderTest
+ extends AbstractTableProviderTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(VpcTableProviderTest.class);
+
+ @Mock
+ private AmazonEC2 mockEc2;
+
+ protected String getIdField()
+ {
+ return "id";
+ }
+
+ protected String getIdValue()
+ {
+ return "123";
+ }
+
+ protected String getExpectedSchema()
+ {
+ return "ec2";
+ }
+
+ protected String getExpectedTable()
+ {
+ return "vpcs";
+ }
+
+ protected int getExpectedRows()
+ {
+ return 2;
+ }
+
+ protected TableProvider setUpSource()
+ {
+ return new VpcTableProvider(mockEc2);
+ }
+
+ @Override
+ protected void setUpRead()
+ {
+ when(mockEc2.describeVpcs(any(DescribeVpcsRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
+ DescribeVpcsRequest request = (DescribeVpcsRequest) invocation.getArguments()[0];
+
+ assertEquals(getIdValue(), request.getVpcIds().get(0));
+ DescribeVpcsResult mockResult = mock(DescribeVpcsResult.class);
+ List values = new ArrayList<>();
+ values.add(makeVpc(getIdValue()));
+ values.add(makeVpc(getIdValue()));
+ values.add(makeVpc("fake-id"));
+ when(mockResult.getVpcs()).thenReturn(values);
+ return mockResult;
+ });
+ }
+
+ protected void validateRow(Block block, int pos)
+ {
+ for (FieldReader fieldReader : block.getFieldReaders()) {
+ fieldReader.setPosition(pos);
+ Field field = fieldReader.getField();
+
+ if (field.getName().equals(getIdField())) {
+ assertEquals(getIdValue(), fieldReader.readText().toString());
+ }
+ else {
+ validate(fieldReader);
+ }
+ }
+ }
+
+ private void validate(FieldReader fieldReader)
+ {
+ Field field = fieldReader.getField();
+ Types.MinorType type = Types.getMinorTypeForArrowType(field.getType());
+ switch (type) {
+ case VARCHAR:
+ if (field.getName().equals("$data$")) {
+ assertNotNull(fieldReader.readText().toString());
+ }
+ else {
+ assertEquals(field.getName(), fieldReader.readText().toString());
+ }
+ break;
+ case DATEMILLI:
+ assertEquals(100_000, fieldReader.readLocalDateTime().toDateTime(DateTimeZone.UTC).getMillis());
+ break;
+ case BIT:
+ assertTrue(fieldReader.readBoolean());
+ break;
+ case INT:
+ assertTrue(fieldReader.readInteger() > 0);
+ break;
+ case STRUCT:
+ for (Field child : field.getChildren()) {
+ validate(fieldReader.reader(child.getName()));
+ }
+ break;
+ case LIST:
+ validate(fieldReader.reader());
+ break;
+ default:
+ throw new RuntimeException("No validation configured for field " + field.getName() + ":" + type + " " + field.getChildren());
+ }
+ }
+
+ private Vpc makeVpc(String id)
+ {
+ Vpc vpc = new Vpc();
+ vpc.withVpcId(id)
+ .withCidrBlock("cidr_block")
+ .withDhcpOptionsId("dhcp_opts")
+ .withInstanceTenancy("tenancy")
+ .withOwnerId("owner")
+ .withState("state")
+ .withIsDefault(true)
+ .withTags(new Tag("key", "valye"));
+
+ return vpc;
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/s3/S3BucketsTableProviderTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/s3/S3BucketsTableProviderTest.java
new file mode 100644
index 0000000000..0e57ef3027
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/s3/S3BucketsTableProviderTest.java
@@ -0,0 +1,155 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.s3;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.AbstractTableProviderTest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.model.Bucket;
+import com.amazonaws.services.s3.model.Owner;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.joda.time.DateTimeZone;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.when;
+
+public class S3BucketsTableProviderTest
+ extends AbstractTableProviderTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(S3BucketsTableProviderTest.class);
+
+ @Mock
+ private AmazonS3 mockS3;
+
+ protected String getIdField()
+ {
+ return "bucket_name";
+ }
+
+ protected String getIdValue()
+ {
+ return "my_bucket";
+ }
+
+ protected String getExpectedSchema()
+ {
+ return "s3";
+ }
+
+ protected String getExpectedTable()
+ {
+ return "buckets";
+ }
+
+ protected int getExpectedRows()
+ {
+ return 2;
+ }
+
+ protected TableProvider setUpSource()
+ {
+ return new S3BucketsTableProvider(mockS3);
+ }
+
+ @Override
+ protected void setUpRead()
+ {
+ when(mockS3.listBuckets()).thenAnswer((InvocationOnMock invocation) -> {
+ List values = new ArrayList<>();
+ values.add(makeBucket(getIdValue()));
+ values.add(makeBucket(getIdValue()));
+ values.add(makeBucket("fake-id"));
+ return values;
+ });
+ }
+
+ protected void validateRow(Block block, int pos)
+ {
+ for (FieldReader fieldReader : block.getFieldReaders()) {
+ fieldReader.setPosition(pos);
+ Field field = fieldReader.getField();
+
+ if (field.getName().equals(getIdField())) {
+ assertEquals(getIdValue(), fieldReader.readText().toString());
+ }
+ else {
+ validate(fieldReader);
+ }
+ }
+ }
+
+ private void validate(FieldReader fieldReader)
+ {
+ Field field = fieldReader.getField();
+ Types.MinorType type = Types.getMinorTypeForArrowType(field.getType());
+ switch (type) {
+ case VARCHAR:
+ if (field.getName().equals("$data$")) {
+ assertNotNull(fieldReader.readText().toString());
+ }
+ else {
+ assertEquals(field.getName(), fieldReader.readText().toString());
+ }
+ break;
+ case DATEMILLI:
+ assertEquals(100_000, fieldReader.readLocalDateTime().toDateTime(DateTimeZone.UTC).getMillis());
+ break;
+ case BIT:
+ assertTrue(fieldReader.readBoolean());
+ break;
+ case INT:
+ assertTrue(fieldReader.readInteger() > 0);
+ break;
+ case STRUCT:
+ for (Field child : field.getChildren()) {
+ validate(fieldReader.reader(child.getName()));
+ }
+ break;
+ case LIST:
+ validate(fieldReader.reader());
+ break;
+ default:
+ throw new RuntimeException("No validation configured for field " + field.getName() + ":" + type + " " + field.getChildren());
+ }
+ }
+
+ private Bucket makeBucket(String id)
+ {
+ Bucket bucket = new Bucket();
+ bucket.setName(id);
+ Owner owner = new Owner();
+ owner.setDisplayName("owner_name");
+ owner.setId("owner_id");
+ bucket.setOwner(owner);
+ bucket.setCreationDate(new Date(100_000));
+ return bucket;
+ }
+}
diff --git a/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/s3/S3ObjectsTableProviderTest.java b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/s3/S3ObjectsTableProviderTest.java
new file mode 100644
index 0000000000..3499e800e9
--- /dev/null
+++ b/athena-aws-cmdb/src/test/java/com/amazonaws/athena/connectors/aws/cmdb/tables/s3/S3ObjectsTableProviderTest.java
@@ -0,0 +1,185 @@
+/*-
+ * #%L
+ * athena-aws-cmdb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.aws.cmdb.tables.s3;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.AbstractTableProviderTest;
+import com.amazonaws.athena.connectors.aws.cmdb.tables.TableProvider;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.model.ListObjectsV2Request;
+import com.amazonaws.services.s3.model.ListObjectsV2Result;
+import com.amazonaws.services.s3.model.Owner;
+import com.amazonaws.services.s3.model.S3ObjectSummary;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.joda.time.DateTimeZone;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class S3ObjectsTableProviderTest
+ extends AbstractTableProviderTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(S3ObjectsTableProviderTest.class);
+
+ @Mock
+ private AmazonS3 mockS3;
+
+ protected String getIdField()
+ {
+ return "bucket_name";
+ }
+
+ protected String getIdValue()
+ {
+ return "my_bucket";
+ }
+
+ protected String getExpectedSchema()
+ {
+ return "s3";
+ }
+
+ protected String getExpectedTable()
+ {
+ return "objects";
+ }
+
+ protected int getExpectedRows()
+ {
+ return 4;
+ }
+
+ protected TableProvider setUpSource()
+ {
+ return new S3ObjectsTableProvider(mockS3);
+ }
+
+ @Override
+ protected void setUpRead()
+ {
+ AtomicLong count = new AtomicLong(0);
+ when(mockS3.listObjectsV2(any(ListObjectsV2Request.class))).thenAnswer((InvocationOnMock invocation) -> {
+ ListObjectsV2Request request = (ListObjectsV2Request) invocation.getArguments()[0];
+ assertEquals(getIdValue(), request.getBucketName());
+
+ ListObjectsV2Result mockResult = mock(ListObjectsV2Result.class);
+ List values = new ArrayList<>();
+ values.add(makeObjectSummary(getIdValue()));
+ values.add(makeObjectSummary(getIdValue()));
+ values.add(makeObjectSummary("fake-id"));
+ when(mockResult.getObjectSummaries()).thenReturn(values);
+
+ if (count.get() > 0) {
+ assertNotNull(request.getContinuationToken());
+ }
+
+ if (count.incrementAndGet() < 2) {
+ when(mockResult.isTruncated()).thenReturn(true);
+ when(mockResult.getNextContinuationToken()).thenReturn("token");
+ }
+
+ return mockResult;
+ });
+ }
+
+ protected void validateRow(Block block, int pos)
+ {
+ for (FieldReader fieldReader : block.getFieldReaders()) {
+ fieldReader.setPosition(pos);
+ Field field = fieldReader.getField();
+
+ if (field.getName().equals(getIdField())) {
+ assertEquals(getIdValue(), fieldReader.readText().toString());
+ }
+ else {
+ validate(fieldReader);
+ }
+ }
+ }
+
+ private void validate(FieldReader fieldReader)
+ {
+ Field field = fieldReader.getField();
+ Types.MinorType type = Types.getMinorTypeForArrowType(field.getType());
+ switch (type) {
+ case VARCHAR:
+ if (field.getName().equals("$data$")) {
+ assertNotNull(fieldReader.readText().toString());
+ }
+ else {
+ assertEquals(field.getName(), fieldReader.readText().toString());
+ }
+ break;
+ case DATEMILLI:
+ assertEquals(100_000, fieldReader.readLocalDateTime().toDateTime(DateTimeZone.UTC).getMillis());
+ break;
+ case BIT:
+ assertTrue(fieldReader.readBoolean());
+ break;
+ case INT:
+ assertTrue(fieldReader.readInteger() > 0);
+ break;
+ case BIGINT:
+ assertTrue(fieldReader.readLong() > 0);
+ break;
+ case STRUCT:
+ for (Field child : field.getChildren()) {
+ validate(fieldReader.reader(child.getName()));
+ }
+ break;
+ case LIST:
+ validate(fieldReader.reader());
+ break;
+ default:
+ throw new RuntimeException("No validation configured for field " + field.getName() + ":" + type + " " + field.getChildren());
+ }
+ }
+
+ private S3ObjectSummary makeObjectSummary(String id)
+ {
+ S3ObjectSummary summary = new S3ObjectSummary();
+ Owner owner = new Owner();
+ owner.setId("owner_id");
+ owner.setDisplayName("owner_name");
+ summary.setOwner(owner);
+ summary.setBucketName(id);
+ summary.setETag("e_tag");
+ summary.setKey("key");
+ summary.setSize(100);
+ summary.setLastModified(new Date(100_000));
+ summary.setStorageClass("storage_class");
+ return summary;
+ }
+}
diff --git a/athena-bigquery/LICENSE.txt b/athena-bigquery/LICENSE.txt
new file mode 100644
index 0000000000..418de4c108
--- /dev/null
+++ b/athena-bigquery/LICENSE.txt
@@ -0,0 +1,174 @@
+Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
\ No newline at end of file
diff --git a/athena-bigquery/README.md b/athena-bigquery/README.md
new file mode 100644
index 0000000000..90f224dce7
--- /dev/null
+++ b/athena-bigquery/README.md
@@ -0,0 +1,40 @@
+# Amazon Athena Google BigQuery Connector
+
+This connector enables Amazon Athena to communicate with BigQuery, making your BigQuery data accessible.
+
+### Parameters
+
+The Athena Google BigQuery Connector exposes several configuration options via Lambda environment variables. More detail on the available parameters can be found below.
+
+|Parameter Name|Example Value|Description|
+|--------------|--------------------|------------------|
+|spill_bucket|my_bucket|When the data returned by your Lambda function exceeds Lambda’s limits, this is the bucket that the data will be written to for Athena to read the excess from.|
+|spill_prefix|temporary/split| (Optional) Defaults to sub-folder in your bucket called 'athena-federation-spill'. Used in conjunction with spill_bucket, this is the path within the above bucket that large responses are spilled to. You should configure an S3 lifecycle on this location to delete old spills after X days/Hours.|
+|kms_key_id|a7e63k4b-8loc-40db-a2a1-4d0en2cd8331|(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys.|
+|disable_spill_encryption|True or False|(Optional) Defaults to False so that any data that is spilled to S3 is encrypted using AES-GMC either with a randomly generated key or using KMS to generate keys. Setting this to false will disable spill encryption. You may wish to disable this for improved performance, especially if your spill location in S3 uses S3 Server Side Encryption.|
+|gcp_project_id|semiotic-primer-1234567|The project id (not project name) that contains the datasets that this connector should read from.|
+|secret_manager_gcp_creds_name|GoogleCloudPlatformCredentials|The name of the secret within AWS Secrets Manager that contains your BigQuery credentials JSON. The credentials |
+
+ ### Deploying The Connector
+
+ To use this connector in your queries, navigate to AWS Serverless Application Repository and deploy a pre-built version of this connector. Alternatively, you can build and deploy this connector from source follow the below steps or use the more detailed tutorial in the athena-example module:
+
+ 1. From the athena-federation-sdk dir, run `mvn clean install` if you haven't already.
+ 2. From the athena-bigquery dir, run `mvn clean install`.
+ 3. From the athena-bigquery dir, run `../tools/publish.sh S3_BUCKET_NAME athena-bigquery` to publish the connector to your private AWS Serverless Application Repository. The S3_BUCKET in the command is where a copy of the connector's code will be stored for Serverless Application Repository to retrieve it. This will allow users with permission to do so, the ability to deploy instances of the connector via 1-Click form. Then navigate to [Serverless Application Repository](https://aws.amazon.com/serverless/serverlessrepo)
+
+
+## Limitations and Other Notes
+
+The following is a list of limitations or other notes.
+- Lambda has a maximum timeout value of 15 mins. Each split executes a query on BigQuery and must finish with enough time to store the results for Athena to read. If the Lambda times out, the query will fail.
+- Google BigQuery is case sensitive. We attempt to correct the case of dataset names, and table names but we do not do any case correction for project id's. This is necessary because Presto lower cases all metadata. These corrections will make many extra calls to Google BigQuery.
+- Many data types are currently not supported, such as Timestamps, Dates, Binary, and Complex data types such as Maps, Lists, Structs.
+
+## Performance
+
+This connector will attempt to push as many constraints to Google BigQuery to decrease the number of results returned. This connector currently does not support querying partitioned tables. This will be added in a future release.
+
+## License
+
+This project is licensed under the Apache-2.0 License.
\ No newline at end of file
diff --git a/athena-bigquery/athena-bigquery.yaml b/athena-bigquery/athena-bigquery.yaml
new file mode 100644
index 0000000000..5a03b10707
--- /dev/null
+++ b/athena-bigquery/athena-bigquery.yaml
@@ -0,0 +1,79 @@
+Transform: 'AWS::Serverless-2016-10-31'
+
+Metadata:
+ AWS::ServerlessRepo::Application:
+ Name: AthenaBigQueryConnector
+ Description: An Athena connector to interact with BigQuery
+ Author: Amazon Athena
+ SpdxLicenseId: Apache-2.0
+ LicenseUrl: LICENSE.txt
+ ReadmeUrl: README.md
+ Labels: ['athena-federation', 'BigQuery']
+ HomePageUrl: https://github.com/awslabs/aws-athena-query-federation
+ SemanticVersion: 1.0.0
+ SourceCodeUrl: https://github.com/awslabs/aws-athena-query-federation
+
+# Parameters are CloudFormation features to pass input
+# to your template when you create a stack
+Parameters:
+ AthenaCatalogName:
+ Description: "The name you will give to this catalog in Athena will be used as the function name prefix."
+ Type: String
+ SpillBucket:
+ Description: "The bucket where this function can spill data."
+ Type: String
+ Default: "athena-spill-test"
+ SpillPrefix:
+ Description: "The bucket where this function can spill data."
+ Type: String
+ Default: "athena-spill"
+ LambdaTimeout:
+ Description: "Maximum Lambda invocation runtime in seconds. (min 1 - 900 max)"
+ Default: 900
+ Type: Number
+ LambdaMemory:
+ Description: "Lambda memory in MB (min 128 - 3008 max)."
+ Default: 3008
+ Type: Number
+ DisableSpillEncryption:
+ Description: "WARNING: If set to 'true' encryption for spilled data is disabled."
+ Default: "false"
+ Type: String
+ GCPProjectID:
+ Description: "The project ID within Google Cloud Platform ."
+ Default: BigQueryCred
+ Type: String
+ SecretManagerGCPCredsName:
+ Description: "The secret name within AWS Secrets Manager that contains your Google Cloud Platform Credentials."
+ Default: GoogleCloudPlatformCredentials
+ Type: String
+
+Resources:
+ ConnectorConfig:
+ Type: 'AWS::Serverless::Function'
+ Properties:
+ Environment:
+ Variables:
+ disable_spill_encryption: !Ref DisableSpillEncryption
+ spill_bucket: !Ref SpillBucket
+ spill_prefix: !Ref SpillPrefix
+ secret_manager_big_query_creds_name: !Ref SecretManagerGCPCredsName
+ gcp_project_id: !Ref GCPProjectID
+ FunctionName: !Sub "${AthenaCatalogName}"
+ Handler: "com.amazonaws.athena.connectors.bigquery.BigQueryCompositeHandler"
+ CodeUri: "./target/athena-bigquery-1.0-SNAPSHOT.jar"
+ Description: "Allows Athena to call and execute BigQuery queries and process the results."
+ Runtime: java8
+ Timeout: !Ref LambdaTimeout
+ MemorySize: !Ref LambdaMemory
+ Policies:
+ - Statement:
+ - Action:
+ - athena:GetQueryExecution
+ Effect: Allow
+ Resource: '*'
+ Version: '2012-10-17'
+ #S3CrudPolicy allows our connector to spill large responses to S3. You can optionally replace this pre-made policy
+ #with one that is more restrictive and can only 'put' but not read,delete, or overwrite files.
+ - S3CrudPolicy:
+ BucketName: !Ref SpillBucket
\ No newline at end of file
diff --git a/athena-bigquery/pom.xml b/athena-bigquery/pom.xml
new file mode 100644
index 0000000000..5a985025e3
--- /dev/null
+++ b/athena-bigquery/pom.xml
@@ -0,0 +1,77 @@
+
+
+
+ aws-athena-query-federation
+ com.amazonaws
+ 1.0
+
+ 4.0.0
+
+ athena-bigquery
+
+
+
+ com.amazonaws
+ aws-athena-federation-sdk
+ ${aws-athena-federation-sdk.version}
+
+
+ com.google.cloud
+ google-cloud-bigquery
+ 1.87.0
+
+
+ com.google.cloud
+ google-cloud-resourcemanager
+ 0.108.0-alpha
+
+
+
+ com.amazonaws
+ aws-java-sdk-secretsmanager
+ ${aws-sdk.version}
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.2.1
+
+
+ package
+
+ shade
+
+
+
+
+ classworlds:classworlds
+ junit:junit
+ jmock:*
+ *:xml-apis
+ org.apache.maven:lib:tests
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryCompositeHandler.java b/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryCompositeHandler.java
new file mode 100644
index 0000000000..b19b713f8e
--- /dev/null
+++ b/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryCompositeHandler.java
@@ -0,0 +1,35 @@
+/*-
+ * #%L
+ * athena-bigquery
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+package com.amazonaws.athena.connectors.bigquery;
+
+import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler;
+
+import java.io.IOException;
+
+public class BigQueryCompositeHandler
+ extends CompositeHandler
+{
+ public BigQueryCompositeHandler()
+ throws IOException
+ {
+ super(new BigQueryMetadataHandler(), new BigQueryRecordHandler());
+ }
+}
diff --git a/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryConstants.java b/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryConstants.java
new file mode 100644
index 0000000000..ac7688f7fd
--- /dev/null
+++ b/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryConstants.java
@@ -0,0 +1,46 @@
+/*-
+ * #%L
+ * athena-bigquery
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.bigquery;
+
+public class BigQueryConstants
+{
+ /**
+ * The source type that is used to aid in logging diagnostic info when raising a support case.
+ */
+ public static final String SOURCE_TYPE = "bigquery";
+
+ /**
+ * The maximum number of datasets and tables that can be returned from Google BigQuery API calls for metadata.
+ */
+ public static final long MAX_RESULTS = 10_000;
+
+ /**
+ * The Project ID within the Google Cloud Platform where the datasets and tables exist to query.
+ */
+ public static final String GCP_PROJECT_ID = "gcp_project_id";
+
+ /**
+ * The name of the secret within Secrets Manager that contains credentials JSON that provides this Lambda access
+ * to call Google BigQuery.
+ */
+ public static final String ENV_BIG_QUERY_CREDS_SM_ID = "secret_manager_gcp_creds_name";
+
+ private BigQueryConstants() {}
+}
diff --git a/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryExceptions.java b/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryExceptions.java
new file mode 100644
index 0000000000..99a62806d4
--- /dev/null
+++ b/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryExceptions.java
@@ -0,0 +1,33 @@
+/*-
+ * #%L
+ * athena-bigquery
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+package com.amazonaws.athena.connectors.bigquery;
+
+public class BigQueryExceptions
+{
+ static class TooManyTablesException
+ extends RuntimeException
+ {
+ TooManyTablesException()
+ {
+ super("Too many tables, exceeded max metadata results for schema count.");
+ }
+ }
+}
diff --git a/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryMetadataHandler.java b/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryMetadataHandler.java
new file mode 100644
index 0000000000..6c44fd5770
--- /dev/null
+++ b/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryMetadataHandler.java
@@ -0,0 +1,179 @@
+/*-
+ * #%L
+ * athena-bigquery
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.bigquery;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockWriter;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation;
+import com.amazonaws.athena.connector.lambda.handlers.MetadataHandler;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse;
+import com.amazonaws.athena.connectors.bigquery.BigQueryExceptions.TooManyTablesException;
+import com.google.api.gax.paging.Page;
+import com.google.cloud.bigquery.BigQuery;
+import com.google.cloud.bigquery.Dataset;
+import com.google.cloud.bigquery.DatasetId;
+import com.google.cloud.bigquery.Field;
+import com.google.cloud.bigquery.Table;
+import com.google.cloud.bigquery.TableDefinition;
+import com.google.cloud.bigquery.TableId;
+import org.apache.arrow.util.VisibleForTesting;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import static com.amazonaws.athena.connectors.bigquery.BigQueryUtils.fixCaseForDatasetName;
+import static com.amazonaws.athena.connectors.bigquery.BigQueryUtils.fixCaseForTableName;
+import static com.amazonaws.athena.connectors.bigquery.BigQueryUtils.translateToArrowType;
+
+public class BigQueryMetadataHandler
+ extends MetadataHandler
+{
+ private static final Logger logger = LoggerFactory.getLogger(BigQueryMetadataHandler.class);
+
+ /**
+ * The {@link BigQuery} client to interact with the BigQuery Service.
+ */
+ private final BigQuery bigQuery;
+
+ BigQueryMetadataHandler()
+ throws IOException
+ {
+ this(BigQueryUtils.getBigQueryClient());
+ }
+
+ @VisibleForTesting
+ BigQueryMetadataHandler(BigQuery bigQuery)
+ {
+ super(BigQueryConstants.SOURCE_TYPE);
+ this.bigQuery = bigQuery;
+ }
+
+ @Override
+ public ListSchemasResponse doListSchemaNames(BlockAllocator blockAllocator, ListSchemasRequest listSchemasRequest)
+ {
+ logger.info("doListSchemaNames called with Catalog: {}", listSchemasRequest.getCatalogName());
+
+ final List schemas = new ArrayList<>();
+ final String projectName = BigQueryUtils.getProjectName(listSchemasRequest);
+ Page response = bigQuery.listDatasets(projectName);
+
+ for (Dataset dataset : response.iterateAll()) {
+ if (schemas.size() > BigQueryConstants.MAX_RESULTS) {
+ throw new TooManyTablesException();
+ }
+ schemas.add(dataset.getDatasetId().getDataset().toLowerCase());
+ logger.debug("Found Dataset: {}", dataset.getDatasetId().getDataset());
+ }
+
+ logger.info("Found {} schemas!", schemas.size());
+
+ return new ListSchemasResponse(listSchemasRequest.getCatalogName(), schemas);
+ }
+
+ @Override
+ public ListTablesResponse doListTables(BlockAllocator blockAllocator, ListTablesRequest listTablesRequest)
+ {
+ logger.info("doListTables called with request {}:{}", listTablesRequest.getCatalogName(),
+ listTablesRequest.getSchemaName());
+
+ //Get the project name, dataset name, and dataset id. Google BigQuery is case sensitive.
+ final String projectName = BigQueryUtils.getProjectName(listTablesRequest);
+ final String datasetName = fixCaseForDatasetName(projectName, listTablesRequest.getSchemaName(), bigQuery);
+ final DatasetId datasetId = DatasetId.of(projectName, datasetName);
+
+ Page
response = bigQuery.listTables(datasetId);
+ List tables = new ArrayList<>();
+
+ for (Table table : response.iterateAll()) {
+ if (tables.size() > BigQueryConstants.MAX_RESULTS) {
+ throw new TooManyTablesException();
+ }
+ tables.add(new TableName(listTablesRequest.getSchemaName(), table.getTableId().getTable().toLowerCase()));
+ }
+
+ logger.info("Found {} table(s)!", tables.size());
+
+ return new ListTablesResponse(listTablesRequest.getCatalogName(), tables);
+ }
+
+ @Override
+ public GetTableResponse doGetTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ logger.info("doGetTable called with request {}:{}", BigQueryUtils.getProjectName(getTableRequest),
+ getTableRequest.getTableName());
+
+ final Schema tableSchema = getSchema(BigQueryUtils.getProjectName(getTableRequest), getTableRequest.getTableName().getSchemaName(),
+ getTableRequest.getTableName().getTableName());
+ return new GetTableResponse(BigQueryUtils.getProjectName(getTableRequest).toLowerCase(),
+ getTableRequest.getTableName(), tableSchema);
+ }
+
+ @Override
+ public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker)
+ throws Exception
+ {
+ //NoOp since we don't support partitioning at this time.
+ }
+
+ @Override
+ public GetSplitsResponse doGetSplits(BlockAllocator allocator, GetSplitsRequest request)
+ {
+ if (logger.isInfoEnabled()) {
+ logger.info("DoGetSplits: {}.{} Part Cols: {}", request.getSchema(), request.getTableName(),
+ String.join(",", request.getPartitionCols()));
+ }
+
+ //Every split must have a unique location if we wish to spill to avoid failures
+ SpillLocation spillLocation = makeSpillLocation(request);
+
+ return new GetSplitsResponse(request.getCatalogName(), Split.newBuilder(spillLocation,
+ makeEncryptionKey()).build());
+ }
+
+ private Schema getSchema(String projectName, String datasetName, String tableName)
+ {
+ datasetName = fixCaseForDatasetName(projectName, datasetName, bigQuery);
+ tableName = fixCaseForTableName(projectName, datasetName, tableName, bigQuery);
+ TableId tableId = TableId.of(projectName, datasetName, tableName);
+ Table response = bigQuery.getTable(tableId);
+ TableDefinition tableDefinition = response.getDefinition();
+ SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
+ for (Field field : tableDefinition.getSchema().getFields()) {
+ schemaBuilder.addField(field.getName(), translateToArrowType(field.getType()));
+ }
+ return schemaBuilder.build();
+ }
+}
diff --git a/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryRecordHandler.java b/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryRecordHandler.java
new file mode 100644
index 0000000000..c517f99343
--- /dev/null
+++ b/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryRecordHandler.java
@@ -0,0 +1,181 @@
+/*-
+ * #%L
+ * athena-bigquery
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+package com.amazonaws.athena.connectors.bigquery;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.handlers.RecordHandler;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.athena.AmazonAthenaClientBuilder;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import com.amazonaws.services.secretsmanager.AWSSecretsManagerClientBuilder;
+import com.google.cloud.bigquery.BigQuery;
+import com.google.cloud.bigquery.BigQueryException;
+import com.google.cloud.bigquery.FieldValue;
+import com.google.cloud.bigquery.FieldValueList;
+import com.google.cloud.bigquery.Job;
+import com.google.cloud.bigquery.JobId;
+import com.google.cloud.bigquery.JobInfo;
+import com.google.cloud.bigquery.QueryJobConfiguration;
+import com.google.cloud.bigquery.TableResult;
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+import static com.amazonaws.athena.connectors.bigquery.BigQueryUtils.fixCaseForDatasetName;
+import static com.amazonaws.athena.connectors.bigquery.BigQueryUtils.fixCaseForTableName;
+import static com.amazonaws.athena.connectors.bigquery.BigQueryUtils.getObjectFromFieldValue;
+
+/**
+ * This record handler is an example of how you can implement a lambda that calls bigquery and pulls data.
+ * This Lambda requires that your BigQuery table is small enough so that a table scan can be completed
+ * within 5-10 mins or this lambda will time out and it will fail.
+ */
+public class BigQueryRecordHandler
+ extends RecordHandler
+{
+ private static final Logger logger = LoggerFactory.getLogger(BigQueryRecordHandler.class);
+
+ /**
+ * The {@link BigQuery} client to interact with the BigQuery Service.
+ */
+ private final BigQuery bigQueryClient;
+
+ BigQueryRecordHandler()
+ throws IOException
+ {
+ this(AmazonS3ClientBuilder.defaultClient(),
+ AWSSecretsManagerClientBuilder.defaultClient(),
+ AmazonAthenaClientBuilder.defaultClient(),
+ BigQueryUtils.getBigQueryClient()
+ );
+ }
+
+ @VisibleForTesting
+ BigQueryRecordHandler(AmazonS3 amazonS3, AWSSecretsManager secretsManager, AmazonAthena athena, BigQuery bigQueryClient)
+ {
+ super(amazonS3, secretsManager, athena, BigQueryConstants.SOURCE_TYPE);
+ this.bigQueryClient = bigQueryClient;
+ }
+
+ @Override
+ protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker)
+ throws Exception
+ {
+ final String projectName = BigQueryUtils.getProjectName(recordsRequest.getCatalogName());
+ final String datasetName = fixCaseForDatasetName(projectName, recordsRequest.getTableName().getSchemaName(), bigQueryClient);
+ final String tableName = fixCaseForTableName(projectName, datasetName, recordsRequest.getTableName().getTableName(),
+ bigQueryClient);
+
+ logger.info("Got Request with constraints: {}", recordsRequest.getConstraints());
+
+ final String sqlToExecute = BigQuerySqlUtils.buildSqlFromSplit(new TableName(datasetName, tableName),
+ recordsRequest.getSchema(), recordsRequest.getConstraints(), recordsRequest.getSplit());
+
+ QueryJobConfiguration queryConfig =
+ QueryJobConfiguration.newBuilder(sqlToExecute)
+ // Use standard SQL syntax for queries.
+ // See: https://cloud.google.com/bigquery/sql-reference/
+ .setUseLegacySql(false)
+ .build();
+
+ logger.info("Executing SQL Query: {} for Split: {}", sqlToExecute, recordsRequest.getSplit());
+
+ Job queryJob;
+ try {
+ JobId jobId = JobId.of(fixRequestId(recordsRequest.getQueryId()));
+ queryJob = bigQueryClient.create(JobInfo.newBuilder(queryConfig).setJobId(jobId).build());
+ }
+ catch (BigQueryException bqe) {
+ if (bqe.getMessage().contains("Already Exists: Job")) {
+ logger.info("Caught exception that this job is already running. ");
+ //Return silently because another lambda is already processing this.
+ //Ideally when this happens, we would want to get the existing queryJob.
+ //This would allow this Lambda to timeout while waiting for the query.
+ //and rejoin it. This would provide much more time for Lambda to wait for
+ //BigQuery to finish its query for up to 15 mins * the number of retries.
+ //However, Presto is creating multiple splits, even if we return a single split.
+ return;
+ }
+ throw bqe;
+ }
+
+ TableResult result;
+ try {
+ while (true) {
+ if (queryJob.isDone()) {
+ result = queryJob.getQueryResults();
+ break;
+ }
+ else if (!queryStatusChecker.isQueryRunning()) {
+ queryJob.cancel();
+ }
+ else {
+ Thread.sleep(10);
+ }
+ }
+ }
+ catch (InterruptedException ie) {
+ throw new IllegalStateException("Got interrupted waiting for Big Query to finish the query.");
+ }
+
+ outputResults(spiller, recordsRequest, result);
+ }
+
+ private String fixRequestId(String queryId)
+ {
+ return queryId.replaceAll("[^a-zA-Z0-9-_]", "");
+ }
+
+ /**
+ * Iterates through all the results that comes back from BigQuery and saves the result to be read by the Athena Connector.
+ *
+ * @param spiller The {@link BlockSpiller} provided when readWithConstraints() is called.
+ * @param recordsRequest The {@link ReadRecordsRequest} provided when readWithConstraints() is called.
+ * @param result The {@link TableResult} provided by {@link BigQuery} client after a query has completed executing.
+ */
+ private void outputResults(BlockSpiller spiller, ReadRecordsRequest recordsRequest, TableResult result)
+ {
+ for (FieldValueList row : result.iterateAll()) {
+ spiller.writeRows((Block block, int rowNum) -> {
+ boolean isMatched = true;
+ for (Field field : recordsRequest.getSchema().getFields()) {
+ FieldValue fieldValue = row.get(field.getName());
+ Object val = getObjectFromFieldValue(field.getName(), fieldValue,
+ field.getFieldType().getType());
+ isMatched &= block.offerValue(field.getName(), rowNum, val);
+ if (!isMatched) {
+ return 0;
+ }
+ }
+ return 1;
+ });
+ }
+ }
+}
diff --git a/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQuerySqlUtils.java b/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQuerySqlUtils.java
new file mode 100644
index 0000000000..d277bdae2f
--- /dev/null
+++ b/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQuerySqlUtils.java
@@ -0,0 +1,182 @@
+/*-
+ * #%L
+ * athena-bigquery
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+package com.amazonaws.athena.connectors.bigquery;
+
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Range;
+import com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import java.util.Map;
+import java.util.StringJoiner;
+
+/**
+ * Utilities that help with Sql operations.
+ */
+class BigQuerySqlUtils
+{
+ private BigQuerySqlUtils()
+ {
+ }
+
+ /**
+ * Builds an SQL statement from the schema, table name, split and contraints that can be executable by
+ * BigQuery.
+ *
+ * @param tableName The table name of the table we are querying.
+ * @param schema The schema of the table that we are querying.
+ * @param constraints The constraints that we want to apply to the query.
+ * @param split The split information to add as a constraint.
+ * @return SQL Statement that represents the table, columns, split, and constraints.
+ */
+ static String buildSqlFromSplit(TableName tableName, Schema schema, Constraints constraints, Split split)
+ {
+ StringBuilder sqlBuilder = new StringBuilder("SELECT ");
+
+ StringJoiner sj = new StringJoiner(",");
+ if (schema.getFields().isEmpty()) {
+ sj.add("*");
+ }
+ else {
+ for (Field field : schema.getFields()) {
+ sj.add(field.getName());
+ }
+ }
+ sqlBuilder.append(sj.toString())
+ .append(" from ")
+ .append(tableName.getSchemaName())
+ .append(".")
+ .append(tableName.getTableName());
+
+ //Buids Where Clause
+ sj = new StringJoiner(") AND (");
+ for (Map.Entry summary : constraints.getSummary().entrySet()) {
+ final ValueSet value = summary.getValue();
+ final String columnName = summary.getKey();
+ if (value instanceof EquatableValueSet) {
+ if (value.isSingleValue()) {
+ if (value.isNullAllowed()) {
+ sj.add(columnName + " is null");
+ }
+ else {
+ //Check Arrow type to see if we
+ sj.add(columnName + " = " + getValueForWhereClause(columnName, value.getSingleValue(), value.getType()));
+ }
+ }
+ //TODO:: process multiple values in "IN" clause.
+ }
+ else if (value instanceof SortedRangeSet) {
+ SortedRangeSet sortedRangeSet = (SortedRangeSet) value;
+ if (sortedRangeSet.isNone()) {
+ if (sortedRangeSet.isNullAllowed()) {
+ sj.add(columnName + " is null");
+ }
+ //If there is no values and null is not allowed, then that means ignore this valueset.
+ continue;
+ }
+ Range range = sortedRangeSet.getSpan();
+ if (!sortedRangeSet.isNullAllowed() && range.getLow().isLowerUnbounded() && range.getHigh().isUpperUnbounded()) {
+ sj.add(columnName + " is not null");
+ continue;
+ }
+ if (!range.getLow().isLowerUnbounded() && !range.getLow().isNullValue()) {
+ final String sqlValue = getValueForWhereClause(columnName, range.getLow().getValue(), value.getType());
+ switch (range.getLow().getBound()) {
+ case ABOVE:
+ sj.add(columnName + " > " + sqlValue);
+ break;
+ case EXACTLY:
+ sj.add(columnName + " >= " + sqlValue);
+ break;
+ case BELOW:
+ throw new IllegalArgumentException("Low Marker should never use BELOW bound: " + range);
+ default:
+ throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
+ }
+ }
+ if (!range.getHigh().isUpperUnbounded() && !range.getHigh().isNullValue()) {
+ final String sqlValue = getValueForWhereClause(columnName, range.getHigh().getValue(), value.getType());
+ switch (range.getHigh().getBound()) {
+ case ABOVE:
+ throw new IllegalArgumentException("High Marker should never use ABOVE bound: " + range);
+ case EXACTLY:
+ sj.add(columnName + " <= " + sqlValue);
+ break;
+ case BELOW:
+ sj.add(columnName + " < " + sqlValue);
+ break;
+ default:
+ throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
+ }
+ }
+ }
+ }
+ if (sj.length() > 0) {
+ sqlBuilder.append(" WHERE (")
+ .append(sj.toString())
+ .append(")");
+ }
+
+ return sqlBuilder.toString();
+ }
+
+ //Gets the representation of a value that can be used in a where clause, ie String values need to be quoted, numeric doesn't.
+ private static String getValueForWhereClause(String columnName, Object value, ArrowType arrowType)
+ {
+ switch (arrowType.getTypeID()) {
+ case Int:
+ case Decimal:
+ case FloatingPoint:
+ return value.toString();
+ case Bool:
+ if ((Boolean) value) {
+ return "true";
+ }
+ else {
+ return "false";
+ }
+ case Utf8:
+ return "'" + value.toString() + "'";
+ case Date:
+ case Time:
+ case Timestamp:
+ case Interval:
+ case Binary:
+ case FixedSizeBinary:
+ case Null:
+ case Struct:
+ case List:
+ case FixedSizeList:
+ case Union:
+ case NONE:
+ throw new UnsupportedOperationException("The Arrow type: " + arrowType.getTypeID().name() + " is currently not supported");
+ default:
+ throw new IllegalArgumentException("Unknown type has been encountered during range processing: " + columnName +
+ " Field Type: " + arrowType.getTypeID().name());
+ }
+ }
+}
diff --git a/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryUtils.java b/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryUtils.java
new file mode 100644
index 0000000000..4bc3983a62
--- /dev/null
+++ b/athena-bigquery/src/main/java/com/amazonaws/athena/connectors/bigquery/BigQueryUtils.java
@@ -0,0 +1,231 @@
+/*-
+ * #%L
+ * athena-bigquery
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+package com.amazonaws.athena.connectors.bigquery;
+
+import com.amazonaws.athena.connector.lambda.metadata.MetadataRequest;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import com.amazonaws.services.secretsmanager.AWSSecretsManagerClientBuilder;
+import com.amazonaws.services.secretsmanager.model.GetSecretValueRequest;
+import com.amazonaws.services.secretsmanager.model.GetSecretValueResult;
+import com.google.api.gax.paging.Page;
+import com.google.auth.Credentials;
+import com.google.auth.oauth2.ServiceAccountCredentials;
+import com.google.cloud.bigquery.BigQuery;
+import com.google.cloud.bigquery.BigQueryOptions;
+import com.google.cloud.bigquery.Dataset;
+import com.google.cloud.bigquery.DatasetId;
+import com.google.cloud.bigquery.FieldValue;
+import com.google.cloud.bigquery.LegacySQLTypeName;
+import com.google.cloud.bigquery.Table;
+import com.google.cloud.resourcemanager.ResourceManager;
+import com.google.cloud.resourcemanager.ResourceManagerOptions;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+
+class BigQueryUtils
+{
+ private BigQueryUtils() {}
+
+ static Credentials getCredentialsFromSecretsManager()
+ throws IOException
+ {
+ AWSSecretsManager secretsManager = AWSSecretsManagerClientBuilder.defaultClient();
+ GetSecretValueResult response = secretsManager.getSecretValue(new GetSecretValueRequest()
+ .withSecretId(getEnvBigQueryCredsSmId()));
+ return ServiceAccountCredentials.fromStream(new ByteArrayInputStream(response.getSecretString().getBytes()));
+ }
+
+ static BigQuery getBigQueryClient()
+ throws IOException
+ {
+ BigQueryOptions.Builder bigqueryBuilder = BigQueryOptions.newBuilder();
+ bigqueryBuilder.setCredentials(getCredentialsFromSecretsManager());
+ return bigqueryBuilder.build().getService();
+ }
+
+ static ResourceManager getResourceManagerClient()
+ throws IOException
+ {
+ ResourceManagerOptions.Builder resourceManagerBuilder = ResourceManagerOptions.newBuilder();
+ resourceManagerBuilder.setCredentials(getCredentialsFromSecretsManager());
+ return resourceManagerBuilder.build().getService();
+ }
+
+ static String getEnvBigQueryCredsSmId()
+ {
+ return getEnvVar(BigQueryConstants.ENV_BIG_QUERY_CREDS_SM_ID);
+ }
+
+ static String getEnvVar(String envVar)
+ {
+ String var = System.getenv(envVar);
+ if (var == null || var.length() == 0) {
+ throw new IllegalArgumentException("Lambda Environment Variable " + envVar + " has not been populated! ");
+ }
+ return var;
+ }
+
+ /**
+ * Gets the project name that exists within Google Cloud Platform that contains the datasets that we wish to query.
+ * The Lambda environment variables are first inspected and if it does not exist, then we take it from the catalog
+ * name in the request.
+ *
+ * @param catalogNameFromRequest The Catalog Name from the request that is passed in from the Athena Connector framework.
+ * @return The project name.
+ */
+ static String getProjectName(String catalogNameFromRequest)
+ {
+ if (System.getenv(BigQueryConstants.GCP_PROJECT_ID) != null) {
+ return System.getenv(BigQueryConstants.GCP_PROJECT_ID);
+ }
+ return catalogNameFromRequest;
+ }
+
+ /**
+ * Gets the project name that exists within Google Cloud Platform that contains the datasets that we wish to query.
+ * The Lambda environment variables are first inspected and if it does not exist, then we take it from the catalog
+ * name in the request.
+ *
+ * @param request The {@link MetadataRequest} from the request that is passed in from the Athena Connector framework.
+ * @return The project name.
+ */
+ static String getProjectName(MetadataRequest request)
+ {
+ return getProjectName(request.getCatalogName());
+ }
+
+ /**
+ * BigQuery is case sensitive for its Project and Dataset Names. This function will return the first
+ * case insensitive match.
+ *
+ * @param projectName The dataset name we want to look up. The project name must be case correct.
+ * @return A case correct dataset name.
+ */
+ static String fixCaseForDatasetName(String projectName, String datasetName, BigQuery bigQuery)
+ {
+ Page response = bigQuery.listDatasets(projectName);
+ for (Dataset dataset : response.iterateAll()) {
+ if (dataset.getDatasetId().getDataset().equalsIgnoreCase(datasetName)) {
+ return dataset.getDatasetId().getDataset();
+ }
+ }
+
+ throw new IllegalArgumentException("Google Dataset with name " + datasetName +
+ " could not be found in Project " + projectName + " in GCP. ");
+ }
+
+ static String fixCaseForTableName(String projectName, String datasetName, String tableName, BigQuery bigQuery)
+ {
+ Page
response = bigQuery.listTables(DatasetId.of(projectName, datasetName));
+ for (Table table : response.iterateAll()) {
+ if (table.getTableId().getTable().equalsIgnoreCase(tableName)) {
+ return table.getTableId().getTable();
+ }
+ }
+ throw new IllegalArgumentException("Google Table with name " + datasetName +
+ " could not be found in Project " + projectName + " in GCP. ");
+ }
+
+ static Object getObjectFromFieldValue(String fieldName, FieldValue fieldValue, ArrowType arrowType)
+ {
+ if (fieldValue == null || fieldValue.isNull() || fieldValue.getValue().equals("null")) {
+ return null;
+ }
+ switch (Types.getMinorTypeForArrowType(arrowType)) {
+ case TIMESTAMPMILLI:
+ //getTimestampValue() returns a long in microseconds. Return it in Milliseconds which is how its stored.
+ return fieldValue.getTimestampValue() / 1000;
+ case SMALLINT:
+ case TINYINT:
+ case INT:
+ case BIGINT:
+ return fieldValue.getLongValue();
+ case DECIMAL:
+ return fieldValue.getNumericValue();
+ case BIT:
+ return fieldValue.getBooleanValue();
+ case FLOAT4:
+ case FLOAT8:
+ return fieldValue.getDoubleValue();
+ case VARCHAR:
+ return fieldValue.getStringValue();
+ //TODO: Support complex types.
+ default:
+ throw new IllegalArgumentException("Unknown type has been encountered: Field Name: " + fieldName +
+ " Field Type: " + arrowType.toString() + " MinorType: " + Types.getMinorTypeForArrowType(arrowType));
+ }
+ }
+
+ static ArrowType translateToArrowType(LegacySQLTypeName type)
+ {
+ switch (type.getStandardType()) {
+ case BOOL:
+ return new ArrowType.Bool();
+ /** A 64-bit signed integer value. */
+ case INT64:
+ return new ArrowType.Int(64, true);
+ /** A 64-bit IEEE binary floating-point value. */
+ case FLOAT64:
+ return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
+ /** A decimal value with 38 digits of precision and 9 digits of scale. */
+ case NUMERIC:
+ return new ArrowType.Decimal(38, 9);
+ /** Variable-length character (Unicode) data. */
+ case STRING:
+ return new ArrowType.Utf8();
+ /** Variable-length binary data. */
+ case BYTES:
+ return new ArrowType.Binary();
+ /** Container of ordered fields each with a type (required) and field name (optional). */
+ case STRUCT:
+ return new ArrowType.Struct();
+ /** Ordered list of zero or more elements of any non-array type. */
+ case ARRAY:
+ return new ArrowType.List();
+ /**
+ * Represents an absolute point in time, with microsecond precision. Values range between the
+ * years 1 and 9999, inclusive.
+ */
+ case TIMESTAMP:
+ return new ArrowType.Timestamp(TimeUnit.MILLISECOND, null);
+ /** Represents a logical calendar date. Values range between the years 1 and 9999, inclusive. */
+ case DATE:
+ return new ArrowType.Date(DateUnit.DAY);
+ /** Represents a time, independent of a specific date, to microsecond precision. */
+ case TIME:
+ return new ArrowType.Time(TimeUnit.MILLISECOND, 32);
+ /** Represents a year, month, day, hour, minute, second, and subsecond (microsecond precision). */
+ case DATETIME:
+ return new ArrowType.Date(DateUnit.MILLISECOND);
+ /** Represents a set of geographic points, represented as a Well Known Text (WKT) string. */
+ case GEOGRAPHY:
+ return new ArrowType.Utf8();
+ }
+ throw new IllegalArgumentException("Unable to map Google Type of StandardType: " + type.getStandardType().toString()
+ + " NonStandardType: " + type.name());
+ }
+}
diff --git a/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQueryMetadataHandlerTest.java b/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQueryMetadataHandlerTest.java
new file mode 100644
index 0000000000..64311daec8
--- /dev/null
+++ b/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQueryMetadataHandlerTest.java
@@ -0,0 +1,184 @@
+/*-
+ * #%L
+ * athena-bigquery
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+package com.amazonaws.athena.connectors.bigquery;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse;
+import com.google.cloud.bigquery.BigQuery;
+import com.google.cloud.bigquery.Dataset;
+import com.google.cloud.bigquery.DatasetId;
+import com.google.cloud.bigquery.Schema;
+import com.google.cloud.bigquery.StandardTableDefinition;
+import com.google.cloud.bigquery.Table;
+import com.google.cloud.bigquery.TableId;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.util.Collections;
+import java.util.HashMap;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class BigQueryMetadataHandlerTest
+{
+ private static final String QUERY_ID = "queryId";
+ private static final String CATALOG = "catalog";
+ private static final TableName TABLE_NAME = new TableName("dataset1", "table1");
+
+ @Mock
+ BigQuery bigQuery;
+
+ private BigQueryMetadataHandler bigQueryMetadataHandler;
+
+ private BlockAllocator blockAllocator;
+
+ @Before
+ public void setUp()
+ {
+ MockitoAnnotations.initMocks(this);
+ bigQueryMetadataHandler = new BigQueryMetadataHandler(bigQuery);
+ blockAllocator = new BlockAllocatorImpl();
+ }
+
+ @After
+ public void tearDown()
+ {
+ blockAllocator.close();
+ }
+
+ @Test
+ public void testDoListSchemaNames()
+ {
+ final int numDatasets = 5;
+ BigQueryPage datasetPage =
+ new BigQueryPage<>(BigQueryTestUtils.getDatasetList(BigQueryTestUtils.PROJECT_1_NAME, numDatasets));
+ when(bigQuery.listDatasets(any(String.class))).thenReturn(datasetPage);
+
+ //This will test case insenstivity
+ ListSchemasRequest request = new ListSchemasRequest(BigQueryTestUtils.FEDERATED_IDENTITY,
+ QUERY_ID, BigQueryTestUtils.PROJECT_1_NAME.toLowerCase());
+ ListSchemasResponse schemaNames = bigQueryMetadataHandler.doListSchemaNames(blockAllocator, request);
+
+ assertNotNull(schemaNames);
+ assertEquals("Schema count does not match!", numDatasets, schemaNames.getSchemas().size());
+ }
+
+ @Test
+ public void testDoListTables()
+ {
+ //Build mocks for Datasets
+ final int numDatasets = 5;
+ BigQueryPage datasetPage =
+ new BigQueryPage<>(BigQueryTestUtils.getDatasetList(BigQueryTestUtils.PROJECT_1_NAME, numDatasets));
+ when(bigQuery.listDatasets(any(String.class))).thenReturn(datasetPage);
+
+ //Get the first dataset name.
+ String datasetName = datasetPage.iterateAll().iterator().next().getDatasetId().getDataset();
+
+ final int numTables = 5;
+ BigQueryPage
tablesPage =
+ new BigQueryPage<>(BigQueryTestUtils.getTableList(BigQueryTestUtils.PROJECT_1_NAME,
+ datasetName, numTables));
+
+ when(bigQuery.listTables(any(DatasetId.class))).thenReturn(tablesPage);
+
+ //This will test case insenstivity
+ ListTablesRequest request = new ListTablesRequest(BigQueryTestUtils.FEDERATED_IDENTITY,
+ QUERY_ID, BigQueryTestUtils.PROJECT_1_NAME.toLowerCase(),
+ datasetName);
+ ListTablesResponse tableNames = bigQueryMetadataHandler.doListTables(blockAllocator, request);
+
+ assertNotNull(tableNames);
+ assertEquals("Schema count does not match!", numTables, tableNames.getTables().size());
+ }
+
+ @Test
+ public void testDoGetTable()
+ {
+ //Build mocks for Datasets
+ final int numDatasets = 5;
+ BigQueryPage datasetPage =
+ new BigQueryPage<>(BigQueryTestUtils.getDatasetList(BigQueryTestUtils.PROJECT_1_NAME, numDatasets));
+ when(bigQuery.listDatasets(any(String.class))).thenReturn(datasetPage);
+
+ //Get the first dataset name.
+ String datasetName = datasetPage.iterateAll().iterator().next().getDatasetId().getDataset();
+
+ //Build mocks for Tables
+ final int numTables = 5;
+ BigQueryPage
tablesPage =
+ new BigQueryPage<>(BigQueryTestUtils.getTableList(BigQueryTestUtils.PROJECT_1_NAME,
+ datasetName, numTables));
+
+ String tableName = tablesPage.iterateAll().iterator().next().getTableId().getTable();
+
+ when(bigQuery.listTables(any(DatasetId.class))).thenReturn(tablesPage);
+
+ Schema tableSchema = BigQueryTestUtils.getTestSchema();
+ StandardTableDefinition tableDefinition = StandardTableDefinition.newBuilder()
+ .setSchema(tableSchema).build();
+
+ Table table = mock(Table.class);
+ when(table.getTableId()).thenReturn(TableId.of(BigQueryTestUtils.PROJECT_1_NAME, datasetName, tableName));
+ when(table.getDefinition()).thenReturn(tableDefinition);
+ when(bigQuery.getTable(any(TableId.class))).thenReturn(table);
+
+ //Make the call
+ GetTableRequest getTableRequest = new GetTableRequest(BigQueryTestUtils.FEDERATED_IDENTITY,
+ QUERY_ID, BigQueryTestUtils.PROJECT_1_NAME,
+ new TableName(datasetName, tableName));
+ GetTableResponse response = bigQueryMetadataHandler.doGetTable(blockAllocator, getTableRequest);
+
+ assertNotNull(response);
+
+ //Number of Fields
+ assertEquals(tableSchema.getFields().size(), response.getSchema().getFields().size());
+ }
+
+ @Test
+ public void testDoGetSplits()
+ {
+ GetSplitsRequest request = new GetSplitsRequest(BigQueryTestUtils.FEDERATED_IDENTITY,
+ QUERY_ID, CATALOG, TABLE_NAME,
+ mock(Block.class), Collections.emptyList(), new Constraints(new HashMap<>()), null);
+ GetSplitsResponse response = bigQueryMetadataHandler.doGetSplits(blockAllocator, request);
+
+ assertNotNull(response);
+ }
+}
diff --git a/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQueryPage.java b/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQueryPage.java
new file mode 100644
index 0000000000..64fdad325d
--- /dev/null
+++ b/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQueryPage.java
@@ -0,0 +1,86 @@
+/*-
+ * #%L
+ * athena-bigquery
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+package com.amazonaws.athena.connectors.bigquery;
+
+import com.google.api.gax.paging.Page;
+
+import java.util.Collection;
+import java.util.Iterator;
+
+/**
+ * This class is a wrapper around the {@link Page} class as a convient way to create Pages in unit tests.
+ *
+ * @param The type of object that is being returned from a Google BigQuery API call. For example, getDatasets().
+ */
+class BigQueryPage
+ implements Page
+{
+ final Collection collection;
+
+ BigQueryPage(Collection collection)
+ {
+ this.collection = collection;
+ }
+
+ @Override
+ public boolean hasNextPage()
+ {
+ return false;
+ }
+
+ @Override
+ public String getNextPageToken()
+ {
+ return null;
+ }
+
+ @Override
+ public Page getNextPage()
+ {
+ return null;
+ }
+
+ @Override
+ public Iterable iterateAll()
+ {
+ return new Iterable()
+ {
+ @Override
+ public Iterator iterator()
+ {
+ return collection.iterator();
+ }
+ };
+ }
+
+ @Override
+ public Iterable getValues()
+ {
+ return new Iterable()
+ {
+ @Override
+ public Iterator iterator()
+ {
+ return collection.iterator();
+ }
+ };
+ }
+}
diff --git a/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQueryRecordHandlerTest.java b/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQueryRecordHandlerTest.java
new file mode 100644
index 0000000000..f5555e5fa1
--- /dev/null
+++ b/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQueryRecordHandlerTest.java
@@ -0,0 +1,314 @@
+/*-
+ * #%L
+ * athena-bigquery
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+package com.amazonaws.athena.connectors.bigquery;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl;
+import com.amazonaws.athena.connector.lambda.data.S3BlockSpillReader;
+import com.amazonaws.athena.connector.lambda.data.S3BlockSpiller;
+import com.amazonaws.athena.connector.lambda.data.SpillConfig;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connector.lambda.security.EncryptionKey;
+import com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory;
+import com.amazonaws.athena.connector.lambda.security.LocalKeyFactory;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.model.PutObjectResult;
+import com.amazonaws.services.s3.model.S3Object;
+import com.amazonaws.services.s3.model.S3ObjectInputStream;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import com.google.api.gax.paging.Page;
+import com.google.cloud.bigquery.BigQuery;
+import com.google.cloud.bigquery.Dataset;
+import com.google.cloud.bigquery.DatasetId;
+import com.google.cloud.bigquery.FieldValue;
+import com.google.cloud.bigquery.FieldValueList;
+import com.google.cloud.bigquery.Job;
+import com.google.cloud.bigquery.JobInfo;
+import com.google.cloud.bigquery.Table;
+import com.google.cloud.bigquery.TableResult;
+import com.google.common.io.ByteStreams;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+import org.mockito.invocation.InvocationOnMock;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.UUID;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyObject;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class BigQueryRecordHandlerTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(BigQueryRecordHandlerTest.class);
+
+ private String bucket = "bucket";
+ private String prefix = "prefix";
+
+ @Mock
+ BigQuery bigQuery;
+
+ @Mock
+ AWSSecretsManager awsSecretsManager;
+
+ @Mock
+ private AmazonAthena athena;
+
+ private BigQueryRecordHandler bigQueryRecordHandler;
+
+ private BlockAllocator allocator;
+ private List mockS3Storage = new ArrayList<>();
+ private AmazonS3 amazonS3;
+ private S3BlockSpiller spillWriter;
+ private S3BlockSpillReader spillReader;
+ private Schema schemaForRead;
+ private EncryptionKeyFactory keyFactory = new LocalKeyFactory();
+ private EncryptionKey encryptionKey = keyFactory.create();
+ private SpillConfig spillConfig;
+ private String queryId = UUID.randomUUID().toString();
+ private S3SpillLocation s3SpillLocation = S3SpillLocation.newBuilder()
+ .withBucket(UUID.randomUUID().toString())
+ .withSplitId(UUID.randomUUID().toString())
+ .withQueryId(queryId)
+ .withIsDirectory(true)
+ .build();
+
+ @Before
+ public void init()
+ {
+ logger.info("Starting init.");
+ MockitoAnnotations.initMocks(this);
+
+ allocator = new BlockAllocatorImpl();
+ amazonS3 = mock(AmazonS3.class);
+
+ mockS3Client();
+
+ //Create Spill config
+ spillConfig = SpillConfig.newBuilder()
+ .withEncryptionKey(encryptionKey)
+ //This will be enough for a single block
+ .withMaxBlockBytes(100000)
+ //This will force the writer to spill.
+ .withMaxInlineBlockBytes(100)
+ //Async Writing.
+ .withNumSpillThreads(0)
+ .withRequestId(UUID.randomUUID().toString())
+ .withSpillLocation(s3SpillLocation)
+ .build();
+
+ schemaForRead = new Schema(BigQueryTestUtils.getTestSchemaFieldsArrow());
+ spillWriter = new S3BlockSpiller(amazonS3, spillConfig, allocator, schemaForRead, ConstraintEvaluator.emptyEvaluator());
+ spillReader = new S3BlockSpillReader(amazonS3, allocator);
+
+ //Mock the BigQuery Client to return Datasets, and Table Schema information.
+ BigQueryPage datasets = new BigQueryPage(BigQueryTestUtils.getDatasetList(BigQueryTestUtils.PROJECT_1_NAME, 2));
+ when(bigQuery.listDatasets(any(String.class))).thenReturn(datasets);
+ BigQueryPage
tables = new BigQueryPage
(BigQueryTestUtils.getTableList(BigQueryTestUtils.PROJECT_1_NAME, "dataset1", 2));
+ when(bigQuery.listTables(any(DatasetId.class))).thenReturn(tables);
+
+ //The class we want to test.
+ bigQueryRecordHandler = new BigQueryRecordHandler(amazonS3, awsSecretsManager, athena, bigQuery);
+
+ logger.info("Completed init.");
+ }
+
+ @Test
+ public void testReadWithConstraint()
+ throws Exception
+ {
+ try (ReadRecordsRequest request = new ReadRecordsRequest(
+ BigQueryTestUtils.FEDERATED_IDENTITY,
+ BigQueryTestUtils.PROJECT_1_NAME,
+ "queryId",
+ new TableName("dataset1", "table1"),
+ BigQueryTestUtils.getBlockTestSchema(),
+ Split.newBuilder(S3SpillLocation.newBuilder()
+ .withBucket(bucket)
+ .withPrefix(prefix)
+ .withSplitId(UUID.randomUUID().toString())
+ .withQueryId(UUID.randomUUID().toString())
+ .withIsDirectory(true)
+ .build(),
+ keyFactory.create()).build(),
+ new Constraints(Collections.EMPTY_MAP),
+ 0, //This is ignored when directly calling readWithConstraints.
+ 0)) { //This is ignored when directly calling readWithConstraints.
+ //Always return try for the evaluator to keep all rows.
+ ConstraintEvaluator evaluator = mock(ConstraintEvaluator.class);
+ when(evaluator.apply(any(String.class), any(Object.class))).thenAnswer(
+ (InvocationOnMock invocationOnMock) -> {
+ return true;
+ }
+ );
+
+ //Populate the schema and data that the mocked Google BigQuery client will return.
+ com.google.cloud.bigquery.Schema tableSchema = BigQueryTestUtils.getTestSchema();
+ List tableRows = Arrays.asList(
+ BigQueryTestUtils.getBigQueryFieldValueList(false, 1000, "test1", 123123.12312),
+ BigQueryTestUtils.getBigQueryFieldValueList(true, 500, "test2", 5345234.22111),
+ BigQueryTestUtils.getBigQueryFieldValueList(false, 700, "test3", 324324.23423),
+ BigQueryTestUtils.getBigQueryFieldValueList(true, 900, null, null),
+ BigQueryTestUtils.getBigQueryFieldValueList(null, null, "test5", 2342.234234),
+ BigQueryTestUtils.getBigQueryFieldValueList(true, 1200, "test6", 1123.12312),
+ BigQueryTestUtils.getBigQueryFieldValueList(false, 100, "test7", 1313.12312),
+ BigQueryTestUtils.getBigQueryFieldValueList(true, 120, "test8", 12313.1312),
+ BigQueryTestUtils.getBigQueryFieldValueList(false, 300, "test9", 12323.1312)
+ );
+ Page fieldValueList = new BigQueryPage<>(tableRows);
+ TableResult result = new TableResult(tableSchema, tableRows.size(), fieldValueList);
+
+ //Mock out the Google BigQuery Job.
+ Job mockBigQueryJob = mock(Job.class);
+ when(mockBigQueryJob.isDone()).thenReturn(false).thenReturn(true);
+ when(mockBigQueryJob.getQueryResults()).thenReturn(result);
+ when(bigQuery.create(any(JobInfo.class))).thenReturn(mockBigQueryJob);
+
+ QueryStatusChecker queryStatusChecker = mock(QueryStatusChecker.class);
+ when(queryStatusChecker.isQueryRunning()).thenReturn(true);
+
+ //Execute the test
+ bigQueryRecordHandler.readWithConstraint(spillWriter, request, queryStatusChecker);
+
+ //Ensure that there was a spill so that we can read the spilled block.
+ assertTrue(spillWriter.spilled());
+ //Calling getSpillLocations() forces a flush.
+ assertEquals(1, spillWriter.getSpillLocations().size());
+
+ //Read the spilled block
+ Block block = spillReader.read(s3SpillLocation, encryptionKey, schemaForRead);
+
+ assertEquals("The number of rows expected do not match!", tableRows.size(), block.getRowCount());
+ validateBlock(block, tableRows);
+ }
+ }
+
+ private void validateBlock(Block block, List tableRows)
+ {
+ //Iterator through the fields
+ for (Field field : block.getFields()) {
+ FieldReader fieldReader = block.getFieldReader(field.getName());
+ int currentCount = 0;
+ //Iterator through the rows and match up with the block
+ for (FieldValueList tableRow : tableRows) {
+ FieldValue orgValue = tableRow.get(field.getName());
+ fieldReader.setPosition(currentCount);
+ currentCount++;
+
+ logger.debug("comparing: {} with {}", orgValue.getValue(), fieldReader.readObject());
+
+ //Check for null values.
+ if ((orgValue.getValue() == null || fieldReader.readObject() == null)) {
+ assertTrue(orgValue.isNull());
+ assertFalse(fieldReader.isSet());
+ continue;
+ }
+
+ //Check regular values.
+ Types.MinorType type = Types.getMinorTypeForArrowType(field.getType());
+ switch (type) {
+ case INT:
+ assertEquals(orgValue.getLongValue(), (long) fieldReader.readInteger());
+ break;
+ case BIT:
+ assertEquals(orgValue.getBooleanValue(), fieldReader.readBoolean());
+ break;
+ case FLOAT4:
+ assertEquals(orgValue.getDoubleValue(), fieldReader.readFloat(), 0.001);
+ break;
+ case FLOAT8:
+ assertEquals(orgValue.getDoubleValue(), fieldReader.readDouble(), 0.001);
+ break;
+ case VARCHAR:
+ assertEquals(orgValue.getStringValue(), fieldReader.readText().toString());
+ break;
+ default:
+ throw new RuntimeException("No validation configured for field " + field.getName() + ":" + type + " " + field.getChildren());
+ }
+ }
+ }
+ }
+
+ //Mocks the S3 client by storing any putObjects() and returning the object when getObject() is called.
+ private void mockS3Client()
+ {
+ when(amazonS3.putObject(anyObject(), anyObject(), anyObject(), anyObject()))
+ .thenAnswer((InvocationOnMock invocationOnMock) -> {
+ InputStream inputStream = (InputStream) invocationOnMock.getArguments()[2];
+ ByteHolder byteHolder = new ByteHolder();
+ byteHolder.setBytes(ByteStreams.toByteArray(inputStream));
+ mockS3Storage.add(byteHolder);
+ return mock(PutObjectResult.class);
+ });
+
+ when(amazonS3.getObject(anyString(), anyString()))
+ .thenAnswer((InvocationOnMock invocationOnMock) -> {
+ S3Object mockObject = mock(S3Object.class);
+ ByteHolder byteHolder = mockS3Storage.get(0);
+ mockS3Storage.remove(0);
+ when(mockObject.getObjectContent()).thenReturn(
+ new S3ObjectInputStream(
+ new ByteArrayInputStream(byteHolder.getBytes()), null));
+ return mockObject;
+ });
+ }
+
+ private class ByteHolder
+ {
+ private byte[] bytes;
+
+ void setBytes(byte[] bytes)
+ {
+ this.bytes = bytes;
+ }
+
+ byte[] getBytes()
+ {
+ return bytes;
+ }
+ }
+}
diff --git a/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQuerySqlUtilsTest.java b/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQuerySqlUtilsTest.java
new file mode 100644
index 0000000000..ee0564cb80
--- /dev/null
+++ b/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQuerySqlUtilsTest.java
@@ -0,0 +1,115 @@
+/*-
+ * #%L
+ * athena-bigquery
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+package com.amazonaws.athena.connectors.bigquery;
+
+import com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Marker;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Range;
+import com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+public class BigQuerySqlUtilsTest
+{
+ static final TableName tableName = new TableName("schema", "table");
+ static final Split split = null;
+
+ static final ArrowType BOOLEAN_TYPE = ArrowType.Bool.INSTANCE;
+ static final ArrowType INT_TYPE = new ArrowType.Int(32, true);
+
+ @Test
+ public void testSqlWithConstraintsEquality()
+ throws Exception
+ {
+ Map constraintMap = new LinkedHashMap<>();
+ constraintMap.put("bool1", EquatableValueSet.newBuilder(new BlockAllocatorImpl(), BOOLEAN_TYPE,
+ true, false).add(false).build());
+ constraintMap.put("int1", EquatableValueSet.newBuilder(new BlockAllocatorImpl(), INT_TYPE,
+ true, false).add(14).build());
+ constraintMap.put("nullableField", EquatableValueSet.newBuilder(new BlockAllocatorImpl(), INT_TYPE,
+ true, true).build());
+
+ try (Constraints constraints = new Constraints(constraintMap)) {
+ String sql = BigQuerySqlUtils.buildSqlFromSplit(tableName, makeSchema(constraintMap), constraints, split);
+ assertEquals("SELECT bool1,int1,nullableField from schema.table WHERE (bool1 = false) AND (int1 = 14) AND (nullableField is null)", sql);
+ }
+ }
+
+ @Test
+ public void testSqlWithConstraintsRanges()
+ throws Exception
+ {
+ Map constraintMap = new LinkedHashMap<>();
+ ValueSet rangeSet = SortedRangeSet.newBuilder(INT_TYPE, true).add(new Range(Marker.above(new BlockAllocatorImpl(), INT_TYPE, 10),
+ Marker.exactly(new BlockAllocatorImpl(), INT_TYPE, 20))).build();
+
+ ValueSet isNullRangeSet = SortedRangeSet.newBuilder(INT_TYPE, true).build();
+
+ ValueSet isNonNullRangeSet = SortedRangeSet.newBuilder(INT_TYPE, false).add(
+ new Range(Marker.lowerUnbounded(new BlockAllocatorImpl(), INT_TYPE),
+ Marker.upperUnbounded(new BlockAllocatorImpl(), INT_TYPE)))
+ .build();
+
+ constraintMap.put("integerRange", rangeSet);
+ constraintMap.put("isNullRange", isNullRangeSet);
+ constraintMap.put("isNotNullRange", isNonNullRangeSet);
+
+ try (Constraints constraints = new Constraints(constraintMap)) {
+ String sql = BigQuerySqlUtils.buildSqlFromSplit(tableName, makeSchema(constraintMap), constraints, split);
+ assertEquals("SELECT integerRange,isNullRange,isNotNullRange from schema.table WHERE (integerRange > 10) AND (integerRange <= 20) AND (isNullRange is null) AND (isNotNullRange is not null)", sql);
+ }
+ }
+
+ private Schema makeSchema(Map constraintMap)
+ {
+ SchemaBuilder builder = new SchemaBuilder();
+ for (Map.Entry field : constraintMap.entrySet()) {
+ ArrowType.ArrowTypeID typeId = field.getValue().getType().getTypeID();
+ switch (typeId) {
+ case Int:
+ builder.addIntField(field.getKey());
+ break;
+ case Bool:
+ builder.addBitField(field.getKey());
+ break;
+ case Utf8:
+ builder.addStringField(field.getKey());
+ default:
+ throw new UnsupportedOperationException("Type Not Implemented: " + typeId.name());
+ }
+ }
+ return builder.build();
+ }
+}
diff --git a/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQueryTestUtils.java b/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQueryTestUtils.java
new file mode 100644
index 0000000000..168b86ac20
--- /dev/null
+++ b/athena-bigquery/src/test/java/com/amazonaws/athena/connectors/bigquery/BigQueryTestUtils.java
@@ -0,0 +1,143 @@
+/*-
+ * #%L
+ * athena-bigquery
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+package com.amazonaws.athena.connectors.bigquery;
+
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.security.FederatedIdentity;
+import com.google.cloud.bigquery.Dataset;
+import com.google.cloud.bigquery.DatasetId;
+import com.google.cloud.bigquery.Field;
+import com.google.cloud.bigquery.FieldList;
+import com.google.cloud.bigquery.FieldValue;
+import com.google.cloud.bigquery.FieldValueList;
+import com.google.cloud.bigquery.LegacySQLTypeName;
+import com.google.cloud.bigquery.Schema;
+import com.google.cloud.bigquery.Table;
+import com.google.cloud.bigquery.TableId;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class BigQueryTestUtils
+{
+ public static final FederatedIdentity FEDERATED_IDENTITY = new FederatedIdentity("id", "principal", "account");
+ public static final String BOOL_FIELD_NAME_1 = "bool1";
+ public static final String INTEGER_FIELD_NAME_1 = "int1";
+ public static final String STRING_FIELD_NAME_1 = "string1";
+ public static final String FLOAT_FIELD_NAME_1 = "float1";
+
+ private BigQueryTestUtils() {
+ }
+
+ public static final String PROJECT_1_NAME = "testProject";
+
+ //Returns a list of mocked Datasets.
+ static List getDatasetList(String projectName, int numDatasets)
+ {
+ List datasetList = new ArrayList<>();
+ for (int i = 0; i < numDatasets; i++) {
+ Dataset dataset1 = mock(Dataset.class);
+ when(dataset1.getDatasetId()).thenReturn(DatasetId.of(projectName, "dataset" + i));
+ when(dataset1.getFriendlyName()).thenReturn("dataset" + i);
+ datasetList.add(dataset1);
+ }
+ return datasetList;
+ }
+
+ //Returns a list of mocked Tables
+ static List
getTableList(String projectName, String dataset, int numTables)
+ {
+ List
tableList = new ArrayList<>();
+ for (int i = 0; i < numTables; i++) {
+ Table table = mock(Table.class);
+ when(table.getTableId()).thenReturn(TableId.of(projectName, dataset, "table" + i));
+ tableList.add(table);
+ }
+ return tableList;
+ }
+
+ //Returns the schema by returning a list of fields in Google BigQuery Format.
+ static List getTestSchemaFields()
+ {
+ return Arrays.asList(Field.of(BOOL_FIELD_NAME_1, LegacySQLTypeName.BOOLEAN),
+ Field.of(INTEGER_FIELD_NAME_1, LegacySQLTypeName.INTEGER),
+ Field.of(STRING_FIELD_NAME_1, LegacySQLTypeName.STRING),
+ Field.of(FLOAT_FIELD_NAME_1, LegacySQLTypeName.FLOAT)
+ );
+ }
+
+ static Schema getTestSchema()
+ {
+ return Schema.of(getTestSchemaFields());
+ }
+
+ //Gets the schema in Arrow Format.
+ static org.apache.arrow.vector.types.pojo.Schema getBlockTestSchema()
+ {
+ return SchemaBuilder.newBuilder()
+ .addBitField(BOOL_FIELD_NAME_1)
+ .addIntField(INTEGER_FIELD_NAME_1)
+ .addStringField(STRING_FIELD_NAME_1)
+ .addFloat8Field(FLOAT_FIELD_NAME_1)
+ .build();
+ }
+
+ static Collection getTestSchemaFieldsArrow()
+ {
+ return Arrays.asList(
+ new org.apache.arrow.vector.types.pojo.Field(BOOL_FIELD_NAME_1,
+ FieldType.nullable(ArrowType.Bool.INSTANCE), null),
+ new org.apache.arrow.vector.types.pojo.Field(INTEGER_FIELD_NAME_1,
+ FieldType.nullable(new ArrowType.Int(32, true)), null),
+ new org.apache.arrow.vector.types.pojo.Field(STRING_FIELD_NAME_1,
+ FieldType.nullable(new ArrowType.Utf8()), null),
+ new org.apache.arrow.vector.types.pojo.Field(FLOAT_FIELD_NAME_1,
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null)
+ );
+ }
+
+ static List generateBigQueryRowValue(Boolean bool, Integer integer, String string, Double floatVal)
+ {
+ return Arrays.asList(
+ //Primitives are stored as Strings.
+ FieldValue.of(FieldValue.Attribute.PRIMITIVE, bool == null ? null : String.valueOf(bool)),
+ FieldValue.of(FieldValue.Attribute.PRIMITIVE, integer == null ? null : String.valueOf(integer)),
+ //Timestamps are stored as a number, where the integer component of the number is seconds since epoch
+ //and the microsecond part is the decimal part.
+ FieldValue.of(FieldValue.Attribute.PRIMITIVE, string),
+ FieldValue.of(FieldValue.Attribute.PRIMITIVE, floatVal == null ? null : String.valueOf(floatVal))
+ );
+ }
+
+ static FieldValueList getBigQueryFieldValueList(Boolean bool, Integer integer, String string, Double floatVal)
+ {
+ return FieldValueList.of(generateBigQueryRowValue(bool, integer, string, floatVal),
+ FieldList.of(getTestSchemaFields()));
+ }
+}
diff --git a/athena-cloudwatch-metrics/LICENSE.txt b/athena-cloudwatch-metrics/LICENSE.txt
new file mode 100644
index 0000000000..418de4c108
--- /dev/null
+++ b/athena-cloudwatch-metrics/LICENSE.txt
@@ -0,0 +1,174 @@
+Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
\ No newline at end of file
diff --git a/athena-cloudwatch-metrics/README.md b/athena-cloudwatch-metrics/README.md
new file mode 100644
index 0000000000..caed435c27
--- /dev/null
+++ b/athena-cloudwatch-metrics/README.md
@@ -0,0 +1,85 @@
+# Amazon Athena Cloudwatch Metrics Connector
+
+This connector enables Amazon Athena to communicate with Cloudwatch Metrics, making your metrics data accessible via SQL.
+
+## Usage
+
+### Parameters
+
+The Athena Cloudwatch Metrics Connector exposes several configuration options via Lambda environment variables. More detail on the available parameters can be found below.
+
+1. **spill_bucket** - When the data returned by your Lambda function exceeds Lambda’s limits, this is the bucket that the data will be written to for Athena to read the excess from. (e.g. my_bucket)
+2. **spill_prefix** - (Optional) Defaults to sub-folder in your bucket called 'athena-federation-spill'. Used in conjunction with spill_bucket, this is the path within the above bucket that large responses are spilled to. You should configure an S3 lifecycle on this location to delete old spills after X days/Hours.
+3. **kms_key_id** - (Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys. (e.g. a7e63k4b-8loc-40db-a2a1-4d0en2cd8331)
+4. **disable_spill_encryption** - (Optional) Defaults to False so that any data that is spilled to S3 is encrypted using AES-GMC either with a randomly generated key or using KMS to generate keys. Setting this to false will disable spill encryption. You may wish to disable this for improved performance, especially if your spill location in S3 uses S3 Server Side Encryption. (e.g. True or False)
+
+The connector also supports AIMD Congestion Control for handling throttling events from Cloudwatch via the Athena Query Federation SDK's ThrottlingInvoker construct. You can tweak the default throttling behavior by setting any of the below (optional) environment variables:
+
+1. **throttle_initial_delay_ms** - (Default: 10ms) This is the initial call delay applied after the first congestion event.
+1. **throttle_max_delay_ms** - (Default: 1000ms) This is the max delay between calls. You can derive TPS by dividing it into 1000ms.
+1. **throttle_decrease_factor** - (Default: 0.5) This is the factor by which we reduce our call rate.
+1. **throttle_increase_ms** - (Default: 10ms) This is the rate at which we decrease the call delay.
+
+
+### Databases & Tables
+
+The Athena Cloudwatch Metrics Connector maps your Namespaces, Dimensions, Metrics, and Metric Values into two tables in a single schema called "default".
+
+1. **metrics** - This table contains the available metrics as uniquely defined by a triple of namespace, set, name. More specifically, this table contains the following columns.
+
+ * **namespace** - A VARCHAR containing the namespace.
+ * **metric_name** - A VARCHAR containing the metric name.
+ * **dimensions** - A LIST of STRUCTS comprised of dim_name (VARCHAR) and dim_value (VARCHAR).
+ * **statistic** - A List of VARCH statistics (e.g. p90, AVERAGE, etc..) avialable for the metric.
+
+1. **metric_samples** - This table contains the available metric samples for each metric named in the **metrics** table. More specifically, the table contains the following columns:
+ * **namespace** - A VARCHAR containing the namespace.
+ * **metric_name** - A VARCHAR containing the metric name.
+ * **dimensions** - A LIST of STRUCTS comprised of dim_name (VARCHAR) and dim_value (VARCHAR).
+ * **dim_name** - A VARCHAR convenience field used to easily filter on a single dimension name.
+ * **dim_value** - A VARCHAR convenience field used to easily filter on a single dimension value.
+ * **period** - An INT field representing the 'period' of the metric in seconds. (e.g. 60 second metric)
+ * **timestamp** - A BIGINT field representing the epoch time (in seconds) the metric sample is for.
+ * **value** - A FLOAT8 field containing the value of the sample.
+ * **statistic** - A VARCHAR containing the statistic type of the sample. (e.g. AVERAGE, p90, etc..)
+
+### Required Permissions
+
+Review the "Policies" section of the athena-cloudwatch-metrics.yaml file for full details on the IAM Policies required by this connector. A brief summary is below.
+
+1. S3 Write Access - In order to successfully handle large queries, the connector requires write access to a location in S3.
+2. Cloudwatch Metrics ReadOnly - The connector uses this access to query your metrics data.
+2. Cloudwatch Logs Write - The connector uses this access to write its own diagnostic logs.
+1. Athena GetQueryExecution - The connector uses this access to fast-fail when the upstream Athena query has terminated.
+
+### Deploying The Connector
+
+To use this connector in your queries, navigate to AWS Serverless Application Repository and deploy a pre-built version of this connector. Alternatively, you can build and deploy this connector from source follow the below steps or use the more detailed tutorial in the athena-example module:
+
+1. From the athena-federation-sdk dir, run `mvn clean install` if you haven't already.
+2. From the athena-cloudwatch-metrics dir, run `mvn clean install`.
+3. From the athena-cloudwatch-metrics dir, run `../tools/publish.sh S3_BUCKET_NAME athena-cloudwatch-metrics` to publish the connector to your private AWS Serverless Application Repository. The S3_BUCKET in the command is where a copy of the connector's code will be stored for Serverless Application Repository to retrieve it. This will allow users with permission to do so, the ability to deploy instances of the connector via 1-Click form. Then navigate to [Serverless Application Repository](https://aws.amazon.com/serverless/serverlessrepo)
+4. Try running a query like the one below in Athena:
+```sql
+-- Get the list of available metrics
+select * from "lambda:"."default".metrics limit 100
+
+-- Query the last 3 days of AWS/Lambda Invocations metrics
+SELECT *
+FROM "lambda:"."default".metric_samples
+WHERE metric_name = 'Invocations'
+ AND namespace = 'AWS/Lambda'
+ AND statistic IN ( 'p90', 'Average' )
+ AND period = 60
+ AND timestamp BETWEEN To_unixtime(Now() - INTERVAL '3' day) AND
+ To_unixtime(Now())
+LIMIT 100;
+```
+
+## Performance
+
+The Athena Cloudwatch Metrics Connector will attempt to parallelize queries against Cloudwatch Metrics by parallelizing scans of the various metrics needed for your query. Predicate Pushdown is performed within the Lambda function and also within Cloudwatch Logs for certain time period , metric, namespace, and dimension filters.
+
+## License
+
+This project is licensed under the Apache-2.0 License.
\ No newline at end of file
diff --git a/athena-cloudwatch-metrics/athena-cloudwatch-metrics.yaml b/athena-cloudwatch-metrics/athena-cloudwatch-metrics.yaml
new file mode 100644
index 0000000000..1d9d44e84f
--- /dev/null
+++ b/athena-cloudwatch-metrics/athena-cloudwatch-metrics.yaml
@@ -0,0 +1,66 @@
+Transform: 'AWS::Serverless-2016-10-31'
+Metadata:
+ 'AWS::ServerlessRepo::Application':
+ Name: AthenaCloudwatchMetricsConnector
+ Description: 'This connector enables Amazon Athena to communicate with Cloudwatch Metrics, making your metrics data accessible via SQL.'
+ Author: 'Amazon Athena'
+ SpdxLicenseId: Apache-2.0
+ LicenseUrl: LICENSE.txt
+ ReadmeUrl: README.md
+ Labels:
+ - athena-federation
+ HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation'
+ SemanticVersion: 1.0.0
+ SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation'
+Parameters:
+ AthenaCatalogName:
+ Description: 'The name you will give to this catalog in Athena. It will also be used as the function name.'
+ Type: String
+ SpillBucket:
+ Description: 'The bucket where this function can spill data.'
+ Type: String
+ SpillPrefix:
+ Description: 'The bucket prefix where this function can spill large responses.'
+ Type: String
+ Default: athena-spill
+ LambdaTimeout:
+ Description: 'Maximum Lambda invocation runtime in seconds. (min 1 - 900 max)'
+ Default: 900
+ Type: Number
+ LambdaMemory:
+ Description: 'Lambda memory in MB (min 128 - 3008 max).'
+ Default: 3008
+ Type: Number
+ DisableSpillEncryption:
+ Description: "WARNING: If set to 'true' encryption for spilled data is disabled."
+ Default: 'false'
+ Type: String
+Resources:
+ ConnectorConfig:
+ Type: 'AWS::Serverless::Function'
+ Properties:
+ Environment:
+ Variables:
+ disable_spill_encryption: !Ref DisableSpillEncryption
+ spill_bucket: !Ref SpillBucket
+ spill_prefix: !Ref SpillPrefix
+ FunctionName: !Ref AthenaCatalogName
+ Handler: "com.amazonaws.athena.connectors.cloudwatch.metrics.MetricsCompositeHandler"
+ CodeUri: "./target/athena-cloudwatch-metrics-1.0.jar"
+ Description: "Enables Amazon Athena to communicate with Cloudwatch Metrics, making your metrics data accessible via SQL"
+ Runtime: java8
+ Timeout: !Ref LambdaTimeout
+ MemorySize: !Ref LambdaMemory
+ Policies:
+ - Statement:
+ - Action:
+ - cloudwatch:Describe*
+ - cloudwatch:Get*
+ - cloudwatch:List*
+ Effect: Allow
+ Resource: '*'
+ Version: '2012-10-17'
+ #S3CrudPolicy allows our connector to spill large responses to S3. You can optionally replace this pre-made policy
+ #with one that is more restrictive and can only 'put' but not read,delete, or overwrite files.
+ - S3CrudPolicy:
+ BucketName: !Ref SpillBucket
\ No newline at end of file
diff --git a/athena-cloudwatch-metrics/pom.xml b/athena-cloudwatch-metrics/pom.xml
new file mode 100644
index 0000000000..b2bd8796f8
--- /dev/null
+++ b/athena-cloudwatch-metrics/pom.xml
@@ -0,0 +1,57 @@
+
+
+
+ aws-athena-query-federation
+ com.amazonaws
+ 1.0
+
+ 4.0.0
+
+ athena-cloudwatch-metrics
+
+
+
+ com.amazonaws
+ aws-athena-federation-sdk
+ ${aws-athena-federation-sdk.version}
+
+
+ com.amazonaws
+ aws-java-sdk-cloudwatch
+ 1.11.490
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.2.1
+
+ false
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+ package
+
+ shade
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/DimensionSerDe.java b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/DimensionSerDe.java
new file mode 100644
index 0000000000..7752896ebe
--- /dev/null
+++ b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/DimensionSerDe.java
@@ -0,0 +1,93 @@
+/*-
+ * #%L
+ * athena-cloudwatch-metrics
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch.metrics;
+
+import com.amazonaws.services.cloudwatch.model.Dimension;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Used to serialize and deserialize Cloudwatch Metrics Dimension objects. This is used
+ * when creating and processing Splits.
+ */
+public class DimensionSerDe
+{
+ protected static final String SERIALZIE_DIM_FIELD_NAME = "d";
+ private static final ObjectMapper mapper = new ObjectMapper();
+
+ private DimensionSerDe() {}
+
+ /**
+ * Serializes the provided List of Dimensions.
+ *
+ * @param dim The list of dimensions to serialize.
+ * @return A String containing the serialized list of Dimensions.
+ */
+ public static String serialize(List dim)
+ {
+ try {
+ return mapper.writeValueAsString(new DimensionHolder(dim));
+ }
+ catch (JsonProcessingException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * Deserializes the provided String into a List of Dimensions.
+ *
+ * @param serializeDim A serialized list of Dimensions.
+ * @return The List of Dimensions represented by the serialized string.
+ */
+ public static List deserialize(String serializeDim)
+ {
+ try {
+ return mapper.readValue(serializeDim, DimensionHolder.class).getDimensions();
+ }
+ catch (IOException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * Helper which allows us to use Jackson's Object Mapper to serialize a List of Dimensions.
+ */
+ private static class DimensionHolder
+ {
+ private final List dimensions;
+
+ @JsonCreator
+ public DimensionHolder(@JsonProperty("dimensions") List dimensions)
+ {
+ this.dimensions = dimensions;
+ }
+
+ @JsonProperty
+ public List getDimensions()
+ {
+ return dimensions;
+ }
+ }
+}
diff --git a/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricUtils.java b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricUtils.java
new file mode 100644
index 0000000000..862396bd58
--- /dev/null
+++ b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricUtils.java
@@ -0,0 +1,198 @@
+/*-
+ * #%L
+ * athena-cloudwatch-metrics
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch.metrics;
+
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Range;
+import com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.services.cloudwatch.model.Dimension;
+import com.amazonaws.services.cloudwatch.model.DimensionFilter;
+import com.amazonaws.services.cloudwatch.model.GetMetricDataRequest;
+import com.amazonaws.services.cloudwatch.model.ListMetricsRequest;
+import com.amazonaws.services.cloudwatch.model.Metric;
+import com.amazonaws.services.cloudwatch.model.MetricDataQuery;
+import com.amazonaws.services.cloudwatch.model.MetricStat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.DIMENSION_NAME_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.DIMENSION_VALUE_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.METRIC_NAME_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.NAMESPACE_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.PERIOD_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.STATISTIC_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.TIMESTAMP_FIELD;
+
+/**
+ * Helper which prepares and filters Cloudwatch Metrics requests.
+ */
+public class MetricUtils
+{
+ private static final Logger logger = LoggerFactory.getLogger(MetricUtils.class);
+
+ //this is a format required by Cloudwatch Metrics
+ private static final String METRIC_ID = "m1";
+
+ private MetricUtils() {}
+
+ /**
+ * Filters metrics who have at least 1 metric dimension that matches DIMENSION_NAME_FIELD and DIMENSION_VALUE_FIELD filters.
+ * This is just an optimization and isn't fully correct. We depend on the calling engine to apply full constraints. Also
+ * filters metric name and namespace.
+ *
+ * @return True if the supplied metric contains at least 1 Dimension matching the evaluator.
+ */
+ protected static boolean applyMetricConstraints(ConstraintEvaluator evaluator, Metric metric, String statistic)
+ {
+ if (!evaluator.apply(NAMESPACE_FIELD, metric.getNamespace())) {
+ return false;
+ }
+
+ if (!evaluator.apply(METRIC_NAME_FIELD, metric.getMetricName())) {
+ return false;
+ }
+
+ if (statistic != null && !evaluator.apply(STATISTIC_FIELD, statistic)) {
+ return false;
+ }
+
+ for (Dimension next : metric.getDimensions()) {
+ if (evaluator.apply(DIMENSION_NAME_FIELD, next.getName()) && evaluator.apply(DIMENSION_VALUE_FIELD, next.getValue())) {
+ return true;
+ }
+ }
+
+ if (metric.getDimensions().isEmpty() &&
+ evaluator.apply(DIMENSION_NAME_FIELD, null) &&
+ evaluator.apply(DIMENSION_VALUE_FIELD, null)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Attempts to push the supplied predicate constraints onto the Cloudwatch Metrics request.
+ */
+ protected static void pushDownPredicate(Constraints constraints, ListMetricsRequest listMetricsRequest)
+ {
+ Map summary = constraints.getSummary();
+
+ ValueSet namespaceConstraint = summary.get(NAMESPACE_FIELD);
+ if (namespaceConstraint != null && namespaceConstraint.isSingleValue()) {
+ listMetricsRequest.setNamespace(namespaceConstraint.getSingleValue().toString());
+ }
+
+ ValueSet metricConstraint = summary.get(METRIC_NAME_FIELD);
+ if (metricConstraint != null && metricConstraint.isSingleValue()) {
+ listMetricsRequest.setMetricName(metricConstraint.getSingleValue().toString());
+ }
+
+ ValueSet dimensionNameConstraint = summary.get(DIMENSION_NAME_FIELD);
+ ValueSet dimensionValueConstraint = summary.get(DIMENSION_VALUE_FIELD);
+ if (dimensionNameConstraint != null && dimensionNameConstraint.isSingleValue() &&
+ dimensionValueConstraint != null && dimensionValueConstraint.isSingleValue()) {
+ DimensionFilter filter = new DimensionFilter()
+ .withName(dimensionNameConstraint.getSingleValue().toString())
+ .withValue(dimensionValueConstraint.getSingleValue().toString());
+ listMetricsRequest.setDimensions(Collections.singletonList(filter));
+ }
+ }
+
+ /**
+ * Creates a Cloudwatch Metrics sample data request from the provided inputs
+ *
+ * @param readRecordsRequest The RecordReadRequest to make into a Cloudwatch Metrics Data request.
+ * @return The Cloudwatch Metrics Data request that matches the requested read operation.
+ */
+ protected static GetMetricDataRequest makeGetMetricDataRequest(ReadRecordsRequest readRecordsRequest)
+ {
+ Split split = readRecordsRequest.getSplit();
+ List dimensions = DimensionSerDe.deserialize(split.getProperty(DimensionSerDe.SERIALZIE_DIM_FIELD_NAME));
+ GetMetricDataRequest dataRequest = new GetMetricDataRequest();
+ com.amazonaws.services.cloudwatch.model.Metric metric = new com.amazonaws.services.cloudwatch.model.Metric();
+ metric.setNamespace(split.getProperty(NAMESPACE_FIELD));
+ metric.setMetricName(split.getProperty(METRIC_NAME_FIELD));
+
+ List dList = new ArrayList<>();
+ for (Dimension nextDim : dimensions) {
+ dList.add(new Dimension().withName(nextDim.getName()).withValue(nextDim.getValue()));
+ }
+ metric.setDimensions(dList);
+
+ MetricDataQuery mds = new MetricDataQuery()
+ .withMetricStat(new MetricStat()
+ .withMetric(metric)
+ .withPeriod(Integer.valueOf(split.getProperty(PERIOD_FIELD)))
+ .withStat(split.getProperty(STATISTIC_FIELD)))
+ .withId(METRIC_ID);
+
+ dataRequest.withMetricDataQueries(Collections.singletonList(mds));
+
+ ValueSet timeConstraint = readRecordsRequest.getConstraints().getSummary().get(TIMESTAMP_FIELD);
+ if (timeConstraint instanceof SortedRangeSet && !timeConstraint.isNullAllowed()) {
+ //SortedRangeSet is how >, <, between is represented which are easiest and most common when
+ //searching logs so we attempt to push that down here as an optimization. SQL can represent complex
+ //overlapping ranges which Cloudwatch can not support so this is not a replacement for applying
+ //constraints using the ConstraintEvaluator.
+
+ Range basicPredicate = ((SortedRangeSet) timeConstraint).getSpan();
+
+ if (!basicPredicate.getLow().isNullValue()) {
+ Long lowerBound = (Long) basicPredicate.getLow().getValue();
+ //TODO: confirm timezone handling
+ logger.info("makeGetMetricsRequest: with startTime " + (lowerBound * 1000) + " " + new Date(lowerBound * 1000));
+ dataRequest.withStartTime(new Date(lowerBound * 1000));
+ }
+ else {
+ //TODO: confirm timezone handling
+ dataRequest.withStartTime(new Date(0));
+ }
+
+ if (!basicPredicate.getHigh().isNullValue()) {
+ Long upperBound = (Long) basicPredicate.getHigh().getValue();
+ //TODO: confirm timezone handling
+ logger.info("makeGetMetricsRequest: with endTime " + (upperBound * 1000) + " " + new Date(upperBound * 1000));
+ dataRequest.withEndTime(new Date(upperBound * 1000));
+ }
+ else {
+ //TODO: confirm timezone handling
+ dataRequest.withEndTime(new Date(System.currentTimeMillis()));
+ }
+ }
+ else {
+ //TODO: confirm timezone handling
+ dataRequest.withStartTime(new Date(0));
+ dataRequest.withEndTime(new Date(System.currentTimeMillis()));
+ }
+
+ return dataRequest;
+ }
+}
diff --git a/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsCompositeHandler.java b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsCompositeHandler.java
new file mode 100644
index 0000000000..6c2999ddf3
--- /dev/null
+++ b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsCompositeHandler.java
@@ -0,0 +1,35 @@
+/*-
+ * #%L
+ * athena-cloudwatch-metrics
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch.metrics;
+
+import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler;
+
+/**
+ * Boilerplate composite handler that allows us to use a single Lambda function for both
+ * Metadata and Data. In this case we just compose MetricsMetadataHandler and MetricsRecordHandler.
+ */
+public class MetricsCompositeHandler
+ extends CompositeHandler
+{
+ public MetricsCompositeHandler()
+ {
+ super(new MetricsMetadataHandler(), new MetricsRecordHandler());
+ }
+}
diff --git a/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsExceptionFilter.java b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsExceptionFilter.java
new file mode 100644
index 0000000000..4810c6a017
--- /dev/null
+++ b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsExceptionFilter.java
@@ -0,0 +1,49 @@
+/*-
+ * #%L
+ * athena-cloudwatch-metrics
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch.metrics;
+
+import com.amazonaws.athena.connector.lambda.ThrottlingInvoker;
+import com.amazonaws.services.cloudwatch.model.AmazonCloudWatchException;
+import com.amazonaws.services.cloudwatch.model.LimitExceededException;
+
+/**
+ * Used to identify Exceptions that are related to Cloudwatch Metrics throttling events.
+ */
+public class MetricsExceptionFilter
+ implements ThrottlingInvoker.ExceptionFilter
+{
+ public static final ThrottlingInvoker.ExceptionFilter EXCEPTION_FILTER = new MetricsExceptionFilter();
+
+ private MetricsExceptionFilter() {}
+
+ @Override
+ public boolean isMatch(Exception ex)
+ {
+ if (ex instanceof AmazonCloudWatchException && ex.getMessage().startsWith("Rate exceeded")) {
+ return true;
+ }
+
+ if (ex instanceof AmazonCloudWatchException && ex.getMessage().startsWith("Request has been throttled")) {
+ return true;
+ }
+
+ return (ex instanceof LimitExceededException);
+ }
+}
diff --git a/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsMetadataHandler.java b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsMetadataHandler.java
new file mode 100644
index 0000000000..fa8e4dc7b0
--- /dev/null
+++ b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsMetadataHandler.java
@@ -0,0 +1,286 @@
+/*-
+ * #%L
+ * athena-cloudwatch-metrics
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch.metrics;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.ThrottlingInvoker;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockWriter;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.handlers.MetadataHandler;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse;
+import com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory;
+import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.MetricSamplesTable;
+import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.MetricsTable;
+import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.cloudwatch.AmazonCloudWatch;
+import com.amazonaws.services.cloudwatch.AmazonCloudWatchClientBuilder;
+import com.amazonaws.services.cloudwatch.model.ListMetricsRequest;
+import com.amazonaws.services.cloudwatch.model.ListMetricsResult;
+import com.amazonaws.services.cloudwatch.model.Metric;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import org.apache.arrow.util.VisibleForTesting;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.MetricsExceptionFilter.EXCEPTION_FILTER;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.METRIC_NAME_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.NAMESPACE_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.PERIOD_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.STATISTIC_FIELD;
+
+/**
+ * Handles metadata requests for the Athena Cloudwatch Metrics Connector.
+ *
+ * For more detail, please see the module's README.md, some notable characteristics of this class include:
+ *
+ * 1. Provides two tables (metrics and metric_samples) for accessing Cloudwatch Metrics data via the "default" schema.
+ * 2. Supports Predicate Pushdown into Cloudwatch Metrics for most fields.
+ * 3. If multiple Metrics (namespace, metric, dimension(s), and statistic) are requested, they can be read in parallel.
+ */
+public class MetricsMetadataHandler
+ extends MetadataHandler
+{
+ private static final Logger logger = LoggerFactory.getLogger(MetricsMetadataHandler.class);
+
+ //Used to log diagnostic info about this connector
+ private static final String SOURCE_TYPE = "metrics";
+
+ //List of available statistics (AVERAGE, p90, etc...).
+ protected static final List STATISTICS = new ArrayList<>();
+ //The schema (aka database) supported by this connector
+ protected static final String SCHEMA_NAME = "default";
+ //Schema for the metrics table
+ private static final Table METRIC_TABLE;
+ //Schema for the metric_samples table.
+ private static final Table METRIC_DATA_TABLE;
+ //Name of the table which contains details of available metrics.
+ private static final String METRIC_TABLE_NAME;
+ //Name of the table which contains metric samples.
+ private static final String METRIC_SAMPLES_TABLE_NAME;
+ //Lookup table for resolving table name to Schema.
+ private static final Map TABLES = new HashMap<>();
+ //The default metric period to query (60 seconds)
+ private static final int DEFAULT_PERIOD_SEC = 60;
+ //Used to handle throttling events by applying AIMD congestion control
+ private final ThrottlingInvoker invoker = ThrottlingInvoker.newDefaultBuilder(EXCEPTION_FILTER).build();
+
+ private final AmazonCloudWatch metrics;
+
+ static {
+ //The statistics supported by Cloudwatch Metrics by default
+ STATISTICS.add("Average");
+ STATISTICS.add("Minimum");
+ STATISTICS.add("Maximum");
+ STATISTICS.add("Sum");
+ STATISTICS.add("Sample Count");
+ STATISTICS.add("p99");
+ STATISTICS.add("p95");
+ STATISTICS.add("p90");
+ STATISTICS.add("p50");
+ STATISTICS.add("p10");
+
+ METRIC_TABLE = new MetricsTable();
+ METRIC_DATA_TABLE = new MetricSamplesTable();
+ METRIC_TABLE_NAME = METRIC_TABLE.getName();
+ METRIC_SAMPLES_TABLE_NAME = METRIC_DATA_TABLE.getName();
+ TABLES.put(METRIC_TABLE_NAME, METRIC_TABLE);
+ TABLES.put(METRIC_SAMPLES_TABLE_NAME, METRIC_DATA_TABLE);
+ }
+
+ public MetricsMetadataHandler()
+ {
+ super(SOURCE_TYPE);
+ metrics = AmazonCloudWatchClientBuilder.standard().build();
+ }
+
+ @VisibleForTesting
+ protected MetricsMetadataHandler(AmazonCloudWatch metrics,
+ EncryptionKeyFactory keyFactory,
+ AWSSecretsManager secretsManager,
+ AmazonAthena athena,
+ String spillBucket,
+ String spillPrefix)
+ {
+ super(keyFactory, secretsManager, athena, SOURCE_TYPE, spillBucket, spillPrefix);
+ this.metrics = metrics;
+ }
+
+ /**
+ * Only supports a single, static, schema defined by SCHEMA_NAME.
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public ListSchemasResponse doListSchemaNames(BlockAllocator blockAllocator, ListSchemasRequest listSchemasRequest)
+ {
+ return new ListSchemasResponse(listSchemasRequest.getCatalogName(), Collections.singletonList(SCHEMA_NAME));
+ }
+
+ /**
+ * Supports a set of static tables defined by: TABLES
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public ListTablesResponse doListTables(BlockAllocator blockAllocator, ListTablesRequest listTablesRequest)
+ {
+ List tables = new ArrayList<>();
+ TABLES.keySet().stream().forEach(next -> tables.add(new TableName(SCHEMA_NAME, next)));
+ return new ListTablesResponse(listTablesRequest.getCatalogName(), tables);
+ }
+
+ /**
+ * Returns the details of the requested static table.
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public GetTableResponse doGetTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ validateTable(getTableRequest.getTableName());
+ Table table = TABLES.get(getTableRequest.getTableName().getTableName());
+ return new GetTableResponse(getTableRequest.getCatalogName(),
+ getTableRequest.getTableName(),
+ table.getSchema(),
+ table.getPartitionColumns());
+ }
+
+ /**
+ * Our table doesn't support complex layouts or partitioning so we simply make this method a NoOp and the SDK will
+ * automatically generate a single placeholder partition for us since Athena needs at least 1 partition returned
+ * if there is potetnailly any data to read. We do this because Cloudwatch Metric's APIs do not support the kind of filtering we need to do
+ * reasonably scoped partition pruning. Instead we do the pruning at Split generation time and return a single
+ * partition here. The down side to doing it at Split generation time is that we sacrifice parallelizing Split
+ * generation. However this is not a significant performance detrement to this connector since we can
+ * generate Splits rather quickly and easily.
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker)
+ throws Exception
+ {
+ validateTable(request.getTableName());
+ //NoOp as we do not support partitioning.
+ }
+
+ /**
+ * Each 'metric' in cloudwatch is uniquely identified by a quad of Namespace, List, MetricName, Statistic. As such
+ * we can parallelize each metric as a unique split.
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest)
+ throws Exception
+ {
+ validateTable(getSplitsRequest.getTableName());
+
+ //Handle requests for the METRIC_TABLE which requires only 1 split to list available metrics.
+ if (METRIC_TABLE_NAME.equals(getSplitsRequest.getTableName().getTableName())) {
+ //The request is just for meta-data about what metrics exist.
+ Split metricsSplit = Split.newBuilder(makeSpillLocation(getSplitsRequest), makeEncryptionKey()).build();
+ return new GetSplitsResponse(getSplitsRequest.getCatalogName(), metricsSplit);
+ }
+
+ //handle generating splits for reading actual metrics data.
+ try (ConstraintEvaluator constraintEvaluator = new ConstraintEvaluator(blockAllocator,
+ METRIC_DATA_TABLE.getSchema(),
+ getSplitsRequest.getConstraints())) {
+ ListMetricsRequest listMetricsRequest = new ListMetricsRequest();
+ MetricUtils.pushDownPredicate(getSplitsRequest.getConstraints(), listMetricsRequest);
+ listMetricsRequest.setNextToken(getSplitsRequest.getContinuationToken());
+
+ String period = getPeriodFromConstraint(getSplitsRequest.getConstraints());
+ Set splits = new HashSet<>();
+ ListMetricsResult result = invoker.invoke(() -> metrics.listMetrics(listMetricsRequest));
+ for (Metric nextMetric : result.getMetrics()) {
+ for (String nextStatistic : STATISTICS) {
+ if (MetricUtils.applyMetricConstraints(constraintEvaluator, nextMetric, nextStatistic)) {
+ splits.add(Split.newBuilder(makeSpillLocation(getSplitsRequest), makeEncryptionKey())
+ .add(DimensionSerDe.SERIALZIE_DIM_FIELD_NAME, DimensionSerDe.serialize(nextMetric.getDimensions()))
+ .add(METRIC_NAME_FIELD, nextMetric.getMetricName())
+ .add(NAMESPACE_FIELD, nextMetric.getNamespace())
+ .add(STATISTIC_FIELD, nextStatistic)
+ .add(PERIOD_FIELD, period)
+ .build());
+ }
+ }
+ }
+
+ String continuationToken = null;
+ if (result.getNextToken() != null &&
+ !result.getNextToken().equalsIgnoreCase(listMetricsRequest.getNextToken())) {
+ continuationToken = result.getNextToken();
+ }
+
+ return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, continuationToken);
+ }
+ }
+
+ /**
+ * Resolved the metric period to query, using a default if no period constraint is found.
+ */
+ private String getPeriodFromConstraint(Constraints constraints)
+ {
+ ValueSet period = constraints.getSummary().get(PERIOD_FIELD);
+ if (period != null && period.isSingleValue()) {
+ return String.valueOf(period.getSingleValue());
+ }
+
+ return String.valueOf(DEFAULT_PERIOD_SEC);
+ }
+
+ /**
+ * Validates that the requested schema and table exist in our static set of supported tables.
+ */
+ private void validateTable(TableName tableName)
+ {
+ if (!SCHEMA_NAME.equals(tableName.getSchemaName())) {
+ throw new RuntimeException("Unknown table " + tableName);
+ }
+
+ if (TABLES.get(tableName.getTableName()) == null) {
+ throw new RuntimeException("Unknown table " + tableName);
+ }
+ }
+}
diff --git a/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsRecordHandler.java b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsRecordHandler.java
new file mode 100644
index 0000000000..73434dbda6
--- /dev/null
+++ b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsRecordHandler.java
@@ -0,0 +1,262 @@
+/*-
+ * #%L
+ * athena-cloudwatch-metrics
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch.metrics;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.ThrottlingInvoker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.handlers.RecordHandler;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.MetricSamplesTable;
+import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.MetricsTable;
+import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.athena.AmazonAthenaClientBuilder;
+import com.amazonaws.services.cloudwatch.AmazonCloudWatch;
+import com.amazonaws.services.cloudwatch.AmazonCloudWatchClientBuilder;
+import com.amazonaws.services.cloudwatch.model.Dimension;
+import com.amazonaws.services.cloudwatch.model.GetMetricDataRequest;
+import com.amazonaws.services.cloudwatch.model.GetMetricDataResult;
+import com.amazonaws.services.cloudwatch.model.ListMetricsRequest;
+import com.amazonaws.services.cloudwatch.model.ListMetricsResult;
+import com.amazonaws.services.cloudwatch.model.Metric;
+import com.amazonaws.services.cloudwatch.model.MetricDataResult;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import com.amazonaws.services.secretsmanager.AWSSecretsManagerClientBuilder;
+import org.apache.arrow.util.VisibleForTesting;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Date;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.TimeoutException;
+
+import static com.amazonaws.athena.connector.lambda.data.FieldResolver.DEFAULT;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.MetricsExceptionFilter.EXCEPTION_FILTER;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.MetricsMetadataHandler.STATISTICS;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.DIMENSIONS_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.DIMENSION_NAME_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.DIMENSION_VALUE_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.METRIC_NAME_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.NAMESPACE_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.PERIOD_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.STATISTIC_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.TIMESTAMP_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.VALUE_FIELD;
+
+/**
+ * Handles data read record requests for the Athena Cloudwatch Metrics Connector.
+ *
+ * For more detail, please see the module's README.md, some notable characteristics of this class include:
+ *
+ * 1. Reads and maps Cloudwatch Metrics and Metric Samples.
+ * 2. Attempts to push down time range predicates into Cloudwatch Metrics.
+ */
+public class MetricsRecordHandler
+ extends RecordHandler
+{
+ private static final Logger logger = LoggerFactory.getLogger(MetricsRecordHandler.class);
+
+ //Used to log diagnostic info about this connector
+ private static final String SOURCE_TYPE = "metrics";
+ //Schema for the metrics table.
+ private static final Table METRIC_TABLE = new MetricsTable();
+ //Schema for the metric_samples table.
+ private static final Table METRIC_DATA_TABLE = new MetricSamplesTable();
+
+ //Used to handle throttling events by applying AIMD congestion control
+ private final ThrottlingInvoker invoker = ThrottlingInvoker.newDefaultBuilder(EXCEPTION_FILTER).build();
+ private final AmazonS3 amazonS3;
+ private final AmazonCloudWatch metrics;
+
+ public MetricsRecordHandler()
+ {
+ this(AmazonS3ClientBuilder.defaultClient(),
+ AWSSecretsManagerClientBuilder.defaultClient(),
+ AmazonAthenaClientBuilder.defaultClient(),
+ AmazonCloudWatchClientBuilder.standard().build());
+ }
+
+ @VisibleForTesting
+ protected MetricsRecordHandler(AmazonS3 amazonS3, AWSSecretsManager secretsManager, AmazonAthena athena, AmazonCloudWatch metrics)
+ {
+ super(amazonS3, secretsManager, athena, SOURCE_TYPE);
+ this.amazonS3 = amazonS3;
+ this.metrics = metrics;
+ }
+
+ /**
+ * Scans Cloudwatch Metrics for the list of available metrics or the samples for a specific metric.
+ *
+ * @see RecordHandler
+ */
+ @Override
+ protected void readWithConstraint(BlockSpiller blockSpiller, ReadRecordsRequest readRecordsRequest, QueryStatusChecker queryStatusChecker)
+ throws TimeoutException
+ {
+ invoker.setBlockSpiller(blockSpiller);
+ if (readRecordsRequest.getTableName().getTableName().equalsIgnoreCase(METRIC_TABLE.getName())) {
+ readMetricsWithConstraint(blockSpiller, readRecordsRequest, queryStatusChecker);
+ }
+ else if (readRecordsRequest.getTableName().getTableName().equalsIgnoreCase(METRIC_DATA_TABLE.getName())) {
+ readMetricSamplesWithConstraint(blockSpiller, readRecordsRequest, queryStatusChecker);
+ }
+ }
+
+ /**
+ * Handles retrieving the list of available metrics when the METRICS_TABLE is queried by listing metrics in Cloudwatch Metrics.
+ */
+ private void readMetricsWithConstraint(BlockSpiller blockSpiller, ReadRecordsRequest request, QueryStatusChecker queryStatusChecker)
+ throws TimeoutException
+ {
+ ListMetricsRequest listMetricsRequest = new ListMetricsRequest();
+ MetricUtils.pushDownPredicate(request.getConstraints(), listMetricsRequest);
+ String prevToken;
+ Set requiredFields = new HashSet<>();
+ request.getSchema().getFields().stream().forEach(next -> requiredFields.add(next.getName()));
+ ValueSet dimensionNameConstraint = request.getConstraints().getSummary().get(DIMENSION_NAME_FIELD);
+ ValueSet dimensionValueConstraint = request.getConstraints().getSummary().get(DIMENSION_VALUE_FIELD);
+ do {
+ prevToken = listMetricsRequest.getNextToken();
+ ListMetricsResult result = invoker.invoke(() -> metrics.listMetrics(listMetricsRequest));
+ for (Metric nextMetric : result.getMetrics()) {
+ blockSpiller.writeRows((Block block, int row) -> {
+ boolean matches = MetricUtils.applyMetricConstraints(blockSpiller.getConstraintEvaluator(), nextMetric, null);
+ if (matches) {
+ matches &= block.offerValue(METRIC_NAME_FIELD, row, nextMetric.getMetricName());
+ matches &= block.offerValue(NAMESPACE_FIELD, row, nextMetric.getNamespace());
+ matches &= block.offerComplexValue(STATISTIC_FIELD, row, DEFAULT, STATISTICS);
+
+ matches &= block.offerComplexValue(DIMENSIONS_FIELD,
+ row,
+ (Field field, Object val) -> {
+ if (field.getName().equals(DIMENSION_NAME_FIELD)) {
+ return ((Dimension) val).getName();
+ }
+ else if (field.getName().equals(DIMENSION_VALUE_FIELD)) {
+ return ((Dimension) val).getValue();
+ }
+
+ throw new RuntimeException("Unexpected field " + field.getName());
+ },
+ nextMetric.getDimensions());
+
+ //This field is 'faked' in that we just use it as a convenient way to filter single dimensions. As such
+ //we always populate it with the value of the filter if the constraint passed and the filter was singleValue
+ String dimName = (dimensionNameConstraint == null || !dimensionNameConstraint.isSingleValue())
+ ? null : (dimensionNameConstraint.getSingleValue().toString());
+ matches &= block.offerValue(DIMENSION_NAME_FIELD, row, dimName);
+
+ //This field is 'faked' in that we just use it as a convenient way to filter single dimensions. As such
+ //we always populate it with the value of the filter if the constraint passed and the filter was singleValue
+ String dimValue = (dimensionValueConstraint == null || !dimensionValueConstraint.isSingleValue())
+ ? null : dimensionValueConstraint.getSingleValue().toString();
+ matches &= block.offerValue(DIMENSION_VALUE_FIELD, row, dimValue);
+ }
+ return matches ? 1 : 0;
+ });
+ }
+ listMetricsRequest.setNextToken(result.getNextToken());
+ }
+ while (listMetricsRequest.getNextToken() != null && !listMetricsRequest.getNextToken().equalsIgnoreCase(prevToken) && queryStatusChecker.isQueryRunning());
+ }
+
+ /**
+ * Handles retrieving the samples for a specific metric from Cloudwatch Metrics.
+ */
+ private void readMetricSamplesWithConstraint(BlockSpiller blockSpiller, ReadRecordsRequest request, QueryStatusChecker queryStatusChecker)
+ throws TimeoutException
+ {
+ Split split = request.getSplit();
+ List dimensions = DimensionSerDe.deserialize(split.getProperty(DimensionSerDe.SERIALZIE_DIM_FIELD_NAME));
+ GetMetricDataRequest dataRequest = MetricUtils.makeGetMetricDataRequest(request);
+
+ String prevToken;
+ Set requiredFields = new HashSet<>();
+ request.getSchema().getFields().stream().forEach(next -> requiredFields.add(next.getName()));
+ ValueSet dimensionNameConstraint = request.getConstraints().getSummary().get(DIMENSION_NAME_FIELD);
+ ValueSet dimensionValueConstraint = request.getConstraints().getSummary().get(DIMENSION_NAME_FIELD);
+ do {
+ prevToken = dataRequest.getNextToken();
+ GetMetricDataResult result = invoker.invoke(() -> metrics.getMetricData(dataRequest));
+ for (MetricDataResult nextMetric : result.getMetricDataResults()) {
+ List timestamps = nextMetric.getTimestamps();
+ List values = nextMetric.getValues();
+ for (int i = 0; i < nextMetric.getValues().size(); i++) {
+ int sampleNum = i;
+ blockSpiller.writeRows((Block block, int row) -> {
+ /**
+ * Most constraints were already applied at split generation so we only need to apply
+ * a subset.
+ */
+ block.offerValue(METRIC_NAME_FIELD, row, split.getProperty(METRIC_NAME_FIELD));
+ block.offerValue(NAMESPACE_FIELD, row, split.getProperty(NAMESPACE_FIELD));
+ block.offerValue(STATISTIC_FIELD, row, split.getProperty(STATISTIC_FIELD));
+
+ block.offerComplexValue(DIMENSIONS_FIELD,
+ row,
+ (Field field, Object val) -> {
+ if (field.getName().equals(DIMENSION_NAME_FIELD)) {
+ return ((Dimension) val).getName();
+ }
+ else if (field.getName().equals(DIMENSION_VALUE_FIELD)) {
+ return ((Dimension) val).getValue();
+ }
+
+ throw new RuntimeException("Unexpected field " + field.getName());
+ },
+ dimensions);
+
+ //This field is 'faked' in that we just use it as a convenient way to filter single dimensions. As such
+ //we always populate it with the value of the filter if the constraint passed and the filter was singleValue
+ String dimName = (dimensionNameConstraint == null || !dimensionNameConstraint.isSingleValue())
+ ? null : dimensionNameConstraint.getSingleValue().toString();
+ block.offerValue(DIMENSION_NAME_FIELD, row, dimName);
+
+ //This field is 'faked' in that we just use it as a convenient way to filter single dimensions. As such
+ //we always populate it with the value of the filter if the constraint passed and the filter was singleValue
+ String dimVal = (dimensionValueConstraint == null || !dimensionValueConstraint.isSingleValue())
+ ? null : dimensionValueConstraint.getSingleValue().toString();
+ block.offerValue(DIMENSION_VALUE_FIELD, row, dimVal);
+
+ block.offerValue(PERIOD_FIELD, row, Integer.valueOf(split.getProperty(PERIOD_FIELD)));
+
+ boolean matches = true;
+ block.offerValue(VALUE_FIELD, row, values.get(sampleNum));
+ long timestamp = timestamps.get(sampleNum).getTime() / 1000;
+ block.offerValue(TIMESTAMP_FIELD, row, timestamp);
+
+ return matches ? 1 : 0;
+ });
+ }
+ }
+ dataRequest.setNextToken(result.getNextToken());
+ }
+ while (dataRequest.getNextToken() != null && !dataRequest.getNextToken().equalsIgnoreCase(prevToken) && queryStatusChecker.isQueryRunning());
+ }
+}
diff --git a/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/tables/MetricSamplesTable.java b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/tables/MetricSamplesTable.java
new file mode 100644
index 0000000000..02f325a9ac
--- /dev/null
+++ b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/tables/MetricSamplesTable.java
@@ -0,0 +1,100 @@
+/*-
+ * #%L
+ * athena-cloudwatch-metrics
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch.metrics.tables;
+
+import com.amazonaws.athena.connector.lambda.data.FieldBuilder;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import java.util.Collections;
+import java.util.Set;
+
+/**
+ * Defines the metadata associated with our static metric_samples table.
+ *
+ * This table contains the available metric samples for each metric named in the **metrics** table.
+ * More specifically, the table contains the following columns:
+ *
+ * **namespace** - A VARCHAR containing the namespace.
+ * **metric_name** - A VARCHAR containing the metric name.
+ * **dimensions** - A LIST of STRUCTS comprised of dim_name (VARCHAR) and dim_value (VARCHAR).
+ * **dim_name** - A VARCHAR convenience field used to easily filter on a single dimension name.
+ * **dim_value** - A VARCHAR convenience field used to easily filter on a single dimension value.
+ * **period** - An INT field representing the 'period' of the metric in seconds. (e.g. 60 second metric)
+ * **timestamp** - A BIGINT field representing the epoch time (in seconds) the metric sample is for.
+ * **value** - A FLOAT8 field containing the value of the sample.
+ * **statistic** - A VARCHAR containing the statistic type of the sample. (e.g. AVERAGE, p90, etc..)
+ */
+public class MetricSamplesTable
+ extends Table
+{
+ private final Schema schema;
+ private final String name;
+
+ public MetricSamplesTable()
+ {
+ schema = new SchemaBuilder().newBuilder()
+ .addStringField(NAMESPACE_FIELD)
+ .addStringField(METRIC_NAME_FIELD)
+ .addField(FieldBuilder.newBuilder(DIMENSIONS_FIELD, Types.MinorType.LIST.getType())
+ .addField(FieldBuilder.newBuilder(DIMENSIONS_FIELD, Types.MinorType.STRUCT.getType())
+ .addStringField(DIMENSION_NAME_FIELD)
+ .addStringField(DIMENSION_VALUE_FIELD)
+ .build())
+ .build())
+ .addStringField(DIMENSION_NAME_FIELD)
+ .addStringField(DIMENSION_VALUE_FIELD)
+ .addIntField(PERIOD_FIELD)
+ .addBigIntField(TIMESTAMP_FIELD)
+ .addFloat8Field(VALUE_FIELD)
+ .addStringField(STATISTIC_FIELD)
+ .addMetadata(NAMESPACE_FIELD, "Metric namespace")
+ .addMetadata(METRIC_NAME_FIELD, "Metric name")
+ .addMetadata(DIMENSIONS_FIELD, "Array of Dimensions for the given metric.")
+ .addMetadata(DIMENSION_NAME_FIELD, "Shortcut field that flattens dimension to allow easier filtering on a single dimension name. This field is left blank unless used in the where clause")
+ .addMetadata(DIMENSION_VALUE_FIELD, "Shortcut field that flattens dimension to allow easier filtering on a single dimension value. This field is left blank unless used in the where clause.")
+ .addMetadata(STATISTIC_FIELD, "Statistics type of this value (e.g. Maximum, Minimum, Average, Sample Count)")
+ .addMetadata(TIMESTAMP_FIELD, "The epoch time (in seconds) the value is for.")
+ .addMetadata(PERIOD_FIELD, "The period, in seconds, for the metric (e.g. 60 seconds, 120 seconds)")
+ .addMetadata(VALUE_FIELD, "The value for the sample.")
+ .build();
+
+ name = "metric_samples";
+ }
+
+ @Override
+ public String getName()
+ {
+ return name;
+ }
+
+ @Override
+ public Schema getSchema()
+ {
+ return schema;
+ }
+
+ @Override
+ public Set getPartitionColumns()
+ {
+ return Collections.emptySet();
+ }
+}
diff --git a/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/tables/MetricsTable.java b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/tables/MetricsTable.java
new file mode 100644
index 0000000000..6c76356e38
--- /dev/null
+++ b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/tables/MetricsTable.java
@@ -0,0 +1,88 @@
+/*-
+ * #%L
+ * athena-cloudwatch-metrics
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch.metrics.tables;
+
+import com.amazonaws.athena.connector.lambda.data.FieldBuilder;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import java.util.Collections;
+import java.util.Set;
+
+/**
+ * Defines the metadata associated with our static metrics table.
+ *
+ * This table contains the available metrics as uniquely defined by a triple of namespace, set, name.
+ * More specifically, this table contains the following columns.
+ * * **namespace** - A VARCHAR containing the namespace.
+ * * **metric_name** - A VARCHAR containing the metric name.
+ * * **dimensions** - A LIST of STRUCTS comprised of dim_name (VARCHAR) and dim_value (VARCHAR).
+ * * **statistic** - A List of VARCH statistics (e.g. p90, AVERAGE, etc..) avialable for the metric.
+ */
+public class MetricsTable
+ extends Table
+{
+ private final Schema schema;
+ private final String name;
+
+ public MetricsTable()
+ {
+ schema = new SchemaBuilder().newBuilder()
+ .addStringField(NAMESPACE_FIELD)
+ .addStringField(METRIC_NAME_FIELD)
+ .addField(FieldBuilder.newBuilder(DIMENSIONS_FIELD, Types.MinorType.LIST.getType())
+ .addField(FieldBuilder.newBuilder(DIMENSIONS_FIELD, Types.MinorType.STRUCT.getType())
+ .addStringField(DIMENSION_NAME_FIELD)
+ .addStringField(DIMENSION_VALUE_FIELD)
+ .build())
+ .build())
+ .addStringField(DIMENSION_NAME_FIELD)
+ .addStringField(DIMENSION_VALUE_FIELD)
+ .addListField(STATISTIC_FIELD, Types.MinorType.VARCHAR.getType())
+ .addMetadata(NAMESPACE_FIELD, "Metric namespace")
+ .addMetadata(METRIC_NAME_FIELD, "Metric name")
+ .addMetadata(STATISTIC_FIELD, "List of statistics available for this metric (e.g. Maximum, Minimum, Average, Sample Count)")
+ .addMetadata(DIMENSIONS_FIELD, "Array of Dimensions for the given metric.")
+ .addMetadata(DIMENSION_NAME_FIELD, "Shortcut field that flattens dimension to allow easier filtering for metrics that contain the dimension name. This field is left blank unless used in the where clause.")
+ .addMetadata(DIMENSION_VALUE_FIELD, "Shortcut field that flattens dimension to allow easier filtering for metrics that contain the dimension value. This field is left blank unless used in the where clause.")
+ .build();
+
+ name = "metrics";
+ }
+
+ @Override
+ public String getName()
+ {
+ return name;
+ }
+
+ @Override
+ public Schema getSchema()
+ {
+ return schema;
+ }
+
+ @Override
+ public Set getPartitionColumns()
+ {
+ return Collections.emptySet();
+ }
+}
diff --git a/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/tables/Table.java b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/tables/Table.java
new file mode 100644
index 0000000000..812803d58c
--- /dev/null
+++ b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/tables/Table.java
@@ -0,0 +1,53 @@
+/*-
+ * #%L
+ * athena-cloudwatch-metrics
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch.metrics.tables;
+
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import java.util.Set;
+
+/**
+ * Defines some commonly required field names used by all tables and consumers of tables in this connector.
+ */
+public abstract class Table
+{
+ //The name of the metric name field.
+ public static final String METRIC_NAME_FIELD = "metric_name";
+ //The name of the namespace field.
+ public static final String NAMESPACE_FIELD = "namespace";
+ //The name of the dimensions field which houses a list of Cloudwatch Metrics Dimensions.
+ public static final String DIMENSIONS_FIELD = "dimensions";
+ //The name of the convenience Dimension name field which gives easy access to 1 dimension name.
+ public static final String DIMENSION_NAME_FIELD = "dim_name";
+ //The name of the convenience Dimension value field which gives easy access to 1 dimension value.
+ public static final String DIMENSION_VALUE_FIELD = "dim_value";
+ //The name of the timestamp field, denoting the time period a particular metric sample was for.
+ public static final String TIMESTAMP_FIELD = "timestamp";
+ //The name of the metric value field which holds the value of a metric sample.
+ public static final String VALUE_FIELD = "value";
+ //The name of the statistic field (e.g. AVERAGE, p90).
+ public static final String STATISTIC_FIELD = "statistic";
+ //The name of the period field (e.g. 60 seconds).
+ public static final String PERIOD_FIELD = "period";
+
+ public abstract String getName();
+ public abstract Schema getSchema();
+ public abstract Set getPartitionColumns();
+}
diff --git a/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/DimensionSerDeTest.java b/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/DimensionSerDeTest.java
new file mode 100644
index 0000000000..279bd7196b
--- /dev/null
+++ b/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/DimensionSerDeTest.java
@@ -0,0 +1,51 @@
+/*-
+ * #%L
+ * athena-cloudwatch-metrics
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch.metrics;
+
+import com.amazonaws.services.cloudwatch.model.Dimension;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.*;
+
+public class DimensionSerDeTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(DimensionSerDeTest.class);
+ private static final String EXPECTED_SERIALIZATION = "{\"dimensions\":[{\"name\":\"dim_name\",\"value\":\"dim_val\"}" +
+ ",{\"name\":\"dim_name1\",\"value\":\"dim_val1\"},{\"name\":\"dim_name2\",\"value\":\"dim_val2\"}]}";
+
+ @Test
+ public void serializeTest()
+ {
+ List expected = new ArrayList<>();
+ expected.add(new Dimension().withName("dim_name").withValue("dim_val"));
+ expected.add(new Dimension().withName("dim_name1").withValue("dim_val1"));
+ expected.add(new Dimension().withName("dim_name2").withValue("dim_val2"));
+ String actualSerialization = DimensionSerDe.serialize(expected);
+ logger.info("serializeTest: {}", actualSerialization);
+ List actual = DimensionSerDe.deserialize(actualSerialization);
+ assertEquals(EXPECTED_SERIALIZATION, actualSerialization);
+ assertEquals(expected, actual);
+ }
+}
diff --git a/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricUtilsTest.java b/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricUtilsTest.java
new file mode 100644
index 0000000000..dbd839fac1
--- /dev/null
+++ b/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricUtilsTest.java
@@ -0,0 +1,203 @@
+/*-
+ * #%L
+ * athena-cloudwatch-metrics
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch.metrics;
+
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Range;
+import com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connector.lambda.security.FederatedIdentity;
+import com.amazonaws.services.cloudwatch.model.Dimension;
+import com.amazonaws.services.cloudwatch.model.DimensionFilter;
+import com.amazonaws.services.cloudwatch.model.GetMetricDataRequest;
+import com.amazonaws.services.cloudwatch.model.ListMetricsRequest;
+import com.amazonaws.services.cloudwatch.model.Metric;
+import com.amazonaws.services.cloudwatch.model.MetricStat;
+import org.apache.arrow.vector.types.pojo.Schema;
+import com.google.common.collect.ImmutableList;
+import org.apache.arrow.vector.types.Types;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.DimensionSerDe.SERIALZIE_DIM_FIELD_NAME;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.TestUtils.makeStringEquals;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.DIMENSION_NAME_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.DIMENSION_VALUE_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.METRIC_NAME_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.NAMESPACE_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.PERIOD_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.STATISTIC_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.TIMESTAMP_FIELD;
+import static org.junit.Assert.*;
+
+public class MetricUtilsTest
+{
+ private FederatedIdentity identity = new FederatedIdentity("id", "principal", "account");
+ private String catalog = "default";
+ private BlockAllocator allocator;
+
+ @Before
+ public void setup()
+ {
+ allocator = new BlockAllocatorImpl();
+ }
+
+ @After
+ public void tearDown()
+ {
+ allocator.close();
+ }
+
+ @Test
+ public void applyMetricConstraints()
+ {
+ Schema schema = SchemaBuilder.newBuilder()
+ .addStringField(NAMESPACE_FIELD)
+ .addStringField(METRIC_NAME_FIELD)
+ .addStringField(STATISTIC_FIELD)
+ .addStringField(DIMENSION_NAME_FIELD)
+ .addStringField(DIMENSION_VALUE_FIELD)
+ .build();
+
+ Map constraintsMap = new HashMap<>();
+ constraintsMap.put(NAMESPACE_FIELD, makeStringEquals(allocator, "match1"));
+ constraintsMap.put(METRIC_NAME_FIELD, makeStringEquals(allocator, "match2"));
+ constraintsMap.put(STATISTIC_FIELD, makeStringEquals(allocator, "match3"));
+ constraintsMap.put(DIMENSION_NAME_FIELD, makeStringEquals(allocator, "match4"));
+ constraintsMap.put(DIMENSION_VALUE_FIELD, makeStringEquals(allocator, "match5"));
+
+ ConstraintEvaluator constraintEvaluator = new ConstraintEvaluator(allocator, schema, new Constraints(constraintsMap));
+
+ Metric metric = new Metric()
+ .withNamespace("match1")
+ .withMetricName("match2")
+ .withDimensions(new Dimension().withName("match4").withValue("match5"));
+ String statistic = "match3";
+ assertTrue(MetricUtils.applyMetricConstraints(constraintEvaluator, metric, statistic));
+
+ assertFalse(MetricUtils.applyMetricConstraints(constraintEvaluator, copyMetric(metric).withNamespace("no_match"), statistic));
+ assertFalse(MetricUtils.applyMetricConstraints(constraintEvaluator, copyMetric(metric).withMetricName("no_match"), statistic));
+ assertFalse(MetricUtils.applyMetricConstraints(constraintEvaluator,
+ copyMetric(metric).withDimensions(Collections.singletonList(new Dimension().withName("no_match").withValue("match5"))), statistic));
+ assertFalse(MetricUtils.applyMetricConstraints(constraintEvaluator,
+ copyMetric(metric).withDimensions(Collections.singletonList(new Dimension().withName("match4").withValue("no_match"))), statistic));
+ assertFalse(MetricUtils.applyMetricConstraints(constraintEvaluator, copyMetric(metric), "no_match"));
+ }
+
+ private Metric copyMetric(Metric metric)
+ {
+ Metric newMetric = new Metric()
+ .withNamespace(metric.getNamespace())
+ .withMetricName(metric.getMetricName());
+
+ List dims = new ArrayList<>();
+ for (Dimension next : metric.getDimensions()) {
+ dims.add(new Dimension().withName(next.getName()).withValue(next.getValue()));
+ }
+ return newMetric.withDimensions(dims);
+ }
+
+ @Test
+ public void pushDownPredicate()
+ {
+ Map constraintsMap = new HashMap<>();
+ constraintsMap.put(NAMESPACE_FIELD, makeStringEquals(allocator, "match1"));
+ constraintsMap.put(METRIC_NAME_FIELD, makeStringEquals(allocator, "match2"));
+ constraintsMap.put(STATISTIC_FIELD, makeStringEquals(allocator, "match3"));
+ constraintsMap.put(DIMENSION_NAME_FIELD, makeStringEquals(allocator, "match4"));
+ constraintsMap.put(DIMENSION_VALUE_FIELD, makeStringEquals(allocator, "match5"));
+
+ ListMetricsRequest request = new ListMetricsRequest();
+ MetricUtils.pushDownPredicate(new Constraints(constraintsMap), request);
+
+ assertEquals("match1", request.getNamespace());
+ assertEquals("match2", request.getMetricName());
+ assertEquals(1, request.getDimensions().size());
+ assertEquals(new DimensionFilter().withName("match4").withValue("match5"), request.getDimensions().get(0));
+ }
+
+ @Test
+ public void makeGetMetricDataRequest()
+ {
+ String schema = "schema";
+ String table = "table";
+ Integer period = 60;
+ String statistic = "p90";
+ String metricName = "metricName";
+ String namespace = "namespace";
+
+ List dimensions = new ArrayList<>();
+ dimensions.add(new Dimension().withName("dim_name1").withValue("dim_value1"));
+ dimensions.add(new Dimension().withName("dim_name2").withValue("dim_value2"));
+
+ Split split = Split.newBuilder(null, null)
+ .add(NAMESPACE_FIELD, namespace)
+ .add(METRIC_NAME_FIELD, metricName)
+ .add(PERIOD_FIELD, String.valueOf(period))
+ .add(STATISTIC_FIELD, statistic)
+ .add(SERIALZIE_DIM_FIELD_NAME, DimensionSerDe.serialize(dimensions))
+ .build();
+
+ Schema schemaForRead = SchemaBuilder.newBuilder().addStringField(METRIC_NAME_FIELD).build();
+
+ Map constraintsMap = new HashMap<>();
+
+ constraintsMap.put(TIMESTAMP_FIELD, SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(),
+ ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.BIGINT.getType(), 1L)), false));
+
+ ReadRecordsRequest request = new ReadRecordsRequest(identity,
+ catalog,
+ "queryId-" + System.currentTimeMillis(),
+ new TableName(schema, table),
+ schemaForRead,
+ split,
+ new Constraints(constraintsMap),
+ 100_000_000_000L, //100GB don't expect this to spill
+ 100_000_000_000L
+ );
+
+ GetMetricDataRequest actual = MetricUtils.makeGetMetricDataRequest(request);
+ assertEquals(1, actual.getMetricDataQueries().size());
+ assertNotNull(actual.getMetricDataQueries().get(0).getId());
+ MetricStat metricStat = actual.getMetricDataQueries().get(0).getMetricStat();
+ assertNotNull(metricStat);
+ assertEquals(metricName, metricStat.getMetric().getMetricName());
+ assertEquals(namespace, metricStat.getMetric().getNamespace());
+ assertEquals(statistic, metricStat.getStat());
+ assertEquals(period, metricStat.getPeriod());
+ assertEquals(2, metricStat.getMetric().getDimensions().size());
+ assertEquals(1000L, actual.getStartTime().getTime());
+ assertTrue(actual.getStartTime().getTime() <= System.currentTimeMillis() + 1_000);
+ }
+}
diff --git a/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsMetadataHandlerTest.java b/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsMetadataHandlerTest.java
new file mode 100644
index 0000000000..bbe9719a40
--- /dev/null
+++ b/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsMetadataHandlerTest.java
@@ -0,0 +1,337 @@
+/*-
+ * #%L
+ * athena-cloudwatch-metrics
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch.metrics;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl;
+import com.amazonaws.athena.connector.lambda.data.BlockUtils;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse;
+import com.amazonaws.athena.connector.lambda.metadata.MetadataRequestType;
+import com.amazonaws.athena.connector.lambda.metadata.MetadataResponse;
+import com.amazonaws.athena.connector.lambda.security.FederatedIdentity;
+import com.amazonaws.athena.connector.lambda.security.LocalKeyFactory;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.cloudwatch.AmazonCloudWatch;
+import com.amazonaws.services.cloudwatch.model.ListMetricsRequest;
+import com.amazonaws.services.cloudwatch.model.ListMetricsResult;
+import com.amazonaws.services.cloudwatch.model.Metric;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.DimensionSerDe.SERIALZIE_DIM_FIELD_NAME;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.METRIC_NAME_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.NAMESPACE_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.PERIOD_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.STATISTIC_FIELD;
+import static org.junit.Assert.*;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class MetricsMetadataHandlerTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(MetricsMetadataHandlerTest.class);
+
+ private final String defaultSchema = "default";
+ private final FederatedIdentity identity = new FederatedIdentity("id", "principal", "account");
+
+ private MetricsMetadataHandler handler;
+ private BlockAllocator allocator;
+
+ @Mock
+ private AmazonCloudWatch mockMetrics;
+
+ @Mock
+ private AWSSecretsManager mockSecretsManager;
+
+ @Mock
+ private AmazonAthena mockAthena;
+
+ @Before
+ public void setUp()
+ throws Exception
+ {
+ handler = new MetricsMetadataHandler(mockMetrics, new LocalKeyFactory(), mockSecretsManager, mockAthena, "spillBucket", "spillPrefix");
+ allocator = new BlockAllocatorImpl();
+ }
+
+ @After
+ public void tearDown()
+ throws Exception
+ {
+ allocator.close();
+ }
+
+ @Test
+ public void doListSchemaNames()
+ {
+ logger.info("doListSchemas - enter");
+
+ ListSchemasRequest req = new ListSchemasRequest(identity, "queryId", "default");
+ ListSchemasResponse res = handler.doListSchemaNames(allocator, req);
+ logger.info("doListSchemas - {}", res.getSchemas());
+
+ assertTrue(res.getSchemas().size() == 1);
+ assertEquals(defaultSchema, res.getSchemas().iterator().next());
+
+ logger.info("doListSchemas - exit");
+ }
+
+ @Test
+ public void doListTables()
+ {
+ logger.info("doListTables - enter");
+
+ ListTablesRequest req = new ListTablesRequest(identity, "queryId", "default", defaultSchema);
+ ListTablesResponse res = handler.doListTables(allocator, req);
+ logger.info("doListTables - {}", res.getTables());
+
+ assertEquals(2, res.getTables().size());
+ assertTrue(res.getTables().contains(new TableName(defaultSchema, "metrics")));
+ assertTrue(res.getTables().contains(new TableName(defaultSchema, "metric_samples")));
+
+ logger.info("doListTables - exit");
+ }
+
+ @Test
+ public void doGetMetricsTable()
+ {
+ logger.info("doGetMetricsTable - enter");
+
+ GetTableRequest metricsTableReq = new GetTableRequest(identity, "queryId", "default", new TableName(defaultSchema, "metrics"));
+ GetTableResponse metricsTableRes = handler.doGetTable(allocator, metricsTableReq);
+ logger.info("doGetMetricsTable - {} {}", metricsTableRes.getTableName(), metricsTableRes.getSchema());
+
+ assertEquals(new TableName(defaultSchema, "metrics"), metricsTableRes.getTableName());
+ assertNotNull(metricsTableRes.getSchema());
+ assertEquals(6, metricsTableRes.getSchema().getFields().size());
+
+ logger.info("doGetMetricsTable - exit");
+ }
+
+ @Test
+ public void doGetMetricSamplesTable()
+ {
+ logger.info("doGetMetricSamplesTable - enter");
+
+ GetTableRequest metricsTableReq = new GetTableRequest(identity,
+ "queryId",
+ "default",
+ new TableName(defaultSchema, "metric_samples"));
+
+ GetTableResponse metricsTableRes = handler.doGetTable(allocator, metricsTableReq);
+ logger.info("doGetMetricSamplesTable - {} {}", metricsTableRes.getTableName(), metricsTableRes.getSchema());
+
+ assertEquals(new TableName(defaultSchema, "metric_samples"), metricsTableRes.getTableName());
+ assertNotNull(metricsTableRes.getSchema());
+ assertEquals(9, metricsTableRes.getSchema().getFields().size());
+
+ logger.info("doGetMetricSamplesTable - exit");
+ }
+
+ @Test
+ public void doGetTableLayout()
+ throws Exception
+ {
+ logger.info("doGetTableLayout - enter");
+
+ Map constraintsMap = new HashMap<>();
+
+ constraintsMap.put(METRIC_NAME_FIELD,
+ EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false)
+ .add("MyMetric").build());
+
+ GetTableLayoutRequest req = new GetTableLayoutRequest(identity,
+ "queryId",
+ "default",
+ new TableName(defaultSchema, "metrics"),
+ new Constraints(constraintsMap),
+ SchemaBuilder.newBuilder().build(),
+ Collections.EMPTY_SET);
+
+ GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);
+
+ logger.info("doGetTableLayout - {}", res.getPartitions().getSchema());
+ logger.info("doGetTableLayout - {}", res.getPartitions());
+
+ assertTrue(res.getPartitions().getRowCount() == 1);
+
+ logger.info("doGetTableLayout - exit");
+ }
+
+ @Test
+ public void doGetMetricsSplits()
+ throws Exception
+ {
+ logger.info("doGetMetricsSplits: enter");
+
+ Schema schema = SchemaBuilder.newBuilder().addIntField("partitionId").build();
+
+ Block partitions = allocator.createBlock(schema);
+ BlockUtils.setValue(partitions.getFieldVector("partitionId"), 1, 1);
+ partitions.setRowCount(1);
+
+ String continuationToken = null;
+ GetSplitsRequest originalReq = new GetSplitsRequest(identity,
+ "queryId",
+ "catalog_name",
+ new TableName(defaultSchema, "metrics"),
+ partitions,
+ Collections.singletonList("partitionId"),
+ new Constraints(new HashMap<>()),
+ continuationToken);
+ int numContinuations = 0;
+ do {
+ GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
+ logger.info("doGetMetricsSplits: req[{}]", req);
+
+ MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
+ assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
+
+ GetSplitsResponse response = (GetSplitsResponse) rawResponse;
+ continuationToken = response.getContinuationToken();
+
+ logger.info("doGetMetricsSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
+ assertEquals(1, response.getSplits().size());
+
+ if (continuationToken != null) {
+ numContinuations++;
+ }
+ }
+ while (continuationToken != null);
+
+ assertEquals(0, numContinuations);
+
+ logger.info("doGetMetricsSplits: exit");
+ }
+
+ @Test
+ public void doGetMetricSamplesSplits()
+ throws Exception
+ {
+ logger.info("doGetMetricSamplesSplits: enter");
+
+ String namespaceFilter = "MyNameSpace";
+ String statistic = "p90";
+ int numMetrics = 10;
+
+ when(mockMetrics.listMetrics(any(ListMetricsRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
+ ListMetricsRequest request = invocation.getArgumentAt(0, ListMetricsRequest.class);
+
+ //assert that the namespace filter was indeed pushed down
+ assertEquals(namespaceFilter, request.getNamespace());
+ String nextToken = (request.getNextToken() == null) ? "valid" : null;
+ List metrics = new ArrayList<>();
+
+ for (int i = 0; i < numMetrics; i++) {
+ metrics.add(new Metric().withNamespace(namespaceFilter).withMetricName("metric-" + i));
+ }
+
+ return new ListMetricsResult().withNextToken(nextToken).withMetrics(metrics);
+ });
+
+ Schema schema = SchemaBuilder.newBuilder().addIntField("partitionId").build();
+
+ Block partitions = allocator.createBlock(schema);
+ BlockUtils.setValue(partitions.getFieldVector("partitionId"), 1, 1);
+ partitions.setRowCount(1);
+
+ Map constraintsMap = new HashMap<>();
+
+ constraintsMap.put(NAMESPACE_FIELD,
+ EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false)
+ .add(namespaceFilter).build());
+ constraintsMap.put(STATISTIC_FIELD,
+ EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false)
+ .add(statistic).build());
+
+ String continuationToken = null;
+ GetSplitsRequest originalReq = new GetSplitsRequest(identity,
+ "queryId",
+ "catalog_name",
+ new TableName(defaultSchema, "metric_samples"),
+ partitions,
+ Collections.singletonList("partitionId"),
+ new Constraints(constraintsMap),
+ continuationToken);
+
+ int numContinuations = 0;
+ do {
+ GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
+ logger.info("doGetMetricSamplesSplits: req[{}]", req);
+
+ MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
+ assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
+
+ GetSplitsResponse response = (GetSplitsResponse) rawResponse;
+ continuationToken = response.getContinuationToken();
+
+ logger.info("doGetMetricSamplesSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
+ assertEquals(numMetrics, response.getSplits().size());
+ for (Split nextSplit : response.getSplits()) {
+ assertNotNull(nextSplit.getProperty(SERIALZIE_DIM_FIELD_NAME));
+ assertNotNull(nextSplit.getProperty(METRIC_NAME_FIELD));
+ assertEquals(statistic, nextSplit.getProperty(STATISTIC_FIELD));
+ assertEquals("60", nextSplit.getProperty(PERIOD_FIELD));
+ }
+
+ if (continuationToken != null) {
+ numContinuations++;
+ }
+ }
+ while (continuationToken != null);
+
+ assertEquals(1, numContinuations);
+
+ logger.info("doGetMetricSamplesSplits: exit");
+ }
+}
diff --git a/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsRecordHandlerTest.java b/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsRecordHandlerTest.java
new file mode 100644
index 0000000000..7ae483d36b
--- /dev/null
+++ b/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsRecordHandlerTest.java
@@ -0,0 +1,343 @@
+/*-
+ * #%L
+ * athena-cloudwatch-metrics
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch.metrics;
+
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl;
+import com.amazonaws.athena.connector.lambda.data.BlockUtils;
+import com.amazonaws.athena.connector.lambda.data.S3BlockSpillReader;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse;
+import com.amazonaws.athena.connector.lambda.records.RecordResponse;
+import com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory;
+import com.amazonaws.athena.connector.lambda.security.FederatedIdentity;
+import com.amazonaws.athena.connector.lambda.security.LocalKeyFactory;
+import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.MetricSamplesTable;
+import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.MetricsTable;
+import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.cloudwatch.AmazonCloudWatch;
+import com.amazonaws.services.cloudwatch.model.Dimension;
+import com.amazonaws.services.cloudwatch.model.GetMetricDataRequest;
+import com.amazonaws.services.cloudwatch.model.GetMetricDataResult;
+import com.amazonaws.services.cloudwatch.model.ListMetricsRequest;
+import com.amazonaws.services.cloudwatch.model.ListMetricsResult;
+import com.amazonaws.services.cloudwatch.model.Metric;
+import com.amazonaws.services.cloudwatch.model.MetricDataQuery;
+import com.amazonaws.services.cloudwatch.model.MetricDataResult;
+import com.amazonaws.services.cloudwatch.model.MetricStat;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.model.PutObjectResult;
+import com.amazonaws.services.s3.model.S3Object;
+import com.amazonaws.services.s3.model.S3ObjectInputStream;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import com.google.common.io.ByteStreams;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.TestUtils.makeStringEquals;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.DIMENSION_NAME_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.DIMENSION_VALUE_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.METRIC_NAME_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.NAMESPACE_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.PERIOD_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.STATISTIC_FIELD;
+import static org.junit.Assert.*;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyObject;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class MetricsRecordHandlerTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(MetricsRecordHandlerTest.class);
+ //Schema for the metrics table.
+ private static final Table METRIC_TABLE = new MetricsTable();
+ //Schema for the metric_samples table.
+ private static final Table METRIC_DATA_TABLE = new MetricSamplesTable();
+ private static final TableName METRICS_TABLE_NAME = new TableName("default", METRIC_TABLE.getName());
+ private static final TableName METRIC_SAMPLES_TABLE_NAME = new TableName("default", METRIC_DATA_TABLE.getName());
+
+ private FederatedIdentity identity = new FederatedIdentity("id", "principal", "account");
+ private List mockS3Storage;
+ private MetricsRecordHandler handler;
+ private S3BlockSpillReader spillReader;
+ private BlockAllocator allocator;
+ private EncryptionKeyFactory keyFactory = new LocalKeyFactory();
+
+ @Mock
+ private AmazonCloudWatch mockMetrics;
+
+ @Mock
+ private AmazonS3 mockS3;
+
+ @Mock
+ private AWSSecretsManager mockSecretsManager;
+
+ @Mock
+ private AmazonAthena mockAthena;
+
+ @Before
+ public void setUp()
+ throws Exception
+ {
+ mockS3Storage = new ArrayList<>();
+ allocator = new BlockAllocatorImpl();
+ handler = new MetricsRecordHandler(mockS3, mockSecretsManager, mockAthena, mockMetrics);
+ spillReader = new S3BlockSpillReader(mockS3, allocator);
+
+ when(mockS3.putObject(anyObject(), anyObject(), anyObject(), anyObject()))
+ .thenAnswer((InvocationOnMock invocationOnMock) ->
+ {
+ InputStream inputStream = (InputStream) invocationOnMock.getArguments()[2];
+ ByteHolder byteHolder = new ByteHolder();
+ byteHolder.setBytes(ByteStreams.toByteArray(inputStream));
+ mockS3Storage.add(byteHolder);
+ return mock(PutObjectResult.class);
+ });
+
+ when(mockS3.getObject(anyString(), anyString()))
+ .thenAnswer((InvocationOnMock invocationOnMock) ->
+ {
+ S3Object mockObject = mock(S3Object.class);
+ ByteHolder byteHolder = mockS3Storage.get(0);
+ mockS3Storage.remove(0);
+ when(mockObject.getObjectContent()).thenReturn(
+ new S3ObjectInputStream(
+ new ByteArrayInputStream(byteHolder.getBytes()), null));
+ return mockObject;
+ });
+ }
+
+ @After
+ public void tearDown()
+ throws Exception
+ {
+ allocator.close();
+ }
+
+ @Test
+ public void readMetricsWithConstraint()
+ throws Exception
+ {
+ logger.info("readMetricsWithConstraint: enter");
+
+ String namespace = "namespace";
+ String dimName = "dimName";
+ String dimValue = "dimValye";
+
+ int numMetrics = 100;
+ AtomicLong numCalls = new AtomicLong(0);
+ when(mockMetrics.listMetrics(any(ListMetricsRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
+ ListMetricsRequest request = invocation.getArgumentAt(0, ListMetricsRequest.class);
+ numCalls.incrementAndGet();
+ //assert that the namespace filter was indeed pushed down
+ assertEquals(namespace, request.getNamespace());
+ String nextToken = (request.getNextToken() == null) ? "valid" : null;
+ List metrics = new ArrayList<>();
+
+ for (int i = 0; i < numMetrics; i++) {
+ metrics.add(new Metric().withNamespace(namespace).withMetricName("metric-" + i)
+ .withDimensions(new Dimension().withName(dimName).withValue(dimValue)));
+ metrics.add(new Metric().withNamespace(namespace + i).withMetricName("metric-" + i));
+ }
+
+ return new ListMetricsResult().withNextToken(nextToken).withMetrics(metrics);
+ });
+
+ Map constraintsMap = new HashMap<>();
+ constraintsMap.put(NAMESPACE_FIELD, makeStringEquals(allocator, namespace));
+ constraintsMap.put(DIMENSION_NAME_FIELD, makeStringEquals(allocator, dimName));
+ constraintsMap.put(DIMENSION_VALUE_FIELD, makeStringEquals(allocator, dimValue));
+
+ S3SpillLocation spillLocation = S3SpillLocation.newBuilder()
+ .withBucket(UUID.randomUUID().toString())
+ .withSplitId(UUID.randomUUID().toString())
+ .withQueryId(UUID.randomUUID().toString())
+ .withIsDirectory(true)
+ .build();
+
+ Split split = Split.newBuilder(spillLocation, keyFactory.create()).build();
+
+ ReadRecordsRequest request = new ReadRecordsRequest(identity,
+ "catalog",
+ "queryId-" + System.currentTimeMillis(),
+ METRICS_TABLE_NAME,
+ METRIC_TABLE.getSchema(),
+ split,
+ new Constraints(constraintsMap),
+ 100_000_000_000L,
+ 100_000_000_000L//100GB don't expect this to spill
+ );
+
+ RecordResponse rawResponse = handler.doReadRecords(allocator, request);
+
+ assertTrue(rawResponse instanceof ReadRecordsResponse);
+
+ ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
+ logger.info("readMetricsWithConstraint: rows[{}]", response.getRecordCount());
+
+ assertEquals(numCalls.get() * numMetrics, response.getRecords().getRowCount());
+ logger.info("readMetricsWithConstraint: {}", BlockUtils.rowToString(response.getRecords(), 0));
+
+ logger.info("readMetricsWithConstraint: exit");
+ }
+
+ @Test
+ public void readMetricSamplesWithConstraint()
+ throws Exception
+ {
+ logger.info("readMetricSamplesWithConstraint: enter");
+
+ String namespace = "namespace";
+ String metricName = "metricName";
+ String statistic = "p90";
+ String period = "60";
+ String dimName = "dimName";
+ String dimValue = "dimValue";
+ List dimensions = Collections.singletonList(new Dimension().withName(dimName).withValue(dimValue));
+
+ int numMetrics = 10;
+ int numSamples = 10;
+ AtomicLong numCalls = new AtomicLong(0);
+ when(mockMetrics.getMetricData(any(GetMetricDataRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
+ numCalls.incrementAndGet();
+ return mockMetricData(invocation, numMetrics, numSamples);
+ });
+
+ Map constraintsMap = new HashMap<>();
+ constraintsMap.put(NAMESPACE_FIELD, makeStringEquals(allocator, namespace));
+ constraintsMap.put(STATISTIC_FIELD, makeStringEquals(allocator, statistic));
+ constraintsMap.put(DIMENSION_NAME_FIELD, makeStringEquals(allocator, dimName));
+ constraintsMap.put(DIMENSION_VALUE_FIELD, makeStringEquals(allocator, dimValue));
+
+ S3SpillLocation spillLocation = S3SpillLocation.newBuilder()
+ .withBucket(UUID.randomUUID().toString())
+ .withSplitId(UUID.randomUUID().toString())
+ .withQueryId(UUID.randomUUID().toString())
+ .withIsDirectory(true)
+ .build();
+
+ Split split = Split.newBuilder(spillLocation, keyFactory.create())
+ .add(DimensionSerDe.SERIALZIE_DIM_FIELD_NAME, DimensionSerDe.serialize(dimensions))
+ .add(METRIC_NAME_FIELD, metricName)
+ .add(NAMESPACE_FIELD, namespace)
+ .add(STATISTIC_FIELD, statistic)
+ .add(PERIOD_FIELD, period)
+ .build();
+
+ ReadRecordsRequest request = new ReadRecordsRequest(identity,
+ "catalog",
+ "queryId-" + System.currentTimeMillis(),
+ METRIC_SAMPLES_TABLE_NAME,
+ METRIC_DATA_TABLE.getSchema(),
+ split,
+ new Constraints(constraintsMap),
+ 100_000_000_000L,
+ 100_000_000_000L//100GB don't expect this to spill
+ );
+
+ RecordResponse rawResponse = handler.doReadRecords(allocator, request);
+
+ assertTrue(rawResponse instanceof ReadRecordsResponse);
+
+ ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
+ logger.info("readMetricSamplesWithConstraint: rows[{}]", response.getRecordCount());
+
+ assertEquals(numCalls.get() * numMetrics * numSamples, response.getRecords().getRowCount());
+ logger.info("readMetricSamplesWithConstraint: {}", BlockUtils.rowToString(response.getRecords(), 0));
+
+ logger.info("readMetricSamplesWithConstraint: exit");
+ }
+
+ private GetMetricDataResult mockMetricData(InvocationOnMock invocation, int numMetrics, int numSamples)
+ {
+ GetMetricDataRequest request = invocation.getArgumentAt(0, GetMetricDataRequest.class);
+
+ /**
+ * Confirm that all available criteria were pushed down into Cloudwatch Metrics
+ */
+ List queries = request.getMetricDataQueries();
+ assertEquals(1, queries.size());
+ MetricStat stat = queries.get(0).getMetricStat();
+ assertNotNull(stat.getPeriod());
+ assertNotNull(stat.getMetric());
+ assertNotNull(stat.getStat());
+ assertNotNull(stat.getMetric().getMetricName());
+ assertNotNull(stat.getMetric().getNamespace());
+ assertNotNull(stat.getMetric().getDimensions());
+ assertEquals(1, stat.getMetric().getDimensions().size());
+
+ String nextToken = (request.getNextToken() == null) ? "valid" : null;
+ List samples = new ArrayList<>();
+
+ for (int i = 0; i < numMetrics; i++) {
+ List values = new ArrayList<>();
+ List timestamps = new ArrayList<>();
+ for (double j = 0; j < numSamples; j++) {
+ values.add(j);
+ timestamps.add(new Date(System.currentTimeMillis() + (int) j));
+ }
+ samples.add(new MetricDataResult().withValues(values).withTimestamps(timestamps));
+ }
+
+ return new GetMetricDataResult().withNextToken(nextToken).withMetricDataResults(samples);
+ }
+
+ private class ByteHolder
+ {
+ private byte[] bytes;
+
+ public void setBytes(byte[] bytes)
+ {
+ this.bytes = bytes;
+ }
+
+ public byte[] getBytes()
+ {
+ return bytes;
+ }
+ }
+}
diff --git a/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/TestUtils.java b/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/TestUtils.java
new file mode 100644
index 0000000000..58fa82f59e
--- /dev/null
+++ b/athena-cloudwatch-metrics/src/test/java/com/amazonaws/athena/connectors/cloudwatch/metrics/TestUtils.java
@@ -0,0 +1,36 @@
+/*-
+ * #%L
+ * athena-cloudwatch-metrics
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch.metrics;
+
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import org.apache.arrow.vector.types.Types;
+
+public class TestUtils
+{
+ private TestUtils() {}
+
+ public static ValueSet makeStringEquals(BlockAllocator allocator, String value)
+ {
+ return EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false)
+ .add(value).build();
+ }
+}
diff --git a/athena-cloudwatch/LICENSE.txt b/athena-cloudwatch/LICENSE.txt
new file mode 100644
index 0000000000..418de4c108
--- /dev/null
+++ b/athena-cloudwatch/LICENSE.txt
@@ -0,0 +1,174 @@
+Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
\ No newline at end of file
diff --git a/athena-cloudwatch/README.md b/athena-cloudwatch/README.md
new file mode 100644
index 0000000000..03edd2e8fc
--- /dev/null
+++ b/athena-cloudwatch/README.md
@@ -0,0 +1,60 @@
+# Amazon Athena Cloudwatch Connector
+
+This connector enables Amazon Athena to communicate with Cloudwatch, making your log data accessible via SQL.
+
+## Usage
+
+### Parameters
+
+The Athena Cloudwatch Connector exposes several configuration options via Lambda environment variables. More detail on the available parameters can be found below.
+
+1. **spill_bucket** - When the data returned by your Lambda function exceeds Lambda’s limits, this is the bucket that the data will be written to for Athena to read the excess from. (e.g. my_bucket)
+2. **spill_prefix** - (Optional) Defaults to sub-folder in your bucket called 'athena-federation-spill'. Used in conjunction with spill_bucket, this is the path within the above bucket that large responses are spilled to. You should configure an S3 lifecycle on this location to delete old spills after X days/Hours.
+3. **kms_key_id** - (Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys. (e.g. a7e63k4b-8loc-40db-a2a1-4d0en2cd8331)
+4. **disable_spill_encryption** - (Optional) Defaults to False so that any data that is spilled to S3 is encrypted using AES-GMC either with a randomly generated key or using KMS to generate keys. Setting this to false will disable spill encryption. You may wish to disable this for improved performance, especially if your spill location in S3 uses S3 Server Side Encryption. (e.g. True or False)
+
+The connector also supports AIMD Congestion Control for handling throttling events from Cloudwatch via the Athena Query Federation SDK's ThrottlingInvoker construct. You can tweak the default throttling behavior by setting any of the below (optional) environment variables:
+
+1. **throttle_initial_delay_ms** - (Default: 10ms) This is the initial call delay applied after the first congestion event.
+1. **throttle_max_delay_ms** - (Default: 1000ms) This is the max delay between calls. You can derive TPS by dividing it into 1000ms.
+1. **throttle_decrease_factor** - (Default: 0.5) This is the factor by which we reduce our call rate.
+1. **throttle_increase_ms** - (Default: 10ms) This is the rate at which we decrease the call delay.
+
+
+### Databases & Tables
+
+The Athena Cloudwatch Connector maps your LogGroups as schemas (aka database) and each LogStream as a table. The connector also maps a special "all_log_streams" View comprised of all LogStreams in the LogGroup. This View allows you to query all the logs in a LogGroup at once instead of search through each LogStream individually.
+
+Every Table mapped by the Athena Cloudwatch Connector has the following schema which matches the fields provided by Cloudwatch Logs itself.
+
+1. **log_stream** - A VARCHAR containing the name of the LogStream that the row is from.
+2. **time** - An INT64 containing the epoch time of the log line was generated.
+3. **message** - A VARCHAR containing the log message itself.
+
+### Required Permissions
+
+Review the "Policies" section of the athena-cloudwatch.yaml file for full details on the IAM Policies required by this connector. A brief summary is below.
+
+1. S3 Write Access - In order to successfully handle large queries, the connector requires write access to a location in S3.
+2. CloudWatch Logs Read/Write - The connector uses this access to read your log data in order to satisfy your queries but also to write its own diagnostic logs.
+1. Athena GetQueryExecution - The connector uses this access to fast-fail when the upstream Athena query has terminated.
+
+### Deploying The Connector
+
+To use this connector in your queries, navigate to AWS Serverless Application Repository and deploy a pre-built version of this connector. Alternatively, you can build and deploy this connector from source follow the below steps or use the more detailed tutorial in the athena-example module:
+
+1. From the athena-federation-sdk dir, run `mvn clean install` if you haven't already.
+2. From the athena-cloudwatch dir, run `mvn clean install`.
+3. From the athena-cloudwatch dir, run `../tools/publish.sh S3_BUCKET_NAME athena-cloudwatch` to publish the connector to your private AWS Serverless Application Repository. The S3_BUCKET in the command is where a copy of the connector's code will be stored for Serverless Application Repository to retrieve it. This will allow users with permission to do so, the ability to deploy instances of the connector via 1-Click form. Then navigate to [Serverless Application Repository](https://aws.amazon.com/serverless/serverlessrepo)
+4. Try running a query like the one below in Athena:
+```sql
+select * from "lambda:"."/aws/lambda/".all_log_streams limit 100
+```
+
+## Performance
+
+The Athena Cloudwatch Connector will attempt to parallelize queries against Cloudwatch by parallelizing scans of the various log_streams needed for your query. Predicate Pushdown is performed within the Lambda function and also within Cloudwatch Logs for certain time period filters.
+
+## License
+
+This project is licensed under the Apache-2.0 License.
\ No newline at end of file
diff --git a/athena-cloudwatch/athena-cloudwatch.yaml b/athena-cloudwatch/athena-cloudwatch.yaml
new file mode 100644
index 0000000000..ce4915aae4
--- /dev/null
+++ b/athena-cloudwatch/athena-cloudwatch.yaml
@@ -0,0 +1,71 @@
+Transform: 'AWS::Serverless-2016-10-31'
+Metadata:
+ 'AWS::ServerlessRepo::Application':
+ Name: AthenaCloudwatchConnector
+ Description: 'This connector enables Amazon Athena to communicate with Cloudwatch, making your logs accessible via SQL.'
+ Author: 'Amazon Athena'
+ SpdxLicenseId: Apache-2.0
+ LicenseUrl: LICENSE.txt
+ ReadmeUrl: README.md
+ Labels:
+ - athena-federation
+ HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation'
+ SemanticVersion: 1.0.0
+ SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation'
+Parameters:
+ AthenaCatalogName:
+ Description: 'The name you will give to this catalog in Athena. It will also be used as the function name.'
+ Type: String
+ SpillBucket:
+ Description: 'The bucket where this function can spill data.'
+ Type: String
+ SpillPrefix:
+ Description: 'The bucket prefix where this function can spill large responses.'
+ Type: String
+ Default: athena-spill
+ LambdaTimeout:
+ Description: 'Maximum Lambda invocation runtime in seconds. (min 1 - 900 max)'
+ Default: 900
+ Type: Number
+ LambdaMemory:
+ Description: 'Lambda memory in MB (min 128 - 3008 max).'
+ Default: 3008
+ Type: Number
+ DisableSpillEncryption:
+ Description: "WARNING: If set to 'true' encryption for spilled data is disabled."
+ Default: 'false'
+ Type: String
+Resources:
+ ConnectorConfig:
+ Type: 'AWS::Serverless::Function'
+ Properties:
+ Environment:
+ Variables:
+ disable_spill_encryption: !Ref DisableSpillEncryption
+ spill_bucket: !Ref SpillBucket
+ spill_prefix: !Ref SpillPrefix
+ FunctionName: !Ref AthenaCatalogName
+ Handler: "com.amazonaws.athena.connectors.cloudwatch.CloudwatchCompositeHandler"
+ CodeUri: "./target/athena-cloudwatch-1.0.jar"
+ Description: "Enables Amazon Athena to communicate with Cloudwatch, making your log accessible via SQL"
+ Runtime: java8
+ Timeout: !Ref LambdaTimeout
+ MemorySize: !Ref LambdaMemory
+ Policies:
+ - Statement:
+ - Action:
+ - logs:Describe*
+ - logs:Get*
+ - logs:List*
+ - logs:StartQuery
+ - logs:StopQuery
+ - logs:TestMetricFilter
+ - logs:FilterLogEvents
+ - athena:GetQueryExecution
+ Effect: Allow
+ Resource: '*'
+ Version: '2012-10-17'
+ #S3CrudPolicy allows our connector to spill large responses to S3. You can optionally replace this pre-made policy
+ #with one that is more restrictive and can only 'put' but not read,delete, or overwrite files.
+ - S3CrudPolicy:
+ BucketName: !Ref SpillBucket
\ No newline at end of file
diff --git a/athena-cloudwatch/pom.xml b/athena-cloudwatch/pom.xml
new file mode 100644
index 0000000000..02fa9a54d6
--- /dev/null
+++ b/athena-cloudwatch/pom.xml
@@ -0,0 +1,57 @@
+
+
+
+ aws-athena-query-federation
+ com.amazonaws
+ 1.0
+
+ 4.0.0
+
+ athena-cloudwatch
+
+
+
+ com.amazonaws
+ aws-athena-federation-sdk
+ ${aws-athena-federation-sdk.version}
+
+
+ com.amazonaws
+ aws-java-sdk-logs
+ 1.11.490
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.2.1
+
+ false
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+ package
+
+ shade
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchCompositeHandler.java b/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchCompositeHandler.java
new file mode 100644
index 0000000000..0db8b25753
--- /dev/null
+++ b/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchCompositeHandler.java
@@ -0,0 +1,35 @@
+/*-
+ * #%L
+ * athena-cloudwatch
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch;
+
+import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler;
+
+/**
+ * Boilerplate composite handler that allows us to use a single Lambda function for both
+ * Metadata and Data. In this case we just compose CloudwatchMetadataHandler and CloudwatchRecordHandler.
+ */
+public class CloudwatchCompositeHandler
+ extends CompositeHandler
+{
+ public CloudwatchCompositeHandler()
+ {
+ super(new CloudwatchMetadataHandler(), new CloudwatchRecordHandler());
+ }
+}
diff --git a/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchExceptionFilter.java b/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchExceptionFilter.java
new file mode 100644
index 0000000000..c71db552cf
--- /dev/null
+++ b/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchExceptionFilter.java
@@ -0,0 +1,45 @@
+/*-
+ * #%L
+ * athena-cloudwatch
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch;
+
+import com.amazonaws.athena.connector.lambda.ThrottlingInvoker;
+import com.amazonaws.services.logs.model.AWSLogsException;
+import com.amazonaws.services.logs.model.LimitExceededException;
+
+/**
+ * Used to identify Exceptions that are related to Cloudwatch Logs throttling events.
+ */
+public class CloudwatchExceptionFilter
+ implements ThrottlingInvoker.ExceptionFilter
+{
+ public static final ThrottlingInvoker.ExceptionFilter EXCEPTION_FILTER = new CloudwatchExceptionFilter();
+
+ private CloudwatchExceptionFilter() {}
+
+ @Override
+ public boolean isMatch(Exception ex)
+ {
+ if (ex instanceof AWSLogsException && ex.getMessage().startsWith("Rate exceeded")) {
+ return true;
+ }
+
+ return (ex instanceof LimitExceededException);
+ }
+}
diff --git a/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchMetadataHandler.java b/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchMetadataHandler.java
new file mode 100644
index 0000000000..d88b39d5a2
--- /dev/null
+++ b/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchMetadataHandler.java
@@ -0,0 +1,345 @@
+/*-
+ * #%L
+ * athena-cloudwatch
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.ThrottlingInvoker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockWriter;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation;
+import com.amazonaws.athena.connector.lambda.handlers.MetadataHandler;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse;
+import com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.logs.AWSLogs;
+import com.amazonaws.services.logs.AWSLogsClientBuilder;
+import com.amazonaws.services.logs.model.DescribeLogGroupsRequest;
+import com.amazonaws.services.logs.model.DescribeLogGroupsResult;
+import com.amazonaws.services.logs.model.DescribeLogStreamsRequest;
+import com.amazonaws.services.logs.model.DescribeLogStreamsResult;
+import com.amazonaws.services.logs.model.LogStream;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import org.apache.arrow.util.VisibleForTesting;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.TimeoutException;
+
+import static com.amazonaws.athena.connectors.cloudwatch.CloudwatchExceptionFilter.EXCEPTION_FILTER;
+
+/**
+ * Handles metadata requests for the Athena Cloudwatch Connector.
+ *
+ * For more detail, please see the module's README.md, some notable characteristics of this class include:
+ *
+ * 1. Each LogGroup is treated as a schema (aka database).
+ * 2. Each LogStream is treated as a table.
+ * 3. A special 'all_log_streams' view is added which allows you to query all LogStreams in a LogGroup.
+ * 4. LogStreams area treated as partitions and scanned in parallel.
+ * 5. Timestamp predicates are pushed into Cloudwatch itself.
+ */
+public class CloudwatchMetadataHandler
+ extends MetadataHandler
+{
+ private static final Logger logger = LoggerFactory.getLogger(CloudwatchMetadataHandler.class);
+
+ //Used to tag log lines generated by this connector for diagnostic purposes when interacting with Athena.
+ private static final String sourceType = "cloudwatch";
+ //some customers have a very large number of log groups and log streams. In those cases we limit
+ //the max results as a safety mechanism. They can still be queried but aren't returned in show tables or show databases.
+ private static final long MAX_RESULTS = 100_000;
+ //The maximum number of splits that will be generated by a single call to doGetSplits(...) before we paginate.
+ protected static final int MAX_SPLITS_PER_REQUEST = 1000;
+ //The name of the special table view which allows you to query all log streams in a LogGroup
+ protected static final String ALL_LOG_STREAMS_TABLE = "all_log_streams";
+ //The name of the log stream field in our response and split objects.
+ protected static final String LOG_STREAM_FIELD = "log_stream";
+ //The name of the log group field in our response and split objects.
+ protected static final String LOG_GROUP_FIELD = "log_group";
+ //The name of the log time field in our response and split objects.
+ protected static final String LOG_TIME_FIELD = "time";
+ //The name of the log message field in our response and split objects.
+ protected static final String LOG_MSG_FIELD = "message";
+ //The name of the log stream size field in our split objects.
+ protected static final String LOG_STREAM_SIZE_FIELD = "log_stream_bytes";
+ //The the schema of all Cloudwatch tables.
+ protected static final Schema CLOUDWATCH_SCHEMA;
+
+ static {
+ CLOUDWATCH_SCHEMA = new SchemaBuilder().newBuilder()
+ .addField(LOG_STREAM_FIELD, Types.MinorType.VARCHAR.getType())
+ .addField(LOG_TIME_FIELD, new ArrowType.Int(64, true))
+ .addField(LOG_MSG_FIELD, Types.MinorType.VARCHAR.getType())
+ //requests to read multiple log streams can be parallelized so lets treat it like a partition
+ .addMetadata("partitionCols", LOG_STREAM_FIELD)
+ .build();
+ }
+
+ private final AWSLogs awsLogs;
+ private final ThrottlingInvoker invoker = ThrottlingInvoker.newDefaultBuilder(EXCEPTION_FILTER).build();
+ private final CloudwatchTableResolver tableResolver;
+
+ public CloudwatchMetadataHandler()
+ {
+ super(sourceType);
+ this.awsLogs = AWSLogsClientBuilder.standard().build();
+ tableResolver = new CloudwatchTableResolver(invoker, awsLogs, MAX_RESULTS, MAX_RESULTS);
+ }
+
+ @VisibleForTesting
+ protected CloudwatchMetadataHandler(AWSLogs awsLogs,
+ EncryptionKeyFactory keyFactory,
+ AWSSecretsManager secretsManager,
+ AmazonAthena athena,
+ String spillBucket,
+ String spillPrefix)
+ {
+ super(keyFactory, secretsManager, athena, sourceType, spillBucket, spillPrefix);
+ this.awsLogs = awsLogs;
+ tableResolver = new CloudwatchTableResolver(invoker, awsLogs, MAX_RESULTS, MAX_RESULTS);
+ }
+
+ /**
+ * List LogGroups in your Cloudwatch account treating each as a 'schema' (aka database)
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public ListSchemasResponse doListSchemaNames(BlockAllocator blockAllocator, ListSchemasRequest listSchemasRequest)
+ throws TimeoutException
+ {
+ DescribeLogGroupsRequest request = new DescribeLogGroupsRequest();
+ DescribeLogGroupsResult result;
+ List schemas = new ArrayList<>();
+ do {
+ if (schemas.size() > MAX_RESULTS) {
+ throw new RuntimeException("Too many log groups, exceeded max metadata results for schema count.");
+ }
+ result = invoker.invoke(() -> awsLogs.describeLogGroups(request));
+ result.getLogGroups().forEach(next -> schemas.add(next.getLogGroupName().toLowerCase()));
+ request.setNextToken(result.getNextToken());
+ logger.info("doListSchemaNames: Listing log groups {} {}", result.getNextToken(), schemas.size());
+ }
+ while (result.getNextToken() != null);
+
+ return new ListSchemasResponse(listSchemasRequest.getCatalogName(), schemas);
+ }
+
+ /**
+ * List LogStreams within the requested schema (aka LogGroup) in your Cloudwatch account treating each as a 'table'.
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public ListTablesResponse doListTables(BlockAllocator blockAllocator, ListTablesRequest listTablesRequest)
+ throws TimeoutException
+ {
+ String logGroupName = tableResolver.validateSchema(listTablesRequest.getSchemaName());
+ DescribeLogStreamsRequest request = new DescribeLogStreamsRequest(logGroupName);
+ DescribeLogStreamsResult result;
+ List tables = new ArrayList<>();
+ do {
+ if (tables.size() > MAX_RESULTS) {
+ throw new RuntimeException("Too many log streams, exceeded max metadata results for table count.");
+ }
+ result = invoker.invoke(() -> awsLogs.describeLogStreams(request));
+ result.getLogStreams().forEach(next -> tables.add(toTableName(listTablesRequest, next)));
+ request.setNextToken(result.getNextToken());
+ logger.info("doListTables: Listing log streams {} {}", result.getNextToken(), tables.size());
+ }
+ while (result.getNextToken() != null);
+
+ //We add a special table that represents all log streams. This is helpful depending on how
+ //you have your logs organized.
+ tables.add(new TableName(listTablesRequest.getSchemaName(), ALL_LOG_STREAMS_TABLE));
+
+ return new ListTablesResponse(listTablesRequest.getCatalogName(), tables);
+ }
+
+ /**
+ * Returns the pre-set schema for the request Cloudwatch table (LogStream) and schema (LogGroup) after
+ * validating that it exists.
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public GetTableResponse doGetTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
+ {
+ TableName tableName = getTableRequest.getTableName();
+ tableResolver.validateTable(tableName);
+ return new GetTableResponse(getTableRequest.getCatalogName(),
+ getTableRequest.getTableName(),
+ CLOUDWATCH_SCHEMA,
+ Collections.singleton(LOG_STREAM_FIELD));
+ }
+
+ /**
+ * We add one additional field to the partition schema. This field is used for our own purposes and ignored
+ * by Athena but it will get passed to calls to GetSplits(...) which is where we will set it on our Split
+ * without the need to call Cloudwatch a second time.
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public void enhancePartitionSchema(SchemaBuilder partitionSchemaBuilder, GetTableLayoutRequest request)
+ {
+ partitionSchemaBuilder.addField(LOG_STREAM_SIZE_FIELD, new ArrowType.Int(64, true));
+ partitionSchemaBuilder.addField(LOG_GROUP_FIELD, Types.MinorType.VARCHAR.getType());
+ }
+
+ /**
+ * Gets the list of LogStreams that need to be scanned to satisfy the requested table. In most cases this will be just
+ * 1 LogStream and this results in just 1 partition. If, however, the request is for the special ALL_LOG_STREAMS view
+ * then all LogStreams in the requested LogGroup (schema) are queried and turned into partitions 1:1.
+ *
+ * @note This method applies partition pruning based on the log_stream field.
+ * @see MetadataHandler
+ */
+ @Override
+ public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker)
+ throws Exception
+ {
+ CloudwatchTableName cwTableName = tableResolver.validateTable(request.getTableName());
+
+ DescribeLogStreamsRequest cwRequest = new DescribeLogStreamsRequest(cwTableName.getLogGroupName());
+ if (!ALL_LOG_STREAMS_TABLE.equals(cwTableName.getLogStreamName())) {
+ cwRequest.setLogStreamNamePrefix(cwTableName.getLogStreamName());
+ }
+
+ DescribeLogStreamsResult result;
+ do {
+ result = invoker.invoke(() -> awsLogs.describeLogStreams(cwRequest));
+ for (LogStream next : result.getLogStreams()) {
+ //Each log stream that matches any possible partition pruning should be added to the partition list.
+ blockWriter.writeRows((Block block, int rowNum) -> {
+ boolean matched = block.setValue(LOG_GROUP_FIELD, rowNum, cwRequest.getLogGroupName());
+ matched &= block.setValue(LOG_STREAM_FIELD, rowNum, next.getLogStreamName());
+ matched &= block.setValue(LOG_STREAM_SIZE_FIELD, rowNum, next.getStoredBytes());
+ return matched ? 1 : 0;
+ });
+ }
+ cwRequest.setNextToken(result.getNextToken());
+ }
+ while (result.getNextToken() != null && queryStatusChecker.isQueryRunning());
+ }
+
+ /**
+ * Each partition is converted into a single Split which means we will potentially read all LogStreams required for
+ * the query in parallel.
+ *
+ * @see MetadataHandler
+ */
+ @Override
+ public GetSplitsResponse doGetSplits(BlockAllocator allocator, GetSplitsRequest request)
+ {
+ int partitionContd = decodeContinuationToken(request);
+ Set splits = new HashSet<>();
+ Block partitions = request.getPartitions();
+ for (int curPartition = partitionContd; curPartition < partitions.getRowCount(); curPartition++) {
+ FieldReader logStreamReader = partitions.getFieldReader(LOG_STREAM_FIELD);
+ logStreamReader.setPosition(curPartition);
+
+ FieldReader logGroupReader = partitions.getFieldReader(LOG_GROUP_FIELD);
+ logGroupReader.setPosition(curPartition);
+
+ FieldReader sizeReader = partitions.getFieldReader(LOG_STREAM_SIZE_FIELD);
+ sizeReader.setPosition(curPartition);
+
+ //Every split must have a unique location if we wish to spill to avoid failures
+ SpillLocation spillLocation = makeSpillLocation(request);
+
+ Split.Builder splitBuilder = Split.newBuilder(spillLocation, makeEncryptionKey())
+ .add(CloudwatchMetadataHandler.LOG_GROUP_FIELD, String.valueOf(logGroupReader.readText()))
+ .add(CloudwatchMetadataHandler.LOG_STREAM_FIELD, String.valueOf(logStreamReader.readText()))
+ .add(CloudwatchMetadataHandler.LOG_STREAM_SIZE_FIELD, String.valueOf(sizeReader.readLong()));
+
+ splits.add(splitBuilder.build());
+
+ if (splits.size() >= MAX_SPLITS_PER_REQUEST) {
+ //We exceeded the number of split we want to return in a single request, return and provide
+ //a continuation token.
+ return new GetSplitsResponse(request.getCatalogName(),
+ splits,
+ encodeContinuationToken(curPartition));
+ }
+ }
+
+ return new GetSplitsResponse(request.getCatalogName(), splits, null);
+ }
+
+ /**
+ * Used to handle paginated requests.
+ *
+ * @return The partition number to resume with.
+ */
+ private int decodeContinuationToken(GetSplitsRequest request)
+ {
+ if (request.hasContinuationToken()) {
+ return Integer.valueOf(request.getContinuationToken());
+ }
+
+ //No continuation token present
+ return 0;
+ }
+
+ /**
+ * Used to create pagination tokens by encoding the number of the next partition to process.
+ *
+ * @param partition The number of the next partition we should process on the next call.
+ * @return The encoded continuation token.
+ */
+ private String encodeContinuationToken(int partition)
+ {
+ return String.valueOf(partition);
+ }
+
+ /**
+ * Helper that converts a LogStream to a TableName by lowercasing the schema of the request and the logstreamname.
+ *
+ * @param request The ListTablesRequest to retrieve the schema name from.
+ * @param logStream The LogStream to turn into a table.
+ * @return A TableName with both the schema (LogGroup) and the table (LogStream) lowercased.
+ */
+ private TableName toTableName(ListTablesRequest request, LogStream logStream)
+ {
+ return new TableName(request.getSchemaName(), logStream.getLogStreamName().toLowerCase());
+ }
+}
diff --git a/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchRecordHandler.java b/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchRecordHandler.java
new file mode 100644
index 0000000000..78388c20bb
--- /dev/null
+++ b/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchRecordHandler.java
@@ -0,0 +1,172 @@
+/*-
+ * #%L
+ * athena-cloudwatch
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.ThrottlingInvoker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Range;
+import com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.handlers.RecordHandler;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.athena.AmazonAthenaClientBuilder;
+import com.amazonaws.services.logs.AWSLogs;
+import com.amazonaws.services.logs.AWSLogsClientBuilder;
+import com.amazonaws.services.logs.model.GetLogEventsRequest;
+import com.amazonaws.services.logs.model.GetLogEventsResult;
+import com.amazonaws.services.logs.model.OutputLogEvent;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import com.amazonaws.services.secretsmanager.AWSSecretsManagerClientBuilder;
+import org.apache.arrow.util.VisibleForTesting;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static com.amazonaws.athena.connectors.cloudwatch.CloudwatchExceptionFilter.EXCEPTION_FILTER;
+import static com.amazonaws.athena.connectors.cloudwatch.CloudwatchMetadataHandler.LOG_GROUP_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.CloudwatchMetadataHandler.LOG_MSG_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.CloudwatchMetadataHandler.LOG_STREAM_FIELD;
+import static com.amazonaws.athena.connectors.cloudwatch.CloudwatchMetadataHandler.LOG_TIME_FIELD;
+
+/**
+ * Handles data read record requests for the Athena Cloudwatch Connector.
+ *
+ * For more detail, please see the module's README.md, some notable characteristics of this class include:
+ *
+ * 1. Reads and maps Cloudwatch Logs data for a specific LogStream (split)
+ * 2. Attempts to push down time range predicates into Cloudwatch.
+ */
+public class CloudwatchRecordHandler
+ extends RecordHandler
+{
+ private static final Logger logger = LoggerFactory.getLogger(CloudwatchRecordHandler.class);
+ //Used to tag log lines generated by this connector for diagnostic purposes when interacting with Athena.
+ private static final String sourceType = "cloudwatch";
+ //Used to handle Throttling events and apply AIMD congestion control
+ ThrottlingInvoker invoker = ThrottlingInvoker.newDefaultBuilder(EXCEPTION_FILTER).build();
+ private final AtomicLong count = new AtomicLong(0);
+ private final AWSLogs awsLogs;
+
+ public CloudwatchRecordHandler()
+ {
+ this(AmazonS3ClientBuilder.defaultClient(),
+ AWSSecretsManagerClientBuilder.defaultClient(),
+ AmazonAthenaClientBuilder.defaultClient(),
+ AWSLogsClientBuilder.defaultClient());
+ }
+
+ @VisibleForTesting
+ protected CloudwatchRecordHandler(AmazonS3 amazonS3, AWSSecretsManager secretsManager, AmazonAthena athena, AWSLogs awsLogs)
+ {
+ super(amazonS3, secretsManager, athena, sourceType);
+ this.awsLogs = awsLogs;
+ }
+
+ /**
+ * Scans Cloudwatch Logs using the LogStream and optional Time stamp filters.
+ *
+ * @see RecordHandler
+ */
+ @Override
+ protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker)
+ throws TimeoutException
+ {
+ String continuationToken = null;
+ TableName tableName = recordsRequest.getTableName();
+ Split split = recordsRequest.getSplit();
+ invoker.setBlockSpiller(spiller);
+ do {
+ final String actualContinuationToken = continuationToken;
+ GetLogEventsResult logEventsResult = invoker.invoke(() -> awsLogs.getLogEvents(
+ pushDownConstraints(recordsRequest.getConstraints(),
+ new GetLogEventsRequest()
+ .withLogGroupName(split.getProperty(LOG_GROUP_FIELD))
+ //We use the property instead of the table name because of the special all_streams table
+ .withLogStreamName(split.getProperty(LOG_STREAM_FIELD))
+ .withNextToken(actualContinuationToken)
+ )));
+
+ if (continuationToken == null || !continuationToken.equals(logEventsResult.getNextForwardToken())) {
+ continuationToken = logEventsResult.getNextForwardToken();
+ }
+ else {
+ continuationToken = null;
+ }
+
+ for (OutputLogEvent ole : logEventsResult.getEvents()) {
+ spiller.writeRows((Block block, int rowNum) -> {
+ boolean matched = true;
+ matched &= block.offerValue(LOG_STREAM_FIELD, rowNum, split.getProperty(LOG_STREAM_FIELD));
+ matched &= block.offerValue(LOG_TIME_FIELD, rowNum, ole.getTimestamp());
+ matched &= block.offerValue(LOG_MSG_FIELD, rowNum, ole.getMessage());
+ return matched ? 1 : 0;
+ });
+ }
+
+ logger.info("readWithConstraint: LogGroup[{}] LogStream[{}] Continuation[{}] rows[{}]",
+ tableName.getSchemaName(), tableName.getTableName(), continuationToken,
+ logEventsResult.getEvents().size());
+ }
+ while (continuationToken != null && queryStatusChecker.isQueryRunning());
+ }
+
+ /**
+ * Attempts to push down predicates into Cloudwatch Logs by decorating the Cloudwatch Logs request.
+ *
+ * @param constraints The constraints for the read as provided by Athena based on the customer's query.
+ * @param request The Cloudwatch Logs request to inject predicates to.
+ * @return The decorated Cloudwatch Logs request.
+ * @note This impl currently only pushing down SortedRangeSet filters (>=, =<, between) on the log time column.
+ */
+ private GetLogEventsRequest pushDownConstraints(Constraints constraints, GetLogEventsRequest request)
+ {
+ ValueSet timeConstraint = constraints.getSummary().get(LOG_TIME_FIELD);
+ if (timeConstraint instanceof SortedRangeSet && !timeConstraint.isNullAllowed()) {
+ //SortedRangeSet is how >, <, between is represented which are easiest and most common when
+ //searching logs so we attempt to push that down here as an optimization. SQL can represent complex
+ //overlapping ranges which Cloudwatch can not support so this is not a replacement for applying
+ //constraints using the ConstraintEvaluator.
+
+ Range basicPredicate = ((SortedRangeSet) timeConstraint).getSpan();
+
+ if (!basicPredicate.getLow().isNullValue()) {
+ Long lowerBound = (Long) basicPredicate.getLow().getValue();
+ request.setStartTime(lowerBound);
+ }
+
+ if (!basicPredicate.getHigh().isNullValue()) {
+ Long upperBound = (Long) basicPredicate.getHigh().getValue();
+ request.setEndTime(upperBound);
+ }
+ }
+
+ return request;
+ }
+}
diff --git a/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchTableName.java b/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchTableName.java
new file mode 100644
index 0000000000..7e083ebc71
--- /dev/null
+++ b/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchTableName.java
@@ -0,0 +1,80 @@
+/*-
+ * #%L
+ * athena-cloudwatch
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch;
+
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+
+import java.util.Objects;
+
+public class CloudwatchTableName
+{
+ private final String logGroupName;
+ private final String logStreamName;
+
+ public CloudwatchTableName(String logGroupName, String logStreamName)
+ {
+ this.logGroupName = logGroupName;
+ this.logStreamName = logStreamName;
+ }
+
+ public String getLogGroupName()
+ {
+ return logGroupName;
+ }
+
+ public String getLogStreamName()
+ {
+ return logStreamName;
+ }
+
+ public TableName toTableName()
+ {
+ return new TableName(logGroupName.toLowerCase(), logStreamName.toLowerCase());
+ }
+
+ @Override
+ public String toString()
+ {
+ return "CloudwatchTableName{" +
+ "logGroupName='" + logGroupName + '\'' +
+ ", logStreamName='" + logStreamName + '\'' +
+ '}';
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ CloudwatchTableName that = (CloudwatchTableName) o;
+ return Objects.equals(getLogGroupName(), that.getLogGroupName()) &&
+ Objects.equals(getLogStreamName(), that.getLogStreamName());
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(getLogGroupName(), getLogStreamName());
+ }
+}
diff --git a/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchTableResolver.java b/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchTableResolver.java
new file mode 100644
index 0000000000..52526f5498
--- /dev/null
+++ b/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchTableResolver.java
@@ -0,0 +1,289 @@
+/*-
+ * #%L
+ * athena-cloudwatch
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch;
+
+import com.amazonaws.athena.connector.lambda.ThrottlingInvoker;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.services.logs.AWSLogs;
+import com.amazonaws.services.logs.model.DescribeLogGroupsRequest;
+import com.amazonaws.services.logs.model.DescribeLogGroupsResult;
+import com.amazonaws.services.logs.model.DescribeLogStreamsRequest;
+import com.amazonaws.services.logs.model.DescribeLogStreamsResult;
+import com.amazonaws.services.logs.model.LogGroup;
+import com.amazonaws.services.logs.model.LogStream;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeoutException;
+
+import static com.amazonaws.athena.connectors.cloudwatch.CloudwatchMetadataHandler.ALL_LOG_STREAMS_TABLE;
+
+/**
+ * This class helps with resolving the differences in casing between cloudwatch log and Presto. Presto expects all
+ * databases, tables, and columns to be lower case. This class allows us to use cloudwatch logGroups and logStreams
+ * which may have captial letters in them without issue. It does so by caching LogStreams and LogStreams and doing
+ * a case insentive search over them. It will first try to do a targeted get to reduce the penalty for LogGroups
+ * and LogStreams which don't have capitalization. It also has an optimization for LAMBDA which is a common
+ * cause of capitalized LogStreams by doing a targeted replace for LAMBDA's pattern.
+ */
+public class CloudwatchTableResolver
+{
+ private static final Logger logger = LoggerFactory.getLogger(CloudwatchTableResolver.class);
+
+ private AWSLogs awsLogs;
+ //Used to handle Throttling events using an AIMD strategy for congestion control.
+ private ThrottlingInvoker invoker;
+ //The LogStream pattern that is capitalized by LAMBDA
+ private static final String LAMBDA_PATTERN = "$latest";
+ //The LogStream pattern to replace
+ private static final String LAMBDA_ACTUAL_PATTERN = "$LATEST";
+ //The schema cache that is presto casing to cloudwatch casing
+ private final LoadingCache schemaCache;
+ //The table cache that is presto casing to cloudwatch casing
+ private final LoadingCache tableCache;
+
+ /**
+ * Constructs an instance of the table resolver.
+ *
+ * @param invoker The ThrottlingInvoker to use to handle throttling events.
+ * @param awsLogs The AWSLogs client to use for cache misses.
+ * @param maxSchemaCacheSize The max number of schemas to cache.
+ * @param maxTableCacheSize The max tables to cache.
+ */
+ public CloudwatchTableResolver(ThrottlingInvoker invoker, AWSLogs awsLogs, long maxSchemaCacheSize, long maxTableCacheSize)
+ {
+ this.invoker = invoker;
+ this.awsLogs = awsLogs;
+ this.tableCache = CacheBuilder.newBuilder()
+ .maximumSize(maxTableCacheSize)
+ .build(
+ new CacheLoader()
+ {
+ public CloudwatchTableName load(TableName schemaName)
+ throws TimeoutException
+ {
+ return loadLogStreams(schemaName.getSchemaName(), schemaName.getTableName());
+ }
+ });
+
+ this.schemaCache = CacheBuilder.newBuilder()
+ .maximumSize(maxSchemaCacheSize)
+ .build(
+ new CacheLoader()
+ {
+ public String load(String schemaName)
+ throws TimeoutException
+ {
+ return loadLogGroups(schemaName);
+ }
+ });
+ }
+
+ /**
+ * Loads the requested LogStream as identified by the TableName.
+ *
+ * @param logGroup The properly cased schema name.
+ * @param logStream The table name to validate.
+ * @return The CloudwatchTableName or null if not found.
+ * @note This method also primes the cache with other CloudwatchTableNames found along the way while scaning Cloudwatch.
+ */
+ private CloudwatchTableName loadLogStreams(String logGroup, String logStream)
+ throws TimeoutException
+ {
+ //As an optimization, see if the table name is an exact match (meaning likely no casing issues)
+ CloudwatchTableName result = loadLogStream(logGroup, logStream);
+ if (result != null) {
+ return result;
+ }
+
+ logger.info("loadLogStreams: Did not find a match for the table, falling back to LogGroup scan for {}:{}",
+ logGroup, logStream);
+ DescribeLogStreamsRequest validateTableRequest = new DescribeLogStreamsRequest(logGroup);
+ DescribeLogStreamsResult validateTableResult;
+ do {
+ validateTableResult = invoker.invoke(() -> awsLogs.describeLogStreams(validateTableRequest));
+ for (LogStream nextStream : validateTableResult.getLogStreams()) {
+ String logStreamName = nextStream.getLogStreamName();
+ CloudwatchTableName nextCloudwatch = new CloudwatchTableName(logGroup, logStreamName);
+ tableCache.put(nextCloudwatch.toTableName(), nextCloudwatch);
+ if (nextCloudwatch.getLogStreamName().equalsIgnoreCase(logStreamName)) {
+ //We stop loading once we find the one we care about. This is an optimization that
+ //attempt to exploit the fact that we likely access more recent logstreams first.
+ logger.info("loadLogStreams: Matched {} for {}", nextCloudwatch, logStream);
+ return nextCloudwatch;
+ }
+ }
+ validateTableRequest.setNextToken(validateTableResult.getNextToken());
+ }
+ while (validateTableResult.getNextToken() != null);
+
+ //We could not find a match
+ throw new IllegalArgumentException("No such table " + logGroup + " " + logStream);
+ }
+
+ /**
+ * Optomizaiton that attempts to load a specific LogStream as identified by the TableName.
+ *
+ * @param logGroup The properly cased schema name.
+ * @param logStream The table name to validate.
+ * @return The CloudwatchTableName or null if not found.
+ * @note This method also primes the cache with other CloudwatchTableNames found along the way while scanning Cloudwatch.
+ */
+ private CloudwatchTableName loadLogStream(String logGroup, String logStream)
+ throws TimeoutException
+ {
+ if (ALL_LOG_STREAMS_TABLE.equalsIgnoreCase(logStream)) {
+ return new CloudwatchTableName(logGroup, ALL_LOG_STREAMS_TABLE);
+ }
+
+ String effectiveTableName = logStream;
+ if (effectiveTableName.contains(LAMBDA_PATTERN)) {
+ logger.info("loadLogStream: Appears to be a lambda log_stream, substituting Lambda pattern {} for {}",
+ LAMBDA_PATTERN, effectiveTableName);
+ effectiveTableName = effectiveTableName.replace(LAMBDA_PATTERN, LAMBDA_ACTUAL_PATTERN);
+ }
+
+ DescribeLogStreamsRequest request = new DescribeLogStreamsRequest(logGroup)
+ .withLogStreamNamePrefix(effectiveTableName);
+ DescribeLogStreamsResult result = invoker.invoke(() -> awsLogs.describeLogStreams(request));
+ for (LogStream nextStream : result.getLogStreams()) {
+ String logStreamName = nextStream.getLogStreamName();
+ CloudwatchTableName nextCloudwatch = new CloudwatchTableName(logGroup, logStreamName);
+ if (nextCloudwatch.getLogStreamName().equalsIgnoreCase(logStreamName)) {
+ logger.info("loadLogStream: Matched {} for {}:{}", nextCloudwatch, logGroup, logStream);
+ return nextCloudwatch;
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Loads the requested LogGroup as identified by the schemaName.
+ *
+ * @param schemaName The schemaName to load.
+ * @return The actual LogGroup name in cloudwatch.
+ * @note This method also primes the cache with other LogGroups found along the way while scanning Cloudwatch.
+ */
+ private String loadLogGroups(String schemaName)
+ throws TimeoutException
+ {
+ //As an optimization, see if the table name is an exact match (meaning likely no casing issues)
+ String result = loadLogGroup(schemaName);
+ if (result != null) {
+ return result;
+ }
+
+ logger.info("loadLogGroups: Did not find a match for the schema, falling back to LogGroup scan for {}", schemaName);
+ DescribeLogGroupsRequest validateSchemaRequest = new DescribeLogGroupsRequest();
+ DescribeLogGroupsResult validateSchemaResult;
+ do {
+ validateSchemaResult = invoker.invoke(() -> awsLogs.describeLogGroups(validateSchemaRequest));
+ for (LogGroup next : validateSchemaResult.getLogGroups()) {
+ String nextLogGroupName = next.getLogGroupName();
+ schemaCache.put(schemaName.toLowerCase(), nextLogGroupName);
+ if (nextLogGroupName.equalsIgnoreCase(schemaName)) {
+ logger.info("loadLogGroups: Matched {} for {}", nextLogGroupName, schemaName);
+ return nextLogGroupName;
+ }
+ }
+ validateSchemaRequest.setNextToken(validateSchemaResult.getNextToken());
+ }
+ while (validateSchemaResult.getNextToken() != null);
+
+ //We could not find a match
+ throw new IllegalArgumentException("No such schema " + schemaName);
+ }
+
+ /**
+ * Optomizaiton that attempts to load a specific LogStream as identified by the TableName.
+ *
+ * @param schemaName The schemaName to load.
+ * @return The CloudwatchTableName or null if not found.
+ */
+ private String loadLogGroup(String schemaName)
+ throws TimeoutException
+ {
+ DescribeLogGroupsRequest request = new DescribeLogGroupsRequest().withLogGroupNamePrefix(schemaName);
+ DescribeLogGroupsResult result = invoker.invoke(() -> awsLogs.describeLogGroups(request));
+ for (LogGroup next : result.getLogGroups()) {
+ String nextLogGroupName = next.getLogGroupName();
+ if (nextLogGroupName.equalsIgnoreCase(schemaName)) {
+ logger.info("loadLogGroup: Matched {} for {}", nextLogGroupName, schemaName);
+ return nextLogGroupName;
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Used to validate and convert the given TableName to a properly cased and qualified CloudwatchTableName.
+ *
+ * @param tableName The TableName to validate and convert.
+ * @return The CloudwatchTableName for the provided TableName or throws if the TableName could not be resolved to a
+ * CloudwatchTableName. This method mostly handles resolving case mismatches and ensuring the input is a valid entity
+ * in Cloudwatch.
+ */
+ public CloudwatchTableName validateTable(TableName tableName)
+ {
+ String actualSchema = validateSchema(tableName.getSchemaName());
+ CloudwatchTableName actual = null;
+ try {
+ actual = tableCache.get(new TableName(actualSchema, tableName.getTableName()));
+ if (actual == null) {
+ throw new IllegalArgumentException("Unknown table[" + tableName + "]");
+ }
+
+ return actual;
+ }
+ catch (ExecutionException ex) {
+ throw new RuntimeException("Exception while attempting to validate table " + tableName, ex);
+ }
+ }
+
+ /**
+ * Used to validate and convert the given schema name to a properly cased and qualified CloudwatchTableName.
+ *
+ * @param schema The TableName to validate and convert.
+ * @return The cloudwatch LogGroup (aka schema name) or throws if the schema name could not be resolved to a
+ * LogGroup. This method mostly handles resolving case mismatches and ensuring the input is a valid entity
+ * in Cloudwatch.
+ */
+ public String validateSchema(String schema)
+ {
+ String actual = null;
+ try {
+ actual = schemaCache.get(schema);
+ if (actual == null) {
+ throw new IllegalArgumentException("Unknown schema[" + schema + "]");
+ }
+
+ return actual;
+ }
+ catch (ExecutionException ex) {
+ throw new RuntimeException("Exception while attempting to validate schema " + schema, ex);
+ }
+ }
+}
diff --git a/athena-cloudwatch/src/test/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchMetadataHandlerTest.java b/athena-cloudwatch/src/test/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchMetadataHandlerTest.java
new file mode 100644
index 0000000000..a9e7ef1671
--- /dev/null
+++ b/athena-cloudwatch/src/test/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchMetadataHandlerTest.java
@@ -0,0 +1,409 @@
+/*-
+ * #%L
+ * athena-cloudwatch
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl;
+import com.amazonaws.athena.connector.lambda.data.BlockUtils;
+import com.amazonaws.athena.connector.lambda.data.SchemaBuilder;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse;
+import com.amazonaws.athena.connector.lambda.metadata.MetadataRequestType;
+import com.amazonaws.athena.connector.lambda.metadata.MetadataResponse;
+import com.amazonaws.athena.connector.lambda.security.FederatedIdentity;
+import com.amazonaws.athena.connector.lambda.security.LocalKeyFactory;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.logs.AWSLogs;
+import com.amazonaws.services.logs.model.DescribeLogGroupsRequest;
+import com.amazonaws.services.logs.model.DescribeLogGroupsResult;
+import com.amazonaws.services.logs.model.DescribeLogStreamsRequest;
+import com.amazonaws.services.logs.model.DescribeLogStreamsResult;
+import com.amazonaws.services.logs.model.LogGroup;
+import com.amazonaws.services.logs.model.LogStream;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeoutException;
+
+import static org.junit.Assert.*;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class CloudwatchMetadataHandlerTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(CloudwatchMetadataHandlerTest.class);
+
+ private FederatedIdentity identity = new FederatedIdentity("id", "principal", "account");
+ private CloudwatchMetadataHandler handler;
+ private BlockAllocator allocator;
+
+ @Mock
+ private AWSLogs mockAwsLogs;
+
+ @Mock
+ private AWSSecretsManager mockSecretsManager;
+
+ @Mock
+ private AmazonAthena mockAthena;
+
+ @Before
+ public void setUp()
+ throws Exception
+ {
+ when(mockAwsLogs.describeLogStreams(any(DescribeLogStreamsRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
+ return new DescribeLogStreamsResult().withLogStreams(new LogStream().withLogStreamName("table-9"),
+ new LogStream().withLogStreamName("table-10"));
+ });
+
+ when(mockAwsLogs.describeLogGroups(any(DescribeLogGroupsRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
+ return new DescribeLogGroupsResult().withLogGroups(new LogGroup().withLogGroupName("schema-1"),
+ new LogGroup().withLogGroupName("schema-20"));
+ });
+ handler = new CloudwatchMetadataHandler(mockAwsLogs, new LocalKeyFactory(), mockSecretsManager, mockAthena, "spillBucket", "spillPrefix");
+ allocator = new BlockAllocatorImpl();
+ }
+
+ @After
+ public void tearDown()
+ throws Exception
+ {
+ allocator.close();
+ }
+
+ @Test
+ public void doListSchemaNames()
+ throws TimeoutException
+ {
+ logger.info("doListSchemas - enter");
+
+ when(mockAwsLogs.describeLogGroups(any(DescribeLogGroupsRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
+ DescribeLogGroupsRequest request = (DescribeLogGroupsRequest) invocationOnMock.getArguments()[0];
+
+ DescribeLogGroupsResult result = new DescribeLogGroupsResult();
+
+ Integer nextToken;
+ if (request.getNextToken() == null) {
+ nextToken = 1;
+ }
+ else if (Integer.valueOf(request.getNextToken()) < 3) {
+ nextToken = Integer.valueOf(request.getNextToken()) + 1;
+ }
+ else {
+ nextToken = null;
+ }
+
+ List logGroups = new ArrayList<>();
+ if (request.getNextToken() == null || Integer.valueOf(request.getNextToken()) < 3) {
+ for (int i = 0; i < 10; i++) {
+ LogGroup nextLogGroup = new LogGroup();
+ nextLogGroup.setLogGroupName("schema-" + String.valueOf(i));
+ logGroups.add(nextLogGroup);
+ }
+ }
+
+ result.withLogGroups(logGroups);
+ if (nextToken != null) {
+ result.setNextToken(String.valueOf(nextToken));
+ }
+
+ return result;
+ });
+
+ ListSchemasRequest req = new ListSchemasRequest(identity, "queryId", "default");
+ ListSchemasResponse res = handler.doListSchemaNames(allocator, req);
+ logger.info("doListSchemas - {}", res.getSchemas());
+
+ assertTrue(res.getSchemas().size() == 30);
+ verify(mockAwsLogs, times(4)).describeLogGroups(any(DescribeLogGroupsRequest.class));
+ verifyNoMoreInteractions(mockAwsLogs);
+
+ logger.info("doListSchemas - exit");
+ }
+
+ @Test
+ public void doListTables()
+ throws TimeoutException
+ {
+ logger.info("doListTables - enter");
+
+ when(mockAwsLogs.describeLogStreams(any(DescribeLogStreamsRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
+ DescribeLogStreamsRequest request = (DescribeLogStreamsRequest) invocationOnMock.getArguments()[0];
+
+ DescribeLogStreamsResult result = new DescribeLogStreamsResult();
+
+ Integer nextToken;
+ if (request.getNextToken() == null) {
+ nextToken = 1;
+ }
+ else if (Integer.valueOf(request.getNextToken()) < 3) {
+ nextToken = Integer.valueOf(request.getNextToken()) + 1;
+ }
+ else {
+ nextToken = null;
+ }
+
+ List logStreams = new ArrayList<>();
+ if (request.getNextToken() == null || Integer.valueOf(request.getNextToken()) < 3) {
+ for (int i = 0; i < 10; i++) {
+ LogStream nextLogStream = new LogStream();
+ nextLogStream.setLogStreamName("table-" + String.valueOf(i));
+ logStreams.add(nextLogStream);
+ }
+ }
+
+ result.withLogStreams(logStreams);
+ if (nextToken != null) {
+ result.setNextToken(String.valueOf(nextToken));
+ }
+
+ return result;
+ });
+
+ ListTablesRequest req = new ListTablesRequest(identity, "queryId", "default", "schema-1");
+ ListTablesResponse res = handler.doListTables(allocator, req);
+ logger.info("doListTables - {}", res.getTables());
+
+ assertTrue(res.getTables().contains(new TableName("schema-1", "all_log_streams")));
+
+ assertTrue(res.getTables().size() == 31);
+
+ verify(mockAwsLogs, times(4)).describeLogStreams(any(DescribeLogStreamsRequest.class));
+ verify(mockAwsLogs, times(1)).describeLogGroups(any(DescribeLogGroupsRequest.class));
+ verifyNoMoreInteractions(mockAwsLogs);
+
+ logger.info("doListTables - exit");
+ }
+
+ @Test
+ public void doGetTable()
+ {
+ logger.info("doGetTable - enter");
+ String expectedSchema = "schema-20";
+
+ when(mockAwsLogs.describeLogStreams(any(DescribeLogStreamsRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
+ DescribeLogStreamsRequest request = (DescribeLogStreamsRequest) invocationOnMock.getArguments()[0];
+
+ assertTrue(request.getLogGroupName().equals(expectedSchema));
+ DescribeLogStreamsResult result = new DescribeLogStreamsResult();
+
+ Integer nextToken;
+ if (request.getNextToken() == null) {
+ nextToken = 1;
+ }
+ else if (Integer.valueOf(request.getNextToken()) < 3) {
+ nextToken = Integer.valueOf(request.getNextToken()) + 1;
+ }
+ else {
+ nextToken = null;
+ }
+
+ List logStreams = new ArrayList<>();
+ if (request.getNextToken() == null || Integer.valueOf(request.getNextToken()) < 3) {
+ for (int i = 0; i < 10; i++) {
+ LogStream nextLogStream = new LogStream();
+ nextLogStream.setLogStreamName("table-" + String.valueOf(i));
+ logStreams.add(nextLogStream);
+ }
+ }
+
+ result.withLogStreams(logStreams);
+ if (nextToken != null) {
+ result.setNextToken(String.valueOf(nextToken));
+ }
+
+ return result;
+ });
+
+ GetTableRequest req = new GetTableRequest(identity, "queryId", "default", new TableName(expectedSchema, "table-9"));
+ GetTableResponse res = handler.doGetTable(allocator, req);
+ logger.info("doGetTable - {} {}", res.getTableName(), res.getSchema());
+
+ assertEquals(new TableName(expectedSchema, "table-9"), res.getTableName());
+ assertTrue(res.getSchema() != null);
+
+ verify(mockAwsLogs, times(1)).describeLogStreams(any(DescribeLogStreamsRequest.class));
+
+ logger.info("doGetTable - exit");
+ }
+
+ @Test
+ public void doGetTableLayout()
+ throws Exception
+ {
+ logger.info("doGetTableLayout - enter");
+
+ when(mockAwsLogs.describeLogStreams(any(DescribeLogStreamsRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
+ DescribeLogStreamsRequest request = (DescribeLogStreamsRequest) invocationOnMock.getArguments()[0];
+
+ DescribeLogStreamsResult result = new DescribeLogStreamsResult();
+
+ Integer nextToken;
+ if (request.getNextToken() == null) {
+ nextToken = 1;
+ }
+ else if (Integer.valueOf(request.getNextToken()) < 3) {
+ nextToken = Integer.valueOf(request.getNextToken()) + 1;
+ }
+ else {
+ nextToken = null;
+ }
+
+ List logStreams = new ArrayList<>();
+ if (request.getNextToken() == null || Integer.valueOf(request.getNextToken()) < 3) {
+ int continuation = request.getNextToken() == null ? 0 : Integer.valueOf(request.getNextToken());
+ for (int i = 0 + continuation * 100; i < 300; i++) {
+ LogStream nextLogStream = new LogStream();
+ nextLogStream.setLogStreamName("table-" + String.valueOf(i));
+ nextLogStream.setStoredBytes(i * 1000L);
+ logStreams.add(nextLogStream);
+ }
+ }
+
+ result.withLogStreams(logStreams);
+ if (nextToken != null) {
+ result.setNextToken(String.valueOf(nextToken));
+ }
+
+ return result;
+ });
+
+ Map constraintsMap = new HashMap<>();
+
+ constraintsMap.put("log_stream",
+ EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false)
+ .add("table-10").build());
+
+ Schema schema = SchemaBuilder.newBuilder().addStringField("log_stream").build();
+
+ GetTableLayoutRequest req = new GetTableLayoutRequest(identity,
+ "queryId",
+ "default",
+ new TableName("schema-1", "all_log_streams"),
+ new Constraints(constraintsMap),
+ schema,
+ Collections.singleton("log_stream"));
+
+ GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);
+
+ logger.info("doGetTableLayout - {}", res.getPartitions().getSchema());
+ logger.info("doGetTableLayout - {}", res.getPartitions());
+
+ assertTrue(res.getPartitions().getSchema().findField("log_stream") != null);
+ assertTrue(res.getPartitions().getRowCount() == 1);
+
+ verify(mockAwsLogs, times(4)).describeLogStreams(any(DescribeLogStreamsRequest.class));
+
+ logger.info("doGetTableLayout - exit");
+ }
+
+ @Test
+ public void doGetSplits()
+ {
+ logger.info("doGetSplits: enter");
+
+ Schema schema = SchemaBuilder.newBuilder()
+ .addField(CloudwatchMetadataHandler.LOG_STREAM_FIELD, new ArrowType.Utf8())
+ .addField(CloudwatchMetadataHandler.LOG_STREAM_SIZE_FIELD, new ArrowType.Int(64, true))
+ .addField(CloudwatchMetadataHandler.LOG_GROUP_FIELD, new ArrowType.Utf8())
+ .build();
+
+ Block partitions = allocator.createBlock(schema);
+
+ int num_partitions = 2_000;
+ for (int i = 0; i < num_partitions; i++) {
+ BlockUtils.setValue(partitions.getFieldVector(CloudwatchMetadataHandler.LOG_STREAM_SIZE_FIELD), i, 2016L + i);
+ BlockUtils.setValue(partitions.getFieldVector(CloudwatchMetadataHandler.LOG_STREAM_FIELD), i, "log_stream_" + i);
+ BlockUtils.setValue(partitions.getFieldVector(CloudwatchMetadataHandler.LOG_GROUP_FIELD), i, "log_group_" + i);
+ }
+ partitions.setRowCount(num_partitions);
+
+ String continuationToken = null;
+ GetSplitsRequest originalReq = new GetSplitsRequest(identity,
+ "queryId",
+ "catalog_name",
+ new TableName("schema", "all_log_streams"),
+ partitions,
+ Collections.singletonList(CloudwatchMetadataHandler.LOG_STREAM_FIELD),
+ new Constraints(new HashMap<>()),
+ continuationToken);
+ int numContinuations = 0;
+ do {
+ GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
+ logger.info("doGetSplits: req[{}]", req);
+
+ MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
+ assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
+
+ GetSplitsResponse response = (GetSplitsResponse) rawResponse;
+ continuationToken = response.getContinuationToken();
+
+ logger.info("doGetSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
+
+ for (Split nextSplit : response.getSplits()) {
+ assertNotNull(nextSplit.getProperty(CloudwatchMetadataHandler.LOG_STREAM_SIZE_FIELD));
+ assertNotNull(nextSplit.getProperty(CloudwatchMetadataHandler.LOG_STREAM_FIELD));
+ assertNotNull(nextSplit.getProperty(CloudwatchMetadataHandler.LOG_GROUP_FIELD));
+ }
+
+ if (continuationToken != null) {
+ numContinuations++;
+ }
+ }
+ while (continuationToken != null);
+
+ assertTrue(numContinuations > 0);
+
+ logger.info("doGetSplits: exit");
+ }
+}
diff --git a/athena-cloudwatch/src/test/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchRecordHandlerTest.java b/athena-cloudwatch/src/test/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchRecordHandlerTest.java
new file mode 100644
index 0000000000..0f5e82043b
--- /dev/null
+++ b/athena-cloudwatch/src/test/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchRecordHandlerTest.java
@@ -0,0 +1,293 @@
+/*-
+ * #%L
+ * athena-cloudwatch
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.cloudwatch;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl;
+import com.amazonaws.athena.connector.lambda.data.BlockUtils;
+import com.amazonaws.athena.connector.lambda.data.S3BlockSpillReader;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Range;
+import com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation;
+import com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse;
+import com.amazonaws.athena.connector.lambda.records.RecordResponse;
+import com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse;
+import com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory;
+import com.amazonaws.athena.connector.lambda.security.FederatedIdentity;
+import com.amazonaws.athena.connector.lambda.security.LocalKeyFactory;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.logs.AWSLogs;
+import com.amazonaws.services.logs.model.GetLogEventsRequest;
+import com.amazonaws.services.logs.model.GetLogEventsResult;
+import com.amazonaws.services.logs.model.OutputLogEvent;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.model.PutObjectResult;
+import com.amazonaws.services.s3.model.S3Object;
+import com.amazonaws.services.s3.model.S3ObjectInputStream;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import com.google.common.collect.ImmutableList;
+import com.google.common.io.ByteStreams;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.runners.MockitoJUnitRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+import static org.junit.Assert.*;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyObject;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.class)
+public class CloudwatchRecordHandlerTest
+{
+ private static final Logger logger = LoggerFactory.getLogger(CloudwatchRecordHandlerTest.class);
+
+ private FederatedIdentity identity = new FederatedIdentity("id", "principal", "account");
+ private List mockS3Storage;
+ private CloudwatchRecordHandler handler;
+ private S3BlockSpillReader spillReader;
+ private BlockAllocator allocator;
+ private Schema schemaForRead;
+ private EncryptionKeyFactory keyFactory = new LocalKeyFactory();
+
+ @Mock
+ private AWSLogs mockAwsLogs;
+
+ @Mock
+ private AmazonS3 mockS3;
+
+ @Mock
+ private AWSSecretsManager mockSecretsManager;
+
+ @Mock
+ private AmazonAthena mockAthena;
+
+ @Before
+ public void setUp()
+ throws Exception
+ {
+ schemaForRead = CloudwatchMetadataHandler.CLOUDWATCH_SCHEMA;
+
+ mockS3Storage = new ArrayList<>();
+ allocator = new BlockAllocatorImpl();
+ handler = new CloudwatchRecordHandler(mockS3, mockSecretsManager, mockAthena, mockAwsLogs);
+ spillReader = new S3BlockSpillReader(mockS3, allocator);
+
+ when(mockS3.putObject(anyObject(), anyObject(), anyObject(), anyObject()))
+ .thenAnswer((InvocationOnMock invocationOnMock) ->
+ {
+ InputStream inputStream = (InputStream) invocationOnMock.getArguments()[2];
+ ByteHolder byteHolder = new ByteHolder();
+ byteHolder.setBytes(ByteStreams.toByteArray(inputStream));
+ mockS3Storage.add(byteHolder);
+ return mock(PutObjectResult.class);
+ });
+
+ when(mockS3.getObject(anyString(), anyString()))
+ .thenAnswer((InvocationOnMock invocationOnMock) ->
+ {
+ S3Object mockObject = mock(S3Object.class);
+ ByteHolder byteHolder = mockS3Storage.get(0);
+ mockS3Storage.remove(0);
+ when(mockObject.getObjectContent()).thenReturn(
+ new S3ObjectInputStream(
+ new ByteArrayInputStream(byteHolder.getBytes()), null));
+ return mockObject;
+ });
+
+ when(mockAwsLogs.getLogEvents(any(GetLogEventsRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
+ GetLogEventsRequest request = (GetLogEventsRequest) invocationOnMock.getArguments()[0];
+
+ //Check that predicate pushdown was propagated to cloudwatch
+ assertNotNull(request.getStartTime());
+ assertNotNull(request.getEndTime());
+
+ GetLogEventsResult result = new GetLogEventsResult();
+
+ Integer nextToken;
+ if (request.getNextToken() == null) {
+ nextToken = 1;
+ }
+ else if (Integer.valueOf(request.getNextToken()) < 3) {
+ nextToken = Integer.valueOf(request.getNextToken()) + 1;
+ }
+ else {
+ nextToken = null;
+ }
+
+ List logEvents = new ArrayList<>();
+ if (request.getNextToken() == null || Integer.valueOf(request.getNextToken()) < 3) {
+ long continuation = request.getNextToken() == null ? 0 : Integer.valueOf(request.getNextToken());
+ for (int i = 0; i < 100_000; i++) {
+ OutputLogEvent outputLogEvent = new OutputLogEvent();
+ outputLogEvent.setMessage("message-" + (continuation * i));
+ outputLogEvent.setTimestamp(i * 100L);
+ logEvents.add(outputLogEvent);
+ }
+ }
+
+ result.withEvents(logEvents);
+ if (nextToken != null) {
+ result.setNextForwardToken(String.valueOf(nextToken));
+ }
+
+ return result;
+ });
+ }
+
+ @After
+ public void tearDown()
+ throws Exception
+ {
+ allocator.close();
+ }
+
+ @Test
+ public void doReadRecordsNoSpill()
+ throws Exception
+ {
+ logger.info("doReadRecordsNoSpill: enter");
+
+ Map constraintsMap = new HashMap<>();
+ constraintsMap.put("time", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(),
+ ImmutableList.of(Range.equal(allocator, Types.MinorType.BIGINT.getType(), 100L)), false));
+
+ ReadRecordsRequest request = new ReadRecordsRequest(identity,
+ "catalog",
+ "queryId-" + System.currentTimeMillis(),
+ new TableName("schema", "table"),
+ schemaForRead,
+ Split.newBuilder(S3SpillLocation.newBuilder()
+ .withBucket(UUID.randomUUID().toString())
+ .withSplitId(UUID.randomUUID().toString())
+ .withQueryId(UUID.randomUUID().toString())
+ .withIsDirectory(true)
+ .build(),
+ keyFactory.create()).add(CloudwatchMetadataHandler.LOG_STREAM_FIELD, "table").build(),
+ new Constraints(constraintsMap),
+ 100_000_000_000L,
+ 100_000_000_000L//100GB don't expect this to spill
+ );
+
+ RecordResponse rawResponse = handler.doReadRecords(allocator, request);
+
+ assertTrue(rawResponse instanceof ReadRecordsResponse);
+
+ ReadRecordsResponse response = (ReadRecordsResponse) rawResponse;
+ logger.info("doReadRecordsNoSpill: rows[{}]", response.getRecordCount());
+
+ assertTrue(response.getRecords().getRowCount() == 3);
+ logger.info("doReadRecordsNoSpill: {}", BlockUtils.rowToString(response.getRecords(), 0));
+
+ logger.info("doReadRecordsNoSpill: exit");
+ }
+
+ @Test
+ public void doReadRecordsSpill()
+ throws Exception
+ {
+ logger.info("doReadRecordsSpill: enter");
+
+ Map constraintsMap = new HashMap<>();
+ constraintsMap.put("time", SortedRangeSet.of(
+ Range.range(allocator, Types.MinorType.BIGINT.getType(), 100L, true, 100_000_000L, true)));
+
+ ReadRecordsRequest request = new ReadRecordsRequest(identity,
+ "catalog",
+ "queryId-" + System.currentTimeMillis(),
+ new TableName("schema", "table"),
+ schemaForRead,
+ Split.newBuilder(S3SpillLocation.newBuilder()
+ .withBucket(UUID.randomUUID().toString())
+ .withSplitId(UUID.randomUUID().toString())
+ .withQueryId(UUID.randomUUID().toString())
+ .withIsDirectory(true)
+ .build(),
+ keyFactory.create()).add(CloudwatchMetadataHandler.LOG_STREAM_FIELD, "table").build(),
+ new Constraints(constraintsMap),
+ 1_500_000L, //~1.5MB so we should see some spill
+ 0
+ );
+
+ RecordResponse rawResponse = handler.doReadRecords(allocator, request);
+
+ assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
+
+ try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
+ logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
+
+ assertTrue(response.getNumberBlocks() > 1);
+
+ int blockNum = 0;
+ for (SpillLocation next : response.getRemoteBlocks()) {
+ S3SpillLocation spillLocation = (S3SpillLocation) next;
+ try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
+
+ logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
+ // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
+
+ logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
+ assertNotNull(BlockUtils.rowToString(block, 0));
+ }
+ }
+ }
+
+ logger.info("doReadRecordsSpill: exit");
+ }
+
+ private class ByteHolder
+ {
+ private byte[] bytes;
+
+ public void setBytes(byte[] bytes)
+ {
+ this.bytes = bytes;
+ }
+
+ public byte[] getBytes()
+ {
+ return bytes;
+ }
+ }
+}
diff --git a/athena-docdb/LICENSE.txt b/athena-docdb/LICENSE.txt
new file mode 100644
index 0000000000..418de4c108
--- /dev/null
+++ b/athena-docdb/LICENSE.txt
@@ -0,0 +1,174 @@
+Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
\ No newline at end of file
diff --git a/athena-docdb/README.md b/athena-docdb/README.md
new file mode 100644
index 0000000000..04268f5a62
--- /dev/null
+++ b/athena-docdb/README.md
@@ -0,0 +1,95 @@
+# Amazon Athena DocumentDB Connector
+
+This connector enables Amazon Athena to communicate with your DocumentDB instance(s), making your DocumentDB data accessible via SQL. The also works with any MongoDB compatible endpoint.
+
+Unlike traditional relational data stores, DocumentDB collections do not have set schema. Each entry can have different fields and data types. While we are investigating the best way to support schema-on-read usecases for this connector, it presently supports two mechanisms for generating traditional table schema information. The default mechanism is for the connector to scan a small number of documents in your collection in order to form a union of all fields and coerce fields with non-overlap data types. This basic schema inference works well for collections that have mostly uniform entries. For more diverse collections, the connector supports retrieving meta-data from the Glue Data Catalog. If the connector sees a database and table which match your DocumentDB database and collection names it will use the corresponding Glue table for schema. We recommend creating your Glue table such that it is a superset of all fields you may want to access from your DocumentDB Collection.
+
+### Parameters
+
+The Amazon Athena DocumentDB Connector exposes several configuration options via Lambda environment variables. More detail on the available parameters can be found below.
+
+1. **spill_bucket** - When the data returned by your Lambda function exceeds Lambda’s limits, this is the bucket that the data will be written to for Athena to read the excess from. (e.g. my_bucket)
+2. **spill_prefix** - (Optional) Defaults to sub-folder in your bucket called 'athena-federation-spill'. Used in conjunction with spill_bucket, this is the path within the above bucket that large responses are spilled to. You should configure an S3 lifecycle on this location to delete old spills after X days/Hours.
+3. **kms_key_id** - (Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys. (e.g. a7e63k4b-8loc-40db-a2a1-4d0en2cd8331)
+4. **disable_spill_encryption** - (Optional) Defaults to False so that any data that is spilled to S3 is encrypted using AES-GMC either with a randomly generated key or using KMS to generate keys. Setting this to false will disable spill encryption. You may wish to disable this for improved performance, especially if your spill location in S3 uses S3 Server Side Encryption. (e.g. True or False)
+5. **disable_glue** - (Optional) If present, with any valye, the connector will no longer attempt to retrieve supplemental metadata from Glue.
+6. **glue_catalog** - (Optional) Can be used to target a cross-account Glue catalog. By default the connector will attempt to get metadata from its own Glue account.
+7. **default_docdb** If present, this DocDB connection string is used when there is not a catalog specific environment variable (as explained below). (e.g. mongodb://:@:/?ssl=true&ssl_ca_certs=rds-combined-ca-bundle.pem&replicaSet=rs0)
+
+You can also provide one or more properties which define the DocumentDB connection details for the DocumentDB instance(s) you'd like this connector to use. You can do this by setting a Lambda environment variable that corresponds to the catalog name you'd like to use in Athena. For example, if I'd like to query two different DocumentDB instances from Athena in the below queries:
+
+```sql
+ select * from "docdb_instance_1".database.table
+ select * from "docdb_instance_2".database.table
+ ```
+
+To support these two SQL statements we'd need to add two environment variables to our Lambda function:
+
+1. **docdb_instance_1** - The value should be the DocumentDB connection details in the format of:mongodb://:@:/?ssl=true&ssl_ca_certs=rds-combined-ca-bundle.pem&replicaSet=rs0
+2. **docdb_instance_2** - The value should be the DocumentDB connection details in the format of: mongodb://:@:/?ssl=true&ssl_ca_certs=rds-combined-ca-bundle.pem&replicaSet=rs0
+
+You can also optionally use SecretsManager for part or all of the value for the preceeding connection details. For example, if I set a Lambda environment variable for **docdb_instance_1** to be "mongodb://${docdb_instance_1_creds}@myhostname.com:123/?ssl=true&ssl_ca_certs=rds-combined-ca-bundle.pem&replicaSet=rs0" the Athena Federation
+SDK will automatically attempt to retrieve a secret from AWS SecretsManager named "docdb_instance_1_creds" and inject that value in place of "${docdb_instance_1_creds}". Basically anything between ${...} is attempted as a secret in SecretsManager. If no such secret exists, the text isn't replaced.
+
+
+### Setting Up Databases & Tables
+
+To enable a Glue Table for use with DocumentDB, you simply need to have a Glue database and table that matches any DocumentDB Database and Collection that you'd like to supply supplemental metadata for (instead of relying on the DocumentDB Connector's ability to infer schema). The connector's in built schema inference only supports a subset of data types and scans a limited number of documents. You can enable a Glue table to be used for supplemental metadata by setting the below table properties from the Glue Console when editing the Table and databse in question. The only other thing you need to do ensure you use the appropriate data types listed in a later section.
+
+1. **docdb-metadata-flag** - Flag indicating that the table can be used for supplemental meta-data by the Athena DocDB Connector. The value is unimportant as long as this key is present in the properties of the table.
+
+### Data Types
+
+The schema inference feature of this connector will attempt to infer values as one of the following:
+
+|Apache Arrow DataType|Java/DocDB Type|
+|-------------|-----------------|
+|VARCHAR|String|
+|INT|Integer|
+|BIGINT|Long|
+|BIT|Boolean|
+|FLOAT4|Float|
+|FLOAT8|Double|
+|TIMESTAMPSEC|Date|
+|VARCHAR|ObjectId|
+|LIST|List|
+|STRUCT|Document|
+
+Alternatively, if you are using Glue for supplimental metadata you can configure the following types:
+
+|Glue DataType|Apache Arrow Type|
+|-------------|-----------------|
+|int|INT|
+|bigint|BIGINT|
+|double|FLOAT8|
+|float|FLOAT4|
+|boolean|BIT|
+|binary|VARBINARY|
+|string|VARCHAR|
+|List|LIST|
+|Struct|STRUCT|
+
+### Required Permissions
+
+Review the "Policies" section of the athena-docdb.yaml file for full details on the IAM Policies required by this connector. A brief summary is below.
+
+1. S3 Write Access - In order to successfully handle large queries, the connector requires write access to a location in S3.
+2. SecretsManager Read Access - If you choose to store redis-endpoint details in SecretsManager you will need to grant the connector access to those secrets.
+3. Glue Data Catalog - Since DocumentDB does not have a meta-data store, the connector requires Read-Only access to Glue's DataCatalog for supplemental table schema information.
+4. VPC Access - In order to connect to your VPC for the purposes of communicating with your DocumentDB instance(s), the connector needs the ability to attach/detach an interface to the VPC.
+5. CloudWatch Logs - This is a somewhat implicit permission when deploying a Lambda function but it needs access to cloudwatch logs for storing logs.
+1. Athena GetQueryExecution - The connector uses this access to fast-fail when the upstream Athena query has terminated.
+
+### Deploying The Connector
+
+To use this connector in your queries, navigate to AWS Serverless Application Repository and deploy a pre-built version of this connector. Alternatively, you can build and deploy this connector from source follow the below steps or use the more detailed tutorial in the athena-example module:
+
+1. From the athena-federation-sdk dir, run `mvn clean install` if you haven't already.
+2. From the athena-docdb dir, run `mvn clean install`.
+3. From the athena-docdb dir, run `../tools/publish.sh S3_BUCKET_NAME athena-docdb` to publish the connector to your private AWS Serverless Application Repository. The S3_BUCKET in the command is where a copy of the connector's code will be stored for Serverless Application Repository to retrieve it. This will allow users with permission to do so, the ability to deploy instances of the connector via 1-Click form. Then navigate to [Serverless Application Repository](https://aws.amazon.com/serverless/serverlessrepo)
+
+
+## Performance
+
+The Athena DocumentDB Connector does not current support parallel scans but will attempt to push down predicates as part of its DocumentDB queries.
+
diff --git a/athena-docdb/athena-docdb.yaml b/athena-docdb/athena-docdb.yaml
new file mode 100644
index 0000000000..49b3d2e1c3
--- /dev/null
+++ b/athena-docdb/athena-docdb.yaml
@@ -0,0 +1,98 @@
+Transform: 'AWS::Serverless-2016-10-31'
+Metadata:
+ 'AWS::ServerlessRepo::Application':
+ Name: AthenaDocumentDBConnector
+ Description: This connector enables Amazon Athena to communicate with your DocumentDB instance(s), making your DocumentDB data accessible via SQL.
+ Author: 'Amazon Athena'
+ SpdxLicenseId: Apache-2.0
+ LicenseUrl: LICENSE.txt
+ ReadmeUrl: README.md
+ Labels:
+ - athena-federation
+ HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation'
+ SemanticVersion: 1.0.2
+ SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation'
+Parameters:
+ AthenaCatalogName:
+ Description: 'The name you will give to this catalog in Athena. It will also be used as the function name.'
+ Type: String
+ SpillBucket:
+ Description: 'The bucket where this function can spill data.'
+ Type: String
+ Default: athena-federation-spill
+ SpillPrefix:
+ Description: 'The bucket prefix where this function can spill large responses.'
+ Type: String
+ Default: athena-spill
+ LambdaTimeout:
+ Description: 'Maximum Lambda invocation runtime in seconds. (min 1 - 900 max)'
+ Default: 900
+ Type: Number
+ LambdaMemory:
+ Description: 'Lambda memory in MB (min 128 - 3008 max).'
+ Default: 3008
+ Type: Number
+ DisableSpillEncryption:
+ Description: 'If set to ''false'' data spilled to S3 is encrypted with AES GCM'
+ Default: 'false'
+ Type: String
+ SecurityGroupIds:
+ Description: 'One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)'
+ Type: 'List'
+ SubnetIds:
+ Description: 'One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)'
+ Type: 'List'
+ SecretNameOrPrefix:
+ Description: 'The name or prefix of a set of names within Secrets Manager that this function should have access to. (e.g. hbase-*).'
+ Type: String
+ DocDBConnectionString:
+ Description: 'The DocDB connection details to use by default if not catalog specific connection is defined and optionally using SecretsManager (e.g. ${secret_name}).'
+ Type: String
+ Default: "e.g. mongodb://:@:/?ssl=true&ssl_ca_certs=rds-combined-ca-bundle.pem&replicaSet=rs0"
+Resources:
+ ConnectorConfig:
+ Type: 'AWS::Serverless::Function'
+ Properties:
+ Environment:
+ Variables:
+ disable_spill_encryption: !Ref DisableSpillEncryption
+ spill_bucket: !Ref SpillBucket
+ spill_prefix: !Ref SpillPrefix
+ default_docdb: !Ref DocDBConnectionString
+ FunctionName: !Ref AthenaCatalogName
+ Handler: "com.amazonaws.athena.connectors.docdb.DocDBCompositeHandler"
+ CodeUri: "./target/athena-docdb-1.0.jar"
+ Description: "Enables Amazon Athena to communicate with DocumentDB, making your DocumentDB data accessible via SQL."
+ Runtime: java8
+ Timeout: !Ref LambdaTimeout
+ MemorySize: !Ref LambdaMemory
+ Policies:
+ - Statement:
+ - Action:
+ - secretsmanager:GetSecretValue
+ Effect: Allow
+ Resource: !Sub 'arn:aws:secretsmanager:*:*:secret:${SecretNameOrPrefix}'
+ Version: '2012-10-17'
+ - Statement:
+ - Action:
+ - glue:GetTableVersions
+ - glue:GetPartitions
+ - glue:GetTables
+ - glue:GetTableVersion
+ - glue:GetDatabases
+ - glue:GetTable
+ - glue:GetPartition
+ - glue:GetDatabase
+ - athena:GetQueryExecution
+ Effect: Allow
+ Resource: '*'
+ Version: '2012-10-17'
+ #S3CrudPolicy allows our connector to spill large responses to S3. You can optionally replace this pre-made policy
+ #with one that is more restrictive and can only 'put' but not read,delete, or overwrite files.
+ - S3CrudPolicy:
+ BucketName: !Ref SpillBucket
+ #VPCAccessPolicy allows our connector to run in a VPC so that it can access your data source.
+ - VPCAccessPolicy: {}
+ VpcConfig:
+ SecurityGroupIds: !Ref SecurityGroupIds
+ SubnetIds: !Ref SubnetIds
\ No newline at end of file
diff --git a/athena-docdb/pom.xml b/athena-docdb/pom.xml
new file mode 100644
index 0000000000..4efd98b3ae
--- /dev/null
+++ b/athena-docdb/pom.xml
@@ -0,0 +1,57 @@
+
+
+
+ aws-athena-query-federation
+ com.amazonaws
+ 1.0
+
+ 4.0.0
+
+ athena-docdb
+
+
+
+ com.amazonaws
+ aws-athena-federation-sdk
+ ${aws-athena-federation-sdk.version}
+
+
+ org.mongodb
+ mongo-java-driver
+ 3.10.2
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.2.1
+
+ false
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+ package
+
+ shade
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBCompositeHandler.java b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBCompositeHandler.java
new file mode 100644
index 0000000000..df5342f7f5
--- /dev/null
+++ b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBCompositeHandler.java
@@ -0,0 +1,35 @@
+/*-
+ * #%L
+ * athena-mongodb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.docdb;
+
+import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler;
+
+/**
+ * Boilerplate composite handler that allows us to use a single Lambda function for both
+ * Metadata and Data. In this case we just compose DocDBMetadataHandler and DocDBRecordHandler.
+ */
+public class DocDBCompositeHandler
+ extends CompositeHandler
+{
+ public DocDBCompositeHandler()
+ {
+ super(new DocDBMetadataHandler(), new DocDBRecordHandler());
+ }
+}
diff --git a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBConnectionFactory.java b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBConnectionFactory.java
new file mode 100644
index 0000000000..715f2a4104
--- /dev/null
+++ b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBConnectionFactory.java
@@ -0,0 +1,93 @@
+/*-
+ * #%L
+ * athena-mongodb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.docdb;
+
+import com.mongodb.client.MongoClient;
+import com.mongodb.client.MongoClients;
+import org.apache.arrow.util.VisibleForTesting;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Creates and Caches HBase Connection Instances, using the connection string as the cache key.
+ *
+ * @Note Connection String format is expected to be like:
+ * mongodb://:@:/?ssl=true&ssl_ca_certs=&replicaSet=
+ */
+public class DocDBConnectionFactory
+{
+ private static final Logger logger = LoggerFactory.getLogger(DocDBConnectionFactory.class);
+ private final Map clientCache = new HashMap<>();
+
+ /**
+ * Used to get an existing, pooled, connection or to create a new connection
+ * for the given connection string.
+ *
+ * @param connStr MongoClient connection details, format is expected to be:
+ * mongodb://:@:/?ssl=true&ssl_ca_certs=&replicaSet=
+ * @return A MongoClient connection if the connection succeeded, else the function will throw.
+ */
+ public synchronized MongoClient getOrCreateConn(String connStr)
+ {
+ logger.info("getOrCreateConn: enter");
+ MongoClient result = clientCache.get(connStr);
+
+ if (result == null || !connectionTest(result)) {
+ result = MongoClients.create(connStr);
+ clientCache.put(connStr, result);
+ }
+
+ logger.info("getOrCreateConn: exit");
+ return result;
+ }
+
+ /**
+ * Runs a 'quick' test on the connection and then returns it if it passes.
+ */
+ private boolean connectionTest(MongoClient conn)
+ {
+ try {
+ logger.info("connectionTest: Testing connection started.");
+ conn.listDatabaseNames();
+ logger.info("connectionTest: Testing connection completed - success.");
+ return true;
+ }
+ catch (RuntimeException ex) {
+ logger.warn("getOrCreateConn: Exception while testing existing connection.", ex);
+ }
+ logger.info("connectionTest: Testing connection completed - fail.");
+ return false;
+ }
+
+ /**
+ * Injects a connection into the client cache.
+ *
+ * @param conStr The connection string (aka the cache key)
+ * @param conn The connection to inject into the client cache, most often a Mock used in testing.
+ */
+ @VisibleForTesting
+ protected synchronized void addConnection(String conStr, MongoClient conn)
+ {
+ clientCache.put(conStr, conn);
+ }
+}
diff --git a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBFieldResolver.java b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBFieldResolver.java
new file mode 100644
index 0000000000..f805333ace
--- /dev/null
+++ b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBFieldResolver.java
@@ -0,0 +1,54 @@
+/*-
+ * #%L
+ * athena-mongodb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.docdb;
+
+import com.amazonaws.athena.connector.lambda.data.FieldResolver;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.bson.Document;
+
+import java.util.List;
+
+/**
+ * Used to resolve DocDB complex structures to Apache Arrow Types.
+ *
+ * @see com.amazonaws.athena.connector.lambda.data.FieldResolver
+ */
+public class DocDBFieldResolver
+ implements FieldResolver
+{
+ protected static final FieldResolver DEFAULT_FIELD_RESOLVER = new DocDBFieldResolver();
+
+ private DocDBFieldResolver() {}
+
+ @Override
+ public Object getFieldValue(Field field, Object value)
+ {
+ Types.MinorType minorType = Types.getMinorTypeForArrowType(field.getType());
+ if (minorType == Types.MinorType.LIST) {
+ return TypeUtils.coerce(field.getChildren().get(0), ((List) value).iterator());
+ }
+ else if (value instanceof Document) {
+ Object rawVal = ((Document) value).get(field.getName());
+ return TypeUtils.coerce(field, rawVal);
+ }
+ throw new RuntimeException("Expected LIST or Document type but found " + minorType);
+ }
+}
diff --git a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBMetadataHandler.java b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBMetadataHandler.java
new file mode 100644
index 0000000000..5432061497
--- /dev/null
+++ b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBMetadataHandler.java
@@ -0,0 +1,250 @@
+/*-
+ * #%L
+ * athena-mongodb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.docdb;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
+import com.amazonaws.athena.connector.lambda.data.BlockWriter;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation;
+import com.amazonaws.athena.connector.lambda.handlers.GlueMetadataHandler;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
+import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest;
+import com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse;
+import com.amazonaws.athena.connector.lambda.metadata.MetadataRequest;
+import com.amazonaws.athena.connector.lambda.metadata.glue.GlueFieldLexer;
+import com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.glue.AWSGlue;
+import com.amazonaws.services.glue.AWSGlueClientBuilder;
+import com.amazonaws.services.glue.model.Table;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import com.mongodb.client.MongoClient;
+import com.mongodb.client.MongoCursor;
+import org.apache.arrow.util.VisibleForTesting;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Handles metadata requests for the Athena DocumentDB Connector.
+ *
+ * For more detail, please see the module's README.md, some notable characteristics of this class include:
+ *
+ * 1. Uses a Glue table property (docfb-metadata-flag) to indicate that the table (whose name matched the DocDB collection
+ * name) can indeed be used to supplement metadata from DocDB itself.
+ * 2. Attempts to resolve sensitive fields such as DocDB connection strings via SecretsManager so that you can substitute
+ * variables with values from by doing something like:
+ * mongodb://${docdb_instance_1_creds}@myhostname.com:123/?ssl=true&ssl_ca_certs=rds-combined-ca-bundle.pem&replicaSet=rs0
+ */
+public class DocDBMetadataHandler
+ extends GlueMetadataHandler
+{
+ private static final Logger logger = LoggerFactory.getLogger(DocDBMetadataHandler.class);
+
+ //Used to denote the 'type' of this connector for diagnostic purposes.
+ private static final String SOURCE_TYPE = "documentdb";
+ //The Env variable name used to indicate that we want to disable the use of Glue DataCatalog for supplemental
+ //metadata and instead rely solely on the connector's schema inference capabilities.
+ private static final String GLUE_ENV_VAR = "disable_glue";
+ //Field name used to store the connection string as a property on Split objects.
+ protected static final String DOCDB_CONN_STR = "connStr";
+ //The Env variable name used to store the default DocDB connection string if no catalog specific
+ //env variable is set.
+ private static final String DEFAULT_DOCDB = "default_docdb";
+ //The Glue table property that indicates that a table matching the name of an DocDB table
+ //is indeed enabled for use by this connector.
+ private static final String DOCDB_METADATA_FLAG = "docdb-metadata-flag";
+ //Used to filter out Glue tables which lack a docdb metadata flag.
+ private static final TableFilter TABLE_FILTER = (Table table) -> table.getParameters().containsKey(DOCDB_METADATA_FLAG);
+ //The number of documents to scan when attempting to infer schema from an DocDB collection.
+ private static final int SCHEMA_INFERRENCE_NUM_DOCS = 10;
+
+ private final AWSGlue glue;
+ private final DocDBConnectionFactory connectionFactory;
+
+ public DocDBMetadataHandler()
+ {
+ super((System.getenv(GLUE_ENV_VAR) == null) ? AWSGlueClientBuilder.standard().build() : null, SOURCE_TYPE);
+ glue = getAwsGlue();
+ connectionFactory = new DocDBConnectionFactory();
+ }
+
+ @VisibleForTesting
+ protected DocDBMetadataHandler(AWSGlue glue,
+ DocDBConnectionFactory connectionFactory,
+ EncryptionKeyFactory keyFactory,
+ AWSSecretsManager secretsManager,
+ AmazonAthena athena,
+ String spillBucket,
+ String spillPrefix)
+ {
+ super(glue, keyFactory, secretsManager, athena, SOURCE_TYPE, spillBucket, spillPrefix);
+ this.glue = glue;
+ this.connectionFactory = connectionFactory;
+ }
+
+ private MongoClient getOrCreateConn(MetadataRequest request)
+ {
+ String endpoint = resolveSecrets(getConnStr(request));
+ return connectionFactory.getOrCreateConn(endpoint);
+ }
+
+ /**
+ * Retrieves the DocDB connection details from an env variable matching the catalog name, if no such
+ * env variable exists we fall back to the default env variable defined by DEFAULT_DOCDB.
+ */
+ private String getConnStr(MetadataRequest request)
+ {
+ String conStr = System.getenv(request.getCatalogName());
+ if (conStr == null) {
+ logger.info("getConnStr: No environment variable found for catalog {} , using default {}",
+ request.getCatalogName(), DEFAULT_DOCDB);
+ conStr = System.getenv(DEFAULT_DOCDB);
+ }
+ return conStr;
+ }
+
+ /**
+ * List databases in your DocumentDB instance treating each as a 'schema' (aka database)
+ *
+ * @see GlueMetadataHandler
+ */
+ @Override
+ public ListSchemasResponse doListSchemaNames(BlockAllocator blockAllocator, ListSchemasRequest request)
+ {
+ List schemas = new ArrayList<>();
+ MongoClient client = getOrCreateConn(request);
+ try (MongoCursor itr = client.listDatabaseNames().iterator()) {
+ while (itr.hasNext()) {
+ schemas.add(itr.next());
+ }
+
+ return new ListSchemasResponse(request.getCatalogName(), schemas);
+ }
+ }
+
+ /**
+ * List collections in the requested schema in your DocumentDB instance treating the requested schema as an DocumentDB
+ * database.
+ *
+ * @see GlueMetadataHandler
+ */
+ @Override
+ public ListTablesResponse doListTables(BlockAllocator blockAllocator, ListTablesRequest request)
+ {
+ MongoClient client = getOrCreateConn(request);
+ List tables = new ArrayList<>();
+
+ try (MongoCursor itr = client.getDatabase(request.getSchemaName()).listCollectionNames().iterator()) {
+ while (itr.hasNext()) {
+ tables.add(new TableName(request.getSchemaName(), itr.next()));
+ }
+
+ return new ListTablesResponse(request.getCatalogName(), tables);
+ }
+ }
+
+ /**
+ * If Glue is enabled as a source of supplemental metadata we look up the requested Schema/Table in Glue and
+ * filters out any results that don't have the DOCDB_METADATA_FLAG set. If no matching results were found in Glue,
+ * then we resort to inferring the schema of the DocumentDB collection using SchemaUtils.inferSchema(...). If there
+ * is no such table in DocumentDB the operation will fail.
+ *
+ * @see GlueMetadataHandler
+ */
+ @Override
+ public GetTableResponse doGetTable(BlockAllocator blockAllocator, GetTableRequest request)
+ throws Exception
+ {
+ logger.info("doGetTable: enter", request.getTableName());
+ Schema schema = null;
+ try {
+ if (glue != null) {
+ schema = super.doGetTable(blockAllocator, request, TABLE_FILTER).getSchema();
+ logger.info("doGetTable: Retrieved schema for table[{}] from AWS Glue.", request.getTableName());
+ }
+ }
+ catch (RuntimeException ex) {
+ logger.warn("doGetTable: Unable to retrieve table[{}:{}] from AWS Glue.",
+ request.getTableName().getSchemaName(),
+ request.getTableName().getTableName(),
+ ex);
+ }
+
+ if (schema == null) {
+ logger.info("doGetTable: Inferring schema for table[{}].", request.getTableName());
+ MongoClient client = getOrCreateConn(request);
+ schema = SchemaUtils.inferSchema(client, request.getTableName(), SCHEMA_INFERRENCE_NUM_DOCS);
+ }
+ return new GetTableResponse(request.getCatalogName(), request.getTableName(), schema);
+ }
+
+ /**
+ * Our table doesn't support complex layouts or partitioning so we simply make this method a NoOp.
+ *
+ * @see GlueMetadataHandler
+ */
+ @Override
+ public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker)
+ throws Exception
+ {
+ //NoOp as we do not support partitioning.
+ }
+
+ /**
+ * Since our connector does not support parallel scans we generate a single Split and include the connection details
+ * as a property on the split so that the RecordHandler has easy access to it.
+ *
+ * @see GlueMetadataHandler
+ */
+ @Override
+ public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest request)
+ {
+ //Every split must have a unique location if we wish to spill to avoid failures
+ SpillLocation spillLocation = makeSpillLocation(request);
+
+ //Since our connector does not support parallel reads we return a fixed split.
+ return new GetSplitsResponse(request.getCatalogName(),
+ Split.newBuilder(spillLocation, makeEncryptionKey())
+ .add(DOCDB_CONN_STR, getConnStr(request))
+ .build());
+ }
+
+ /**
+ * @see GlueMetadataHandler
+ */
+ @Override
+ protected Field convertField(String name, String glueType)
+ {
+ return GlueFieldLexer.lex(name, glueType);
+ }
+}
diff --git a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBRecordHandler.java b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBRecordHandler.java
new file mode 100644
index 0000000000..73ea87c9f1
--- /dev/null
+++ b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBRecordHandler.java
@@ -0,0 +1,169 @@
+/*-
+ * #%L
+ * athena-mongodb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+package com.amazonaws.athena.connectors.docdb;
+
+import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.data.BlockSpiller;
+import com.amazonaws.athena.connector.lambda.domain.Split;
+import com.amazonaws.athena.connector.lambda.domain.TableName;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import com.amazonaws.athena.connector.lambda.handlers.RecordHandler;
+import com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest;
+import com.amazonaws.services.athena.AmazonAthena;
+import com.amazonaws.services.athena.AmazonAthenaClientBuilder;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.secretsmanager.AWSSecretsManager;
+import com.amazonaws.services.secretsmanager.AWSSecretsManagerClientBuilder;
+import com.mongodb.client.MongoClient;
+import com.mongodb.client.MongoCollection;
+import com.mongodb.client.MongoCursor;
+import com.mongodb.client.MongoDatabase;
+import org.apache.arrow.util.VisibleForTesting;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.bson.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static com.amazonaws.athena.connectors.docdb.DocDBFieldResolver.DEFAULT_FIELD_RESOLVER;
+import static com.amazonaws.athena.connectors.docdb.DocDBMetadataHandler.DOCDB_CONN_STR;
+
+/**
+ * Handles data read record requests for the Athena DocumentDB Connector.
+ *
+ * For more detail, please see the module's README.md, some notable characteristics of this class include:
+ *
+ * 1. Attempts to resolve sensitive configuration fields such as HBase connection string via SecretsManager so that you can
+ * substitute variables with values from by doing something like hostname:port:password=${my_secret}
+ */
+public class DocDBRecordHandler
+ extends RecordHandler
+{
+ private static final Logger logger = LoggerFactory.getLogger(DocDBRecordHandler.class);
+
+ //Used to denote the 'type' of this connector for diagnostic purposes.
+ private static final String SOURCE_TYPE = "documentdb";
+ //Controls the page size for fetching batches of documents from the MongoDB client.
+ private static final int MONGO_QUERY_BATCH_SIZE = 100;
+
+ private final DocDBConnectionFactory connectionFactory;
+
+ public DocDBRecordHandler()
+ {
+ this(AmazonS3ClientBuilder.defaultClient(),
+ AWSSecretsManagerClientBuilder.defaultClient(),
+ AmazonAthenaClientBuilder.defaultClient(),
+ new DocDBConnectionFactory());
+ }
+
+ @VisibleForTesting
+ protected DocDBRecordHandler(AmazonS3 amazonS3, AWSSecretsManager secretsManager, AmazonAthena athena, DocDBConnectionFactory connectionFactory)
+ {
+ super(amazonS3, secretsManager, athena, SOURCE_TYPE);
+ this.connectionFactory = connectionFactory;
+ }
+
+ /**
+ * Gets the special DOCDB_CONN_STR property from the provided split and uses its contents to getOrCreate
+ * a MongoDB client connection.
+ *
+ * @param split The split to that we need to read and this the DocDB instance to connecto ro.
+ * @return A MongoClient connected to the request DB instance.
+ * @note This method attempts to resolve any SecretsManager secrets that are using in the connection string and denoted
+ * by ${secret_name}.
+ */
+ private MongoClient getOrCreateConn(Split split)
+ {
+ String conStr = split.getProperty(DOCDB_CONN_STR);
+ if (conStr == null) {
+ throw new RuntimeException(DOCDB_CONN_STR + " Split property is null! Unable to create connection.");
+ }
+ String endpoint = resolveSecrets(conStr);
+ return connectionFactory.getOrCreateConn(endpoint);
+ }
+
+ /**
+ * Scans DocumentDB using the scan settings set on the requested Split by DocDBeMetadataHandler.
+ *
+ * @see RecordHandler
+ */
+ @Override
+ protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker)
+ {
+ TableName tableName = recordsRequest.getTableName();
+ Map constraintSummary = recordsRequest.getConstraints().getSummary();
+
+ MongoClient client = getOrCreateConn(recordsRequest.getSplit());
+ MongoDatabase db = client.getDatabase(tableName.getSchemaName());
+ MongoCollection table = db.getCollection(tableName.getTableName());
+
+ Document query = QueryUtils.makeQuery(recordsRequest.getSchema(), constraintSummary);
+ Document output = QueryUtils.makeProjection(recordsRequest.getSchema());
+
+ logger.info("readWithConstraint: query[{}] projection[{}]", query, output);
+
+ final MongoCursor iterable = table
+ .find(query)
+ .projection(output)
+ .batchSize(MONGO_QUERY_BATCH_SIZE).iterator();
+
+ long numRows = 0;
+ AtomicLong numResultRows = new AtomicLong(0);
+ while (iterable.hasNext() && queryStatusChecker.isQueryRunning()) {
+ numRows++;
+ spiller.writeRows((Block block, int rowNum) -> {
+ Document doc = iterable.next();
+
+ boolean matched = true;
+ for (Field nextField : recordsRequest.getSchema().getFields()) {
+ Object value = TypeUtils.coerce(nextField, doc.get(nextField.getName()));
+ Types.MinorType fieldType = Types.getMinorTypeForArrowType(nextField.getType());
+ try {
+ switch (fieldType) {
+ case LIST:
+ case STRUCT:
+ matched &= block.offerComplexValue(nextField.getName(), rowNum, DEFAULT_FIELD_RESOLVER, value);
+ break;
+ default:
+ matched &= block.offerValue(nextField.getName(), rowNum, value);
+ break;
+ }
+ if (!matched) {
+ return 0;
+ }
+ }
+ catch (Exception ex) {
+ throw new RuntimeException("Error while processing field " + nextField.getName(), ex);
+ }
+ }
+
+ numResultRows.getAndIncrement();
+ return 1;
+ });
+ }
+
+ logger.info("readWithConstraint: numRows[{}] numResultRows[{}]", numRows, numResultRows.get());
+ }
+}
diff --git a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/QueryUtils.java b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/QueryUtils.java
new file mode 100644
index 0000000000..fdf2b17191
--- /dev/null
+++ b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/QueryUtils.java
@@ -0,0 +1,247 @@
+/*-
+ * #%L
+ * athena-mongodb
+ * %%
+ * Copyright (C) 2019 Amazon Web Services
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @note Portions of this file are attributable to:
+ * https://github.com/prestodb/presto/blob/master/presto-mongodb/src/main/java/com/facebook/presto/mongodb/MongoSession.java
+ */
+package com.amazonaws.athena.connectors.docdb;
+
+import com.amazonaws.athena.connector.lambda.data.Block;
+import com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet;
+import com.amazonaws.athena.connector.lambda.domain.predicate.Range;
+import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.Text;
+import org.bson.Document;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import static com.google.common.base.Preconditions.checkState;
+import static com.google.common.base.Verify.verify;
+import static java.util.stream.Collectors.toList;
+
+/**
+ * Collection of helper methods which build Documents for use in DocumentDB queries, including:
+ * 1. Projections
+ * 2. Predicates
+ * 3. Queries (a collection of predicates)
+ */
+public final class QueryUtils
+{
+ private static final String OR_OP = "$or";
+ private static final String AND_OP = "$and";
+ private static final String NOT_OP = "$not";
+ private static final String NOR_OP = "$nor";
+
+ private static final String EQ_OP = "$eq";
+ private static final String NOT_EQ_OP = "$ne";
+ private static final String EXISTS_OP = "$exists";
+ private static final String GTE_OP = "$gte";
+ private static final String GT_OP = "$gt";
+ private static final String LT_OP = "$lt";
+ private static final String LTE_OP = "$lte";
+ private static final String IN_OP = "$in";
+ private static final String NOTIN_OP = "$nin";
+
+ private QueryUtils()
+ {
+ }
+
+ /**
+ * Given a Schema create a projection document which can be used to request only specific Document fields
+ * from DocumentDB.
+ *
+ * @param schema The schema containing the requested projection.
+ * @return A Document matching the requested field projections.
+ */
+ public static Document makeProjection(Schema schema)
+ {
+ Document output = new Document();
+ for (Field field : schema.getFields()) {
+ output.append(field.getName(), 1);
+ }
+ return output;
+ }
+
+ /**
+ * Given a set of Constraints and the projection Schema, create the Query Document that can be used to
+ * push predicates into DocumentDB.
+ *
+ * @param schema The schema containing the requested projection.
+ * @param constraintSummary The set of constraints to apply to the query.
+ * @return The Document to use as the query.
+ */
+ public static Document makeQuery(Schema schema, Map constraintSummary)
+ {
+ Document query = new Document();
+ for (Map.Entry entry : constraintSummary.entrySet()) {
+ Document doc = makePredicate(schema.findField(entry.getKey()), entry.getValue());
+ if (doc != null) {
+ query.putAll(doc);
+ }
+ }
+
+ return query;
+ }
+
+ /**
+ * Converts a single field constraint into a Document for use in a DocumentDB query.
+ *
+ * @param field The field for the given ValueSet constraint.
+ * @param constraint The constraint to apply to the given field.
+ * @return A Document describing the constraint for pushing down into DocumentDB.
+ */
+ public static Document makePredicate(Field field, ValueSet constraint)
+ {
+ String name = field.getName();
+
+ if (constraint.isNone()) {
+ return documentOf(name, isNullPredicate());
+ }
+
+ if (constraint.isAll()) {
+ return documentOf(name, isNotNullPredicate());
+ }
+
+ if (constraint.isNullAllowed()) {
+ //TODO: support nulls mixed with discrete value constraints
+ return null;
+ }
+
+ if (constraint instanceof EquatableValueSet) {
+ Block block = ((EquatableValueSet) constraint).getValues();
+ List