From 43f6e514774a0531dbf264df9b38aaa7d8507ca0 Mon Sep 17 00:00:00 2001 From: Vincent Gromakowski Date: Wed, 12 Feb 2025 14:16:21 +0100 Subject: [PATCH] feat(catalog): add lake formation support in catalog (#810) * add lake formation support in catalog --------- Co-authored-by: vgkowski --- .gitignore | 1 + .projenrc.ts | 1 + framework/API.md | 242 ++++++++++- framework/src/governance/README.md | 15 + .../data-catalog-database-permissions.lit.ts | 26 ++ .../data-lake-catalog-permissions.lit.ts | 23 + .../lib/data-catalog-database-props.ts | 26 +- .../governance/lib/data-catalog-database.ts | 279 ++++++++++--- .../governance/lib/data-lake-catalog-props.ts | 23 + .../src/governance/lib/data-lake-catalog.ts | 33 +- framework/src/governance/lib/index.ts | 3 +- .../governance/lib/lake-formation-helpers.ts | 237 +++++++++++ framework/src/utils/lib/dsf-provider-props.ts | 2 +- framework/src/utils/lib/index.ts | 1 + framework/src/utils/lib/permission-model.ts | 11 + framework/src/utils/lib/utils.ts | 12 +- .../e2e/data-catalog-database.e2e.test.ts | 38 +- .../test/e2e/data-lake-catalog.e2e.test.ts | 63 ++- .../governance/data-catalog-database.test.ts | 393 +++++++++++++++++- .../unit/governance/data-lake-catalog.test.ts | 349 +++++++++++++++- .../nag-access-data-catalog-database.test.ts | 62 ++- .../nag-access-data-lake-catalog.test.ts | 69 ++- framework/yarn.lock | 92 +--- .../_governance-data-catalog-database.mdx | 52 +++ 24 files changed, 1864 insertions(+), 189 deletions(-) create mode 100644 framework/src/governance/examples/data-catalog-database-permissions.lit.ts create mode 100644 framework/src/governance/examples/data-lake-catalog-permissions.lit.ts create mode 100644 framework/src/governance/lib/lake-formation-helpers.ts create mode 100644 framework/src/utils/lib/permission-model.ts diff --git a/.gitignore b/.gitignore index 47b21727f..1d464c30b 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,7 @@ __pycache__ cdk.out .DS_Store LICENSE.bak +framework/test/e2e/mytest.e2e.test.ts !/.mergify.yml !/.github/dependabot.yml .jsii.tabl.json diff --git a/.projenrc.ts b/.projenrc.ts index 4c4ca925f..31bf09a46 100644 --- a/.projenrc.ts +++ b/.projenrc.ts @@ -74,6 +74,7 @@ const rootProject = new LernaProject({ 'cdk.out', '.DS_Store', 'LICENSE.bak', + 'framework/test/e2e/mytest.e2e.test.ts', ], projenrcTs: true, diff --git a/framework/API.md b/framework/API.md index 8e794993a..792d3660e 100644 --- a/framework/API.md +++ b/framework/API.md @@ -3295,6 +3295,7 @@ public grantReadOnlyAccess(principal: IPrincipal): AddToPrincipalPolicyResult Grants read access via identity based policy to the principal. This would attach an IAM Policy to the principal allowing read access to the Glue Database and all its Glue Tables. +Only valid for IAM permission model. ###### `principal`Required @@ -3360,9 +3361,17 @@ Any object. | database | aws-cdk-lib.aws_glue.CfnDatabase | The Glue Database that's created. | | databaseName | string | The Glue Database name with the randomized suffix to prevent name collisions in the catalog. | | crawler | aws-cdk-lib.aws_glue.CfnCrawler | The Glue Crawler created when `autoCrawl` is set to `true` (default value). | +| crawlerLakeFormationDatabaseGrant | aws-cdk-lib.aws_lakeformation.CfnPrincipalPermissions | The Lake Formation grant on the database for the Crawler when Lake Formation or Hybrid is used. | +| crawlerLakeFormationLocationGrant | aws-cdk-lib.aws_lakeformation.CfnPrincipalPermissions | The Lake Formation grant on the data location for the Crawler when Lake Formation or Hybrid is used. | +| crawlerLakeFormationTablesGrant | aws-cdk-lib.aws_lakeformation.CfnPrincipalPermissions | The Lake Formation grant on the tables for the Crawler when Lake Formation or Hybrid is used. | | crawlerLogEncryptionKey | aws-cdk-lib.aws_kms.IKey | KMS encryption Key used by the Crawler. | | crawlerRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the Glue crawler when created. | | crawlerSecurityConfiguration | aws-cdk-lib.aws_glue.CfnSecurityConfiguration | The Glue security configuration used by the Glue Crawler when created. | +| dataLakeLocation | aws-cdk-lib.aws_lakeformation.CfnResource | The Lake Formation data lake location. | +| dataLakeSettings | aws-cdk-lib.aws_lakeformation.CfnDataLakeSettings | The DataLakeSettings for Lake Formation. | +| lakeFormationDataAccessRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by Lake Formation to access data. | +| lakeFormationRevokeRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used to revoke LakeFormation IAMAllowedPrincipals. | +| revokeIamAllowedPrincipal | aws-cdk-lib.custom_resources.AwsCustomResource | The custom resource for revoking IAM permissions from the database. | --- @@ -3416,6 +3425,42 @@ This property can be undefined if `autoCrawl` is set to `false`. --- +##### `crawlerLakeFormationDatabaseGrant`Optional + +```typescript +public readonly crawlerLakeFormationDatabaseGrant: CfnPrincipalPermissions; +``` + +- *Type:* aws-cdk-lib.aws_lakeformation.CfnPrincipalPermissions + +The Lake Formation grant on the database for the Crawler when Lake Formation or Hybrid is used. + +--- + +##### `crawlerLakeFormationLocationGrant`Optional + +```typescript +public readonly crawlerLakeFormationLocationGrant: CfnPrincipalPermissions; +``` + +- *Type:* aws-cdk-lib.aws_lakeformation.CfnPrincipalPermissions + +The Lake Formation grant on the data location for the Crawler when Lake Formation or Hybrid is used. + +--- + +##### `crawlerLakeFormationTablesGrant`Optional + +```typescript +public readonly crawlerLakeFormationTablesGrant: CfnPrincipalPermissions; +``` + +- *Type:* aws-cdk-lib.aws_lakeformation.CfnPrincipalPermissions + +The Lake Formation grant on the tables for the Crawler when Lake Formation or Hybrid is used. + +--- + ##### `crawlerLogEncryptionKey`Optional ```typescript @@ -3452,6 +3497,66 @@ The Glue security configuration used by the Glue Crawler when created. --- +##### `dataLakeLocation`Optional + +```typescript +public readonly dataLakeLocation: CfnResource; +``` + +- *Type:* aws-cdk-lib.aws_lakeformation.CfnResource + +The Lake Formation data lake location. + +--- + +##### `dataLakeSettings`Optional + +```typescript +public readonly dataLakeSettings: CfnDataLakeSettings; +``` + +- *Type:* aws-cdk-lib.aws_lakeformation.CfnDataLakeSettings + +The DataLakeSettings for Lake Formation. + +--- + +##### `lakeFormationDataAccessRole`Optional + +```typescript +public readonly lakeFormationDataAccessRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM Role used by Lake Formation to access data. + +--- + +##### `lakeFormationRevokeRole`Optional + +```typescript +public readonly lakeFormationRevokeRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM Role used to revoke LakeFormation IAMAllowedPrincipals. + +--- + +##### `revokeIamAllowedPrincipal`Optional + +```typescript +public readonly revokeIamAllowedPrincipal: AwsCustomResource; +``` + +- *Type:* aws-cdk-lib.custom_resources.AwsCustomResource + +The custom resource for revoking IAM permissions from the database. + +--- + #### Constants | **Name** | **Type** | **Description** | @@ -14496,8 +14601,11 @@ const dataCatalogDatabaseProps: governance.DataCatalogDatabaseProps = { ... } | jdbcPath | string | The JDBC path that would be included by the crawler. | | jdbcSecret | aws-cdk-lib.aws_secretsmanager.ISecret | The secret associated with the JDBC connection. | | jdbcSecretKMSKey | aws-cdk-lib.aws_kms.IKey | The KMS key used by the JDBC secret. | +| lakeFormationConfigurationRole | aws-cdk-lib.aws_iam.IRole | The IAM Role assumed by the construct resources to perform Lake Formation configuration. | +| lakeFormationDataAccessRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by Lake Formation for [data access](https://docs.aws.amazon.com/lake-formation/latest/dg/registration-role.html). The role is assumed by Lake Formation to provide temporary credentials to query engines. Only needed when permissionModel is set to Lake Formation or Hybrid. | | locationBucket | aws-cdk-lib.aws_s3.IBucket | S3 bucket where data is stored. | | locationPrefix | string | Top level location where table data is stored. | +| permissionModel | @cdklabs/aws-data-solutions-framework.utils.PermissionModel | The permission model to apply to the Glue Database. | | removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | --- @@ -14635,6 +14743,35 @@ The KMS key used by the JDBC secret. --- +##### `lakeFormationConfigurationRole`Optional + +```typescript +public readonly lakeFormationConfigurationRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole +- *Default:* A new role is created + +The IAM Role assumed by the construct resources to perform Lake Formation configuration. + +The role is assumed by Lambda functions to perform Lake Formation related operations. +Only needed when permissionModel is set to Lake Formation or Hybrid + +--- + +##### `lakeFormationDataAccessRole`Optional + +```typescript +public readonly lakeFormationDataAccessRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole +- *Default:* A new role is created + +The IAM Role used by Lake Formation for [data access](https://docs.aws.amazon.com/lake-formation/latest/dg/registration-role.html). The role is assumed by Lake Formation to provide temporary credentials to query engines. Only needed when permissionModel is set to Lake Formation or Hybrid. + +--- + ##### `locationBucket`Optional ```typescript @@ -14654,11 +14791,22 @@ public readonly locationPrefix: string; ``` - *Type:* string +- *Default:* the root of the bucket is used as the location prefix. Top level location where table data is stored. -The location prefix cannot be empty if the `locationBucket` is set. -The minimal configuration is `/` for the root level in the Bucket. +--- + +##### `permissionModel`Optional + +```typescript +public readonly permissionModel: PermissionModel; +``` + +- *Type:* @cdklabs/aws-data-solutions-framework.utils.PermissionModel +- *Default:* IAM permission model is used + +The permission model to apply to the Glue Database. --- @@ -14700,6 +14848,9 @@ const dataLakeCatalogProps: governance.DataLakeCatalogProps = { ... } | crawlerLogEncryptionKey | aws-cdk-lib.aws_kms.IKey | The KMS encryption Key used for the Glue Crawler logs. | | crawlerTableLevelDepth | number | Directory depth where the table folders are located. | | databaseName | string | The suffix of the Glue Data Catalog Database. | +| lakeFormationConfigurationRole | aws-cdk-lib.aws_iam.IRole | The IAM Role assumed by the construct resources to perform Lake Formation configuration. | +| lakeFormationDataAccessRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by Lake Formation for [data access](https://docs.aws.amazon.com/lake-formation/latest/dg/access-control-underlying-data.html). The role will be used for accessing all the layers of the data lake (bronze, silver, gold). Only needed when permissionModel is set to Lake Formation or Hybrid. | +| permissionModel | @cdklabs/aws-data-solutions-framework.utils.PermissionModel | The permission model to apply to the Glue Database. | | removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | --- @@ -14788,6 +14939,47 @@ The suffix is also added to the S3 location inside the data lake S3 Buckets. --- +##### `lakeFormationConfigurationRole`Optional + +```typescript +public readonly lakeFormationConfigurationRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole +- *Default:* A new role is created for the entire Data Lake + +The IAM Role assumed by the construct resources to perform Lake Formation configuration. + +Only needed when permissionModel is set to Lake Formation or Hybrid + +--- + +##### `lakeFormationDataAccessRole`Optional + +```typescript +public readonly lakeFormationDataAccessRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole +- *Default:* A new role is created for the entire Data Lake + +The IAM Role used by Lake Formation for [data access](https://docs.aws.amazon.com/lake-formation/latest/dg/access-control-underlying-data.html). The role will be used for accessing all the layers of the data lake (bronze, silver, gold). Only needed when permissionModel is set to Lake Formation or Hybrid. + +--- + +##### `permissionModel`Optional + +```typescript +public readonly permissionModel: PermissionModel; +``` + +- *Type:* @cdklabs/aws-data-solutions-framework.utils.PermissionModel +- *Default:* IAM permission model is used + +The permission model to apply to the Glue Database. + +--- + ##### `removalPolicy`Optional ```typescript @@ -22041,6 +22233,7 @@ new utils.Utils() | --- | --- | | generateHash | Generate an 8 character hash from a string based on HMAC algorithm. | | generateUniqueHash | Generate a unique hash of 8 characters from the CDK scope using its path and the stack name. | +| getCdkDeploymentRole | Get CDK deployment role. | | loadYaml | Take a document stored as string and load it as YAML. | | randomize | Create a random string to be used as a seed for IAM User password. | | readYamlDocument | Read a YAML file from the path provided and return it. | @@ -22094,6 +22287,22 @@ the CDK ID of the construct. --- +##### `getCdkDeploymentRole` + +```typescript +import { utils } from '@cdklabs/aws-data-solutions-framework' + +utils.Utils.getCdkDeploymentRole(scope: Construct) +``` + +Get CDK deployment role. + +###### `scope`Required + +- *Type:* constructs.Construct + +--- + ##### `loadYaml` ```typescript @@ -23026,6 +23235,35 @@ Default Node Instances for OpenSearch cluster. --- +### PermissionModel + +List of supported permissions model. + +#### Members + +| **Name** | **Description** | +| --- | --- | +| LAKE_FORMATION | *No description.* | +| HYBRID | *No description.* | +| IAM | *No description.* | + +--- + +##### `LAKE_FORMATION` + +--- + + +##### `HYBRID` + +--- + + +##### `IAM` + +--- + + ### RedshiftServerlessNamespaceLogExport Namespace log export types. diff --git a/framework/src/governance/README.md b/framework/src/governance/README.md index af70d62e9..3bd97b61d 100644 --- a/framework/src/governance/README.md +++ b/framework/src/governance/README.md @@ -9,6 +9,7 @@ AWS Glue Catalog database for an Amazon S3 dataset. - The database default location is pointing to an S3 bucket location `s3:////` - The database can store various tables structured in their respective prefixes, for example: `s3://///` - By default, a database level crawler is scheduled to run once a day (00:01h local timezone). The crawler can be disabled and the schedule/frequency of the crawler can be modified with a cron expression. +- The permission model of the database can use IAM, LakeFormation or Hybrid mode. ![Data Catalog Database](../../../website/static/img/adsf-data-catalog.png) @@ -20,6 +21,20 @@ The AWS Glue Data Catalog resources created by the `DataCatalogDatabase` constru [example default usage](./examples/data-catalog-database-default.lit.ts) +## Using Lake Formation permission model + +You can change the default permission model of the database to use [Lake Formation](https://docs.aws.amazon.com/lake-formation/latest/dg/how-it-works.html) exclusively or [hybrid mode](https://docs.aws.amazon.com/lake-formation/latest/dg/hybrid-access-mode.html). + +Changing the permission model to Lake Formation or Hybrid has the following impact: +* The CDK provisioning role is added as a Lake Formation administrator so it can perform Lake Formation operations +* The IAMAllowedPrincipal grant is removed from the database to enforce Lake Formation as the unique permission model (only for Lake Formation permission model) + +:::caution Lake Formation Data Lake Settings +Lake Formation and Hybrid permission models are configured using PutDataLakeSettings API call. Concurrent API calls can lead to throttling. If you create multiple `DataCatalogDatabases`, it's recommended to create dependencies between the `dataLakeSettings` that are exposed in each database to avoid concurrent calls. See the example in the `DataLakeCatalog`construct [here](https://github.com/awslabs/data-solutions-framework-on-aws/blob/main/framework/src/governance/lib/data-lake-catalog.ts#L137) +::: + +[example lake formation permission model](./examples/data-catalog-database-permissions.lit.ts) + ## Modifying the crawler behavior You can change the default configuration of the AWS Glue Crawler to match your requirements: diff --git a/framework/src/governance/examples/data-catalog-database-permissions.lit.ts b/framework/src/governance/examples/data-catalog-database-permissions.lit.ts new file mode 100644 index 000000000..82b0b2c7c --- /dev/null +++ b/framework/src/governance/examples/data-catalog-database-permissions.lit.ts @@ -0,0 +1,26 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import * as cdk from 'aws-cdk-lib'; +import { Bucket } from 'aws-cdk-lib/aws-s3'; +import { Construct } from 'constructs'; +import * as dsf from '../../index'; + +/// !show +class ExampleDefaultDataCatalogDatabaseStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + const bucket = new Bucket(this, 'DataCatalogBucket'); + + new dsf.governance.DataCatalogDatabase(this, 'DataCatalogDatabase', { + locationBucket: bucket, + locationPrefix: '/databasePath', + name: 'example-db', + permissionModel: dsf.utils.PermissionModel.LAKE_FORMATION, + }); + } +} +/// !hide + +const app = new cdk.App(); +new ExampleDefaultDataCatalogDatabaseStack(app, 'ExampleDefaultDataCatalogDatabaseStack'); \ No newline at end of file diff --git a/framework/src/governance/examples/data-lake-catalog-permissions.lit.ts b/framework/src/governance/examples/data-lake-catalog-permissions.lit.ts new file mode 100644 index 000000000..d44868647 --- /dev/null +++ b/framework/src/governance/examples/data-lake-catalog-permissions.lit.ts @@ -0,0 +1,23 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as dsf from '../../index'; + +/// !show +class ExampleDefaultDataLakeCatalogStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + const storage = new dsf.storage.DataLakeStorage(this, 'MyDataLakeStorage'); + + new dsf.governance.DataLakeCatalog(this, 'DataCatalog', { + dataLakeStorage: storage, + permissionModel: dsf.utils.PermissionModel.LAKE_FORMATION, + }); + } +} +/// !hide + +const app = new cdk.App(); +new ExampleDefaultDataLakeCatalogStack(app, 'ExampleDefaultDataLakeCatalogStack'); \ No newline at end of file diff --git a/framework/src/governance/lib/data-catalog-database-props.ts b/framework/src/governance/lib/data-catalog-database-props.ts index 09a1f962c..0fae09c55 100644 --- a/framework/src/governance/lib/data-catalog-database-props.ts +++ b/framework/src/governance/lib/data-catalog-database-props.ts @@ -7,6 +7,7 @@ import { IRole } from 'aws-cdk-lib/aws-iam'; import { IKey } from 'aws-cdk-lib/aws-kms'; import { IBucket } from 'aws-cdk-lib/aws-s3'; import { ISecret } from 'aws-cdk-lib/aws-secretsmanager'; +import { PermissionModel } from '../../utils'; /** * Properties for the `DataCatalogDatabase` construct @@ -24,8 +25,7 @@ export interface DataCatalogDatabaseProps { /** * Top level location where table data is stored. - * The location prefix cannot be empty if the `locationBucket` is set. - * The minimal configuration is `/` for the root level in the Bucket. + * @default - the root of the bucket is used as the location prefix. */ readonly locationPrefix?: string; @@ -87,4 +87,26 @@ export interface DataCatalogDatabaseProps { * @default - The resources are not deleted (`RemovalPolicy.RETAIN`). */ readonly removalPolicy?: RemovalPolicy; + + /** + * The permission model to apply to the Glue Database. + * @default - IAM permission model is used + */ + readonly permissionModel?: PermissionModel; + + /** + * The IAM Role used by Lake Formation for [data access](https://docs.aws.amazon.com/lake-formation/latest/dg/registration-role.html). + * The role is assumed by Lake Formation to provide temporary credentials to query engines. + * Only needed when permissionModel is set to Lake Formation or Hybrid + * @default - A new role is created + */ + readonly lakeFormationDataAccessRole?: IRole; + + /** + * The IAM Role assumed by the construct resources to perform Lake Formation configuration. + * The role is assumed by Lambda functions to perform Lake Formation related operations. + * Only needed when permissionModel is set to Lake Formation or Hybrid + * @default - A new role is created + */ + readonly lakeFormationConfigurationRole?: IRole; } \ No newline at end of file diff --git a/framework/src/governance/lib/data-catalog-database.ts b/framework/src/governance/lib/data-catalog-database.ts index f04dfa717..eb7bec2ca 100644 --- a/framework/src/governance/lib/data-catalog-database.ts +++ b/framework/src/governance/lib/data-catalog-database.ts @@ -5,9 +5,12 @@ import { Stack } from 'aws-cdk-lib'; import { CfnCrawler, CfnDatabase, CfnSecurityConfiguration } from 'aws-cdk-lib/aws-glue'; import { AddToPrincipalPolicyResult, Effect, IPrincipal, IRole, PolicyDocument, PolicyStatement, Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; import { IKey, Key } from 'aws-cdk-lib/aws-kms'; +import { CfnDataLakeSettings, CfnPrincipalPermissions, CfnResource } from 'aws-cdk-lib/aws-lakeformation'; +import { AwsCustomResource } from 'aws-cdk-lib/custom-resources'; import { Construct } from 'constructs'; import { DataCatalogDatabaseProps } from './data-catalog-database-props'; -import { Context, TrackedConstruct, TrackedConstructProps, Utils } from '../../utils'; +import { grantCrawler, grantDataLakeLocation, putDataLakeSettings, registerS3Location, revokeIamAllowedPrincipal } from './lake-formation-helpers'; +import { Context, PermissionModel, TrackedConstruct, TrackedConstructProps, Utils } from '../../utils'; /** * An AWS Glue Data Catalog Database configured with the location and a crawler. @@ -23,6 +26,10 @@ import { Context, TrackedConstruct, TrackedConstructProps, Utils } from '../../u * }); */ export class DataCatalogDatabase extends TrackedConstruct { + /** + * Default permission model for the DataCatalogDatabase + */ + private static readonly DEFAULT_PERMISSION_MODEL = PermissionModel.IAM; /** * The Glue Crawler created when `autoCrawl` is set to `true` (default value). This property can be undefined if `autoCrawl` is set to `false`. */ @@ -47,10 +54,54 @@ export class DataCatalogDatabase extends TrackedConstruct { * KMS encryption Key used by the Crawler */ readonly crawlerLogEncryptionKey?: IKey; + /** + * The DataLakeSettings for Lake Formation + */ + readonly dataLakeSettings?: CfnDataLakeSettings; + /** + * The IAM Role used by Lake Formation to access data. + */ + readonly lakeFormationDataAccessRole?: IRole; + /** + * The Lake Formation data lake location + */ + readonly dataLakeLocation?: CfnResource; + /** + * The custom resource for revoking IAM permissions from the database + */ + readonly revokeIamAllowedPrincipal?: AwsCustomResource; + /** + * The Lake Formation grant on the database for the Crawler when Lake Formation or Hybrid is used + */ + readonly crawlerLakeFormationDatabaseGrant?: CfnPrincipalPermissions; + /** + * The Lake Formation grant on the tables for the Crawler when Lake Formation or Hybrid is used + */ + readonly crawlerLakeFormationTablesGrant?: CfnPrincipalPermissions; + /** + * The Lake Formation grant on the data location for the Crawler when Lake Formation or Hybrid is used + */ + readonly crawlerLakeFormationLocationGrant?: CfnPrincipalPermissions; + /** + * The IAM Role used to revoke LakeFormation IAMAllowedPrincipals + */ + readonly lakeFormationRevokeRole?: IRole; /** * Caching constructor properties for internal reuse by constructor methods */ private dataCatalogDatabaseProps: DataCatalogDatabaseProps; + /** + * The location prefix without trailing slash + */ + private cleanedLocationPrefix?: string; + /** + * The location S3 URI + */ + private s3LocationUri?: string; + /** + * The permission model + */ + private permissionModel: PermissionModel; constructor(scope: Construct, id: string, props: DataCatalogDatabaseProps) { const trackedConstructProps: TrackedConstructProps = { @@ -59,6 +110,12 @@ export class DataCatalogDatabase extends TrackedConstruct { super(scope, id, trackedConstructProps); const catalogType = this.determineCatalogType(props); + this.permissionModel = props.permissionModel || DataCatalogDatabase.DEFAULT_PERMISSION_MODEL; + const useLakeFormation = this.permissionModel === PermissionModel.LAKE_FORMATION || this.permissionModel === PermissionModel.HYBRID; + + if (!useLakeFormation && (props.lakeFormationDataAccessRole !== undefined || props.lakeFormationConfigurationRole !== undefined)) { + throw new Error('Lake Formation Data Access Role and Configuration Role are only used when the permission model is Lake Formation or Hybrid'); + } if (catalogType === CatalogType.INVALID) { throw new Error("Data catalog type can't be determined. Please check `DataCatalogDatabase` properties."); @@ -70,25 +127,62 @@ export class DataCatalogDatabase extends TrackedConstruct { const hash = Utils.generateUniqueHash(this); this.databaseName = props.name + '_' + hash.toLowerCase(); - let s3LocationUri: string|undefined, locationPrefix: string|undefined; - if (catalogType === CatalogType.S3) { - locationPrefix = props.locationPrefix; - if (!locationPrefix!.endsWith('/')) { - locationPrefix += '/'; - } + this.cleanedLocationPrefix = props.locationPrefix === undefined ? undefined : props.locationPrefix.replace(/\/$/g, ''); + this.s3LocationUri = props.locationBucket!.s3UrlForObject(this.cleanedLocationPrefix); + + if (useLakeFormation) { + + const lakeFormationAdmins: IRole[]=[]; + const cdkRole = Utils.getCdkDeploymentRole(this); + lakeFormationAdmins.push(cdkRole); - s3LocationUri = props.locationBucket!.s3UrlForObject(locationPrefix); + if (props.permissionModel === PermissionModel.LAKE_FORMATION) { + // Create a role for the AwsCustomResource to revoke IAMAllowedPrincipal + this.lakeFormationRevokeRole = props.lakeFormationConfigurationRole || new Role(this, 'LfRevokeRole', { + assumedBy: new ServicePrincipal('lambda.amazonaws.com'), + }); + lakeFormationAdmins.push(this.lakeFormationRevokeRole); + } + + this.dataLakeSettings = putDataLakeSettings(this, 'DataLakeSettings', lakeFormationAdmins); + + if (props.permissionModel === PermissionModel.LAKE_FORMATION) { + this.dataLakeSettings.node.addDependency(this.lakeFormationRevokeRole!); + } + + // register location + if (props.locationBucket) { + + [this.lakeFormationDataAccessRole, this.dataLakeLocation] = registerS3Location( + this, 'LakeFormationRegistration', + props.locationBucket, + this.cleanedLocationPrefix, + props.permissionModel, + props.lakeFormationDataAccessRole, + ); + this.lakeFormationDataAccessRole.node.addDependency(this.dataLakeSettings!); + + } + } } this.database = new CfnDatabase(this, 'GlueDatabase', { catalogId: Stack.of(this).account, databaseInput: { name: this.databaseName, - locationUri: s3LocationUri, + locationUri: this.s3LocationUri, }, }); + this.database.applyRemovalPolicy(removalPolicy); + + if (catalogType === CatalogType.S3 && props.permissionModel === PermissionModel.LAKE_FORMATION) { + + this.revokeIamAllowedPrincipal = + revokeIamAllowedPrincipal(this, 'IamRevoke', this.databaseName, this.lakeFormationRevokeRole!, removalPolicy); + this.revokeIamAllowedPrincipal.node.addDependency(this.database); + } let autoCrawl = props.autoCrawl; @@ -103,49 +197,62 @@ export class DataCatalogDatabase extends TrackedConstruct { const currentStack = Stack.of(this); if (autoCrawl) { + + const statements = [ + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'glue:BatchCreatePartition', + 'glue:BatchDeletePartition', + 'glue:BatchDeleteTable', + 'glue:BatchDeleteTableVersion', + 'glue:BatchGetPartition', + 'glue:BatchUpdatePartition', + 'glue:CreatePartition', + 'glue:CreateTable', + 'glue:DeletePartition', + 'glue:DeleteTable', + 'glue:GetDatabase', + 'glue:GetDatabases', + 'glue:GetPartition', + 'glue:GetPartitions', + 'glue:GetTable', + 'glue:GetTables', + 'glue:UpdateDatabase', + 'glue:UpdatePartition', + 'glue:UpdateTable', + ], + resources: [ + `arn:aws:glue:${currentStack.region}:${currentStack.account}:catalog`, + `arn:aws:glue:${currentStack.region}:${currentStack.account}:database/${this.databaseName}`, + `arn:aws:glue:${currentStack.region}:${currentStack.account}:table/${this.databaseName}/*`, + ], + }), + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'glue:GetSecurityConfigurations', + 'glue:GetSecurityConfiguration', + ], + resources: ['*'], + }), + ]; + + if (useLakeFormation) { + statements.push(new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'lakeformation:GetDataAccess', + ], + resources: ['*'], + })); + }; + this.crawlerRole = props.crawlerRole || new Role(this, 'CrawlerRole', { assumedBy: new ServicePrincipal('glue.amazonaws.com'), inlinePolicies: { crawlerPermissions: new PolicyDocument({ - statements: [ - new PolicyStatement({ - effect: Effect.ALLOW, - actions: [ - 'glue:BatchCreatePartition', - 'glue:BatchDeletePartition', - 'glue:BatchDeleteTable', - 'glue:BatchDeleteTableVersion', - 'glue:BatchGetPartition', - 'glue:BatchUpdatePartition', - 'glue:CreatePartition', - 'glue:CreateTable', - 'glue:DeletePartition', - 'glue:DeleteTable', - 'glue:GetDatabase', - 'glue:GetDatabases', - 'glue:GetPartition', - 'glue:GetPartitions', - 'glue:GetTable', - 'glue:GetTables', - 'glue:UpdateDatabase', - 'glue:UpdatePartition', - 'glue:UpdateTable', - ], - resources: [ - `arn:aws:glue:${currentStack.region}:${currentStack.account}:catalog`, - `arn:aws:glue:${currentStack.region}:${currentStack.account}:database/${this.databaseName}`, - `arn:aws:glue:${currentStack.region}:${currentStack.account}:table/${this.databaseName}/*`, - ], - }), - new PolicyStatement({ - effect: Effect.ALLOW, - actions: [ - 'glue:GetSecurityConfigurations', - 'glue:GetSecurityConfiguration', - ], - resources: ['*'], - }), - ], + statements, }), }, }); @@ -211,12 +318,13 @@ export class DataCatalogDatabase extends TrackedConstruct { const crawlerName = `${props.name}-${hash.toLowerCase()}-crawler`; if (catalogType === CatalogType.S3) { - this.crawler = this.handleS3TypeCrawler(props, { + [this.crawler, this.crawlerLakeFormationDatabaseGrant, this.crawlerLakeFormationTablesGrant, this.crawlerLakeFormationLocationGrant] = + this.handleS3TypeCrawler(props, { autoCrawlSchedule, crawlerName, crawlerSecurityConfigurationName: this.crawlerSecurityConfiguration.name, - locationPrefix: locationPrefix!, - s3LocationUri: s3LocationUri!, + locationPrefix: this.cleanedLocationPrefix!, + s3LocationUri: this.s3LocationUri!, }); } else if (catalogType === CatalogType.JDBC) { this.crawler = this.handleJDBCTypeCrawler(props, { @@ -229,7 +337,9 @@ export class DataCatalogDatabase extends TrackedConstruct { } /** - * Grants read access via identity based policy to the principal. This would attach an IAM Policy to the principal allowing read access to the Glue Database and all its Glue Tables. + * Grants read access via identity based policy to the principal. + * This would attach an IAM Policy to the principal allowing read access to the Glue Database and all its Glue Tables. + * Only valid for IAM permission model. * @param principal Principal to attach the Glue Database read access to * @returns `AddToPrincipalPolicyResult` */ @@ -238,7 +348,7 @@ export class DataCatalogDatabase extends TrackedConstruct { const catalogType = this.determineCatalogType(this.dataCatalogDatabaseProps); - if (catalogType === CatalogType.S3) { + if (catalogType === CatalogType.S3 || this.permissionModel === PermissionModel.IAM) { let locationPrefix = this.dataCatalogDatabaseProps.locationPrefix; if (!locationPrefix!.endsWith('/')) { @@ -273,17 +383,21 @@ export class DataCatalogDatabase extends TrackedConstruct { * @returns `number` */ private calculateDefaultTableLevelDepth(locationPrefix: string): number { - const baseCount = 2; + if (locationPrefix === undefined) { + return 1; + } else { + const baseCount = 2; - const locationTokens = locationPrefix.split('/'); + const locationTokens = locationPrefix.split('/'); - let ctrValidToken = 0; + let ctrValidToken = 0; - locationTokens.forEach((token) => { - ctrValidToken += (token) ? 1 : 0; - }); + locationTokens.forEach((token) => { + ctrValidToken += (token) ? 1 : 0; + }); - return ctrValidToken + baseCount; + return ctrValidToken + baseCount; + } } /** @@ -292,7 +406,7 @@ export class DataCatalogDatabase extends TrackedConstruct { * @returns `CatalogType` */ private determineCatalogType(props: DataCatalogDatabaseProps): CatalogType { - if (props.locationBucket && props.locationPrefix) { + if (props.locationBucket) { return CatalogType.S3; } else if (props.glueConnectionName && props.jdbcSecret && props.jdbcSecretKMSKey && props.jdbcPath) { return CatalogType.JDBC; @@ -307,12 +421,37 @@ export class DataCatalogDatabase extends TrackedConstruct { * @param s3Props `S3CrawlerProps` * @returns `CfnCrawler` */ - private handleS3TypeCrawler(props: DataCatalogDatabaseProps, s3Props: S3CrawlerProps): CfnCrawler { + private handleS3TypeCrawler( + props: DataCatalogDatabaseProps, + s3Props: S3CrawlerProps, + ): [CfnCrawler, CfnPrincipalPermissions | undefined, CfnPrincipalPermissions | undefined, CfnPrincipalPermissions | undefined] { + const tableLevel = props.crawlerTableLevelDepth || this.calculateDefaultTableLevelDepth(s3Props.locationPrefix); const grantPrefix = s3Props.locationPrefix == '/' ? '' : s3Props.locationPrefix; - props.locationBucket!.grantRead(this.crawlerRole!, grantPrefix+'*'); - return new CfnCrawler(this, 'DatabaseAutoCrawler', { + const useLakeFormation = props.permissionModel === PermissionModel.HYBRID || props.permissionModel === PermissionModel.LAKE_FORMATION; + let lakeFormationDbGrant: CfnPrincipalPermissions | undefined; + let lakeFormationTablesGrant: CfnPrincipalPermissions | undefined; + let lakeFormationLocationGrant: CfnPrincipalPermissions | undefined; + + if (useLakeFormation) { + + lakeFormationLocationGrant = grantDataLakeLocation( + this, 'CrawlerLfLocationGrant', + this.cleanedLocationPrefix ? props.locationBucket!.arnForObjects(this.cleanedLocationPrefix) : props.locationBucket!.bucketArn, + this.crawlerRole!, + ); + + [lakeFormationDbGrant, lakeFormationTablesGrant] = grantCrawler(this, 'DbCrawler', this.databaseName, this.crawlerRole!); + + lakeFormationLocationGrant.node.addDependency(this.dataLakeLocation!); + lakeFormationDbGrant.node.addDependency(this.database); + lakeFormationTablesGrant.node.addDependency(this.database); + } else { + props.locationBucket!.grantRead(this.crawlerRole!, grantPrefix+'*'); + } + + const crawler = new CfnCrawler(this, 'DatabaseAutoCrawler', { role: this.crawlerRole!.roleArn, targets: { s3Targets: [{ @@ -329,7 +468,19 @@ export class DataCatalogDatabase extends TrackedConstruct { TableLevelConfiguration: tableLevel, }, }), + lakeFormationConfiguration: { + useLakeFormationCredentials: useLakeFormation, + }, }); + crawler.node.addDependency(this.database); + + if (useLakeFormation) { + crawler.node.addDependency(lakeFormationDbGrant!); + crawler.node.addDependency(lakeFormationTablesGrant!); + crawler.node.addDependency(lakeFormationLocationGrant!); + } + + return [crawler, lakeFormationDbGrant, lakeFormationTablesGrant, lakeFormationLocationGrant]; } /** diff --git a/framework/src/governance/lib/data-lake-catalog-props.ts b/framework/src/governance/lib/data-lake-catalog-props.ts index b14a1bba6..b1b437f4c 100644 --- a/framework/src/governance/lib/data-lake-catalog-props.ts +++ b/framework/src/governance/lib/data-lake-catalog-props.ts @@ -3,8 +3,10 @@ import { RemovalPolicy } from 'aws-cdk-lib'; import { CfnCrawler } from 'aws-cdk-lib/aws-glue'; +import { IRole } from 'aws-cdk-lib/aws-iam'; import { IKey } from 'aws-cdk-lib/aws-kms'; import { DataLakeStorage } from '../../storage'; +import { PermissionModel } from '../../utils'; /** * Properties for the `DataLakeCatalog` Construct @@ -54,4 +56,25 @@ export interface DataLakeCatalogProps { * @default - The resources are not deleted (`RemovalPolicy.RETAIN`). */ readonly removalPolicy?: RemovalPolicy; + + /** + * The permission model to apply to the Glue Database. + * @default - IAM permission model is used + */ + readonly permissionModel?: PermissionModel; + + /** + * The IAM Role used by Lake Formation for [data access](https://docs.aws.amazon.com/lake-formation/latest/dg/access-control-underlying-data.html). + * The role will be used for accessing all the layers of the data lake (bronze, silver, gold). + * Only needed when permissionModel is set to Lake Formation or Hybrid + * @default - A new role is created for the entire Data Lake + */ + readonly lakeFormationDataAccessRole?: IRole; + + /** + * The IAM Role assumed by the construct resources to perform Lake Formation configuration. + * Only needed when permissionModel is set to Lake Formation or Hybrid + * @default - A new role is created for the entire Data Lake + */ + readonly lakeFormationConfigurationRole?: IRole; } \ No newline at end of file diff --git a/framework/src/governance/lib/data-lake-catalog.ts b/framework/src/governance/lib/data-lake-catalog.ts index ab2c34612..ac0b91cf5 100644 --- a/framework/src/governance/lib/data-lake-catalog.ts +++ b/framework/src/governance/lib/data-lake-catalog.ts @@ -2,12 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 import { Fn } from 'aws-cdk-lib'; +import { IRole, Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; import { IKey, Key } from 'aws-cdk-lib/aws-kms'; import { Construct } from 'constructs'; import { DataCatalogDatabase } from './data-catalog-database'; import { DataLakeCatalogProps } from './data-lake-catalog-props'; import { AnalyticsBucket } from '../../storage'; -import { Context, TrackedConstruct, TrackedConstructProps } from '../../utils'; +import { Context, PermissionModel, TrackedConstruct, TrackedConstructProps } from '../../utils'; /** * Creates a Data Lake Catalog on top of a `DataLakeStorage`. @@ -64,6 +65,22 @@ export class DataLakeCatalog extends TrackedConstruct { }); } + let dataAccessRole : IRole | undefined = undefined; + let configurationRole : IRole | undefined = undefined; + + if (props.permissionModel === PermissionModel.LAKE_FORMATION || props.permissionModel === PermissionModel.HYBRID) { + dataAccessRole = props.lakeFormationDataAccessRole || new Role(this, 'LakeFormationDataAccessRole', { + assumedBy: new ServicePrincipal('lakeformation.amazonaws.com'), + }); + props.dataLakeStorage.bronzeBucket.grantReadWrite(dataAccessRole); + props.dataLakeStorage.silverBucket.grantReadWrite(dataAccessRole); + props.dataLakeStorage.goldBucket.grantReadWrite(dataAccessRole); + + configurationRole = props.lakeFormationConfigurationRole || new Role(this, 'LakeFormationConfigurationRole', { + assumedBy: new ServicePrincipal('lambda'), + }); + } + const extractedBronzeBucketName = this.extractBucketName(props.dataLakeStorage.bronzeBucket); const extractedSilverBucketName = this.extractBucketName(props.dataLakeStorage.silverBucket); const extractedGoldBucketName = this.extractBucketName(props.dataLakeStorage.goldBucket); @@ -83,6 +100,9 @@ export class DataLakeCatalog extends TrackedConstruct { crawlerLogEncryptionKey: this.crawlerLogEncryptionKey, crawlerTableLevelDepth: props.crawlerTableLevelDepth, removalPolicy, + permissionModel: props.permissionModel, + lakeFormationDataAccessRole: dataAccessRole, + lakeFormationConfigurationRole: configurationRole, }); this.silverCatalogDatabase = new DataCatalogDatabase(this, 'SilverCatalogDatabase', { @@ -94,6 +114,9 @@ export class DataLakeCatalog extends TrackedConstruct { crawlerLogEncryptionKey: this.crawlerLogEncryptionKey, crawlerTableLevelDepth: props.crawlerTableLevelDepth, removalPolicy, + permissionModel: props.permissionModel, + lakeFormationDataAccessRole: dataAccessRole, + lakeFormationConfigurationRole: configurationRole, }); this.goldCatalogDatabase = new DataCatalogDatabase(this, 'GoldCatalogDatabase', { @@ -105,7 +128,15 @@ export class DataLakeCatalog extends TrackedConstruct { crawlerLogEncryptionKey: this.crawlerLogEncryptionKey, crawlerTableLevelDepth: props.crawlerTableLevelDepth, removalPolicy, + permissionModel: props.permissionModel, + lakeFormationDataAccessRole: dataAccessRole, + lakeFormationConfigurationRole: configurationRole, }); + + if (props.permissionModel === PermissionModel.HYBRID || props.permissionModel === PermissionModel.LAKE_FORMATION) { + this.silverCatalogDatabase.dataLakeSettings!.addDependency(this.bronzeCatalogDatabase.dataLakeSettings!); + this.goldCatalogDatabase.dataLakeSettings!.addDependency(this.silverCatalogDatabase.dataLakeSettings!); + } } /** diff --git a/framework/src/governance/lib/index.ts b/framework/src/governance/lib/index.ts index d025764e4..1e42c2669 100644 --- a/framework/src/governance/lib/index.ts +++ b/framework/src/governance/lib/index.ts @@ -7,4 +7,5 @@ export * from './data-catalog-database-props'; export * from './data-lake-catalog-props'; export * from './custom-authorizer-environment-helpers'; export * from './custom-authorizer-central-helpers'; -export * from './datazone'; \ No newline at end of file +export * from './datazone'; +export * from './lake-formation-helpers'; diff --git a/framework/src/governance/lib/lake-formation-helpers.ts b/framework/src/governance/lib/lake-formation-helpers.ts new file mode 100644 index 000000000..19c0ed431 --- /dev/null +++ b/framework/src/governance/lib/lake-formation-helpers.ts @@ -0,0 +1,237 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { Duration, RemovalPolicy, Stack } from 'aws-cdk-lib'; +import { Effect, IRole, ISamlProvider, IUser, PolicyStatement, Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; +import { CfnDataLakeSettings, CfnPrincipalPermissions, CfnResource } from 'aws-cdk-lib/aws-lakeformation'; +import { RetentionDays } from 'aws-cdk-lib/aws-logs'; +import { IBucket } from 'aws-cdk-lib/aws-s3'; +import { AwsCustomResource, AwsCustomResourcePolicy, PhysicalResourceId } from 'aws-cdk-lib/custom-resources'; +import { Construct } from 'constructs'; +import { PermissionModel } from '../../utils'; + +/** + * Configure the DataLakeSettings of LakeFormation + * @param scope the construct scope + * @param id the construct id + * @param principals the list of principals to add as LakeFormation admin + * @returns the CfnDataLakeSettings to configure Lake Formation + */ +export function putDataLakeSettings(scope: Construct, id: string, principals: (IRole|IUser|[ISamlProvider, string])[]): CfnDataLakeSettings { + + // Check if the principals are Amazon IAM Roles or Users and extract the arns and names + const principalArns: CfnDataLakeSettings.DataLakePrincipalProperty[] = []; + for (const principal of principals) { + const principalId = getPrincipalArn(principal); + principalArns.push({ + dataLakePrincipalIdentifier: principalId, + }); + } + + return new CfnDataLakeSettings(scope, id, { + admins: principalArns, + mutationType: 'APPEND', + parameters: { + CROSS_ACCOUNT_VERSION: 4, + }, + }); +} + +/** + * Register an Amazon S3 location in AWS Lake Formation. + * It creates an IAM Role dedicated per location and register the location using either Lake Formation or Hybrid access model. + * @param scope the construct scope + * @param id the construct id + * @param locationBucket the Amazon S3 location bucket + * @param locationPrefix the Amazon S3 location prefix + * @param accessMode the Amazon S3 location access model + * @return the CfnDataLakeSettings to register the Amazon S3 location in AWS Lake Formation + */ +export function registerS3Location( + scope: Construct, + id: string, + locationBucket: IBucket, + locationPrefix?: string, + accessMode?: PermissionModel, + dataAccessRole?: IRole, +) : [IRole, CfnResource] { + + // create the IAM role for LF data access + const lfDataAccessRole = dataAccessRole || new Role(scope, `${id}DataAccessRole`, { + assumedBy: new ServicePrincipal('lakeformation.amazonaws.com'), + }); + + const grantReadWrite = locationBucket.grantReadWrite(lfDataAccessRole, locationPrefix); + + const dataLakeLocation = new CfnResource(scope, `${id}DataLakeLocation`, { + hybridAccessEnabled: accessMode === PermissionModel.HYBRID ? true : false, + useServiceLinkedRole: false, + roleArn: lfDataAccessRole.roleArn, + resourceArn: locationPrefix ? locationBucket.arnForObjects(locationPrefix) : locationBucket.bucketArn, + }); + + dataLakeLocation.node.addDependency(grantReadWrite); + + return [lfDataAccessRole, dataLakeLocation]; + +} + +/** + * Revoke the IAMAllowedPrincipal permissions from the database. + * @param scope the construct scope + * @param id the construct id + * @param database the database to remove the IAMAllowedPrincipal permission + * @return the CfnDataLakeSettings to remove the IAMAllowedPrincipal permission + */ +export function revokeIamAllowedPrincipal( + scope: Construct, + id: string, + database: string, + execRole: IRole, + removalPolicy: RemovalPolicy, +): AwsCustomResource { + + const stack = Stack.of(scope); + + // eslint-disable-next-line local-rules/no-tokens-in-construct-id + const cr = new AwsCustomResource(scope, id, { + removalPolicy, + role: execRole, + onCreate: { + service: 'LakeFormation', + action: 'RevokePermissions', + parameters: { + Permissions: ['ALL'], + Principal: { + DataLakePrincipalIdentifier: 'IAM_ALLOWED_PRINCIPALS', + }, + Resource: { + Database: { + Name: database, + }, + }, + }, + physicalResourceId: PhysicalResourceId.of(`${database}`), + }, + policy: AwsCustomResourcePolicy.fromStatements([ + new PolicyStatement({ + actions: ['lakeformation:RevokePermissions'], + effect: Effect.ALLOW, + resources: [ + `arn:${stack.partition}:lakeformation:${stack.region}:${stack.account}:catalog:${stack.account}`, + ], + }), + new PolicyStatement({ + actions: [ + 'glue:GetDatabase', + ], + effect: Effect.ALLOW, + resources: [ + `arn:${stack.partition}:glue:${stack.region}:${stack.account}:database/${database}`, + `arn:${stack.partition}:glue:${stack.region}:${stack.account}:catalog`, + + ], + }), + ]), + logRetention: RetentionDays.ONE_WEEK, + timeout: Duration.seconds(60), + }); + + return cr; +} + +/** + * Grant Lake Formation access on Data Lake Location + * @param scope the construct scope + * @param id the construct id + * @param location the Amazon S3 location in ARN format + * @param principal the IAM Principal to grant Lake Formation access on Data Lake Location + * @param grantable whether the grantable permission is set. @default - false + * @return the CfnPermissions to grant Lake Formation access on Data Lake Location + */ + +export function grantDataLakeLocation( + scope: Construct, + id: string, + location: string, + principal: IRole, + grantable?: boolean, +): CfnPrincipalPermissions { + + return new CfnPrincipalPermissions(scope, id, { + permissions: ['DATA_LOCATION_ACCESS'], + permissionsWithGrantOption: grantable === true ? ['DATA_LOCATION_ACCESS']: [], + principal: { + dataLakePrincipalIdentifier: principal.roleArn, + }, + resource: { + dataLocation: { + catalogId: Stack.of(scope).account, + resourceArn: location, + }, + }, + }); +} + +/** + * Grant Lake Formation permissions required by crawlers + * @param scope the construct scope + * @param id the construct id + * @param database the database to grant Lake Formation permissions + * @param principal the IAM Principal to grant Lake Formation permissions + * @return the CfnPrincipalPermissions granting Lake Formation permissions + */ +export function grantCrawler(scope: Construct, id: string, database: string, principal: IRole): [CfnPrincipalPermissions, CfnPrincipalPermissions] { + + const lfDbGrant = new CfnPrincipalPermissions(scope, `${id}LfDbGrant`, { + permissions: ['CREATE_TABLE'], + permissionsWithGrantOption: [], + principal: { + dataLakePrincipalIdentifier: getPrincipalArn(principal), + }, + resource: { + database: { + catalogId: Stack.of(scope).account, + name: database, + }, + }, + }); + + const lfTablesGrant = new CfnPrincipalPermissions(scope, `${id}LfTablesGrant`, { + permissions: ['SELECT', 'DESCRIBE', 'ALTER'], + permissionsWithGrantOption: [], + principal: { + dataLakePrincipalIdentifier: getPrincipalArn(principal), + }, + resource: { + table: { + catalogId: Stack.of(scope).account, + tableWildcard: {}, + databaseName: database, + }, + }, + }); + + return [lfDbGrant, lfTablesGrant]; +} + +/** + * Extract the principalArn (Arn) from the IAM Principal + * @param principal the IAM Principal to extract the principal id from + * @returns the principal ARN + */ +function getPrincipalArn(principal: IRole | IUser | [ISamlProvider, string] ): string { + + let principalArn: string; + + if ((principal as IRole).roleArn) { + principalArn = (principal as IRole).roleArn; + } else if ((principal as IUser).userArn) { + principalArn = (principal as IUser).userArn; + } else { + const samlIdentity = (principal as [ISamlProvider, string]); + principalArn = samlIdentity[0].samlProviderArn + samlIdentity[1]; + } + + return principalArn; +} \ No newline at end of file diff --git a/framework/src/utils/lib/dsf-provider-props.ts b/framework/src/utils/lib/dsf-provider-props.ts index b4f3825fd..239cbf504 100644 --- a/framework/src/utils/lib/dsf-provider-props.ts +++ b/framework/src/utils/lib/dsf-provider-props.ts @@ -81,7 +81,7 @@ export interface HandlerDefinition { */ readonly bundling?: BundlingOptions; /** - * The role used by the lamnda function + * The role used by the lambda function * @default - A new role is created. */ readonly iamRole?: IRole; diff --git a/framework/src/utils/lib/index.ts b/framework/src/utils/lib/index.ts index 488e4bbb7..72d915eaf 100644 --- a/framework/src/utils/lib/index.ts +++ b/framework/src/utils/lib/index.ts @@ -19,5 +19,6 @@ export * from './client-vpn-endpoint-props'; export * from './create-service-linked-role'; export * from './create-service-linked-role-props'; export * from './service-linked-role-service'; +export * from './permission-model'; diff --git a/framework/src/utils/lib/permission-model.ts b/framework/src/utils/lib/permission-model.ts new file mode 100644 index 000000000..fcdfe65a6 --- /dev/null +++ b/framework/src/utils/lib/permission-model.ts @@ -0,0 +1,11 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * List of supported permissions model. + */ +export enum PermissionModel { + LAKE_FORMATION = 'lake_formation', + HYBRID = 'hybrid', + IAM = 'iam' +} \ No newline at end of file diff --git a/framework/src/utils/lib/utils.ts b/framework/src/utils/lib/utils.ts index 537c57f07..71577ebf6 100644 --- a/framework/src/utils/lib/utils.ts +++ b/framework/src/utils/lib/utils.ts @@ -4,7 +4,8 @@ import { createHmac } from 'crypto'; import * as fs from 'fs'; -import { Stack } from 'aws-cdk-lib'; +import { DefaultStackSynthesizer, Fn, Stack } from 'aws-cdk-lib'; +import { IRole, Role } from 'aws-cdk-lib/aws-iam'; import { Construct } from 'constructs'; import * as yaml from 'js-yaml'; @@ -109,4 +110,13 @@ export class Utils { const accountIdRegex = /^\d{12}$/; return accountIdRegex.test(accountId); } + + /** + * Get CDK deployment role + */ + public static getCdkDeploymentRole(scope: Construct): IRole { + const stack = Stack.of(scope); + const synthesizer = stack.synthesizer as DefaultStackSynthesizer; + return Role.fromRoleArn(scope, 'CdkRole', Fn.sub(synthesizer.cloudFormationExecutionRoleArn)); + } } diff --git a/framework/test/e2e/data-catalog-database.e2e.test.ts b/framework/test/e2e/data-catalog-database.e2e.test.ts index 76dbfd77c..be497b128 100644 --- a/framework/test/e2e/data-catalog-database.e2e.test.ts +++ b/framework/test/e2e/data-catalog-database.e2e.test.ts @@ -7,6 +7,7 @@ import { Key } from 'aws-cdk-lib/aws-kms'; import { Bucket } from 'aws-cdk-lib/aws-s3'; import { TestStack } from './test-stack'; import { DataCatalogDatabase } from '../../src/governance'; +import { PermissionModel } from '../../src/utils'; /** * E2E test for DataCatalogDatabase @@ -30,9 +31,26 @@ const bucket = new Bucket(stack, 'TestBucket', { const database = new DataCatalogDatabase(stack, 'TestDatabase', { locationBucket: bucket, - locationPrefix: 'test-database', - name: 'test-database', + locationPrefix: 'test_database', + name: 'test_database', removalPolicy: RemovalPolicy.DESTROY, + permissionModel: PermissionModel.IAM, +}); + +const database2 = new DataCatalogDatabase(stack, 'TestDatabase2', { + locationBucket: bucket, + locationPrefix: 'test_database2', + name: 'test_database2', + removalPolicy: RemovalPolicy.DESTROY, + permissionModel: PermissionModel.HYBRID, +}); + +const database3 = new DataCatalogDatabase(stack, 'TestDatabase3', { + locationBucket: bucket, + // locationPrefix: 'test_database3', + name: 'test_database3', + removalPolicy: RemovalPolicy.DESTROY, + permissionModel: PermissionModel.LAKE_FORMATION, }); const role = new Role(stack, 'TestPrincipal', { @@ -46,6 +64,16 @@ new CfnOutput(stack, 'DatabaseName', { exportName: 'DatabaseName', }); +new CfnOutput(stack, 'DatabaseName2', { + value: database2.databaseName, + exportName: 'DatabaseName2', +}); + +new CfnOutput(stack, 'DatabaseName3', { + value: database3.databaseName, + exportName: 'DatabaseName3', +}); + let deployResult: Record; beforeAll(async() => { @@ -53,9 +81,9 @@ beforeAll(async() => { }, 900000); test('Database in data catalog is created', async() => { - expect(deployResult.DatabaseName).toContain('test-database'); - - + expect(deployResult.DatabaseName).toContain('test_database'); + expect(deployResult.DatabaseName2).toContain('test_database2'); + expect(deployResult.DatabaseName3).toContain('test_database3'); }); afterAll(async () => { diff --git a/framework/test/e2e/data-lake-catalog.e2e.test.ts b/framework/test/e2e/data-lake-catalog.e2e.test.ts index 9f0526f08..077931ea5 100644 --- a/framework/test/e2e/data-lake-catalog.e2e.test.ts +++ b/framework/test/e2e/data-lake-catalog.e2e.test.ts @@ -5,10 +5,11 @@ import { CfnOutput, RemovalPolicy } from 'aws-cdk-lib'; import { TestStack } from './test-stack'; import { DataLakeCatalog } from '../../src/governance'; import { DataLakeStorage } from '../../src/storage'; +import { PermissionModel } from '../../src/utils'; /** * E2E test for DataCatalogDatabase - * @group e2e/governance/data-catalog-database + * @group e2e/governance/data-lake-catalog */ jest.setTimeout(6000000); @@ -20,9 +21,23 @@ stack.node.setContext('@data-solutions-framework-on-aws/removeDataOnDestroy', tr const storage = new DataLakeStorage(stack, 'ExampleDLStorage', { removalPolicy: RemovalPolicy.DESTROY, }); + const dataLakeCatalog = new DataLakeCatalog(stack, 'ExampleDLCatalog', { dataLakeStorage: storage, - databaseName: 'example-db', + removalPolicy: RemovalPolicy.DESTROY, +}); + +const lfDataLakeCatalog = new DataLakeCatalog(stack, 'ExampleLfDLCatalog', { + dataLakeStorage: storage, + databaseName: 'lakeformation_db', + permissionModel: PermissionModel.LAKE_FORMATION, + removalPolicy: RemovalPolicy.DESTROY, +}); + +const hybridDataLakeCatalog = new DataLakeCatalog(stack, 'ExampleHybridDLCatalog', { + dataLakeStorage: storage, + databaseName: 'hybrid_db', + permissionModel: PermissionModel.HYBRID, removalPolicy: RemovalPolicy.DESTROY, }); @@ -41,6 +56,36 @@ new CfnOutput(stack, 'GoldCatalogDB', { exportName: 'GoldCatalogDB', }); +new CfnOutput(stack, 'BronzeLfCatalogDB', { + value: lfDataLakeCatalog.bronzeCatalogDatabase.databaseName, + exportName: 'BronzeLfCatalogDB', +}); + +new CfnOutput(stack, 'SilverLfCatalogDB', { + value: lfDataLakeCatalog.silverCatalogDatabase.databaseName, + exportName: 'SilverLfCatalogDB', +}); + +new CfnOutput(stack, 'GoldLfCatalogDB', { + value: lfDataLakeCatalog.goldCatalogDatabase.databaseName, + exportName: 'GoldLfCatalogDB', +}); + +new CfnOutput(stack, 'BronzeHbCatalogDB', { + value: hybridDataLakeCatalog.bronzeCatalogDatabase.databaseName, + exportName: 'BronzeHbCatalogDB', +}); + +new CfnOutput(stack, 'SilverHbCatalogDB', { + value: hybridDataLakeCatalog.silverCatalogDatabase.databaseName, + exportName: 'SilverHbCatalogDB', +}); + +new CfnOutput(stack, 'GoldHbCatalogDB', { + value: hybridDataLakeCatalog.goldCatalogDatabase.databaseName, + exportName: 'GoldHbCatalogDB', +}); + let deployResult: Record; beforeAll(async() => { @@ -48,11 +93,15 @@ beforeAll(async() => { }, 900000); test('Database in data catalog is created', async() => { - expect(deployResult.BronzeCatalogDB).toContain('bronze_example-db'); - expect(deployResult.SilverCatalogDB).toContain('silver_example-db'); - expect(deployResult.GoldCatalogDB).toContain('gold_example-db'); - - + expect(deployResult.BronzeCatalogDB).toContain('bronze'); + expect(deployResult.SilverCatalogDB).toContain('silver'); + expect(deployResult.GoldCatalogDB).toContain('gold'); + expect(deployResult.BronzeLfCatalogDB).toContain('bronze_lakeformation_db'); + expect(deployResult.SilverLfCatalogDB).toContain('silver_lakeformation_db'); + expect(deployResult.GoldLfCatalogDB).toContain('gold_lakeformation_db'); + // expect(deployResult.BronzeHbCatalogDB).toContain('bronze_hybrid_db'); + // expect(deployResult.SilverHbCatalogDB).toContain('silver_hybrid_db'); + // expect(deployResult.GoldHbCatalogDB).toContain('gold_hybrid_db'); }); afterAll(async () => { diff --git a/framework/test/unit/governance/data-catalog-database.test.ts b/framework/test/unit/governance/data-catalog-database.test.ts index 49cc972ba..fe7a6b337 100644 --- a/framework/test/unit/governance/data-catalog-database.test.ts +++ b/framework/test/unit/governance/data-catalog-database.test.ts @@ -11,8 +11,10 @@ import { App, RemovalPolicy, Stack } from 'aws-cdk-lib'; import { Match, Template } from 'aws-cdk-lib/assertions'; import { Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; -import { Bucket } from 'aws-cdk-lib/aws-s3'; +import { Key } from 'aws-cdk-lib/aws-kms'; +import { Bucket, BucketEncryption } from 'aws-cdk-lib/aws-s3'; import { DataCatalogDatabase } from '../../../src/governance'; +import { PermissionModel } from '../../../src/utils'; describe('DataCatalogDatabase with passed role', () => { const app = new App(); @@ -163,7 +165,7 @@ describe('DataCatalogDatabase with multiple org location prefix', () => { bucketName: dbBucketName, }); - const locationPrefix = '/org1/database/'; + const locationPrefix = '/org1/database'; const dbName = 'sample'; new DataCatalogDatabase(stack, 'database', { locationBucket: dbBucket, @@ -213,7 +215,7 @@ describe('DataCatalogDatabase default construct', () => { assumedBy: new ServicePrincipal('ec2.amazonaws.com'), roleName: testPrincipalRoleName, }); - const locationPrefix = '/database/'; + const locationPrefix = '/database'; const dbName = 'sample'; const catalogDb = new DataCatalogDatabase(stack, 'database', { locationBucket: dbBucket, @@ -483,7 +485,7 @@ describe('DataCatalogDatabase with disabled crawler', () => { const dbBucket = new Bucket(stack, 'dbBucket', { bucketName: dbBucketName, }); - const locationPrefix = '/database/'; + const locationPrefix = '/database'; const dbName = 'sample'; new DataCatalogDatabase(stack, 'database', { locationBucket: dbBucket, @@ -520,7 +522,7 @@ describe('DataCatalogDatabase with missing leading slash in the prefix and globa const dbBucket = new Bucket(stack, 'dbBucket', { bucketName: dbBucketName, }); - const locationPrefix = 'database/'; + const locationPrefix = 'database'; const dbName = 'sample'; new DataCatalogDatabase(stack, 'database', { locationBucket: dbBucket, @@ -610,4 +612,385 @@ describe('DataCatalogDatabase with / as location prefix', () => { }, }); }); +}); + +describe('DataCatalogDatabase with Lake Formation permission model', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const dbBucketName = 'sample-db'; + const bucketEncryptionKey = new Key(stack, 'bucketKey'); + const dbBucket = new Bucket(stack, 'dbBucket', { + bucketName: dbBucketName, + encryption: BucketEncryption.KMS, + encryptionKey: bucketEncryptionKey, + }); + const locationPrefix = '/'; + const dbName = 'sample'; + new DataCatalogDatabase(stack, 'database', { + locationBucket: dbBucket, + locationPrefix: locationPrefix, + name: dbName, + removalPolicy: RemovalPolicy.DESTROY, + permissionModel: PermissionModel.LAKE_FORMATION, + }); + + const template = Template.fromStack(stack); + // console.log(JSON.stringify(template.toJSON(), null, 2)); + + test('should create correct DataLake settings', () => { + template.hasResourceProperties('AWS::LakeFormation::DataLakeSettings', { + Admins: [ + { + DataLakePrincipalIdentifier: { + 'Fn::Sub': Match.stringLikeRegexp('.*role/cdk-.*-cfn-exec-role-.*'), + }, + }, + { + DataLakePrincipalIdentifier: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('databaseLfRevokeRole.*'), + 'Arn', + ], + }, + }, + ], + MutationType: 'APPEND', + Parameters: { + CROSS_ACCOUNT_VERSION: 4, + }, + }); + }); + + test('should register the data location', () => { + template.hasResourceProperties('AWS::LakeFormation::Resource', { + HybridAccessEnabled: false, + ResourceArn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('dbBucket.*'), + 'Arn', + ], + }, + RoleArn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('databaseLakeFormationRegistrationDataAccessRole.*'), + 'Arn', + ], + }, + UseServiceLinkedRole: false, + }); + }); + + test('should revoke IAMAllowedPrincipal via a custom resource', () => { + template.hasResourceProperties('Custom::AWS', { + ServiceToken: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('AWS.*'), + 'Arn', + ], + }, + Create: '{"service":"LakeFormation","action":"RevokePermissions","parameters":{"Permissions":["ALL"],"Principal":{"DataLakePrincipalIdentifier":"IAM_ALLOWED_PRINCIPALS"},"Resource":{"Database":{"Name":"sample_008d4446"}}},"physicalResourceId":{"id":"sample_008d4446"}}', + InstallLatestAwsSdk: true, + }); + }); + + test('should grant Lake Formation permissions to the crawler for creating tables', () => { + template.hasResourceProperties('AWS::LakeFormation::PrincipalPermissions', { + Permissions: [ + 'CREATE_TABLE', + ], + PermissionsWithGrantOption: [], + Principal: { + DataLakePrincipalIdentifier: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('databaseCrawlerRole.*'), + 'Arn', + ], + }, + }, + Resource: { + Database: { + CatalogId: { + Ref: 'AWS::AccountId', + }, + Name: Match.stringLikeRegexp('sample_.*'), + }, + }, + }); + }); + + test('should grant Lake Formation permissions to the crawler for creating tables', () => { + template.hasResourceProperties('AWS::LakeFormation::PrincipalPermissions', { + Permissions: [ + 'SELECT', + 'DESCRIBE', + 'ALTER', + ], + PermissionsWithGrantOption: [], + Principal: { + DataLakePrincipalIdentifier: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('databaseCrawlerRole.*'), + 'Arn', + ], + }, + }, + Resource: { + Table: { + CatalogId: { + Ref: 'AWS::AccountId', + }, + DatabaseName: Match.stringLikeRegexp('sample_.*'), + TableWildcard: {}, + }, + }, + }); + }); + + test('should create a data location permission for the crawler', () => { + template.hasResourceProperties('AWS::LakeFormation::PrincipalPermissions', { + Permissions: [ + 'DATA_LOCATION_ACCESS', + ], + PermissionsWithGrantOption: [], + Principal: { + DataLakePrincipalIdentifier: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('databaseCrawlerRole.*'), + 'Arn', + ], + }, + }, + Resource: { + DataLocation: { + CatalogId: { + Ref: 'AWS::AccountId', + }, + ResourceArn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('dbBucket.*'), + 'Arn', + ], + }, + }, + }, + }); + }); + + test('should crate an IAM role for revoking IAMAllowedPrincipals', () => { + template.hasResourceProperties('AWS::IAM::Role', { + AssumeRolePolicyDocument: { + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'lambda.amazonaws.com', + }, + }, + ], + Version: '2012-10-17', + }, + }); + }); + + test('should create lambda function for revoking IAMAllowedPrincipals via the custom resource', () => { + template.hasResourceProperties('AWS::Lambda::Function', { + Code: { + S3Bucket: { + 'Fn::Sub': Match.stringLikeRegexp('cdk\-.*\-assets\-.*'), + }, + S3Key: Match.stringLikeRegexp('.*.zip'), + }, + Handler: 'index.handler', + Role: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('databaseLfRevokeRole.*'), + 'Arn', + ], + }, + Runtime: { + 'Fn::FindInMap': [ + 'LatestNodeRuntimeMap', + { + Ref: 'AWS::Region', + }, + 'value', + ], + }, + Timeout: 60, + }); + }); + + test('should create an IAM policy for the revoke custom resource', () => { + template.hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'lakeformation:RevokePermissions', + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':lakeformation:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':catalog:', + { + Ref: 'AWS::AccountId', + }, + ], + ], + }, + }, + { + Action: 'glue:GetDatabase', + Effect: 'Allow', + Resource: [ + { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + Match.stringLikeRegexp(':database/sample_.*'), + ], + ], + }, + { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':catalog', + ], + ], + }, + ], + }, + ], + }), + PolicyName: Match.stringLikeRegexp('databaseIamRevokeCustomResourcePolicy.*'), + Roles: [ + { + Ref: Match.stringLikeRegexp('databaseLfRevokeRole.*'), + }, + ], + }); + }); + + test('should create a default data access role', () => { + template.hasResourceProperties('AWS::IAM::Role', { + AssumeRolePolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'lakeformation.amazonaws.com', + }, + }, + ], + }), + }); + }); + + test('should create an IAM policy for the data access role ', () => { + template.hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: Match.objectLike({ + Statement: Match.arrayWith([ + { + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + 's3:DeleteObject*', + 's3:PutObject', + 's3:PutObjectLegalHold', + 's3:PutObjectRetention', + 's3:PutObjectTagging', + 's3:PutObjectVersionTagging', + 's3:Abort*', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('dbBucket.*'), + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('dbBucket.*'), + 'Arn', + ], + }, + '/', + ], + ], + }, + ], + }, + { + Action: [ + 'kms:Decrypt', + 'kms:DescribeKey', + 'kms:Encrypt', + 'kms:ReEncrypt*', + 'kms:GenerateDataKey*', + ], + Effect: 'Allow', + Resource: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('bucketKey.*'), + 'Arn', + ], + }, + }, + ]), + }), + PolicyName: Match.stringLikeRegexp('databaseLakeFormationRegistrationDataAccessRoleDefaultPolicy.*'), + Roles: [ + { + Ref: Match.stringLikeRegexp('databaseLakeFormationRegistrationDataAccessRole.*'), + }, + ], + }, + ); + }); }); \ No newline at end of file diff --git a/framework/test/unit/governance/data-lake-catalog.test.ts b/framework/test/unit/governance/data-lake-catalog.test.ts index 02763f624..811148bcf 100644 --- a/framework/test/unit/governance/data-lake-catalog.test.ts +++ b/framework/test/unit/governance/data-lake-catalog.test.ts @@ -3,15 +3,17 @@ /** - * Tests DataCatalogDatabase construct + * Tests DataLakeCatalog construct * - * @group unit/data-catalog/data-catalog-database + * @group unit/data-catalog/data-lake-catalog */ import { App, RemovalPolicy, Stack } from 'aws-cdk-lib'; import { Match, Template } from 'aws-cdk-lib/assertions'; +import { Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; import { DataLakeCatalog } from '../../../src/governance'; import { DataLakeStorage } from '../../../src/storage'; +import { PermissionModel } from '../../../src/utils'; describe ('Create catalog for bronze, silver, gold with no provided databaseName', () => { const app = new App(); @@ -71,7 +73,6 @@ describe ('Create catalog for bronze, silver, gold with no provided databaseName }, ], }, - '/', ], ], }, @@ -127,7 +128,6 @@ describe ('Create catalog for bronze, silver, gold with no provided databaseName }, ], }, - '/', ], ], }, @@ -183,7 +183,6 @@ describe ('Create catalog for bronze, silver, gold with no provided databaseName }, ], }, - '/', ], ], }, @@ -246,7 +245,7 @@ describe('Create catalog for bronze, silver, gold with no global data removal', [ Match.exact('s3://'), Match.anyValue(), - Match.exact('/exampledb/'), + Match.exact('/exampledb'), ], ], }, @@ -286,7 +285,7 @@ describe('Create catalog for bronze, silver, gold with no global data removal', [ Match.exact('s3://'), Match.anyValue(), - Match.exact('/exampledb/'), + Match.exact('/exampledb'), ], ], }, @@ -326,7 +325,7 @@ describe('Create catalog for bronze, silver, gold with no global data removal', [ Match.exact('s3://'), Match.anyValue(), - Match.exact('/exampledb/'), + Match.exact('/exampledb'), ], ], }, @@ -347,7 +346,7 @@ describe('Create catalog for bronze, silver, gold with no global data removal', }, LocationUri: { 'Fn::Join': [ - '', ['s3://', Match.anyValue(), '/exampledb/'], + '', ['s3://', Match.anyValue(), '/exampledb'], ], }, @@ -376,4 +375,336 @@ describe('Create catalog for bronze, silver, gold with global data removal', () }), ); }); +}); + +describe('Create catalog for data lake with lake formation permission and other defaults', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const storage = new DataLakeStorage(stack, 'ExampleDLStorage'); + new DataLakeCatalog(stack, 'ExampleDLCatalog', { + dataLakeStorage: storage, + databaseName: 'exampledb', + permissionModel: PermissionModel.LAKE_FORMATION, + }); + + const template = Template.fromStack(stack); + // console.log(JSON.stringify(template.toJSON(), null, 2)); + + test('should create one IAM role for data access', () => { + template.resourcePropertiesCountIs('AWS::IAM::Role', + Match.objectLike({ + AssumeRolePolicyDocument: { + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'lakeformation.amazonaws.com', + }, + }, + ], + }, + }), + 1, + ); + }); + + test('should create two IAM roles for lake formation configuration', () => { + template.resourcePropertiesCountIs('AWS::IAM::Role', + Match.objectLike({ + AssumeRolePolicyDocument: { + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'lambda.amazonaws.com', + }, + }, + ], + }, + }), + 2, + ); + }); + + test('should create one IAM policy for data access', () => { + template.resourcePropertiesCountIs('AWS::IAM::Policy', + Match.objectLike({ + PolicyDocument: { + Statement: [ + Match.objectLike({ + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + 's3:DeleteObject*', + 's3:PutObject', + 's3:PutObjectLegalHold', + 's3:PutObjectRetention', + 's3:PutObjectTagging', + 's3:PutObjectVersionTagging', + 's3:Abort*', + ], + Resource: [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('ExampleDLStorageBronzeBucket.*'), + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('ExampleDLStorageBronzeBucket.*'), + 'Arn', + ], + }, + '/*', + ], + ], + }, + ], + }), + Match.objectLike({ + Action: [ + 'kms:Decrypt', + 'kms:DescribeKey', + 'kms:Encrypt', + 'kms:ReEncrypt*', + 'kms:GenerateDataKey*', + ], + Resource: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('ExampleDLStorageDataKey.*'), + 'Arn', + ], + }, + }), + Match.objectLike({ + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + 's3:DeleteObject*', + 's3:PutObject', + 's3:PutObjectLegalHold', + 's3:PutObjectRetention', + 's3:PutObjectTagging', + 's3:PutObjectVersionTagging', + 's3:Abort*', + ], + Resource: [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('ExampleDLStorageSilverBucket.*'), + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('ExampleDLStorageSilverBucket.*'), + 'Arn', + ], + }, + '/*', + ], + ], + }, + ], + }), + Match.objectLike({ + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + 's3:DeleteObject*', + 's3:PutObject', + 's3:PutObjectLegalHold', + 's3:PutObjectRetention', + 's3:PutObjectTagging', + 's3:PutObjectVersionTagging', + 's3:Abort*', + ], + Resource: [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('ExampleDLStorageGoldBucket.*'), + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('ExampleDLStorageGoldBucket.*'), + 'Arn', + ], + }, + '/*', + ], + ], + }, + ], + }), + Match.objectLike({ + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + 's3:DeleteObject*', + 's3:PutObject', + 's3:PutObjectLegalHold', + 's3:PutObjectRetention', + 's3:PutObjectTagging', + 's3:PutObjectVersionTagging', + 's3:Abort*', + ], + Resource: [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('ExampleDLStorageBronzeBucket.*'), + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('ExampleDLStorageBronzeBucket.*'), + 'Arn', + ], + }, + '/exampledb', + ], + ], + }, + ], + }), + Match.objectLike({ + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + 's3:DeleteObject*', + 's3:PutObject', + 's3:PutObjectLegalHold', + 's3:PutObjectRetention', + 's3:PutObjectTagging', + 's3:PutObjectVersionTagging', + 's3:Abort*', + ], + Resource: [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('ExampleDLStorageSilverBucket.*'), + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('ExampleDLStorageSilverBucket.*'), + 'Arn', + ], + }, + '/exampledb', + ], + ], + }, + ], + }), + Match.objectLike({ + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + 's3:DeleteObject*', + 's3:PutObject', + 's3:PutObjectLegalHold', + 's3:PutObjectRetention', + 's3:PutObjectTagging', + 's3:PutObjectVersionTagging', + 's3:Abort*', + ], + Resource: [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('ExampleDLStorageGoldBucket.*'), + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('ExampleDLStorageGoldBucket.*'), + 'Arn', + ], + }, + '/exampledb', + ], + ], + }, + ], + }), + ], + }, + PolicyName: Match.stringLikeRegexp('.*LakeFormationDataAccessRoleDefaultPolicy.*'), + Roles: [ + { + Ref: Match.stringLikeRegexp('.*LakeFormationDataAccessRole.*'), + }, + ], + }), + 1, + ); + }); +}); + +describe('Create catalog for data lake with lake formation permission and non defaults', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const storage = new DataLakeStorage(stack, 'ExampleDLStorage'); + const lfAccessRole = new Role(stack, 'LakeFormationAccessRole', { + assumedBy: new ServicePrincipal('lakeformation.amazonaws.com'), + }); + const lfConfigRole = new Role(stack, 'LakeFormationConfigRole', { + assumedBy: new ServicePrincipal('lambda.amazonaws.com'), + }); + new DataLakeCatalog(stack, 'ExampleDLCatalog', { + dataLakeStorage: storage, + databaseName: 'exampledb', + permissionModel: PermissionModel.LAKE_FORMATION, + lakeFormationDataAccessRole: lfAccessRole, + lakeFormationConfigurationRole: lfConfigRole, + }); + + const template = Template.fromStack(stack); + // console.log(JSON.stringify(template.toJSON(), null, 2)); + + test('should create a KMS Key with RETAIN removal policy', () => { + template.hasResource('AWS::KMS::Key', + Match.objectLike({ + UpdateReplacePolicy: 'Retain', + DeletionPolicy: 'Retain', + }), + ); + }); }); \ No newline at end of file diff --git a/framework/test/unit/nag/governance/nag-access-data-catalog-database.test.ts b/framework/test/unit/nag/governance/nag-access-data-catalog-database.test.ts index f9c5feb1b..60d50ff98 100644 --- a/framework/test/unit/nag/governance/nag-access-data-catalog-database.test.ts +++ b/framework/test/unit/nag/governance/nag-access-data-catalog-database.test.ts @@ -16,6 +16,7 @@ import { Key } from 'aws-cdk-lib/aws-kms'; import { AwsSolutionsChecks, NagSuppressions } from 'cdk-nag'; import { DataCatalogDatabase } from '../../../../src/governance'; import { AnalyticsBucket } from '../../../../src/storage'; +import { PermissionModel } from '../../../../src/utils'; const app = new App(); const stack = new Stack(app, 'Stack'); @@ -42,6 +43,20 @@ const db = new DataCatalogDatabase(stack, 'TestCatalogDatabase', { name: 'sample-db', }); +new DataCatalogDatabase(stack, 'TestLfCatalogDatabase', { + locationBucket: bucket, + locationPrefix: 'sample', + name: 'sample-db', + permissionModel: PermissionModel.LAKE_FORMATION, +}); + +new DataCatalogDatabase(stack, 'TestHybridCatalogDatabase', { + locationBucket: bucket, + locationPrefix: 'sample', + name: 'sample-db', + permissionModel: PermissionModel.HYBRID, +}); + db.grantReadOnlyAccess(role); Aspects.of(stack).add(new AwsSolutionsChecks()); @@ -49,19 +64,33 @@ Aspects.of(stack).add(new AwsSolutionsChecks()); NagSuppressions.addResourceSuppressionsByPath( stack, '/Stack/TestPrincipal/DefaultPolicy/Resource', - [{ id: 'AwsSolutions-IAM5', reason: 'Using AppSec approved managed policy provided by the Bucket interface' }], + [{ id: 'AwsSolutions-IAM5', reason: 'Using managed policy provided by CDK in the Bucket interface' }], +); + +NagSuppressions.addResourceSuppressionsByPath( + stack, + '/Stack/LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a/ServiceRole/DefaultPolicy/Resource', + [{ id: 'AwsSolutions-IAM5', reason: 'Log retention custom resource provided by CDK framework' }], ); NagSuppressions.addResourceSuppressionsByPath( stack, - '/Stack/TestCatalogDatabase/CrawlerRole/Resource', + [ + '/Stack/TestCatalogDatabase/CrawlerRole/Resource', + '/Stack/TestLfCatalogDatabase/CrawlerRole/Resource', + '/Stack/TestHybridCatalogDatabase/CrawlerRole/Resource', + ], [{ id: 'AwsSolutions-IAM5', reason: 'Construct allows read only access at the database level, so created policy would allow read access to all tables inside the database' }], ); NagSuppressions.addResourceSuppressionsByPath( stack, - '/Stack/TestCatalogDatabase/CrawlerRole/DefaultPolicy/Resource', - [{ id: 'AwsSolutions-IAM5', reason: 'Using AppSec approved managed policy provided by the Bucket interface' }], + [ + '/Stack/TestCatalogDatabase/CrawlerRole/DefaultPolicy/Resource', + '/Stack/TestLfCatalogDatabase/CrawlerRole/DefaultPolicy/Resource', + '/Stack/TestHybridCatalogDatabase/CrawlerRole/DefaultPolicy/Resource', + ], + [{ id: 'AwsSolutions-IAM5', reason: 'Using managed policy provided by CDK in the Bucket interface' }], ); NagSuppressions.addResourceSuppressionsByPath( @@ -70,6 +99,31 @@ NagSuppressions.addResourceSuppressionsByPath( [{ id: 'AwsSolutions-GL1', reason: 'Configuring with security configuration causes internal failure in CloudFormation' }], ); +NagSuppressions.addResourceSuppressionsByPath( + stack, + '/Stack/AWS679f53fac002430cb0da5b7982bd2287/Resource', + [{ id: 'CdkNagValidationFailure', reason: 'CDK custom resource provider framework is using intrinsic function to get latest node runtime per region which makes the NAG validation fails' }], +); + +NagSuppressions.addResourceSuppressionsByPath( + stack, + [ + '/Stack/TestLfCatalogDatabase/LakeFormationRegistrationDataAccessRole/DefaultPolicy/Resource', + '/Stack/TestHybridCatalogDatabase/LakeFormationRegistrationDataAccessRole/DefaultPolicy/Resource', + ], + [{ id: 'AwsSolutions-IAM5', reason: 'Using managed policy provided by CDK in the Bucket interface' }], +); + +NagSuppressions.addResourceSuppressionsByPath( + stack, + '/Stack/LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a/ServiceRole/Resource', + [ + { id: 'AwsSolutions-IAM4', reason: 'The permissions are provided by the Custom Resource framework and can\'t be updated' }, + ], + true, +); + + test('No unsuppressed Warnings', () => { const warnings = Annotations.fromStack(stack).findWarning('*', Match.stringLikeRegexp('AwsSolutions-.*')); console.log(warnings); diff --git a/framework/test/unit/nag/governance/nag-access-data-lake-catalog.test.ts b/framework/test/unit/nag/governance/nag-access-data-lake-catalog.test.ts index 7700302c3..2b01e4c4f 100644 --- a/framework/test/unit/nag/governance/nag-access-data-lake-catalog.test.ts +++ b/framework/test/unit/nag/governance/nag-access-data-lake-catalog.test.ts @@ -14,6 +14,7 @@ import { Annotations, Match } from 'aws-cdk-lib/assertions'; import { AwsSolutionsChecks, NagSuppressions } from 'cdk-nag'; import { DataLakeCatalog } from '../../../../src/governance'; import { DataLakeStorage } from '../../../../src/storage'; +import { PermissionModel } from '../../../../src/utils'; const app = new App(); const stack = new Stack(app, 'Stack'); @@ -26,28 +27,60 @@ new DataLakeCatalog(stack, 'ExampleDLCatalog', { dataLakeStorage: storage, }); +new DataLakeCatalog(stack, 'ExampleLfDLCatalog', { + dataLakeStorage: storage, + permissionModel: PermissionModel.LAKE_FORMATION, +}); + +new DataLakeCatalog(stack, 'ExampleHbDLCatalog', { + dataLakeStorage: storage, + permissionModel: PermissionModel.HYBRID, +}); + Aspects.of(stack).add(new AwsSolutionsChecks()); NagSuppressions.addResourceSuppressionsByPath(stack, [ - '/Stack/ExampleDLCatalog/BronzeCatalogDatabase/DatabaseAutoCrawler', - '/Stack/ExampleDLCatalog/SilverCatalogDatabase/DatabaseAutoCrawler', - '/Stack/ExampleDLCatalog/GoldCatalogDatabase/DatabaseAutoCrawler', - '/Stack/ExampleDLCatalog/BronzeCatalogDatabase/CrawlerRole/Resource', - '/Stack/ExampleDLCatalog/BronzeCatalogDatabase/CrawlerRole/DefaultPolicy/Resource', - '/Stack/ExampleDLCatalog/SilverCatalogDatabase/CrawlerRole/Resource', - '/Stack/ExampleDLCatalog/SilverCatalogDatabase/CrawlerRole/DefaultPolicy/Resource', - '/Stack/ExampleDLCatalog/GoldCatalogDatabase/CrawlerRole/Resource', - '/Stack/ExampleDLCatalog/GoldCatalogDatabase/CrawlerRole/DefaultPolicy/Resource', + '/Stack/ExampleDLCatalog/BronzeCatalogDatabase', + '/Stack/ExampleDLCatalog/SilverCatalogDatabase', + '/Stack/ExampleDLCatalog/GoldCatalogDatabase', + '/Stack/ExampleLfDLCatalog/BronzeCatalogDatabase', + '/Stack/ExampleLfDLCatalog/SilverCatalogDatabase', + '/Stack/ExampleLfDLCatalog/GoldCatalogDatabase', + '/Stack/ExampleHbDLCatalog/BronzeCatalogDatabase', + '/Stack/ExampleHbDLCatalog/SilverCatalogDatabase', + '/Stack/ExampleHbDLCatalog/GoldCatalogDatabase', ], [ - { - id: 'AwsSolutions-GL1', - reason: 'Configuring with security configuration causes internal failure in CloudFormation', - }, - { - id: 'AwsSolutions-IAM5', - reason: 'Construct allows read only access at the database level, so created policy would allow read access to all tables inside the database', - }, -]); + { id: 'AwsSolutions-GL1', reason: 'Already tested as part of the DataCatalogDatabase construct' }, + { id: 'AwsSolutions-IAM5', reason: 'Already tested as part of the DataCatalogDatabase construct' }, +], +true); + +NagSuppressions.addResourceSuppressionsByPath( + stack, + [ + '/Stack/AWS679f53fac002430cb0da5b7982bd2287/Resource', + ], + [{ id: 'CdkNagValidationFailure', reason: 'CDK custom resource provider framework is using intrinsic function to get latest node runtime per region which makes the NAG validation fails' }], +); + +NagSuppressions.addResourceSuppressionsByPath( + stack, + [ + '/Stack/ExampleLfDLCatalog/LakeFormationDataAccessRole/DefaultPolicy/Resource', + '/Stack/ExampleHbDLCatalog/LakeFormationDataAccessRole/DefaultPolicy/Resource', + ], + [{ id: 'AwsSolutions-IAM5', reason: 'Using managed policy provided by CDK in the Bucket interface' }], +); + +NagSuppressions.addResourceSuppressionsByPath( + stack, + '/Stack/LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a/ServiceRole/Resource', + [ + { id: 'AwsSolutions-IAM4', reason: 'The permissions are provided by the Custom Resource framework and can\'t be updated' }, + { id: 'AwsSolutions-IAM5', reason: 'The permissions are provided by the Custom Resource framework and can\'t be updated' }, + ], + true, +); test('No unsuppressed Warnings', () => { const warnings = Annotations.fromStack(stack).findWarning('*', Match.stringLikeRegexp('AwsSolutions-.*')); diff --git a/framework/yarn.lock b/framework/yarn.lock index 1f03ec434..4e4232480 100644 --- a/framework/yarn.lock +++ b/framework/yarn.lock @@ -10,17 +10,6 @@ "@jridgewell/gen-mapping" "^0.3.5" "@jridgewell/trace-mapping" "^0.3.24" -"@asamuzakjp/css-color@^2.8.2": - version "2.8.3" - resolved "https://registry.yarnpkg.com/@asamuzakjp/css-color/-/css-color-2.8.3.tgz#665f0f5e8edb95d8f543847529e30fe5cc437ef7" - integrity sha512-GIc76d9UI1hCvOATjZPyHFmE5qhRccp3/zGfMPapK3jBi+yocEzp6BBB0UnfRYP9NP4FANqUZYb0hnfs3TM3hw== - dependencies: - "@csstools/css-calc" "^2.1.1" - "@csstools/css-color-parser" "^3.0.7" - "@csstools/css-parser-algorithms" "^3.0.4" - "@csstools/css-tokenizer" "^3.0.3" - lru-cache "^10.4.3" - "@aws-cdk/asset-awscli-v1@^2.2.208": version "2.2.223" resolved "https://registry.yarnpkg.com/@aws-cdk/asset-awscli-v1/-/asset-awscli-v1-2.2.223.tgz#a7d4cb66fb64e5b8e5609591d2ff120898607595" @@ -323,34 +312,6 @@ resolved "https://registry.yarnpkg.com/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz#75a2e8b51cb758a7553d6804a5932d7aace75c39" integrity sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw== -"@csstools/color-helpers@^5.0.1": - version "5.0.1" - resolved "https://registry.yarnpkg.com/@csstools/color-helpers/-/color-helpers-5.0.1.tgz#829f1c76f5800b79c51c709e2f36821b728e0e10" - integrity sha512-MKtmkA0BX87PKaO1NFRTFH+UnkgnmySQOvNxJubsadusqPEC2aJ9MOQiMceZJJ6oitUl/i0L6u0M1IrmAOmgBA== - -"@csstools/css-calc@^2.1.1": - version "2.1.1" - resolved "https://registry.yarnpkg.com/@csstools/css-calc/-/css-calc-2.1.1.tgz#a7dbc66627f5cf458d42aed14bda0d3860562383" - integrity sha512-rL7kaUnTkL9K+Cvo2pnCieqNpTKgQzy5f+N+5Iuko9HAoasP+xgprVh7KN/MaJVvVL1l0EzQq2MoqBHKSrDrag== - -"@csstools/css-color-parser@^3.0.7": - version "3.0.7" - resolved "https://registry.yarnpkg.com/@csstools/css-color-parser/-/css-color-parser-3.0.7.tgz#442d61d58e54ad258d52c309a787fceb33906484" - integrity sha512-nkMp2mTICw32uE5NN+EsJ4f5N+IGFeCFu4bGpiKgb2Pq/7J/MpyLBeQ5ry4KKtRFZaYs6sTmcMYrSRIyj5DFKA== - dependencies: - "@csstools/color-helpers" "^5.0.1" - "@csstools/css-calc" "^2.1.1" - -"@csstools/css-parser-algorithms@^3.0.4": - version "3.0.4" - resolved "https://registry.yarnpkg.com/@csstools/css-parser-algorithms/-/css-parser-algorithms-3.0.4.tgz#74426e93bd1c4dcab3e441f5cc7ba4fb35d94356" - integrity sha512-Up7rBoV77rv29d3uKHUIVubz1BTcgyUK72IvCQAbfbMv584xHcGKCKbWh7i8hPrRJ7qU4Y8IO3IY9m+iTB7P3A== - -"@csstools/css-tokenizer@^3.0.3": - version "3.0.3" - resolved "https://registry.yarnpkg.com/@csstools/css-tokenizer/-/css-tokenizer-3.0.3.tgz#a5502c8539265fecbd873c1e395a890339f119c2" - integrity sha512-UJnjoFsmxfKUdNYdWgOB0mWUypuLvAfQPH1+pyvRJs6euowbFkFC6P13w1l8mJyi3vxYMxc9kld5jZEGRQs6bw== - "@esbuild/aix-ppc64@0.21.5": version "0.21.5" resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz#c7184a326533fcdf1b8ee0733e21c713b975575f" @@ -526,12 +487,11 @@ resolved "https://registry.yarnpkg.com/@eslint/object-schema/-/object-schema-2.1.6.tgz#58369ab5b5b3ca117880c0f6c0b0f32f6950f24f" integrity sha512-RBMg5FRL0I0gs51M/guSAj5/e14VQ4tpZnQNWwuDT66P14I43ItmPfIZRhO9fUVIPOAQXU47atlywZ/czoqFPA== -"@eslint/plugin-kit@^0.2.5": - version "0.2.5" - resolved "https://registry.yarnpkg.com/@eslint/plugin-kit/-/plugin-kit-0.2.5.tgz#ee07372035539e7847ef834e3f5e7b79f09e3a81" - integrity sha512-lB05FkqEdUg2AA0xEbUz0SnkXT1LcCTa438W4IWTUh4hdOnVbQyOJ81OrDXsJk/LSiJHubgGEFoR5EHq1NsH1A== +"@eslint/plugin-kit@^0.2.3": + version "0.2.3" + resolved "https://registry.yarnpkg.com/@eslint/plugin-kit/-/plugin-kit-0.2.3.tgz#812980a6a41ecf3a8341719f92a6d1e784a2e0e8" + integrity sha512-2b/g5hRmpbb1o4GnTZax9N9m0FXzz9OV42ZzI4rDDMDuHUqigAiQCEWChBWCY4ztAGVRjoWT19v0yMmc5/L5kA== dependencies: - "@eslint/core" "^0.10.0" levn "^0.4.1" "@humanfs/core@^0.19.1": @@ -923,9 +883,9 @@ "@sinonjs/commons" "^3.0.0" "@stylistic/eslint-plugin@^2": - version "2.13.0" - resolved "https://registry.yarnpkg.com/@stylistic/eslint-plugin/-/eslint-plugin-2.13.0.tgz#53bf175dac8c1ec055b370a6ff77d491cae9a70d" - integrity sha512-RnO1SaiCFHn666wNz2QfZEFxvmiNRqhzaMXHXxXXKt+MEP7aajlPxUSMIQpKAaJfverpovEYqjBOXDq6dDcaOQ== + version "2.11.0" + resolved "https://registry.yarnpkg.com/@stylistic/eslint-plugin/-/eslint-plugin-2.11.0.tgz#50d0289f36f7201055b7fa1729fdc1d8c46e93fa" + integrity sha512-PNRHbydNG5EH8NK4c+izdJlxajIR6GxcUhzsYNRsn6Myep4dsZt0qFCz3rCPnkvgO5FYibDcMqgNHUT+zvjYZw== dependencies: "@typescript-eslint/utils" "^8.13.0" eslint-visitor-keys "^4.2.0" @@ -1988,7 +1948,7 @@ create-jest@^29.7.0: jest-util "^29.7.0" prompts "^2.0.1" -cross-spawn@^7.0.0, cross-spawn@^7.0.3, cross-spawn@^7.0.6: +cross-spawn@^7.0.0, cross-spawn@^7.0.3, cross-spawn@^7.0.5: version "7.0.6" resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.6.tgz#8a58fe78f00dcd70c370451759dfbfaf03e8ee9f" integrity sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA== @@ -1998,12 +1958,11 @@ cross-spawn@^7.0.0, cross-spawn@^7.0.3, cross-spawn@^7.0.6: which "^2.0.1" cssstyle@^4.1.0: - version "4.2.1" - resolved "https://registry.yarnpkg.com/cssstyle/-/cssstyle-4.2.1.tgz#5142782410fea95db66fb68147714a652a7c2381" - integrity sha512-9+vem03dMXG7gDmZ62uqmRiMRNtinIZ9ZyuF6BdxzfOD+FdN5hretzynkn0ReS2DO2GSw76RWHs0UmJPI2zUjw== + version "4.1.0" + resolved "https://registry.yarnpkg.com/cssstyle/-/cssstyle-4.1.0.tgz#161faee382af1bafadb6d3867a92a19bcb4aea70" + integrity sha512-h66W1URKpBS5YMI/V8PyXvTMFT8SupJ1IzoIV8IeBC/ji8WVmrO8dGlTi+2dh6whmdk6BiKJLD/ZBkhWbcg6nA== dependencies: - "@asamuzakjp/css-color" "^2.8.2" - rrweb-cssom "^0.8.0" + rrweb-cssom "^0.7.1" dargs@^7.0.0: version "7.0.0" @@ -2088,9 +2047,9 @@ decamelize@^5.0.1: integrity sha512-VfxadyCECXgQlkoEAjeghAr5gY3Hf+IKjKb+X8tGVDtveCjN+USwprd2q3QXBR9T1+x2DG0XZF5/w+7HAtSaXA== decimal.js@^10.4.3: - version "10.5.0" - resolved "https://registry.yarnpkg.com/decimal.js/-/decimal.js-10.5.0.tgz#0f371c7cf6c4898ce0afb09836db73cd82010f22" - integrity sha512-8vDa8Qxvr/+d94hSh5P3IJwI5t8/c0KsMp+g8bNw9cY2icONa5aPfvKeieW1WlG0WQYwwhJ7mjui2xtiePQSXw== + version "10.4.3" + resolved "https://registry.yarnpkg.com/decimal.js/-/decimal.js-10.4.3.tgz#1044092884d245d1b7f65725fa4ad4c6f781cc23" + integrity sha512-VBBaLc1MgL5XpzgIP7ny5Z6Nx3UrRkIViUkPUdtl9aya5amy3De1gsUUSB1g3+3sExYNjCAsAznmukyxCb1GRA== dedent@^1.0.0: version "1.5.3" @@ -2535,7 +2494,7 @@ eslint@^9: "@types/json-schema" "^7.0.15" ajv "^6.12.4" chalk "^4.0.0" - cross-spawn "^7.0.6" + cross-spawn "^7.0.5" debug "^4.3.2" escape-string-regexp "^4.0.0" eslint-scope "^8.2.0" @@ -4293,7 +4252,7 @@ log4js@^6.9.1: rfdc "^1.3.0" streamroller "^3.1.5" -lru-cache@^10.0.1, lru-cache@^10.2.0, lru-cache@^10.4.3: +lru-cache@^10.0.1, lru-cache@^10.2.0: version "10.4.3" resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-10.4.3.tgz#410fc8a17b70e598013df257c2446b7f3383f119" integrity sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ== @@ -4679,9 +4638,9 @@ npmlog@^5.0.1: set-blocking "^2.0.0" nwsapi@^2.2.12: - version "2.2.16" - resolved "https://registry.yarnpkg.com/nwsapi/-/nwsapi-2.2.16.tgz#177760bba02c351df1d2644e220c31dfec8cdb43" - integrity sha512-F1I/bimDpj3ncaNDhfyMWuFqmQDBwDB0Fogc2qpL3BWvkQteFD/8BzWuIRl83rq0DXfm8SGt/HFhLXZyljTXcQ== + version "2.2.13" + resolved "https://registry.yarnpkg.com/nwsapi/-/nwsapi-2.2.13.tgz#e56b4e98960e7a040e5474536587e599c4ff4655" + integrity sha512-cTGB9ptp9dY9A5VbMSe7fQBcl/tt22Vcqdq8+eN93rblOuE0aCFu4aZ2vMwct/2t+lFnosm8RkQW1I0Omb1UtQ== object-assign@^4.1.1: version "4.1.1" @@ -5216,11 +5175,6 @@ rrweb-cssom@^0.7.1: resolved "https://registry.yarnpkg.com/rrweb-cssom/-/rrweb-cssom-0.7.1.tgz#c73451a484b86dd7cfb1e0b2898df4b703183e4b" integrity sha512-TrEMa7JGdVm0UThDJSx7ddw5nVm3UJS9o9CCIZ72B1vSyEZoziDqBYP3XIoi/12lKrJR8rE3jeFHMok2F/Mnsg== -rrweb-cssom@^0.8.0: - version "0.8.0" - resolved "https://registry.yarnpkg.com/rrweb-cssom/-/rrweb-cssom-0.8.0.tgz#3021d1b4352fbf3b614aaeed0bc0d5739abe0bc2" - integrity sha512-guoltQEx+9aMf2gDZ0s62EcV8lsXR+0w8915TC3ITdn2YueuNjdAYh/levpU9nFaoChh9RUS5ZdQMrKfVEN9tw== - run-parallel@^1.1.9: version "1.2.0" resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee" @@ -6299,9 +6253,9 @@ yaml@1.10.2: integrity sha512-r3vXyErRCYJ7wg28yvBY5VSoAF8ZvlcW9/BwUzEtUsjvX/DKs24dIkuwjtuprwJJHsbyUbLApepYTR1BN4uHrg== yaml@^2.4.1: - version "2.7.0" - resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.7.0.tgz#aef9bb617a64c937a9a748803786ad8d3ffe1e98" - integrity sha512-+hSoy/QHluxmC9kCIJyL/uyFmLmc+e5CFR5Wa+bpIhIj85LVb9ZH2nVnqrHoSvKogwODv0ClqZkmiSSaIH5LTA== + version "2.6.1" + resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.6.1.tgz#42f2b1ba89203f374609572d5349fb8686500773" + integrity sha512-7r0XPzioN/Q9kXBro/XPnA6kznR73DHq+GXh5ON7ZozRO6aMjbmiBuKste2wslTFkC5d1dw0GooOCepZXJ2SAg== yargs-parser@^20.2.2, yargs-parser@^20.2.3: version "20.2.9" diff --git a/website/docs/constructs/library/generated/_governance-data-catalog-database.mdx b/website/docs/constructs/library/generated/_governance-data-catalog-database.mdx index a5835bf57..66201b7f7 100644 --- a/website/docs/constructs/library/generated/_governance-data-catalog-database.mdx +++ b/website/docs/constructs/library/generated/_governance-data-catalog-database.mdx @@ -12,6 +12,7 @@ AWS Glue Catalog database for an Amazon S3 dataset. - The database default location is pointing to an S3 bucket location `s3:////` - The database can store various tables structured in their respective prefixes, for example: `s3://///` - By default, a database level crawler is scheduled to run once a day (00:01h local timezone). The crawler can be disabled and the schedule/frequency of the crawler can be modified with a cron expression. +- The permission model of the database can use IAM, LakeFormation or Hybrid mode. ![Data Catalog Database](../../../../static/img/adsf-data-catalog.png) @@ -58,6 +59,57 @@ class ExampleDefaultDataCatalogDatabaseStack(cdk.Stack): +## Using Lake Formation permission model + +You can change the default permission model of the database to use [Lake Formation](https://docs.aws.amazon.com/lake-formation/latest/dg/how-it-works.html) exclusively or [hybrid mode](https://docs.aws.amazon.com/lake-formation/latest/dg/hybrid-access-mode.html). + +Changing the permission model to Lake Formation or Hybrid has the following impact: +* The CDK provisioning role is added as a Lake Formation administrator so it can perform Lake Formation operations +* The IAMAllowedPrincipal grant is removed from the database to enforce Lake Formation as the unique permission model (only for Lake Formation permission model) + +:::caution Lake Formation Data Lake Settings +Lake Formation and Hybrid permission models are configured using PutDataLakeSettings API call. Concurrent API calls can lead to throttling. If you create multiple `DataCatalogDatabases`, it's recommended to create dependencies between the `dataLakeSettings` that are exposed in each database to avoid concurrent calls. See the example in the `DataLakeCatalog`construct [here](https://github.com/awslabs/data-solutions-framework-on-aws/blob/main/framework/src/governance/lib/data-lake-catalog.ts#L137) +::: + + + + + ```typescript +class ExampleDefaultDataCatalogDatabaseStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + const bucket = new Bucket(this, 'DataCatalogBucket'); + + new dsf.governance.DataCatalogDatabase(this, 'DataCatalogDatabase', { + locationBucket: bucket, + locationPrefix: '/databasePath', + name: 'example-db', + permissionModel: dsf.utils.PermissionModel.LAKE_FORMATION, + }); + } +} + ``` + + + + + ```python +class ExampleDefaultDataCatalogDatabaseStack(cdk.Stack): + def __init__(self, scope, id): + super().__init__(scope, id) + bucket = Bucket(self, "DataCatalogBucket") + + dsf.governance.DataCatalogDatabase(self, "DataCatalogDatabase", + location_bucket=bucket, + location_prefix="/databasePath", + name="example-db", + permission_model=dsf.utils.PermissionModel.LAKE_FORMATION + ) + ``` + + + + ## Modifying the crawler behavior You can change the default configuration of the AWS Glue Crawler to match your requirements: