From be25c2c708fcc7a1a0467cd2bf9f7b6fcff96ef2 Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Mon, 27 Nov 2023 15:30:53 +0800 Subject: [PATCH] Initial commit --- .gitattributes | 9 + .github/dependabot.yml | 11 + .github/workflows/build.yml | 31 + .gitignore | 28 + Dockerfile | 24 + LICENSE | 201 ++++ README.md | 147 +++ api/build.gradle.kts | 174 +++ api/gradle.properties | 2 + .../pflooky/datacaterer/java/api/PlanRun.java | 347 ++++++ .../api/DataCatererConfigurationBuilder.scala | 422 +++++++ .../api/MetadataSourceBuilder.scala | 74 ++ .../pflooky/datacaterer/api/PlanBuilder.scala | 105 ++ .../pflooky/datacaterer/api/PlanRun.scala | 481 ++++++++ .../datacaterer/api/SinkOptionsBuilder.scala | 55 + .../pflooky/datacaterer/api/TaskBuilder.scala | 849 ++++++++++++++ .../datacaterer/api/ValidationBuilder.scala | 505 ++++++++ .../api/connection/ConnectionBuilder.scala | 193 ++++ .../api/converter/Converters.scala | 19 + .../datacaterer/api/model/ConfigModels.scala | 56 + .../datacaterer/api/model/Constants.scala | 347 ++++++ .../datacaterer/api/model/DataType.scala | 132 +++ .../api/model/MetadataSourceModels.scala | 33 + .../datacaterer/api/model/PlanModels.scala | 81 ++ .../api/model/ValidationModels.scala | 78 ++ .../api/model/generator/BaseGenerator.scala | 12 + .../api/parser/ValidationIdResolver.scala | 47 + .../java/api/DocumentationJavaPlanRun.java | 98 ++ .../java/api/ExampleJavaPlanRun.java | 17 + .../DataCatererConfigurationBuilderTest.scala | 203 ++++ .../datacaterer/api/ExamplePlanRun.scala | 295 +++++ .../api/MetadataSourceBuilderTest.scala | 55 + .../datacaterer/api/PlanBuilderTest.scala | 201 ++++ .../pflooky/datacaterer/api/PlanRunTest.scala | 133 +++ .../api/SinkOptionsBuilderTest.scala | 32 + .../datacaterer/api/TasksBuilderTest.scala | 289 +++++ .../ValidationConfigurationBuilderTest.scala | 553 +++++++++ app/build.gradle.kts | 134 +++ ...pache.spark.sql.sources.DataSourceRegister | 0 app/src/main/resources/application.conf | 166 +++ app/src/main/resources/log4j2.properties | 51 + .../report/data_catering_transparent.svg | 1 + app/src/main/resources/report/main.css | 173 +++ .../com/github/pflooky/datagen/App.scala | 24 + .../datagen/core/config/ConfigParser.scala | 85 ++ .../datagen/core/exception/Exceptions.scala | 34 + .../core/generator/BatchDataProcessor.scala | 101 ++ .../core/generator/DataGeneratorFactory.scala | 135 +++ .../generator/DataGeneratorProcessor.scala | 70 ++ .../generator/provider/DataGenerator.scala | 120 ++ .../provider/OneOfDataGenerator.scala | 45 + .../provider/RandomDataGenerator.scala | 410 +++++++ .../provider/RegexDataGenerator.scala | 32 + .../result/DataGenerationResultWriter.scala | 117 ++ .../generator/result/ResultHtmlWriter.scala | 777 +++++++++++++ .../core/listener/SparkRecordListener.scala | 25 + .../datagen/core/model/Constants.scala | 58 + .../datagen/core/model/ForeignKeyModels.scala | 6 + .../datagen/core/model/ResultModels.scala | 51 + .../datagen/core/model/ValidationModels.scala | 30 + .../datagen/core/parser/PlanParser.scala | 66 ++ .../core/parser/ValidationParser.scala | 11 + .../datagen/core/parser/YamlFileParser.scala | 58 + .../datagen/core/plan/PlanProcessor.scala | 67 ++ .../datagen/core/sink/SinkFactory.scala | 121 ++ .../datagen/core/sink/SinkProcessor.scala | 16 + .../core/util/CombinationCalculator.scala | 91 ++ .../datagen/core/util/ConfigUtil.scala | 16 + .../pflooky/datagen/core/util/FileUtil.scala | 58 + .../datagen/core/util/ForeignKeyUtil.scala | 287 +++++ .../datagen/core/util/GeneratorUtil.scala | 79 ++ .../pflooky/datagen/core/util/HttpUtil.scala | 19 + .../datagen/core/util/MetadataUtil.scala | 154 +++ .../datagen/core/util/ObjectMapperUtil.scala | 17 + .../datagen/core/util/ProtobufUtil.scala | 75 ++ .../datagen/core/util/RecordCountUtil.scala | 58 + .../pflooky/datagen/core/util/RowUtil.scala | 31 + .../datagen/core/util/SchemaUtil.scala | 340 ++++++ .../datagen/core/util/SparkProvider.scala | 15 + .../datagen/core/util/UniqueFieldsUtil.scala | 55 + .../core/validator/ValidationOperations.scala | 99 ++ .../core/validator/ValidationProcessor.scala | 140 +++ .../validator/ValidationWaitImplicits.scala | 96 ++ .../test/resources/application-cassandra.conf | 66 ++ app/src/test/resources/application-s3.conf | 59 + app/src/test/resources/application.conf | 43 + .../test/resources/datafaker/expressions.txt | 1024 +++++++++++++++++ app/src/test/resources/log4j2.properties | 55 + app/src/test/resources/sample/conf/mysql.conf | 46 + .../test/resources/sample/cql/customer.cql | 28 + .../sample/files/avro/avro_schema.avsc | 61 + .../resources/sample/files/avro/users.avro | Bin 0 -> 334 bytes .../sample/files/csv/account/account.csv | 2 + .../sample/files/csv/json/sample.json | 13 + ...c895a-43b3-4649-88f2-318c4bd69f8b-c000.csv | 701 +++++++++++ .../resources/sample/files/json/sample.json | 13 + ...4de7-9135-765936fa55b8-c000.snappy.parquet | Bin 0 -> 11370 bytes ...4de7-9135-765936fa55b8-c000.snappy.parquet | Bin 0 -> 11370 bytes .../files/parquet/transactions/._SUCCESS.crc | Bin 0 -> 8 bytes ...-9135-765936fa55b8-c000.snappy.parquet.crc | Bin 0 -> 100 bytes .../files/parquet/transactions/_SUCCESS | 0 ...4de7-9135-765936fa55b8-c000.snappy.parquet | Bin 0 -> 11370 bytes .../sample/files/protobuf/example.desc | Bin 0 -> 963 bytes .../sample/files/protobuf/example.proto | 35 + .../sample/files/protobuf/simple.desc | 6 + .../sample/files/protobuf/simple/simple.proto | 6 + .../sample/http/openapi/petstore.json | 266 +++++ .../resources/sample/http/openapi/uspto.json | 253 ++++ .../sample/jms/solace/setup_solace.sh | 15 + .../resources/sample/kafka/setup-kafka.sh | 7 + .../marquez/get_dataset_api_response.json | 85 ++ .../marquez/list_datasets_api_response.json | 90 ++ .../openmetadata/get_table_response.json | 313 +++++ .../sample/plan/account-create-plan.yaml | 23 + .../sample/plan/customer-create-plan.yaml | 20 + .../plan/example-account-create-plan.yaml | 18 + .../resources/sample/plan/large-plan.yaml | 14 + .../sample/plan/simple-json-plan.yaml | 6 + .../sample/plan/transaction-create-plan.yaml | 6 + .../resources/sample/sql/mysql/customer.sql | 47 + .../sample/sql/postgres/customer.sql | 46 + .../cassandra/cassandra-customer-task.yaml | 48 + .../task/file/csv-transaction-task.yaml | 51 + .../sample/task/file/json-account-task.yaml | 148 +++ .../sample/task/file/large-csv-task.yaml | 50 + .../task/file/large-json-account-task.yaml | 102 ++ .../task/file/parquet-transaction-task.yaml | 44 + .../sample/task/file/simple-json-task.yaml | 98 ++ .../sample/task/http/http-account-task.yaml | 72 ++ .../sample/task/jms/jms-account-task.yaml | 74 ++ .../sample/task/kafka/kafka-account-task.yaml | 96 ++ .../task/postgres/postgres-customer-task.yaml | 39 + .../postgres/postgres-transaction-task.yaml | 22 + .../sample/validation/simple-validation.yaml | 13 + .../generator/DataGeneratorFactoryTest.scala | 95 ++ .../DataGeneratorProcessorTest.scala | 33 + .../provider/OneOfDataGeneratorTest.scala | 45 + .../provider/RandomDataGeneratorTest.scala | 332 ++++++ .../provider/RegexDataGeneratorTest.scala | 32 + .../track/RecordTrackingProcessorTest.scala | 32 + .../model/ForeignKeyRelationHelperTest.scala | 25 + .../core/model/PlanImplicitsTest.scala | 22 + .../core/model/ValidationOperationsTest.scala | 68 ++ .../datagen/core/parser/PlanParserTest.scala | 26 + .../datagen/core/plan/ExampleJavaPlanRun.java | 79 ++ .../datagen/core/plan/PlanProcessorTest.scala | 251 ++++ .../core/util/CombinationCalculatorTest.scala | 26 + .../datagen/core/util/FileUtilTest.scala | 17 + .../core/util/ForeignKeyUtilTest.scala | 224 ++++ .../datagen/core/util/MetadataUtilTest.scala | 25 + .../core/util/RecordCountUtilTest.scala | 142 +++ .../datagen/core/util/SparkSuite.scala | 32 + .../core/util/UniqueFieldsUtilTest.scala | 65 ++ .../test/scala/spark/datagen/AppSuite.scala | 15 + build.gradle.kts | 92 ++ design/data_flow_flags.drawio | 1 + design/data_flow_flags.drawio.png | Bin 0 -> 39094 bytes design/foreign_keys.drawio | 1 + design/foreign_keys.drawio.png | Bin 0 -> 30114 bytes design/high-level-design.png | Bin 0 -> 37509 bytes docker-action.sh | 30 + docker-compose.yaml | 39 + gradle.properties | 6 + gradle/wrapper/gradle-wrapper.jar | Bin 0 -> 60756 bytes gradle/wrapper/gradle-wrapper.properties | 5 + gradlew | 240 ++++ gradlew.bat | 91 ++ load-test/RESULTS.md | 44 + misc/banner/logo_landscape_banner.svg | 1 + run-docker.sh | 5 + script/plan/cassandra-plan.yaml | 6 + script/plan/csv-plan.yaml | 6 + script/plan/foreign-key-plan.yaml | 14 + script/plan/http-plan.yaml | 6 + script/plan/kafka-plan.yaml | 6 + script/plan/mysql-plan.yaml | 6 + script/plan/parquet-plan.yaml | 6 + .../plan/postgres-multiple-tables-plan.yaml | 11 + script/plan/postgres-plan.yaml | 6 + script/plan/simple-json-plan.yaml | 6 + script/plan/solace-plan.yaml | 6 + script/run-data-caterer.sh | 20 + .../cassandra/cassandra-customer-task.yaml | 48 + .../task/file/csv/csv-transaction-task.yaml | 52 + script/task/file/json/json-account-task.yaml | 98 ++ .../parquet/parquet-transaction-task.yaml | 43 + script/task/http/http-account-task.yaml | 68 ++ .../task/jdbc/mysql/mysql-account-task.yaml | 38 + .../jdbc/postgres/postgres-account-task.yaml | 38 + .../postgres/postgres-multi-table-task.yaml | 60 + script/task/jms/solace/jms-account-task.yaml | 65 ++ script/task/kafka/kafka-account-task.yaml | 97 ++ settings.gradle.kts | 13 + workspace.xml | 105 ++ 194 files changed, 18142 insertions(+) create mode 100644 .gitattributes create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/build.yml create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 README.md create mode 100644 api/build.gradle.kts create mode 100644 api/gradle.properties create mode 100644 api/src/main/java/com/github/pflooky/datacaterer/java/api/PlanRun.java create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/DataCatererConfigurationBuilder.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/MetadataSourceBuilder.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/PlanBuilder.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/PlanRun.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/SinkOptionsBuilder.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/TaskBuilder.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/ValidationBuilder.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/connection/ConnectionBuilder.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/converter/Converters.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/model/ConfigModels.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/model/Constants.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/model/DataType.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/model/MetadataSourceModels.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/model/PlanModels.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/model/ValidationModels.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/model/generator/BaseGenerator.scala create mode 100644 api/src/main/scala/com/github/pflooky/datacaterer/api/parser/ValidationIdResolver.scala create mode 100644 api/src/test/java/com/github/pflooky/datacaterer/java/api/DocumentationJavaPlanRun.java create mode 100644 api/src/test/java/com/github/pflooky/datacaterer/java/api/ExampleJavaPlanRun.java create mode 100644 api/src/test/scala/com/github/pflooky/datacaterer/api/DataCatererConfigurationBuilderTest.scala create mode 100644 api/src/test/scala/com/github/pflooky/datacaterer/api/ExamplePlanRun.scala create mode 100644 api/src/test/scala/com/github/pflooky/datacaterer/api/MetadataSourceBuilderTest.scala create mode 100644 api/src/test/scala/com/github/pflooky/datacaterer/api/PlanBuilderTest.scala create mode 100644 api/src/test/scala/com/github/pflooky/datacaterer/api/PlanRunTest.scala create mode 100644 api/src/test/scala/com/github/pflooky/datacaterer/api/SinkOptionsBuilderTest.scala create mode 100644 api/src/test/scala/com/github/pflooky/datacaterer/api/TasksBuilderTest.scala create mode 100644 api/src/test/scala/com/github/pflooky/datacaterer/api/ValidationConfigurationBuilderTest.scala create mode 100644 app/build.gradle.kts create mode 100644 app/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister create mode 100644 app/src/main/resources/application.conf create mode 100644 app/src/main/resources/log4j2.properties create mode 100644 app/src/main/resources/report/data_catering_transparent.svg create mode 100644 app/src/main/resources/report/main.css create mode 100644 app/src/main/scala/com/github/pflooky/datagen/App.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/config/ConfigParser.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/exception/Exceptions.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/generator/BatchDataProcessor.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/generator/DataGeneratorFactory.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/generator/DataGeneratorProcessor.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/DataGenerator.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/OneOfDataGenerator.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/RandomDataGenerator.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/RegexDataGenerator.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/generator/result/DataGenerationResultWriter.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/generator/result/ResultHtmlWriter.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/listener/SparkRecordListener.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/model/Constants.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/model/ForeignKeyModels.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/model/ResultModels.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/model/ValidationModels.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/parser/PlanParser.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/parser/ValidationParser.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/parser/YamlFileParser.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/plan/PlanProcessor.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/sink/SinkFactory.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/sink/SinkProcessor.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/util/CombinationCalculator.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/util/ConfigUtil.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/util/FileUtil.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/util/ForeignKeyUtil.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/util/GeneratorUtil.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/util/HttpUtil.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/util/MetadataUtil.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/util/ObjectMapperUtil.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/util/ProtobufUtil.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/util/RecordCountUtil.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/util/RowUtil.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/util/SchemaUtil.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/util/SparkProvider.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/util/UniqueFieldsUtil.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/validator/ValidationOperations.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/validator/ValidationProcessor.scala create mode 100644 app/src/main/scala/com/github/pflooky/datagen/core/validator/ValidationWaitImplicits.scala create mode 100644 app/src/test/resources/application-cassandra.conf create mode 100644 app/src/test/resources/application-s3.conf create mode 100644 app/src/test/resources/application.conf create mode 100644 app/src/test/resources/datafaker/expressions.txt create mode 100644 app/src/test/resources/log4j2.properties create mode 100644 app/src/test/resources/sample/conf/mysql.conf create mode 100644 app/src/test/resources/sample/cql/customer.cql create mode 100644 app/src/test/resources/sample/files/avro/avro_schema.avsc create mode 100644 app/src/test/resources/sample/files/avro/users.avro create mode 100644 app/src/test/resources/sample/files/csv/account/account.csv create mode 100644 app/src/test/resources/sample/files/csv/json/sample.json create mode 100644 app/src/test/resources/sample/files/csv/transactions/part-00000-867c895a-43b3-4649-88f2-318c4bd69f8b-c000.csv create mode 100644 app/src/test/resources/sample/files/json/sample.json create mode 100644 app/src/test/resources/sample/files/parquet/account/country=AU/part-00000-3862dc01-3d49-4de7-9135-765936fa55b8-c000.snappy.parquet create mode 100644 app/src/test/resources/sample/files/parquet/customer/country=AU/date=2023-01-01/part-00000-3862dc01-3d49-4de7-9135-765936fa55b8-c000.snappy.parquet create mode 100644 app/src/test/resources/sample/files/parquet/transactions/._SUCCESS.crc create mode 100644 app/src/test/resources/sample/files/parquet/transactions/.part-00000-3862dc01-3d49-4de7-9135-765936fa55b8-c000.snappy.parquet.crc create mode 100644 app/src/test/resources/sample/files/parquet/transactions/_SUCCESS create mode 100644 app/src/test/resources/sample/files/parquet/transactions/part-00000-3862dc01-3d49-4de7-9135-765936fa55b8-c000.snappy.parquet create mode 100644 app/src/test/resources/sample/files/protobuf/example.desc create mode 100644 app/src/test/resources/sample/files/protobuf/example.proto create mode 100644 app/src/test/resources/sample/files/protobuf/simple.desc create mode 100644 app/src/test/resources/sample/files/protobuf/simple/simple.proto create mode 100644 app/src/test/resources/sample/http/openapi/petstore.json create mode 100644 app/src/test/resources/sample/http/openapi/uspto.json create mode 100644 app/src/test/resources/sample/jms/solace/setup_solace.sh create mode 100644 app/src/test/resources/sample/kafka/setup-kafka.sh create mode 100644 app/src/test/resources/sample/metadata/marquez/get_dataset_api_response.json create mode 100644 app/src/test/resources/sample/metadata/marquez/list_datasets_api_response.json create mode 100644 app/src/test/resources/sample/metadata/openmetadata/get_table_response.json create mode 100644 app/src/test/resources/sample/plan/account-create-plan.yaml create mode 100644 app/src/test/resources/sample/plan/customer-create-plan.yaml create mode 100644 app/src/test/resources/sample/plan/example-account-create-plan.yaml create mode 100644 app/src/test/resources/sample/plan/large-plan.yaml create mode 100644 app/src/test/resources/sample/plan/simple-json-plan.yaml create mode 100644 app/src/test/resources/sample/plan/transaction-create-plan.yaml create mode 100644 app/src/test/resources/sample/sql/mysql/customer.sql create mode 100644 app/src/test/resources/sample/sql/postgres/customer.sql create mode 100644 app/src/test/resources/sample/task/cassandra/cassandra-customer-task.yaml create mode 100644 app/src/test/resources/sample/task/file/csv-transaction-task.yaml create mode 100644 app/src/test/resources/sample/task/file/json-account-task.yaml create mode 100644 app/src/test/resources/sample/task/file/large-csv-task.yaml create mode 100644 app/src/test/resources/sample/task/file/large-json-account-task.yaml create mode 100644 app/src/test/resources/sample/task/file/parquet-transaction-task.yaml create mode 100644 app/src/test/resources/sample/task/file/simple-json-task.yaml create mode 100644 app/src/test/resources/sample/task/http/http-account-task.yaml create mode 100644 app/src/test/resources/sample/task/jms/jms-account-task.yaml create mode 100644 app/src/test/resources/sample/task/kafka/kafka-account-task.yaml create mode 100644 app/src/test/resources/sample/task/postgres/postgres-customer-task.yaml create mode 100644 app/src/test/resources/sample/task/postgres/postgres-transaction-task.yaml create mode 100644 app/src/test/resources/sample/validation/simple-validation.yaml create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/generator/DataGeneratorFactoryTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/generator/DataGeneratorProcessorTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/generator/provider/OneOfDataGeneratorTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/generator/provider/RandomDataGeneratorTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/generator/provider/RegexDataGeneratorTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/generator/track/RecordTrackingProcessorTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/model/ForeignKeyRelationHelperTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/model/PlanImplicitsTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/model/ValidationOperationsTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/parser/PlanParserTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/plan/ExampleJavaPlanRun.java create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/plan/PlanProcessorTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/util/CombinationCalculatorTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/util/FileUtilTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/util/ForeignKeyUtilTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/util/MetadataUtilTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/util/RecordCountUtilTest.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/util/SparkSuite.scala create mode 100644 app/src/test/scala/com/github/pflooky/datagen/core/util/UniqueFieldsUtilTest.scala create mode 100644 app/src/test/scala/spark/datagen/AppSuite.scala create mode 100644 build.gradle.kts create mode 100644 design/data_flow_flags.drawio create mode 100644 design/data_flow_flags.drawio.png create mode 100644 design/foreign_keys.drawio create mode 100644 design/foreign_keys.drawio.png create mode 100644 design/high-level-design.png create mode 100644 docker-action.sh create mode 100644 docker-compose.yaml create mode 100644 gradle.properties create mode 100644 gradle/wrapper/gradle-wrapper.jar create mode 100644 gradle/wrapper/gradle-wrapper.properties create mode 100755 gradlew create mode 100644 gradlew.bat create mode 100644 load-test/RESULTS.md create mode 100644 misc/banner/logo_landscape_banner.svg create mode 100644 run-docker.sh create mode 100644 script/plan/cassandra-plan.yaml create mode 100644 script/plan/csv-plan.yaml create mode 100644 script/plan/foreign-key-plan.yaml create mode 100644 script/plan/http-plan.yaml create mode 100644 script/plan/kafka-plan.yaml create mode 100644 script/plan/mysql-plan.yaml create mode 100644 script/plan/parquet-plan.yaml create mode 100644 script/plan/postgres-multiple-tables-plan.yaml create mode 100644 script/plan/postgres-plan.yaml create mode 100644 script/plan/simple-json-plan.yaml create mode 100644 script/plan/solace-plan.yaml create mode 100644 script/run-data-caterer.sh create mode 100644 script/task/cassandra/cassandra-customer-task.yaml create mode 100644 script/task/file/csv/csv-transaction-task.yaml create mode 100644 script/task/file/json/json-account-task.yaml create mode 100644 script/task/file/parquet/parquet-transaction-task.yaml create mode 100644 script/task/http/http-account-task.yaml create mode 100644 script/task/jdbc/mysql/mysql-account-task.yaml create mode 100644 script/task/jdbc/postgres/postgres-account-task.yaml create mode 100644 script/task/jdbc/postgres/postgres-multi-table-task.yaml create mode 100644 script/task/jms/solace/jms-account-task.yaml create mode 100644 script/task/kafka/kafka-account-task.yaml create mode 100644 settings.gradle.kts create mode 100644 workspace.xml diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..097f9f98 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,9 @@ +# +# https://help.github.com/articles/dealing-with-line-endings/ +# +# Linux start script should use lf +/gradlew text eol=lf + +# These are Windows script files and should use crlf +*.bat text eol=crlf + diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..13f8d2d0 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "gradle" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "daily" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..b3c97351 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,31 @@ +name: Build docker images + +on: + push: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: Login to DockerHub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_HUB_USER }} + password: ${{ secrets.DOCKER_HUB_TOKEN }} + + - name: Build and push images + run: bash docker-action.sh + env: + PACKAGE_TOKEN: ${{ secrets.PACKAGE_TOKEN }} + MAVEN_USERNAME: ${{ secrets.MAVEN_USERNAME }} + MAVEN_PASSWORD: ${{ secrets.MAVEN_PASSWORD }} + ORG_GRADLE_PROJECT_signingKey: ${{ secrets.ORG_GRADLE_PROJECT_SIGNINGKEY }} + ORG_GRADLE_PROJECT_signingKeyId: ${{ secrets.ORG_GRADLE_PROJECT_SIGNINGKEYID }} + ORG_GRADLE_PROJECT_signingPassword: ${{ secrets.ORG_GRADLE_PROJECT_SIGNINGPASSWORD }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..b20ab697 --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +# Ignore Gradle project-specific cache directory +.gradle + +# Ignore Gradle build output directory +build +.idea +.DS_Store +*.iml +*.ipr +*.iws +tmp + +app/out +app/src/test/resources/sample/parquet +app/src/test/resources/sample/json +app/src/test/resources/sample/data +app/src/test/resources/sample/large +app/src/test/resources/sample/html +app/src/test/resources/sample/documentation +app/src/test/resources/sample/java +app/src/test/resources/sample/report +app/src/test/resources/sample/plan-gen + +api/out +api/src/test/resources/sample/documentation + +*.class +*.log diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..a3685948 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,24 @@ +ARG SPARK_VERSION=3.4.1 +FROM apache/spark:$SPARK_VERSION + +USER root +RUN groupadd -g 1001 app && useradd -m -u 1001 -g app app +RUN mkdir -p /opt/app +RUN chown -R app:app /opt/app +COPY --chown=app:app script /opt/app +COPY --chown=app:app app/src/main/resources/application.conf /opt/app/application.conf +COPY --chown=app:app app/src/main/resources/log4j2.properties /opt/app/log4j2.properties +COPY --chown=app:app app/src/main/resources/report /opt/app/report + +ARG APP_VERSION=0.1 +COPY --chown=app:app app/build/libs/datacaterer-basic-${APP_VERSION}.jar /opt/app/job.jar +COPY --chown=app:app api/build/libs/datacaterer-api-${APP_VERSION}.jar /opt/spark/jars/datacaterer-api-${APP_VERSION}.jar +RUN chmod 755 -R /opt/app + +RUN mkdir -p /opt/app/data-caterer/sample/json +RUN chown -R app:app /opt/app/data-caterer/sample/json + +USER app +ENV APPLICATION_CONFIG_PATH=/opt/app/application.conf + +ENTRYPOINT ["/opt/app/run-data-caterer.sh"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..f49a4e16 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 00000000..1b3727c6 --- /dev/null +++ b/README.md @@ -0,0 +1,147 @@ +# Data Caterer - Data Generation and Validation + +![Data Catering](misc/banner/logo_landscape_banner.svg) + +## Overview + +Generator data for databases, files, JMS or HTTP request through a YAML based input and executed via Spark. + +Full docs can be found [**here**](https://pflooky.github.io/data-caterer-docs/). + +## Flow + +![Data Caterer high level design](design/high-level-design.png "High level design") + +## Generate data + +### Quickest start +1. `mkdir /tmp/datagen` +2. `docker run -v /tmp/datagen:/opt/app/data-caterer pflookyy/data-caterer:0.1` +3. `head /tmp/datagen/sample/json/account-gen/part-0000*` + +### Quick start +1. Run [App.scala](app/src/main/scala/com/github/pflooky/datagen/App.scala) +2. Set environment variables `ENABLE_GENERATE_PLAN_AND_TASKS=false;PLAN_FILE_PATH=/plan/account-create-plan.yaml` +3. Check generated data under [here](app/src/test/resources/sample/json) + +## Configuration/Customisation + +### Supported data sources + +Data Caterer is able to support the following data sources: + +1. Database + 1. JDBC + 1. Postgres + 2. MySQL + 2. Cassandra + 3. ElasticSearch (soon) +2. HTTP +3. Files (local or remote like S3) + 1. CSV + 2. Parquet + 3. ORC + 4. Delta (soon) + 5. JSON +4. JMS + 1. Solace +5. Kafka + +#### Supported use cases + +1. Insert into single data sink +2. Insert into multiple data sinks + 1. Foreign keys associated between data sources + 2. Number of records per column value +3. Set random seed at column level +4. Generate real looking data (via DataFaker) and edge cases + 1. Names, addresses, places etc. + 2. Edge cases for each data type (e.g. newline character in string, maximum integer, NaN, 0) + 3. Nullability +5. Send events progressively +6. Automatically insert data into data source + 1. Read metadata from data source and insert for all sub data sources (e.g. tables) + 2. Get statistics from existing data in data source if exists +7. Track and delete generated data +8. Extract data profiling and metadata from given data sources + 1. Calculate the total number of combinations + +## Improvements + +- UI to see dashboard of metadata and data generated +- Read in schema files (such as protobuf, openapi) and convert to tasks + - Ability to convert sample data into task + - Read from metadata sources like amundsen, datahub, etc. +- Pass in data attributes to HTTP URL as parameters +- Auto generate regex and/or faker expressions +- Track each data generation run along with statistics +- Fine grain control on delete certain run of data +- Demo for each type of data source + - Demonstrate what modifications are needed for different use cases + - Preloaded/preconfigured datasets within docker images +- ML model to assist in metadata gathering (either via API or self-hosted) + - Regex and SQL generation + - Foreign key detection across datasets + - Documentation for the datasets + - Via API could be problem as sensitive data could be shared + - Via self-hosted requires large image (10+ Gb) +- Allow for delete from Queue or API + - Ability to define a queue or endpoint that can delete the corresponding records +- Postgres data type related errors + - spark converts to wrong data type when reading from postgres so fails to write back to postgres + open_date_interval INTERVAL, + ERROR: column "open_date_interval" is of type interval but expression is of type character varying + open_id UUID, + balance MONEY, + payload_json JSONB + +## Challenges + +- How to apply foreign keys across datasets +- Providing functions for data generators +- Setting out the Plan -> Task -> Step model +- How to process the data in batches +- Data cleanup after run + - Save data into parquet files. Can read and delete when needed + - Have option to delete directly + - Have to do in particular order due to foreign keys +- Relationships/constraints between fields + - e.g. if transaction has type purchase, then it is a debit + - if country is Australia, then country code should be AU + - could be one to one, one to many, many to many mapping +- Predict the type of string expression to use from DataFaker + - Utilise the metadata for the field +- Having intermediate fields and not including them into the output + - Allow for SQL expressions +- Issues with spark streaming to write real-time data + - Using rate format, have to manage the connection to the data source yourself + - Connection per batch, stopped working for Solace after 125 messages (5 per second) +- Generating regex pattern given data samples +- Database generated columns values + - Auto increment + - On update current_timestamp + - Omit generating columns (only if they are not used as foreign keys) +- Metadata storage and referencing + - How will it interact with a data dictionary? + - Updated schema/metadata + +## UI + +- UI for no/low code solution +- Run as same image + - Option to execute jobs separately + - Interface through YAML files? +- Pages + - Data sources + - Generation + - Validation + +## Resources + +[Spark test data generator](https://github.com/apache/spark/blob/master/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala) + +### Java 17 VM Options + +```shell +--add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED +``` \ No newline at end of file diff --git a/api/build.gradle.kts b/api/build.gradle.kts new file mode 100644 index 00000000..b62afa12 --- /dev/null +++ b/api/build.gradle.kts @@ -0,0 +1,174 @@ +import org.scoverage.ScoverageExtension + +/* + * This file was generated by the Gradle 'init' task. + * + * This generated file contains a sample Scala application project to get you started. + * For more details take a look at the 'Building Java & JVM projects' chapter in the Gradle + * User Manual available at https://docs.gradle.org/7.5.1/userguide/building_java_projects.html + * This project uses @Incubating APIs which are subject to change. + */ +val apiGroupId: String by project +val apiArtifactId: String by project +val scalaVersion: String by project +val scalaSpecificVersion: String by project + +project.base.archivesName.set(apiArtifactId) + +plugins { + scala + `java-library` + `maven-publish` + signing + + id("org.scoverage") version "8.0.3" + id("com.github.johnrengelman.shadow") version "8.1.1" +} + +repositories { + // Use Maven Central for resolving dependencies. + mavenCentral() + maven { + url = uri("https://plugins.gradle.org/m2/") + } +} + +val basicImpl: Configuration by configurations.creating +val advancedImpl: Configuration by configurations.creating + +configurations { + implementation { + extendsFrom(basicImpl) + extendsFrom(advancedImpl) + } +} + +dependencies { + compileOnly("org.scala-lang:scala-library:$scalaSpecificVersion") + compileOnly("com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.16.0") + compileOnly("com.fasterxml.jackson.module:jackson-module-scala_$scalaVersion:2.15.2") + + api("com.softwaremill.quicklens:quicklens_$scalaVersion:1.9.6") { + exclude(group = "org.scala-lang") + } +} + +testing { + suites { + // Configure the built-in test suite + val test by getting(JvmTestSuite::class) { + // Use JUnit4 test framework + useJUnit("4.13.2") + + dependencies { + // Use Scalatest for testing our library + implementation("org.scalatest:scalatest_$scalaVersion:3.2.10") + implementation("org.scalatestplus:junit-4-13_$scalaVersion:3.2.2.0") + implementation("org.scalamock:scalamock_$scalaVersion:5.2.0") + + // Need scala-xml at test runtime + runtimeOnly("org.scala-lang.modules:scala-xml_$scalaVersion:1.2.0") + } + } + } +} + +sourceSets { + main { + scala { + setSrcDirs(listOf("src/main/scala", "src/main/java")) + } + java { + setSrcDirs(emptyList()) + } + } + test { + scala { + setSrcDirs(listOf("src/test/scala", "src/test/java")) + } + java { + setSrcDirs(emptyList()) + } + resources { + setSrcDirs(listOf("src/test/resources")) + } + } +} + +java { + withJavadocJar() + withSourcesJar() +} + +tasks.withType { + targetCompatibility = "11" +} + +tasks.shadowJar { + archiveBaseName.set("datacaterer") + archiveAppendix.set("api") + archiveVersion.set(project.version.toString()) + archiveClassifier.set("") + isZip64 = true +} + +tasks.test { + finalizedBy(tasks.reportScoverage) +} + +configure { + scoverageScalaVersion.set(scalaSpecificVersion) +} + +publishing { + repositories { + maven { + name = "OSSRH" + url = uri("https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/") + credentials { + username = System.getenv("MAVEN_USERNAME") + password = System.getenv("MAVEN_PASSWORD") + } + } + } + publications { + create("mavenScala") { + artifact(tasks.shadowJar) + artifact(tasks["sourcesJar"]) + artifact(tasks["javadocJar"]) + groupId = apiGroupId + artifactId = apiArtifactId + + pom { + name.set("Data Caterer API") + description.set("API for discovering, generating and validating data using Data Caterer") + url.set("https://pflooky.github.io/data-caterer-docs/") + scm { + url.set("https://github.com/pflooky/data-caterer-example") + developerConnection.set("git@github.com:pflooky/data-caterer-example.git") + } + developers { + developer { + id.set("pflooky") + name.set("Peter Flook") + email.set("peter.flook@data.catering") + } + } + licenses { + license { + name.set("Apache 2.0") + url.set("https://opensource.org/license/apache-2-0/") + } + } + } + } + } +} + +signing { + val signingKey: String? by project + val signingKeyId: String? by project + val signingPassword: String? by project + useInMemoryPgpKeys(signingKeyId, signingKey, signingPassword) + sign(publishing.publications["mavenScala"]) +} diff --git a/api/gradle.properties b/api/gradle.properties new file mode 100644 index 00000000..2a2ab8ed --- /dev/null +++ b/api/gradle.properties @@ -0,0 +1,2 @@ +apiGroupId=io.github.pflooky +apiArtifactId=data-caterer-api \ No newline at end of file diff --git a/api/src/main/java/com/github/pflooky/datacaterer/java/api/PlanRun.java b/api/src/main/java/com/github/pflooky/datacaterer/java/api/PlanRun.java new file mode 100644 index 00000000..b03ce802 --- /dev/null +++ b/api/src/main/java/com/github/pflooky/datacaterer/java/api/PlanRun.java @@ -0,0 +1,347 @@ +package com.github.pflooky.datacaterer.java.api; + + +import com.github.pflooky.datacaterer.api.BasePlanRun; +import com.github.pflooky.datacaterer.api.CountBuilder; +import com.github.pflooky.datacaterer.api.DataCatererConfigurationBuilder; +import com.github.pflooky.datacaterer.api.DataSourceValidationBuilder; +import com.github.pflooky.datacaterer.api.FieldBuilder; +import com.github.pflooky.datacaterer.api.GeneratorBuilder; +import com.github.pflooky.datacaterer.api.MetadataSourceBuilder; +import com.github.pflooky.datacaterer.api.PlanBuilder; +import com.github.pflooky.datacaterer.api.SchemaBuilder; +import com.github.pflooky.datacaterer.api.StepBuilder; +import com.github.pflooky.datacaterer.api.TaskBuilder; +import com.github.pflooky.datacaterer.api.TaskSummaryBuilder; +import com.github.pflooky.datacaterer.api.TasksBuilder; +import com.github.pflooky.datacaterer.api.ValidationBuilder; +import com.github.pflooky.datacaterer.api.ValidationConfigurationBuilder; +import com.github.pflooky.datacaterer.api.WaitConditionBuilder; +import com.github.pflooky.datacaterer.api.connection.CassandraBuilder; +import com.github.pflooky.datacaterer.api.connection.ConnectionTaskBuilder; +import com.github.pflooky.datacaterer.api.connection.FileBuilder; +import com.github.pflooky.datacaterer.api.connection.HttpBuilder; +import com.github.pflooky.datacaterer.api.connection.KafkaBuilder; +import com.github.pflooky.datacaterer.api.connection.MySqlBuilder; +import com.github.pflooky.datacaterer.api.connection.PostgresBuilder; +import com.github.pflooky.datacaterer.api.connection.SolaceBuilder; +import com.github.pflooky.datacaterer.api.model.ForeignKeyRelation; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import static com.github.pflooky.datacaterer.api.converter.Converters.toScalaList; +import static com.github.pflooky.datacaterer.api.converter.Converters.toScalaMap; + +public abstract class PlanRun { + + private com.github.pflooky.datacaterer.api.PlanRun basePlanRun = new BasePlanRun(); + + public com.github.pflooky.datacaterer.api.PlanRun getPlan() { + return basePlanRun; + } + + public PlanBuilder plan() { + return new PlanBuilder(); + } + + public TaskSummaryBuilder taskSummary() { + return new TaskSummaryBuilder(); + } + + public TasksBuilder tasks() { + return new TasksBuilder(); + } + + public TaskBuilder task() { + return new TaskBuilder(); + } + + public StepBuilder step() { + return new StepBuilder(); + } + + public SchemaBuilder schema() { + return new SchemaBuilder(); + } + + public FieldBuilder field() { + return new FieldBuilder(); + } + + public GeneratorBuilder generator() { + return new GeneratorBuilder(); + } + + public CountBuilder count() { + return new CountBuilder(); + } + + public DataCatererConfigurationBuilder configuration() { + return new DataCatererConfigurationBuilder(); + } + + public WaitConditionBuilder waitCondition() { + return new WaitConditionBuilder(); + } + + public ValidationBuilder validation() { + return new ValidationBuilder(); + } + + public DataSourceValidationBuilder dataSourceValidation() { + return new DataSourceValidationBuilder(); + } + + public ValidationConfigurationBuilder validationConfig() { + return new ValidationConfigurationBuilder(); + } + + public MetadataSourceBuilder metadataSource() { return new MetadataSourceBuilder(); } + + public ForeignKeyRelation foreignField(String dataSource, String step, String column) { + return new ForeignKeyRelation(dataSource, step, column); + } + + public ForeignKeyRelation foreignField(String dataSource, String step, List columns) { + return new ForeignKeyRelation(dataSource, step, toScalaList(columns)); + } + + public ForeignKeyRelation foreignField(ConnectionTaskBuilder connectionTaskBuilder, String step, List columns) { + return new ForeignKeyRelation(connectionTaskBuilder.connectionConfigWithTaskBuilder().dataSourceName(), step, toScalaList(columns)); + } + + public FileBuilder csv( + String name, String path, Map options + ) { + return basePlanRun.csv(name, path, toScalaMap(options)); + } + + public FileBuilder csv(String name, String path) { + return csv(name, path, Collections.emptyMap()); + } + + + public FileBuilder json(String name, String path, Map options) { + return basePlanRun.json(name, path, toScalaMap(options)); + } + + public FileBuilder json(String name, String path) { + return json(name, path, Collections.emptyMap()); + } + + + public FileBuilder orc(String name, String path, Map options) { + return basePlanRun.orc(name, path, toScalaMap(options)); + } + + public FileBuilder orc(String name, String path) { + return orc(name, path, Collections.emptyMap()); + } + + public FileBuilder parquet(String name, String path, Map options) { + return basePlanRun.parquet(name, path, toScalaMap(options)); + } + + public FileBuilder parquet(String name, String path) { + return parquet(name, path, Collections.emptyMap()); + } + + public PostgresBuilder postgres( + String name, + String url, + String username, + String password, + Map options + ) { + return basePlanRun.postgres(name, url, username, password, toScalaMap(options)); + } + + public PostgresBuilder postgres(String name, String url) { + return basePlanRun.postgresJava(name, url); + } + + public PostgresBuilder postgres( + ConnectionTaskBuilder connectionTaskBuilder + ) { + return basePlanRun.postgres(connectionTaskBuilder); + } + + public MySqlBuilder mysql( + String name, + String url, + String username, + String password, + Map options + ) { + return basePlanRun.mysql(name, url, username, password, toScalaMap(options)); + } + + public MySqlBuilder mysql(String name, String url) { + return basePlanRun.mysqlJava(name, url); + } + + public MySqlBuilder mysql( + ConnectionTaskBuilder connectionTaskBuilder + ) { + return basePlanRun.mysql(connectionTaskBuilder); + } + + public CassandraBuilder cassandra( + String name, + String url, + String username, + String password, + Map options + ) { + return basePlanRun.cassandra(name, url, username, password, toScalaMap(options)); + } + + public CassandraBuilder cassandra(String name, String url) { + return basePlanRun.cassandraJava(name, url); + } + + public CassandraBuilder cassandra( + ConnectionTaskBuilder connectionTaskBuilder + ) { + return basePlanRun.cassandra(connectionTaskBuilder); + } + + public SolaceBuilder solace( + String name, + String url, + String username, + String password, + String vpnName, + String connectionFactory, + String initialContextFactory, + Map options + ) { + return basePlanRun.solace(name, url, username, password, vpnName, connectionFactory, initialContextFactory, toScalaMap(options)); + } + + public SolaceBuilder solace( + String name, + String url, + String username, + String password, + String vpnName + ) { + return basePlanRun.solaceJava(name, url, username, password, vpnName); + } + + public SolaceBuilder solace(String name, String url) { + return basePlanRun.solaceJava(name, url); + } + + public SolaceBuilder solace( + ConnectionTaskBuilder connectionTaskBuilder + ) { + return basePlanRun.solace(connectionTaskBuilder); + } + + public KafkaBuilder kafka(String name, String url, Map options) { + return basePlanRun.kafka(name, url, toScalaMap(options)); + } + + public KafkaBuilder kafka(String name, String url) { + return basePlanRun.kafkaJava(name, url); + } + + public KafkaBuilder kafka( + ConnectionTaskBuilder connectionTaskBuilder + ) { + return basePlanRun.kafka(connectionTaskBuilder); + } + + public HttpBuilder http(String name, String username, String password, Map options) { + return basePlanRun.http(name, username, password, toScalaMap(options)); + } + + public HttpBuilder http(String name, Map options) { + return basePlanRun.http(name, "", "", toScalaMap(options)); + } + + public HttpBuilder http(String name) { + return basePlanRun.httpJava(name); + } + + public HttpBuilder http( + ConnectionTaskBuilder connectionTaskBuilder + ) { + return basePlanRun.http(connectionTaskBuilder); + } + + + public void execute( + ConnectionTaskBuilder connectionTaskBuilder, + ConnectionTaskBuilder... connectionTaskBuilders + ) { + execute(plan(), configuration(), Collections.emptyList(), connectionTaskBuilder, connectionTaskBuilders); + } + + public void execute( + DataCatererConfigurationBuilder configurationBuilder, + ConnectionTaskBuilder connectionTaskBuilder, + ConnectionTaskBuilder... connectionTaskBuilders + ) { + execute(plan(), configurationBuilder, Collections.emptyList(), connectionTaskBuilder, connectionTaskBuilders); + } + + public void execute( + PlanBuilder planBuilder, + DataCatererConfigurationBuilder configurationBuilder, + ConnectionTaskBuilder connectionTaskBuilder, + ConnectionTaskBuilder... connectionTaskBuilders + ) { + execute(planBuilder, configurationBuilder, Collections.emptyList(), connectionTaskBuilder, connectionTaskBuilders); + } + + public void execute( + PlanBuilder planBuilder, + DataCatererConfigurationBuilder configurationBuilder, + List validations, + ConnectionTaskBuilder connectionTaskBuilder, + ConnectionTaskBuilder... connectionTaskBuilders + ) { + var planWithConfig = getPlan(); + planWithConfig.execute( + planBuilder, + configurationBuilder, + toScalaList(validations), + connectionTaskBuilder, + connectionTaskBuilders + ); + this.basePlanRun = planWithConfig; + } + + public void execute(TasksBuilder tasks) { + execute(List.of(tasks), plan(), configuration(), Collections.emptyList()); + } + + public void execute(DataCatererConfigurationBuilder configurationBuilder) { + execute(Collections.emptyList(), plan(), configurationBuilder, Collections.emptyList()); + } + + public void execute(PlanBuilder plan, DataCatererConfigurationBuilder configuration) { + execute(Collections.emptyList(), plan, configuration, Collections.emptyList()); + } + + public void execute( + List tasks, + PlanBuilder plan, + DataCatererConfigurationBuilder configuration, + List validations + ) { + var planWithConfig = getPlan(); + planWithConfig.execute( + toScalaList(tasks), + plan, + configuration, + toScalaList(validations) + ); + this.basePlanRun = planWithConfig; + } + +} diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/DataCatererConfigurationBuilder.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/DataCatererConfigurationBuilder.scala new file mode 100644 index 00000000..17cf1dad --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/DataCatererConfigurationBuilder.scala @@ -0,0 +1,422 @@ +package com.github.pflooky.datacaterer.api + +import com.github.pflooky.datacaterer.api.connection.{CassandraBuilder, ConnectionTaskBuilder, FileBuilder, HttpBuilder, KafkaBuilder, MySqlBuilder, PostgresBuilder, SolaceBuilder} +import com.github.pflooky.datacaterer.api.converter.Converters.toScalaMap +import com.github.pflooky.datacaterer.api.model.Constants._ +import com.github.pflooky.datacaterer.api.model.DataCatererConfiguration +import com.softwaremill.quicklens.ModifyPimp + +case class DataCatererConfigurationBuilder(build: DataCatererConfiguration = DataCatererConfiguration()) { + def this() = this(DataCatererConfiguration()) + + def master(master: String): DataCatererConfigurationBuilder = + this.modify(_.build.master).setTo(master) + + def runtimeConfig(conf: Map[String, String]): DataCatererConfigurationBuilder = + this.modify(_.build.runtimeConfig)(_ ++ conf) + + def runtimeConfig(conf: java.util.Map[String, String]): DataCatererConfigurationBuilder = + runtimeConfig(toScalaMap(conf)) + + def addRuntimeConfig(conf: (String, String)): DataCatererConfigurationBuilder = + this.modify(_.build.runtimeConfig)(_ ++ Map(conf)) + + def addRuntimeConfig(key: String, value: String): DataCatererConfigurationBuilder = + addRuntimeConfig(key -> value) + + + def connectionConfig(connectionConfigByName: Map[String, Map[String, String]]): DataCatererConfigurationBuilder = + this.modify(_.build.connectionConfigByName)(_ ++ connectionConfigByName) + + def connectionConfig(connectionConfigByName: java.util.Map[String, java.util.Map[String, String]]): DataCatererConfigurationBuilder = { + val scalaConf = toScalaMap(connectionConfigByName) + val mappedConf = scalaConf.map(c => (c._1, toScalaMap(c._2))) + connectionConfig(mappedConf) + } + + def addConnectionConfig(name: String, format: String, connectionConfig: Map[String, String]): DataCatererConfigurationBuilder = + this.modify(_.build.connectionConfigByName)(_ ++ Map(name -> (connectionConfig ++ Map(FORMAT -> format)))) + + def addConnectionConfigJava(name: String, format: String, connectionConfig: java.util.Map[String, String]): DataCatererConfigurationBuilder = + addConnectionConfig(name, format, toScalaMap(connectionConfig)) + + def addConnectionConfig(name: String, format: String, path: String, connectionConfig: Map[String, String]): DataCatererConfigurationBuilder = { + val pathConf = if (path.nonEmpty) Map(PATH -> path) else Map() + this.modify(_.build.connectionConfigByName)(_ ++ Map(name -> (connectionConfig ++ Map(FORMAT -> format) ++ pathConf))) + } + + def addConnectionConfigJava(name: String, format: String, path: String, connectionConfig: java.util.Map[String, String]): DataCatererConfigurationBuilder = + addConnectionConfig(name, format, path, toScalaMap(connectionConfig)) + + def csv(name: String, path: String = "", options: Map[String, String] = Map()): DataCatererConfigurationBuilder = + addConnectionConfig(name, CSV, path, options) + + def csv(name: String, path: String, options: java.util.Map[String, String]): DataCatererConfigurationBuilder = + csv(name, path, toScalaMap(options)) + + def parquet(name: String, path: String = "", options: Map[String, String] = Map()): DataCatererConfigurationBuilder = + addConnectionConfig(name, PARQUET, path, options) + + def parquet(name: String, path: String, options: java.util.Map[String, String]): DataCatererConfigurationBuilder = + parquet(name, path, toScalaMap(options)) + + def orc(name: String, path: String = "", options: Map[String, String] = Map()): DataCatererConfigurationBuilder = + addConnectionConfig(name, ORC, path, options) + + def orc(name: String, path: String, options: java.util.Map[String, String]): DataCatererConfigurationBuilder = + orc(name, path, toScalaMap(options)) + + def json(name: String, path: String = "", options: Map[String, String] = Map()): DataCatererConfigurationBuilder = + addConnectionConfig(name, JSON, path, options) + + def json(name: String, path: String, options: java.util.Map[String, String]): DataCatererConfigurationBuilder = + json(name, path, toScalaMap(options)) + + def postgres( + name: String, + url: String = DEFAULT_POSTGRES_URL, + username: String = DEFAULT_POSTGRES_USERNAME, + password: String = DEFAULT_POSTGRES_PASSWORD, + options: Map[String, String] = Map() + ): DataCatererConfigurationBuilder = + addConnection(name, JDBC, url, username, password, options ++ Map(DRIVER -> POSTGRES_DRIVER)) + + def postgres( + name: String, + url: String, + username: String, + password: String, + options: java.util.Map[String, String] + ): DataCatererConfigurationBuilder = + postgres(name, url, username, password, toScalaMap(options)) + + def postgres( + name: String, + url: String, + options: java.util.Map[String, String] + ): DataCatererConfigurationBuilder = + postgres(name, url, options = toScalaMap(options)) + + def postgres( + name: String, + url: String + ): DataCatererConfigurationBuilder = + postgres(name, url, DEFAULT_POSTGRES_USERNAME) + + def mysql( + name: String, + url: String = DEFAULT_MYSQL_URL, + username: String = DEFAULT_MYSQL_USERNAME, + password: String = DEFAULT_MYSQL_PASSWORD, + options: Map[String, String] = Map() + ): DataCatererConfigurationBuilder = + addConnection(name, JDBC, url, username, password, options ++ Map(DRIVER -> MYSQL_DRIVER)) + + def mysql( + name: String, + url: String, + username: String, + password: String, + options: java.util.Map[String, String] + ): DataCatererConfigurationBuilder = + mysql(name, url, username, password, toScalaMap(options)) + + def mysql( + name: String, + url: String, + options: java.util.Map[String, String] + ): DataCatererConfigurationBuilder = + mysql(name, url, options = toScalaMap(options)) + + def mysql( + name: String, + url: String + ): DataCatererConfigurationBuilder = + mysql(name, url, DEFAULT_MYSQL_USERNAME) + + def cassandra( + name: String, + url: String = DEFAULT_CASSANDRA_URL, + username: String = DEFAULT_CASSANDRA_USERNAME, + password: String = DEFAULT_CASSANDRA_PASSWORD, + options: Map[String, String] = Map() + ): DataCatererConfigurationBuilder = { + val sptUrl = url.split(":") + assert(sptUrl.size == 2, "url should have format ':'") + val allOptions = Map( + "spark.cassandra.connection.host" -> sptUrl.head, + "spark.cassandra.connection.port" -> sptUrl.last, + "spark.cassandra.auth.username" -> username, + "spark.cassandra.auth.password" -> password, + ) ++ options + addConnectionConfig(name, CASSANDRA, allOptions) + } + + def cassandra( + name: String, + url: String, + username: String, + password: String, + options: java.util.Map[String, String] + ): DataCatererConfigurationBuilder = + cassandra(name, url, username, password, toScalaMap(options)) + + def cassandra( + name: String, + url: String, + options: java.util.Map[String, String] + ): DataCatererConfigurationBuilder = + cassandra(name, url, options = toScalaMap(options)) + + def cassandra( + name: String, + url: String + ): DataCatererConfigurationBuilder = + cassandra(name, url, DEFAULT_CASSANDRA_USERNAME) + + def jms(name: String, url: String, username: String, password: String, options: Map[String, String] = Map()): DataCatererConfigurationBuilder = + addConnection(name, JMS, url, username, password, options) + + def solace( + name: String, + url: String = DEFAULT_SOLACE_URL, + username: String = DEFAULT_SOLACE_USERNAME, + password: String = DEFAULT_SOLACE_PASSWORD, + vpnName: String = DEFAULT_SOLACE_VPN_NAME, + connectionFactory: String = DEFAULT_SOLACE_CONNECTION_FACTORY, + initialContextFactory: String = DEFAULT_SOLACE_INITIAL_CONTEXT_FACTORY, + options: Map[String, String] = Map() + ): DataCatererConfigurationBuilder = + jms(name, url, username, password, Map( + JMS_VPN_NAME -> vpnName, + JMS_CONNECTION_FACTORY -> connectionFactory, + JMS_INITIAL_CONTEXT_FACTORY -> initialContextFactory, + ) ++ options) + + def solace( + name: String, + url: String, + username: String, + password: String, + vpnName: String, + connectionFactory: String, + initialContextFactory: String, + options: java.util.Map[String, String] + ): DataCatererConfigurationBuilder = + solace(name, url, username, password, vpnName, connectionFactory, initialContextFactory, toScalaMap(options)) + + def solace( + name: String, + url: String, + username: String, + password: String, + vpnName: String + ): DataCatererConfigurationBuilder = + solace(name, url, username, password, vpnName, DEFAULT_SOLACE_CONNECTION_FACTORY) + + def solace( + name: String, + url: String + ): DataCatererConfigurationBuilder = + solace(name, url, DEFAULT_SOLACE_USERNAME) + + def kafka(name: String, url: String = DEFAULT_KAFKA_URL, options: Map[String, String] = Map()): DataCatererConfigurationBuilder = { + addConnectionConfig(name, KAFKA, Map( + "kafka.bootstrap.servers" -> url, + ) ++ options) + } + + def kafka(name: String, url: String, options: java.util.Map[String, String]): DataCatererConfigurationBuilder = + kafka(name, url, toScalaMap(options)) + + def http(name: String, username: String = "", password: String = "", options: Map[String, String] = Map()): DataCatererConfigurationBuilder = { + val authOptions = if (username.nonEmpty && password.nonEmpty) Map(USERNAME -> username, PASSWORD -> password) else Map() + addConnectionConfig(name, HTTP, authOptions ++ options) + } + + def http(name: String, username: String, password: String, options: java.util.Map[String, String]): DataCatererConfigurationBuilder = + http(name, username, password, toScalaMap(options)) + + private def addConnection(name: String, format: String, url: String, username: String, + password: String, options: Map[String, String]): DataCatererConfigurationBuilder = { + addConnectionConfig(name, format, Map( + URL -> url, + USERNAME -> username, + PASSWORD -> password + ) ++ options) + } + + + def enableGenerateData(enable: Boolean): DataCatererConfigurationBuilder = + this.modify(_.build.flagsConfig.enableGenerateData).setTo(enable) + + def enableCount(enable: Boolean): DataCatererConfigurationBuilder = + this.modify(_.build.flagsConfig.enableCount).setTo(enable) + + def enableValidation(enable: Boolean): DataCatererConfigurationBuilder = + this.modify(_.build.flagsConfig.enableValidation).setTo(enable) + + def enableFailOnError(enable: Boolean): DataCatererConfigurationBuilder = + this.modify(_.build.flagsConfig.enableFailOnError).setTo(enable) + + def enableUniqueCheck(enable: Boolean): DataCatererConfigurationBuilder = + this.modify(_.build.flagsConfig.enableUniqueCheck).setTo(enable) + + def enableSaveReports(enable: Boolean): DataCatererConfigurationBuilder = + this.modify(_.build.flagsConfig.enableSaveReports).setTo(enable) + + def enableSinkMetadata(enable: Boolean): DataCatererConfigurationBuilder = + this.modify(_.build.flagsConfig.enableSinkMetadata).setTo(enable) + + def enableDeleteGeneratedRecords(enable: Boolean): DataCatererConfigurationBuilder = + this.modify(_.build.flagsConfig.enableDeleteGeneratedRecords).setTo(enable) + + def enableGeneratePlanAndTasks(enable: Boolean): DataCatererConfigurationBuilder = + this.modify(_.build.flagsConfig.enableGeneratePlanAndTasks).setTo(enable) + + def enableRecordTracking(enable: Boolean): DataCatererConfigurationBuilder = + this.modify(_.build.flagsConfig.enableRecordTracking).setTo(enable) + + def enableGenerateValidations(enable: Boolean): DataCatererConfigurationBuilder = + this.modify(_.build.flagsConfig.enableGenerateValidations).setTo(enable) + + + def planFilePath(path: String): DataCatererConfigurationBuilder = + this.modify(_.build.foldersConfig.planFilePath).setTo(path) + + def taskFolderPath(path: String): DataCatererConfigurationBuilder = + this.modify(_.build.foldersConfig.taskFolderPath).setTo(path) + + def recordTrackingFolderPath(path: String): DataCatererConfigurationBuilder = + this.modify(_.build.foldersConfig.recordTrackingFolderPath).setTo(path) + + def validationFolderPath(path: String): DataCatererConfigurationBuilder = + this.modify(_.build.foldersConfig.validationFolderPath).setTo(path) + + def generatedReportsFolderPath(path: String): DataCatererConfigurationBuilder = + this.modify(_.build.foldersConfig.generatedReportsFolderPath).setTo(path) + + def generatedPlanAndTaskFolderPath(path: String): DataCatererConfigurationBuilder = + this.modify(_.build.foldersConfig.generatedPlanAndTaskFolderPath).setTo(path) + + def recordTrackingForValidationFolderPath(path: String): DataCatererConfigurationBuilder = + this.modify(_.build.foldersConfig.recordTrackingForValidationFolderPath).setTo(path) + + + def numRecordsFromDataSourceForDataProfiling(numRecords: Int): DataCatererConfigurationBuilder = + this.modify(_.build.metadataConfig.numRecordsFromDataSource).setTo(numRecords) + + def numRecordsForAnalysisForDataProfiling(numRecords: Int): DataCatererConfigurationBuilder = + this.modify(_.build.metadataConfig.numRecordsForAnalysis).setTo(numRecords) + + def numGeneratedSamples(numSamples: Int): DataCatererConfigurationBuilder = + this.modify(_.build.metadataConfig.numGeneratedSamples).setTo(numSamples) + + def oneOfMinCount(minCount: Long): DataCatererConfigurationBuilder = + this.modify(_.build.metadataConfig.oneOfMinCount).setTo(minCount) + + def oneOfDistinctCountVsCountThreshold(threshold: Double): DataCatererConfigurationBuilder = + this.modify(_.build.metadataConfig.oneOfDistinctCountVsCountThreshold).setTo(threshold) + + + def numRecordsPerBatch(numRecords: Long): DataCatererConfigurationBuilder = + this.modify(_.build.generationConfig.numRecordsPerBatch).setTo(numRecords) + + def numRecordsPerStep(numRecords: Long): DataCatererConfigurationBuilder = + this.modify(_.build.generationConfig.numRecordsPerStep).setTo(Some(numRecords)) + + + def numErrorSampleRecords(numRecords: Int): DataCatererConfigurationBuilder = + this.modify(_.build.validationConfig.numSampleErrorRecords).setTo(numRecords) +} + +final case class ConnectionConfigWithTaskBuilder( + dataSourceName: String = DEFAULT_DATA_SOURCE_NAME, + options: Map[String, String] = Map() + ) { + def this() = this(DEFAULT_DATA_SOURCE_NAME, Map()) + + def file(name: String, format: String, path: String = "", options: Map[String, String] = Map()): FileBuilder = { + val configBuilder = DataCatererConfigurationBuilder() + val fileConnectionConfig = format match { + case CSV => configBuilder.csv(name, path, options) + case JSON => configBuilder.json(name, path, options) + case ORC => configBuilder.orc(name, path, options) + case PARQUET => configBuilder.parquet(name, path, options) + } + setConnectionConfig(name, fileConnectionConfig, FileBuilder()) + } + + def postgres( + name: String, + url: String, + username: String, + password: String, + options: Map[String, String] = Map() + ): PostgresBuilder = { + val configBuilder = DataCatererConfigurationBuilder().postgres(name, url, username, password, options) + setConnectionConfig(name, configBuilder, PostgresBuilder()) + } + + def mysql( + name: String, + url: String, + username: String, + password: String, + options: Map[String, String] = Map() + ): MySqlBuilder = { + val configBuilder = DataCatererConfigurationBuilder().mysql(name, url, username, password, options) + setConnectionConfig(name, configBuilder, MySqlBuilder()) + } + + def cassandra( + name: String, + url: String, + username: String, + password: String, + options: Map[String, String] = Map() + ): CassandraBuilder = { + val configBuilder = DataCatererConfigurationBuilder().cassandra(name, url, username, password, options) + setConnectionConfig(name, configBuilder, CassandraBuilder()) + } + + def solace( + name: String, + url: String, + username: String, + password: String, + vpnName: String, + connectionFactory: String, + initialContextFactory: String, + options: Map[String, String] = Map() + ): SolaceBuilder = { + val configBuilder = DataCatererConfigurationBuilder().solace(name, url, username, password, vpnName, connectionFactory, initialContextFactory, options) + setConnectionConfig(name, configBuilder, SolaceBuilder()) + } + + def kafka(name: String, url: String, options: Map[String, String] = Map()): KafkaBuilder = { + val configBuilder = DataCatererConfigurationBuilder().kafka(name, url, options) + setConnectionConfig(name, configBuilder, KafkaBuilder()) + } + + def http(name: String, username: String, password: String, options: Map[String, String] = Map()): HttpBuilder = { + val configBuilder = DataCatererConfigurationBuilder().http(name, username, password, options) + setConnectionConfig(name, configBuilder, HttpBuilder()) + } + + def options(options: Map[String, String]): ConnectionConfigWithTaskBuilder = { + this.modify(_.options)(_ ++ options) + } + + def metadataSource(metadataSourceBuilder: MetadataSourceBuilder): ConnectionConfigWithTaskBuilder = { + this.modify(_.options)(_ ++ metadataSourceBuilder.metadataSource.allOptions) + } + + private def setConnectionConfig[T <: ConnectionTaskBuilder[_]](name: String, configBuilder: DataCatererConfigurationBuilder, connectionBuilder: T): T = { + val modifiedConnectionConfig = this.modify(_.dataSourceName).setTo(name) + .modify(_.options).setTo(configBuilder.build.connectionConfigByName(name)) + connectionBuilder.connectionConfigWithTaskBuilder = modifiedConnectionConfig + connectionBuilder + } +} + diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/MetadataSourceBuilder.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/MetadataSourceBuilder.scala new file mode 100644 index 00000000..29f856b3 --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/MetadataSourceBuilder.scala @@ -0,0 +1,74 @@ +package com.github.pflooky.datacaterer.api + +import com.github.pflooky.datacaterer.api.converter.Converters.toScalaMap +import com.github.pflooky.datacaterer.api.model.Constants.{METADATA_SOURCE_URL, OPEN_LINEAGE_DATASET, OPEN_LINEAGE_NAMESPACE, OPEN_METADATA_API_VERSION, OPEN_METADATA_AUTH_TYPE, OPEN_METADATA_AUTH_TYPE_OPEN_METADATA, OPEN_METADATA_DEFAULT_API_VERSION, OPEN_METADATA_HOST, OPEN_METADATA_JWT_TOKEN, SCHEMA_LOCATION} +import com.github.pflooky.datacaterer.api.model.{MarquezMetadataSource, MetadataSource, OpenAPISource, OpenMetadataSource} +import com.softwaremill.quicklens.ModifyPimp + +case class MetadataSourceBuilder(metadataSource: MetadataSource = MarquezMetadataSource()) { + def this() = this(MarquezMetadataSource()) + + def marquez(url: String, namespace: String, optDataset: Option[String] = None, options: Map[String, String] = Map()): MetadataSourceBuilder = { + val baseOptions = Map( + METADATA_SOURCE_URL -> url, + OPEN_LINEAGE_NAMESPACE -> namespace, + ) ++ options + val optionsWithDataset = optDataset.map(ds => baseOptions ++ Map(OPEN_LINEAGE_DATASET -> ds)).getOrElse(baseOptions) + val marquezMetadataSource = MarquezMetadataSource(optionsWithDataset) + this.modify(_.metadataSource).setTo(marquezMetadataSource) + } + + def marquezJava(url: String, namespace: String, dataset: String, options: java.util.Map[String, String]): MetadataSourceBuilder = + marquez(url, namespace, Some(dataset), toScalaMap(options)) + + def marquez(url: String, namespace: String, dataset: String): MetadataSourceBuilder = + marquez(url, namespace, Some(dataset), Map()) + + def marquez(url: String, namespace: String): MetadataSourceBuilder = + marquez(url, namespace, None, Map()) + + def openMetadata(url: String, apiVersion: String, authProvider: String, options: Map[String, String]): MetadataSourceBuilder = { + val baseOptions = Map( + OPEN_METADATA_HOST -> url, + OPEN_METADATA_API_VERSION -> apiVersion, + OPEN_METADATA_AUTH_TYPE -> authProvider + ) ++ options + val openMetadataSource = OpenMetadataSource(baseOptions) + this.modify(_.metadataSource).setTo(openMetadataSource) + } + + /** + * authProvider is one of: + * - no-auth + * - basic + * - azure + * - google + * - okta + * - auth0 + * - aws-cognito + * - custom-oidc + * - ldap + * - saml + * - openmetadata + * + * options can contain additional authentication related configuration values. + * Check under {{{Constants}}} openmetadata section for more details. + * + * @param url URL to OpenMetadata server + * @param authProvider See above for list of auth providers + * @param options Additional auth configuration + * @return + */ + def openMetadata(url: String, authProvider: String, options: Map[String, String]): MetadataSourceBuilder = + openMetadata(url, OPEN_METADATA_DEFAULT_API_VERSION, authProvider, options) + + def openMetadataWithToken(url: String, openMetadataToken: String, options: Map[String, String] = Map()): MetadataSourceBuilder = + openMetadata(url, OPEN_METADATA_DEFAULT_API_VERSION, OPEN_METADATA_AUTH_TYPE_OPEN_METADATA, options ++ Map(OPEN_METADATA_JWT_TOKEN -> openMetadataToken)) + + def openMetadataJava(url: String, authProvider: String, options: java.util.Map[String, String]): MetadataSourceBuilder = + openMetadata(url, OPEN_METADATA_DEFAULT_API_VERSION, authProvider, toScalaMap(options)) + + def openApi(schemaLocation: String): MetadataSourceBuilder = { + this.modify(_.metadataSource).setTo(OpenAPISource(Map(SCHEMA_LOCATION -> schemaLocation))) + } +} diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/PlanBuilder.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/PlanBuilder.scala new file mode 100644 index 00000000..2a943995 --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/PlanBuilder.scala @@ -0,0 +1,105 @@ +package com.github.pflooky.datacaterer.api + +import com.github.pflooky.datacaterer.api.connection.ConnectionTaskBuilder +import com.github.pflooky.datacaterer.api.converter.Converters.toScalaList +import com.github.pflooky.datacaterer.api.model.Constants.METADATA_SOURCE_TYPE +import com.github.pflooky.datacaterer.api.model.{ForeignKeyRelation, Plan, SinkOptions} +import com.softwaremill.quicklens.ModifyPimp + +import scala.annotation.varargs + +case class PlanBuilder(plan: Plan = Plan(), tasks: List[TasksBuilder] = List()) { + def this() = this(Plan(), List()) + + def name(name: String): PlanBuilder = + this.modify(_.plan.name).setTo(name) + + def description(desc: String): PlanBuilder = + this.modify(_.plan.description).setTo(desc) + + def taskSummaries(taskSummaries: TaskSummaryBuilder*): PlanBuilder = { + val tasksToAdd = taskSummaries.filter(_.task.isDefined) + .map(x => TasksBuilder(List(x.task.get), x.taskSummary.dataSourceName)) + .toList + this.modify(_.plan.tasks)(_ ++ taskSummaries.map(_.taskSummary)) + .modify(_.tasks)(_ ++ tasksToAdd) + } + + def sinkOptions(sinkOptionsBuilder: SinkOptionsBuilder): PlanBuilder = + this.modify(_.plan.sinkOptions).setTo(Some(sinkOptionsBuilder.sinkOptions)) + + def seed(seed: Long): PlanBuilder = + this.modify(_.plan.sinkOptions).setTo(Some(getSinkOpt.seed(seed).sinkOptions)) + + def locale(locale: String): PlanBuilder = + this.modify(_.plan.sinkOptions).setTo(Some(getSinkOpt.locale(locale).sinkOptions)) + + @varargs def addForeignKeyRelationship(foreignKey: ForeignKeyRelation, relations: ForeignKeyRelation*): PlanBuilder = + this.modify(_.plan.sinkOptions).setTo(Some(getSinkOpt.foreignKey(foreignKey, relations.toList).sinkOptions)) + + def addForeignKeyRelationship(connectionTaskBuilder: ConnectionTaskBuilder[_], columns: List[String], + relations: List[(ConnectionTaskBuilder[_], List[String])]): PlanBuilder = { + val baseRelation = toForeignKeyRelation(connectionTaskBuilder, columns) + val otherRelations = relations.map(r => toForeignKeyRelation(r._1, r._2)) + addForeignKeyRelationship(baseRelation, otherRelations: _*) + } + + def addForeignKeyRelationship(connectionTaskBuilder: ConnectionTaskBuilder[_], columns: java.util.List[String], + relations: java.util.List[java.util.Map.Entry[ConnectionTaskBuilder[_], java.util.List[String]]]): PlanBuilder = { + val scalaListRelations = toScalaList(relations) + val mappedRelations = scalaListRelations.map(r => (r.getKey, toScalaList(r.getValue))) + addForeignKeyRelationship(connectionTaskBuilder, toScalaList(columns), mappedRelations) + } + + def addForeignKeyRelationship(connectionTaskBuilder: ConnectionTaskBuilder[_], column: String, + relations: List[(ConnectionTaskBuilder[_], String)]): PlanBuilder = + addForeignKeyRelationship(connectionTaskBuilder, List(column), relations.map(r => (r._1, List(r._2)))) + + def addForeignKeyRelationship(connectionTaskBuilder: ConnectionTaskBuilder[_], column: String, + relations: java.util.List[java.util.Map.Entry[ConnectionTaskBuilder[_], String]]): PlanBuilder = { + val scalaListRelations = toScalaList(relations) + val mappedRelations = scalaListRelations.map(r => (r.getKey, List(r.getValue))) + addForeignKeyRelationship(connectionTaskBuilder, List(column), mappedRelations) + } + + def addForeignKeyRelationships(connectionTaskBuilder: ConnectionTaskBuilder[_], columns: List[String], + relations: List[ForeignKeyRelation]): PlanBuilder = { + val baseRelation = toForeignKeyRelation(connectionTaskBuilder, columns) + addForeignKeyRelationship(baseRelation, relations: _*) + } + + def addForeignKeyRelationships(connectionTaskBuilder: ConnectionTaskBuilder[_], columns: java.util.List[String], + relations: java.util.List[ForeignKeyRelation]): PlanBuilder = + addForeignKeyRelationships(connectionTaskBuilder, toScalaList(columns), toScalaList(relations)) + + def addForeignKeyRelationship(foreignKey: ForeignKeyRelation, + relations: List[(ConnectionTaskBuilder[_], List[String])]): PlanBuilder = + addForeignKeyRelationship(foreignKey, relations.map(r => toForeignKeyRelation(r._1, r._2)): _*) + + def addForeignKeyRelationship(foreignKey: ForeignKeyRelation, + relations: java.util.List[(ConnectionTaskBuilder[_], java.util.List[String])]): PlanBuilder = + addForeignKeyRelationship(foreignKey, toScalaList(relations).map(r => toForeignKeyRelation(r._1, toScalaList(r._2))): _*) + + private def toForeignKeyRelation(connectionTaskBuilder: ConnectionTaskBuilder[_], columns: List[String]) = { + val dataSource = connectionTaskBuilder.connectionConfigWithTaskBuilder.dataSourceName + val colNames = columns.mkString(",") + connectionTaskBuilder.step match { + case Some(value) => + val fields = value.step.schema.fields.getOrElse(List()) + val hasColumns = columns.forall(c => fields.exists(_.name == c)) + if (!hasColumns && !value.step.options.contains(METADATA_SOURCE_TYPE)) { + throw new RuntimeException(s"Column name defined in foreign key relationship does not exist, data-source=$dataSource, column-name=$colNames") + } + ForeignKeyRelation(dataSource, value.step.name, columns) + case None => + throw new RuntimeException(s"No schema defined for data source. Cannot create foreign key relationship, data-source=$dataSource, column-name=$colNames") + } + } + + private def getSinkOpt: SinkOptionsBuilder = { + plan.sinkOptions match { + case Some(value) => SinkOptionsBuilder(value) + case None => SinkOptionsBuilder() + } + } +} diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/PlanRun.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/PlanRun.scala new file mode 100644 index 00000000..8525b63b --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/PlanRun.scala @@ -0,0 +1,481 @@ +package com.github.pflooky.datacaterer.api + +import com.github.pflooky.datacaterer.api.connection.{CassandraBuilder, ConnectionTaskBuilder, FileBuilder, HttpBuilder, KafkaBuilder, MySqlBuilder, PostgresBuilder, SolaceBuilder} +import com.github.pflooky.datacaterer.api.converter.Converters.toScalaList +import com.github.pflooky.datacaterer.api.model.Constants._ +import com.github.pflooky.datacaterer.api.model.{DataCatererConfiguration, ForeignKeyRelation, Plan, Task, ValidationConfiguration} + +import scala.annotation.varargs + + +trait PlanRun { + var _plan: Plan = Plan() + var _tasks: List[Task] = List() + var _configuration: DataCatererConfiguration = DataCatererConfiguration() + var _validations: List[ValidationConfiguration] = List() + var _connectionTaskBuilders: Seq[ConnectionTaskBuilder[_]] = Seq() + + def plan: PlanBuilder = PlanBuilder() + + def taskSummary: TaskSummaryBuilder = TaskSummaryBuilder() + + def tasks: TasksBuilder = TasksBuilder() + + def task: TaskBuilder = TaskBuilder() + + def step: StepBuilder = StepBuilder() + + def schema: SchemaBuilder = SchemaBuilder() + + def field: FieldBuilder = FieldBuilder() + + def generator: GeneratorBuilder = GeneratorBuilder() + + def count: CountBuilder = CountBuilder() + + def configuration: DataCatererConfigurationBuilder = DataCatererConfigurationBuilder() + + def waitCondition: WaitConditionBuilder = WaitConditionBuilder() + + def validation: ValidationBuilder = ValidationBuilder() + + def dataSourceValidation: DataSourceValidationBuilder = DataSourceValidationBuilder() + + def validationConfig: ValidationConfigurationBuilder = ValidationConfigurationBuilder() + + def foreignField(dataSource: String, step: String, column: String): ForeignKeyRelation = + new ForeignKeyRelation(dataSource, step, column) + + def foreignField(dataSource: String, step: String, columns: List[String]): ForeignKeyRelation = + ForeignKeyRelation(dataSource, step, columns) + + def foreignField(connectionTask: ConnectionTaskBuilder[_], step: String, columns: List[String]): ForeignKeyRelation = + ForeignKeyRelation(connectionTask.connectionConfigWithTaskBuilder.dataSourceName, step, columns) + + def metadataSource: MetadataSourceBuilder = MetadataSourceBuilder() + + /** + * Create new CSV generation step with configurations + * + * @param name Data source name + * @param path File path to generated CSV + * @param options Additional options for CSV generation + * @return FileBuilder + */ + def csv(name: String, path: String, options: Map[String, String] = Map()): FileBuilder = + ConnectionConfigWithTaskBuilder().file(name, CSV, path, options) + + /** + * Create new JSON generation step with configurations + * + * @param name Data source name + * @param path File path to generated JSON + * @param options Additional options for JSON generation + * @return FileBuilder + */ + def json(name: String, path: String, options: Map[String, String] = Map()): FileBuilder = + ConnectionConfigWithTaskBuilder().file(name, JSON, path, options) + + /** + * Create new ORC generation step with configurations + * + * @param name Data source name + * @param path File path to generated ORC + * @param options Additional options for ORC generation + * @return FileBuilder + */ + def orc(name: String, path: String, options: Map[String, String] = Map()): FileBuilder = + ConnectionConfigWithTaskBuilder().file(name, ORC, path, options) + + /** + * Create new PARQUET generation step with configurations + * + * @param name Data source name + * @param path File path to generated PARQUET + * @param options Additional options for PARQUET generation + * @return FileBuilder + */ + def parquet(name: String, path: String, options: Map[String, String] = Map()): FileBuilder = + ConnectionConfigWithTaskBuilder().file(name, PARQUET, path, options) + + /** + * Create new POSTGRES generation step with connection configuration + * + * @param name Data source name + * @param url Postgres url in format: jdbc:postgresql://_host_:_port_/_database_ + * @param username Postgres username + * @param password Postgres password + * @param options Additional driver options + * @return PostgresBuilder + */ + def postgres( + name: String, + url: String = DEFAULT_POSTGRES_URL, + username: String = DEFAULT_POSTGRES_USERNAME, + password: String = DEFAULT_POSTGRES_PASSWORD, + options: Map[String, String] = Map() + ): PostgresBuilder = + ConnectionConfigWithTaskBuilder().postgres(name, url, username, password, options) + + /** + * Create new POSTGRES generation step with only Postgres URL and default username and password of 'postgres' + * + * @param name Data source name + * @param url Postgres url in format: jdbc:postgresql://_host_:_port_/_database_ + * @return PostgresBuilder + */ + def postgresJava(name: String, url: String): PostgresBuilder = postgres(name, url) + + /** + * Create new POSTGRES generation step using the same connection configuration from another PostgresBuilder + * + * @param connectionTaskBuilder Postgres builder with connection configuration + * @return PostgresBuilder + */ + def postgres(connectionTaskBuilder: ConnectionTaskBuilder[PostgresBuilder]): PostgresBuilder = + PostgresBuilder().fromBaseConfig(connectionTaskBuilder) + + /** + * Create new MYSQL generation step with connection configuration + * + * @param name Data source name + * @param url Mysql url in format: jdbc:mysql://_host_:_port_/_database_ + * @param username Mysql username + * @param password Mysql password + * @param options Additional driver options + * @return MySqlBuilder + */ + def mysql( + name: String, + url: String = DEFAULT_MYSQL_URL, + username: String = DEFAULT_MYSQL_USERNAME, + password: String = DEFAULT_MYSQL_PASSWORD, + options: Map[String, String] = Map() + ): MySqlBuilder = + ConnectionConfigWithTaskBuilder().mysql(name, url, username, password, options) + + + /** + * Create new MYSQL generation step with only Mysql URL and default username and password of 'root' + * + * @param name Data source name + * @param url Mysql url in format: jdbc:mysql://_host_:_port_/_dbname_ + * @return MySqlBuilder + */ + def mysqlJava(name: String, url: String): MySqlBuilder = mysql(name, url) + + + /** + * Create new MYSQL generation step using the same connection configuration from another MySqlBuilder + * + * @param connectionTaskBuilder Mysql builder with connection configuration + * @return MySqlBuilder + */ + def mysql(connectionTaskBuilder: ConnectionTaskBuilder[MySqlBuilder]): MySqlBuilder = + MySqlBuilder().fromBaseConfig(connectionTaskBuilder) + + + /** + * Create new CASSANDRA generation step with connection configuration + * + * @param name Data source name + * @param url Cassandra url with format: _host_:_port_ + * @param username Cassandra username + * @param password Cassandra password + * @param options Additional connection options + * @return CassandraBuilder + */ + def cassandra( + name: String, + url: String = DEFAULT_CASSANDRA_URL, + username: String = DEFAULT_CASSANDRA_USERNAME, + password: String = DEFAULT_CASSANDRA_PASSWORD, + options: Map[String, String] = Map() + ): CassandraBuilder = + ConnectionConfigWithTaskBuilder().cassandra(name, url, username, password, options) + + + /** + * Create new CASSANDRA generation step with only Cassandra URL and default username and password of 'cassandra' + * + * @param name Data source name + * @param url Cassandra url with format: _host_:_port_ + * @return CassandraBuilder + */ + def cassandraJava(name: String, url: String): CassandraBuilder = cassandra(name, url) + + + /** + * Create new Cassandra generation step using the same connection configuration from another CassandraBuilder + * + * @param connectionTaskBuilder Cassandra builder with connection configuration + * @return CassandraBuilder + */ + def cassandra(connectionTaskBuilder: ConnectionTaskBuilder[CassandraBuilder]): CassandraBuilder = + CassandraBuilder().fromBaseConfig(connectionTaskBuilder) + + + /** + * Create new SOLACE generation step with connection configuration + * + * @param name Data source name + * @param url Solace url + * @param username Solace username + * @param password Solace password + * @param vpnName VPN name in Solace to connect to + * @param connectionFactory Connection factory + * @param initialContextFactory Initial context factory + * @param options Additional connection options + * @return SolaceBuilder + */ + def solace( + name: String, + url: String = DEFAULT_SOLACE_URL, + username: String = DEFAULT_SOLACE_USERNAME, + password: String = DEFAULT_SOLACE_PASSWORD, + vpnName: String = DEFAULT_SOLACE_VPN_NAME, + connectionFactory: String = DEFAULT_SOLACE_CONNECTION_FACTORY, + initialContextFactory: String = DEFAULT_SOLACE_INITIAL_CONTEXT_FACTORY, + options: Map[String, String] = Map() + ): SolaceBuilder = + ConnectionConfigWithTaskBuilder().solace(name, url, username, password, vpnName, connectionFactory, initialContextFactory, options) + + + /** + * Create new SOLACE generation step with Solace URL, username, password and vpnName. Default connection factory and + * initial context factory used + * + * @param name Data source name + * @param url Solace url + * @param username Solace username + * @param password Solace password + * @param vpnName VPN name in Solace to connect to + * @return SolaceBuilder + */ + def solaceJava(name: String, url: String, username: String, password: String, vpnName: String): SolaceBuilder = + solace(name, url, username, password, vpnName) + + + /** + * Create new SOLACE generation step with Solace URL. Other configurations are set to default values + * + * @param name Data source name + * @param url Solace url + * @return SolaceBuilder + */ + def solaceJava(name: String, url: String): SolaceBuilder = solace(name, url) + + + /** + * Create new Solace generation step using the same connection configuration from another SolaceBuilder + * + * @param connectionTaskBuilder Solace step with connection configuration + * @return SolaceBuilder + */ + def solace(connectionTaskBuilder: ConnectionTaskBuilder[SolaceBuilder]): SolaceBuilder = + SolaceBuilder().fromBaseConfig(connectionTaskBuilder) + + /** + * Create new KAFKA generation step with connection configuration + * + * @param name Data source name + * @param url Kafka url + * @param options Additional connection options + * @return KafkaBuilder + */ + def kafka(name: String, url: String = DEFAULT_KAFKA_URL, options: Map[String, String] = Map()): KafkaBuilder = + ConnectionConfigWithTaskBuilder().kafka(name, url, options) + + + /** + * Create new KAFKA generation step with url + * + * @param name Data source name + * @param url Kafka url + * @return KafkaBuilder + */ + def kafkaJava(name: String, url: String): KafkaBuilder = kafka(name, url) + + /** + * Create new Kafka generation step using the same connection configuration from another KafkaBuilder + * + * @param connectionTaskBuilder Kafka step with connection configuration + * @return KafkaBuilder + */ + def kafka(connectionTaskBuilder: ConnectionTaskBuilder[KafkaBuilder]): KafkaBuilder = + KafkaBuilder().fromBaseConfig(connectionTaskBuilder) + + /** + * Create new HTTP generation step using connection configuration + * + * @param name Data source name + * @param username HTTP username + * @param password HTTP password + * @param options Additional connection options + * @return HttpBuilder + */ + def http(name: String, username: String = "", password: String = "", options: Map[String, String] = Map()): HttpBuilder = + ConnectionConfigWithTaskBuilder().http(name, username, password, options) + + /** + * Create new HTTP generation step without authentication + * + * @param name Data source name + * @return HttpBuilder + */ + def httpJava(name: String): HttpBuilder = http(name) + + /** + * Create new HTTP generation step using the same connection configuration from another HttpBuilder + * + * @param connectionTaskBuilder Http step with connection configuration + * @return HttpBuilder + */ + def http(connectionTaskBuilder: ConnectionTaskBuilder[HttpBuilder]): HttpBuilder = + HttpBuilder().fromBaseConfig(connectionTaskBuilder) + + + /** + * Execute with the following connections and tasks defined + * + * @param connectionTaskBuilder First connection and task + * @param connectionTaskBuilders Other connections and tasks + */ + def execute(connectionTaskBuilder: ConnectionTaskBuilder[_], connectionTaskBuilders: ConnectionTaskBuilder[_]*): Unit = { + execute(configuration, connectionTaskBuilder, connectionTaskBuilders: _*) + } + + /** + * Execute with non-default configurations for a set of tasks + * + * @param baseConfiguration Runtime configurations + * @param connectionTaskBuilder First connection and task + * @param connectionTaskBuilders Other connections and tasks + */ + def execute( + baseConfiguration: DataCatererConfigurationBuilder, + connectionTaskBuilder: ConnectionTaskBuilder[_], + connectionTaskBuilders: ConnectionTaskBuilder[_]* + ): Unit = { + execute(plan, baseConfiguration, List(), connectionTaskBuilder, connectionTaskBuilders: _*) + } + + /** + * Execute with non-default configurations with validations and tasks + * + * @param planBuilder Plan to set high level task configurations + * @param baseConfiguration Runtime configurations + * @param connectionTaskBuilder First connection and task + * @param connectionTaskBuilders Other connections and tasks + */ + def execute( + planBuilder: PlanBuilder, + baseConfiguration: DataCatererConfigurationBuilder, + connectionTaskBuilder: ConnectionTaskBuilder[_], + connectionTaskBuilders: ConnectionTaskBuilder[_]* + ): Unit = { + execute(planBuilder, baseConfiguration, List(), connectionTaskBuilder, connectionTaskBuilders: _*) + } + + /** + * Execute with non-default configurations with validations and tasks. Validations have to be enabled before running + * (see [[DataCatererConfigurationBuilder.enableValidation()]]. + * + * @param planBuilder Plan to set high level task configurations + * @param baseConfiguration Runtime configurations + * @param validations Validations to run if enabled + * @param connectionTask First connection and task + * @param connectionTasks Other connections and tasks + */ + @varargs def execute( + planBuilder: PlanBuilder, + baseConfiguration: DataCatererConfigurationBuilder, + validations: List[ValidationConfigurationBuilder], + connectionTask: ConnectionTaskBuilder[_], + connectionTasks: ConnectionTaskBuilder[_]* + ): Unit = { + val allConnectionTasks = connectionTask +: connectionTasks + val connectionConfig = allConnectionTasks.map(x => { + val connectionConfigWithTaskBuilder = x.connectionConfigWithTaskBuilder + (connectionConfigWithTaskBuilder.dataSourceName, connectionConfigWithTaskBuilder.options) + }).toMap + val withConnectionConfig = baseConfiguration.connectionConfig(connectionConfig) + val allValidations = validations ++ getValidations(allConnectionTasks) + val allTasks = allConnectionTasks.map(_.toTasksBuilder).filter(_.isDefined).map(_.get).toList + + _connectionTaskBuilders = allConnectionTasks + execute(allTasks, planBuilder, withConnectionConfig, allValidations) + } + + /** + * Execute with set of tasks and default configurations + * + * @param tasks Tasks to generate data + */ + def execute(tasks: TasksBuilder): Unit = execute(List(tasks)) + + /** + * Execute with plan and non-default configuration + * + * @param planBuilder Plan to set high level task configurations + * @param configuration Runtime configuration + */ + def execute(planBuilder: PlanBuilder, configuration: DataCatererConfigurationBuilder): Unit = { + execute(planBuilder.tasks, planBuilder, configuration) + } + + /** + * Execute with tasks, plan, runtime configurations and validations defined + * + * @param tasks Set of generation tasks + * @param plan Plan to set high level task configurations + * @param configuration Runtime configurations + * @param validations Validations on data sources + */ + def execute( + tasks: List[TasksBuilder] = List(), + plan: PlanBuilder = PlanBuilder(), + configuration: DataCatererConfigurationBuilder = DataCatererConfigurationBuilder(), + validations: List[ValidationConfigurationBuilder] = List() + ): Unit = { + val taskToDataSource = tasks.flatMap(x => x.tasks.map(t => (t.name, x.dataSourceName, t))) + val planWithTaskToDataSource = plan.taskSummaries(taskToDataSource.map(t => taskSummary.name(t._1).dataSource(t._2)): _*) + + _plan = planWithTaskToDataSource.plan + _tasks = taskToDataSource.map(_._3) + _configuration = configuration.build + _validations = validations.map(_.validationConfiguration) + } + + private def getValidations(allConnectionTasks: Seq[ConnectionTaskBuilder[_]]) = { + val validationsByDataSource = allConnectionTasks.map(x => { + val dataSource = x.connectionConfigWithTaskBuilder.dataSourceName + val optValidation = x.step + .flatMap(_.optValidation) + .map(dsValid => { + DataSourceValidationBuilder() + .options(x.step.map(_.step.options).getOrElse(Map()) ++ x.connectionConfigWithTaskBuilder.options) + .wait(dsValid.dataSourceValidation.waitCondition) + .validations(dsValid.dataSourceValidation.validations: _*) + }) + (dataSource, optValidation) + }) + .filter(_._2.isDefined) + .map(ds => (ds._1, validationConfig.addDataSourceValidation(ds._1, ds._2.get))) + + validationsByDataSource + .groupBy(_._1) + .map(x => { + val dataSourceName = x._1 + val validationsToMerge = x._2.tail.flatMap(_._2.validationConfiguration.dataSources(dataSourceName)) + if (validationsToMerge.nonEmpty) { + x._2.head._2.addDataSourceValidation(dataSourceName, validationsToMerge) + } else { + x._2.head._2 + } + }) + .filter(vc => vc.validationConfiguration.dataSources.exists(_._2.nonEmpty)) + } +} + +class BasePlanRun extends PlanRun diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/SinkOptionsBuilder.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/SinkOptionsBuilder.scala new file mode 100644 index 00000000..c5fcf9bd --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/SinkOptionsBuilder.scala @@ -0,0 +1,55 @@ +package com.github.pflooky.datacaterer.api + +import com.github.pflooky.datacaterer.api.model.{ForeignKeyRelation, SinkOptions} +import com.softwaremill.quicklens.ModifyPimp + +import scala.annotation.varargs + +/** + * Configurations that get applied across all generated data. This includes the random seed value, locale and foreign keys + */ +case class SinkOptionsBuilder(sinkOptions: SinkOptions = SinkOptions()) { + + /** + * Random seed value to be used across all generated data + * + * @param seed Used as seed argument when creating Random instance + * @return SinkOptionsBuilder + */ + def seed(seed: Long): SinkOptionsBuilder = this.modify(_.sinkOptions.seed).setTo(Some(seed.toString)) + + /** + * Locale used when generating data via DataFaker expressions + * + * @param locale Locale for DataFaker data generated + * @return SinkOptionsBuilder + * @see Docs for details + */ + def locale(locale: String): SinkOptionsBuilder = this.modify(_.sinkOptions.locale).setTo(Some(locale)) + + /** + * Define a foreign key relationship between columns across any data source. + * To define which column to use, it is defined by the following:
+ * dataSourceName + stepName + columnName + * + * @param foreignKey Base foreign key + * @param relations Foreign key relations + * @return SinkOptionsBuilder + * @see Docs for details + */ + @varargs def foreignKey(foreignKey: ForeignKeyRelation, relations: ForeignKeyRelation*): SinkOptionsBuilder = + this.modify(_.sinkOptions.foreignKeys)(_ ++ Map(foreignKey.toString -> relations.map(_.toString).toList)) + + /** + * Define a foreign key relationship between columns across any data source. + * To define which column to use, it is defined by the following:
+ * dataSourceName + stepName + columnName + * + * @param foreignKey Base foreign key + * @param relations Foreign key relations + * @return SinkOptionsBuilder + * @see Docs for details + */ + def foreignKey(foreignKey: ForeignKeyRelation, relations: List[ForeignKeyRelation]): SinkOptionsBuilder = + this.foreignKey(foreignKey, relations: _*) +} diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/TaskBuilder.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/TaskBuilder.scala new file mode 100644 index 00000000..76bf3a9c --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/TaskBuilder.scala @@ -0,0 +1,849 @@ +package com.github.pflooky.datacaterer.api + +import com.github.pflooky.datacaterer.api.converter.Converters.toScalaMap +import com.github.pflooky.datacaterer.api.model.Constants._ +import com.github.pflooky.datacaterer.api.model.{Count, DataType, Field, Generator, PerColumnCount, Schema, Step, StringType, Task, TaskSummary} +import com.softwaremill.quicklens.ModifyPimp + +import scala.annotation.varargs + +case class TaskSummaryBuilder( + taskSummary: TaskSummary = TaskSummary(DEFAULT_TASK_NAME, "myDefaultDataSource"), + task: Option[Task] = None + ) { + def this() = this(TaskSummary(DEFAULT_TASK_NAME, DEFAULT_DATA_SOURCE_NAME), None) + + def name(name: String): TaskSummaryBuilder = { + if (task.isEmpty) this.modify(_.taskSummary.name).setTo(name) else this + } + + def task(taskBuilder: TaskBuilder): TaskSummaryBuilder = { + this.modify(_.taskSummary.name).setTo(taskBuilder.task.name) + .modify(_.task).setTo(Some(taskBuilder.task)) + } + + def task(task: Task): TaskSummaryBuilder = { + this.modify(_.taskSummary.name).setTo(task.name) + .modify(_.task).setTo(Some(task)) + } + + def dataSource(name: String): TaskSummaryBuilder = + this.modify(_.taskSummary.dataSourceName).setTo(name) + + def enabled(enabled: Boolean): TaskSummaryBuilder = + this.modify(_.taskSummary.enabled).setTo(enabled) + +} + +case class TasksBuilder(tasks: List[Task] = List(), dataSourceName: String = DEFAULT_DATA_SOURCE_NAME) { + def this() = this(List(), DEFAULT_DATA_SOURCE_NAME) + + @varargs def addTasks(dataSourceName: String, taskBuilders: TaskBuilder*): TasksBuilder = + this.modify(_.tasks)(_ ++ taskBuilders.map(_.task)) + .modify(_.dataSourceName).setTo(dataSourceName) + + @varargs def addTask(name: String, dataSourceName: String, stepBuilders: StepBuilder*): TasksBuilder = + this.modify(_.tasks)(_ ++ List(TaskBuilder(Task(name, stepBuilders.map(_.step).toList)).task)) + .modify(_.dataSourceName).setTo(dataSourceName) + + def addTask(name: String, dataSourceName: String, steps: List[Step]): TasksBuilder = + this.modify(_.tasks)(_ ++ List(TaskBuilder(Task(name, steps)).task)) + .modify(_.dataSourceName).setTo(dataSourceName) +} + +/** + * A task can be seen as a representation of a data source. + * A task can contain steps which represent sub data sources within it.
+ * For example, you can define a Postgres task for database 'customer' with steps to generate data for + * tables 'public.account' and 'public.transactions' within it. + */ +case class TaskBuilder(task: Task = Task()) { + def this() = this(Task()) + + def name(name: String): TaskBuilder = this.modify(_.task.name).setTo(name) + + @varargs def steps(steps: StepBuilder*): TaskBuilder = this.modify(_.task.steps)(_ ++ steps.map(_.step)) +} + +case class StepBuilder(step: Step = Step(), optValidation: Option[DataSourceValidationBuilder] = None) { + def this() = this(Step(), None) + + /** + * Define name of step. + * Used as part of foreign key definitions + * + * @param name Step name + * @return StepBuilder + */ + def name(name: String): StepBuilder = + this.modify(_.step.name).setTo(name) + + /** + * Define type of step. For example, csv, json, parquet. + * Used to determine how to save the generated data + * + * @param type Can be one of the supported types + * @return StepBuilder + */ + def `type`(`type`: String): StepBuilder = + this.modify(_.step.`type`).setTo(`type`) + + /** + * Enable/disable the step + * + * @param enabled Boolean flag + * @return StepBuilder + */ + def enabled(enabled: Boolean): StepBuilder = + this.modify(_.step.enabled).setTo(enabled) + + /** + * Add in generic option to the step. + * This can be used to configure the sub data source details such as table, topic, and file path. + * It is used as part of the options passed to Spark when connecting to the data source. + * Can also be used for attaching metadata to the step + * + * @param option Key and value of the data used for retrieval + * @return StepBuilder + */ + def option(option: (String, String)): StepBuilder = + this.modify(_.step.options)(_ ++ Map(option)) + + /** + * Map of configurations used by Spark to connect to the data source + * + * @param options Map of key value pairs to connect to data source + * @return StepBuilder + */ + def options(options: Map[String, String]): StepBuilder = + this.modify(_.step.options)(_ ++ options) + + /** + * Wrapper for Java Map + * + * @param options Map of key value pairs to connect to data source + * @return StepBuilder + */ + def options(options: java.util.Map[String, String]): StepBuilder = + this.options(toScalaMap(options)) + + /** + * Define table name to connect for JDBC data source. + * + * @param table Table name + * @return StepBuilder + */ + def jdbcTable(table: String): StepBuilder = + this.modify(_.step.options)(_ ++ Map(JDBC_TABLE -> table)) + + /** + * Define schema and table name for JDBC data source. + * + * @param schema Schema name + * @param table Table name + * @return StepBuilder + */ + def jdbcTable(schema: String, table: String): StepBuilder = + this.modify(_.step.options)(_ ++ Map(JDBC_TABLE -> s"$schema.$table")) + + /** + * Keyspace and table name for Cassandra data source + * + * @param keyspace Keyspace name + * @param table Table name + * @return StepBuilder + */ + def cassandraTable(keyspace: String, table: String): StepBuilder = + this.modify(_.step.options)(_ ++ Map(CASSANDRA_KEYSPACE -> keyspace, CASSANDRA_TABLE -> table)) + + /** + * The queue/topic name for a JMS data source. + * This is used as part of connecting to a JMS destination as a JNDI resource + * + * @param destination Destination name + * @return StepBuilder + */ + def jmsDestination(destination: String): StepBuilder = + this.modify(_.step.options)(_ ++ Map(JMS_DESTINATION_NAME -> destination)) + + /** + * Kafka topic to push data to for Kafka data source + * + * @param topic Topic name + * @return StepBuilder + */ + def kafkaTopic(topic: String): StepBuilder = + this.modify(_.step.options)(_ ++ Map(KAFKA_TOPIC -> topic)) + + /** + * File pathway used for file data source. + * Can be defined as a local file system path or cloud based path (i.e. s3a://my-bucket/file/path) + * + * @param path File path + * @return StepBuilder + */ + def path(path: String): StepBuilder = + this.modify(_.step.options)(_ ++ Map(PATH -> path)) + + /** + * The columns within the generated data to use as partitions for a file data source. + * Order of partition columns defined is used to define order of partitions.
+ * For example, {{{partitionBy("year", "account_id")}}} + * will ensure that `year` is used as the top level partition + * before `account_id`. + * + * @param partitionsBy Partition column names in order + * @return StepBuilder + */ + @varargs def partitionBy(partitionsBy: String*): StepBuilder = + this.modify(_.step.options)(_ ++ Map(PARTITION_BY -> partitionsBy.map(_.trim).mkString(","))) + + /** + * Number of partitions to use when saving data to the data source. + * This can be used to help fine tune performance depending on your data source.
+ * For example, if you are facing timeout errors when saving to your database, you can reduce the number of + * partitions to help reduce the number of concurrent saves to your database. + * + * @param partitions Number of partitions when saving data to data source + * @return StepBuilder + */ + def numPartitions(partitions: Int): StepBuilder = + this.modify(_.step.options)(_ ++ Map(PARTITIONS -> partitions.toString)) + + /** + * Number of rows pushed to data source per second. + * Only used for real time data sources such as JMS, Kafka and HTTP.
+ * If you see that the number of rows per second is not reaching as high as expected, it may be due to the number + * of partitions used when saving data. You will also need to increase the number of partitions via
+ * {{{.numPartitions(20)}}} or some higher number + * + * @param rowsPerSecond Number of rows per second to generate + * @return StepBuilder + */ + def rowsPerSecond(rowsPerSecond: Int): StepBuilder = + this.modify(_.step.options)(_ ++ Map(ROWS_PER_SECOND -> rowsPerSecond.toString)) + + /** + * Define number of records to be generated for the sub data source via CountBuilder + * + * @param countBuilder Configure number of records to generate + * @return StepBuilder + */ + def count(countBuilder: CountBuilder): StepBuilder = + this.modify(_.step.count).setTo(countBuilder.count) + + /** + * Define number of records to be generated. + * If you also have defined a per column count, this value will not represent the full number of records generated. + * + * @param records Number of records to generate + * @return StepBuilder + * @see Count definition for details + */ + def count(records: Long): StepBuilder = + this.modify(_.step.count).setTo(CountBuilder().records(records).count) + + /** + * Define a generator to be used for determining the number of records to generate. + * If you also have defined a per column count, the value generated will be combined with the per column count to + * determine the total number of records + * + * @param generator Generator builder for determining number of records to generate + * @return StepBuilder + * @see Count definition for details + */ + def count(generator: GeneratorBuilder): StepBuilder = + this.modify(_.step.count).setTo(CountBuilder().generator(generator).count) + + /** + * Define the number of records to generate based off certain columns.
+ * For example, if you had a data set with columns account_id and amount, you can set that 10 records to be generated + * per account_id via {{{.count(new PerColumnCountBuilder().total(10, "account_id")}}}. + * The total number of records generated is also influenced by other count configurations. + * + * @param perColumnCountBuilder Per column count builder + * @return StepBuilder + * @see Count definition for details + */ + def count(perColumnCountBuilder: PerColumnCountBuilder): StepBuilder = + this.modify(_.step.count).setTo(CountBuilder().perColumn(perColumnCountBuilder).count) + + /** + * Schema to use when generating data for data source. + * The schema includes various metadata about each field to guide the data generator on what the data should look + * like. + * + * @param schemaBuilder Schema builder + * @return StepBuilder + */ + def schema(schemaBuilder: SchemaBuilder): StepBuilder = + this.modify(_.step.schema).setTo(schemaBuilder.schema) + + /** + * Define fields of the schema of the data source to use when generating data. + * + * @param fields Fields of the schema + * @return StepBuilder + */ + @varargs def schema(fields: FieldBuilder*): StepBuilder = + this.modify(_.step.schema).setTo(SchemaBuilder().addFields(fields: _*).schema) + + /** + * Define data validations once data has been generated. The result of the validations is logged out and included + * as part of the HTML report. + * + * @param validations All validations + * @return StepBuilder + */ + @varargs def validations(validations: ValidationBuilder*): StepBuilder = + this.modify(_.optValidation).setTo(Some(getValidation.validations(validations: _*))) + + /** + * Define a wait condition that is used before executing validations on the data source + * + * @param waitConditionBuilder Builder for wait condition + * @return StepBuilder + */ + def wait(waitConditionBuilder: WaitConditionBuilder): StepBuilder = + this.modify(_.optValidation).setTo(Some(getValidation.wait(waitConditionBuilder))) + + private def getValidation: DataSourceValidationBuilder = optValidation.getOrElse(DataSourceValidationBuilder()) +} + +case class CountBuilder(count: Count = Count()) { + def this() = this(Count()) + + def records(records: Long): CountBuilder = + this.modify(_.count.records).setTo(Some(records)) + + def generator(generator: GeneratorBuilder): CountBuilder = + this.modify(_.count.generator).setTo(Some(generator.generator)) + .modify(_.count.records).setTo(None) + + def perColumn(perColumnCountBuilder: PerColumnCountBuilder): CountBuilder = + this.modify(_.count.perColumn).setTo(Some(perColumnCountBuilder.perColumnCount)) + + @varargs def recordsPerColumn(records: Long, cols: String*): CountBuilder = + this.modify(_.count.perColumn).setTo(Some(perColCount.records(records, cols: _*).perColumnCount)) + + @varargs def recordsPerColumnGenerator(generator: GeneratorBuilder, cols: String*): CountBuilder = + this.modify(_.count.perColumn).setTo(Some(perColCount.generator(generator, cols: _*).perColumnCount)) + + @varargs def recordsPerColumnGenerator(records: Long, generator: GeneratorBuilder, cols: String*): CountBuilder = + this.modify(_.count.records).setTo(Some(records)) + .modify(_.count.perColumn).setTo(Some(perColCount.generator(generator, cols: _*).perColumnCount)) + + private def perColCount: PerColumnCountBuilder = { + count.perColumn match { + case Some(value) => PerColumnCountBuilder(value) + case None => PerColumnCountBuilder() + } + } +} + +/** + * Define number of records to generate based on certain column values. This is used in situations where + * you want to generate multiple records for a given set of column values to closer represent the real production + * data setting. For example, you may have a data set containing bank transactions where you want to generate + * multiple transactions per account. + */ +case class PerColumnCountBuilder(perColumnCount: PerColumnCount = PerColumnCount()) { + + /** + * Define the set of columns that should have multiple records generated for. + * + * @param cols Column names + * @return PerColumnCountBuilder + */ + @varargs def columns(cols: String*): PerColumnCountBuilder = + this.modify(_.perColumnCount.columnNames).setTo(cols.toList) + + /** + * Number of records to generate per set of column values defined + * + * @param records Number of records + * @param cols Column names + * @return PerColumnCountBuilder + */ + @varargs def records(records: Long, cols: String*): PerColumnCountBuilder = + columns(cols: _*).modify(_.perColumnCount.count).setTo(Some(records)) + + /** + * Define a generator to determine the number of records to generate per set of column value defined + * + * @param generator Generator for number of records + * @param cols Column names + * @return PerColumnCountBuilder + */ + @varargs def generator(generator: GeneratorBuilder, cols: String*): PerColumnCountBuilder = + columns(cols: _*).modify(_.perColumnCount.generator).setTo(Some(generator.generator)) +} + +case class SchemaBuilder(schema: Schema = Schema()) { + def this() = this(Schema()) + + def addField(name: String, `type`: DataType = StringType): SchemaBuilder = + addFields(FieldBuilder().name(name).`type`(`type`)) + + @varargs def addFields(fields: FieldBuilder*): SchemaBuilder = + this.modify(_.schema.fields).setTo(schema.fields match { + case Some(value) => Some(value ++ fields.map(_.field)) + case None => Some(fields.map(_.field).toList) + }) +} + +case class FieldBuilder(field: Field = Field()) { + def this() = this(Field()) + + def name(name: String): FieldBuilder = + this.modify(_.field.name).setTo(name) + + def `type`(`type`: DataType): FieldBuilder = + this.modify(_.field.`type`).setTo(Some(`type`.toString)) + + def schema(schema: SchemaBuilder): FieldBuilder = + this.modify(_.field.schema).setTo(Some(schema.schema)) + + def schema(schema: Schema): FieldBuilder = + this.modify(_.field.schema).setTo(Some(schema)) + + @varargs def schema(fields: FieldBuilder*): FieldBuilder = + this.modify(_.field.schema).setTo(Some(getSchema.addFields(fields: _*).schema)) + + def schema(metadataSourceBuilder: MetadataSourceBuilder): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.options(metadataSourceBuilder.metadataSource.allOptions).generator)) + + def nullable(nullable: Boolean): FieldBuilder = + this.modify(_.field.nullable).setTo(nullable) + + def generator(generator: GeneratorBuilder): FieldBuilder = + this.modify(_.field.generator).setTo(Some(generator.generator)) + + def generator(generator: Generator): FieldBuilder = + this.modify(_.field.generator).setTo(Some(generator)) + + def random: FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.random.generator)) + + def sql(sql: String): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.sql(sql).generator)) + + def regex(regex: String): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.regex(regex).generator)) + + @varargs def oneOf(values: Any*): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.oneOf(values: _*).generator)) + .modify(_.field.`type`) + .setTo( + values match { + case Seq(_: Double, _*) => Some("double") + case Seq(_: String, _*) => Some("string") + case Seq(_: Int, _*) => Some("integer") + case Seq(_: Long, _*) => Some("long") + case Seq(_: Boolean, _*) => Some("boolean") + case _ => None + } + ) + + def options(options: Map[String, Any]): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.options(options).generator)) + + def option(option: (String, Any)): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.option(option).generator)) + + def seed(seed: Long): FieldBuilder = this.modify(_.field.generator).setTo(Some(getGenBuilder.seed(seed).generator)) + + def enableNull(enable: Boolean): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.enableNull(enable).generator)) + + def nullProbability(probability: Double): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.nullProbability(probability).generator)) + + def enableEdgeCases(enable: Boolean): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.enableEdgeCases(enable).generator)) + + def edgeCaseProbability(probability: Double): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.edgeCaseProbability(probability).generator)) + + def static(value: Any): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.static(value).generator)) + + def staticValue(value: Any): FieldBuilder = static(value) + + def unique(isUnique: Boolean): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.unique(isUnique).generator)) + + def arrayType(`type`: String): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.arrayType(`type`).generator)) + + def expression(expr: String): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.expression(expr).generator)) + + def avgLength(length: Int): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.avgLength(length).generator)) + + def min(min: Any): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.min(min).generator)) + + def minLength(length: Int): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.minLength(length).generator)) + + def arrayMinLength(length: Int): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.arrayMinLength(length).generator)) + + def max(max: Any): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.max(max).generator)) + + def maxLength(length: Int): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.maxLength(length).generator)) + + def arrayMaxLength(length: Int): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.arrayMaxLength(length).generator)) + + def numericPrecision(precision: Int): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.numericPrecision(precision).generator)) + + def numericScale(scale: Int): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.numericScale(scale).generator)) + + def omit(omit: Boolean): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.omit(omit).generator)) + + def primaryKey(isPrimaryKey: Boolean): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.primaryKey(isPrimaryKey).generator)) + + def primaryKeyPosition(position: Int): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.primaryKeyPosition(position).generator)) + + def clusteringPosition(position: Int): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.clusteringPosition(position).generator)) + + def standardDeviation(stddev: Double): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.standardDeviation(stddev).generator)) + + def mean(mean: Double): FieldBuilder = + this.modify(_.field.generator).setTo(Some(getGenBuilder.mean(mean).generator)) + + private def getGenBuilder: GeneratorBuilder = { + field.generator match { + case Some(gen) => GeneratorBuilder(gen) + case None => GeneratorBuilder() + } + } + + private def getSchema: SchemaBuilder = { + field.schema match { + case Some(schema) => SchemaBuilder(schema) + case None => SchemaBuilder() + } + } +} + +/** + * Data generator contains all the metadata, related to either a field or count generation, required to create new data. + */ +case class GeneratorBuilder(generator: Generator = Generator()) { + def this() = this(Generator()) + + /** + * Create a random data generator. Depending on the data type, particular defaults are set for the metadata + * + * @return GeneratorBuilder GeneratorBuilder + * @see Data generator default details here + */ + def random: GeneratorBuilder = + this.modify(_.generator.`type`).setTo(RANDOM_GENERATOR) + + /** + * Create a SQL based generator. You can reference other columns and SQL functions to generate data. The output data + * type from the SQL expression should also match the data type defined otherwise a runtime error will be thrown + * + * @param sql SQL expression + * @return GeneratorBuilder + */ + def sql(sql: String): GeneratorBuilder = + this.modify(_.generator.`type`).setTo(SQL_GENERATOR) + .modify(_.generator.options)(_ ++ Map(SQL_GENERATOR -> sql)) + + /** + * Create a generator based on a particular regex + * + * @param regex Regex data should adhere to + * @return GeneratorBuilder + */ + def regex(regex: String): GeneratorBuilder = + this.modify(_.generator.`type`).setTo(REGEX_GENERATOR) + .modify(_.generator.options)(_ ++ Map(REGEX_GENERATOR -> regex)) + + /** + * Create a generator that can only generate values from a set of values defined. + * + * @param values Set of valid values + * @return GeneratorBuilder + */ + @varargs def oneOf(values: Any*): GeneratorBuilder = this.modify(_.generator.`type`).setTo(ONE_OF_GENERATOR) + .modify(_.generator.options)(_ ++ Map(ONE_OF_GENERATOR -> values)) + + /** + * Define metadata map for your generator. Add/overwrites existing metadata + * + * @param options Metadata map + * @return GeneratorBuilder + */ + def options(options: Map[String, Any]): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ options) + + /** + * Wrapper for Java Map + * + * @param options Metadata map + * @return + */ + def options(options: java.util.Map[String, Any]): GeneratorBuilder = + this.options(toScalaMap(options)) + + /** + * Define metadata for your generator. Add/overwrites existing metadata + * + * @param option Key and value for metadata + * @return GeneratorBuilder + */ + def option(option: (String, Any)): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(option)) + + /** + * Seed to use for random generator. If you want to generate a consistent set of values, use this method + * + * @param seed Random seed + * @return GeneratorBuilder + */ + def seed(seed: Long): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(RANDOM_SEED -> seed.toString)) + + /** + * Enable/disable null values to be generated for this field + * + * @param enable Enable/disable null values + * @return GeneratorBuilder + */ + def enableNull(enable: Boolean): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(ENABLED_NULL -> enable.toString)) + + /** + * If [[enableNull]] is enabled, the generator will generate null values with the probability defined. + * Value needs to be between 0.0 and 1.0. + * + * @param probability Probability of null values generated + * @return GeneratorBuilder + */ + def nullProbability(probability: Double): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(PROBABILITY_OF_NULL -> probability.toString)) + + /** + * Enable/disable edge case values to be generated. The edge cases are based on the data type defined. + * + * @param enable Enable/disable edge case values + * @return GeneratorBuilder + * @see Generator details here + */ + def enableEdgeCases(enable: Boolean): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(ENABLED_EDGE_CASE -> enable.toString)) + + + /** + * If [[enableEdgeCases]] is enabled, the generator will generate edge case values with the probability + * defined. Value needs to be between 0.0 and 1.0. + * + * @param probability Probability of edge case values generated + * @return GeneratorBuilder + */ + def edgeCaseProbability(probability: Double): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(PROBABILITY_OF_EDGE_CASE -> probability.toString)) + + /** + * Generator will always give back the static value, ignoring all other metadata defined + * + * @param value Always generate this value + * @return GeneratorBuilder + */ + def static(value: Any): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(STATIC -> value.toString)) + + /** + * Wrapper for Java given `static` is a keyword + * + * @param value Always generate this value + * @return GeneratorBuilder + */ + def staticValue(value: Any): GeneratorBuilder = static(value) + + /** + * Unique values within the generated data will be generated. This does not take into account values already existing + * in the data source defined. It also requires the flag + * [[DataCatererConfigurationBuilder.enableUniqueCheck]] + * to be enabled (disabled by default as it is an expensive operation). + * + * @param isUnique Enable/disable generating unique values + * @return GeneratorBuilder + */ + def unique(isUnique: Boolean): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(IS_UNIQUE -> isUnique.toString)) + + /** + * If data type is array, define the inner data type of the array + * + * @param type Type of array + * @return GeneratorBuilder + */ + def arrayType(`type`: String): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(ARRAY_TYPE -> `type`)) + + /** + * Use a DataFaker expression to generate data. If you want to know what is possible to use as an expression, follow + * the below link. + * + * @param expr DataFaker expression + * @return GeneratorBuilder + * @see Expression details + */ + def expression(expr: String): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(EXPRESSION -> expr)) + + /** + * Average length of data generated. Length is specifically used for String data type and is ignored for other data types + * + * @param length Average length + * @return GeneratorBuilder + */ + def avgLength(length: Int): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(AVERAGE_LENGTH -> length.toString)) + + /** + * Minimum value to be generated. This can be used for any data type except for Struct and Array. + * + * @param min Minimum value + * @return GeneratorBuilder + */ + def min(min: Any): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(MINIMUM -> min.toString)) + + /** + * Minimum length of data generated. Length is specifically used for String data type and is ignored for other data types + * + * @param length Minimum length + * @return GeneratorBuilder + */ + def minLength(length: Int): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(MINIMUM_LENGTH -> length.toString)) + + /** + * Minimum length of array generated. Only used when data type is Array + * + * @param length Minimum length of array + * @return GeneratorBuilder + */ + def arrayMinLength(length: Int): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(ARRAY_MINIMUM_LENGTH -> length.toString)) + + /** + * Maximum value to be generated. This can be used for any data type except for Struct and Array. Can be ignored in + * scenario where database column is auto increment where values generated start from the max value. + * + * @param max Maximum value + * @return GeneratorBuilder + */ + def max(max: Any): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(MAXIMUM -> max.toString)) + + /** + * Maximum length of data generated. Length is specifically used for String data type and is ignored for other data types + * + * @param length Maximum length + * @return GeneratorBuilder + */ + def maxLength(length: Int): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(MAXIMUM_LENGTH -> length.toString)) + + /** + * Maximum length of array generated. Only used when data type is Array + * + * @param length Maximum length of array + * @return GeneratorBuilder + */ + def arrayMaxLength(length: Int): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(ARRAY_MAXIMUM_LENGTH -> length.toString)) + + /** + * Numeric precision used for Decimal data type + * + * @param precision Decimal precision + * @return GeneratorBuilder + */ + def numericPrecision(precision: Int): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(NUMERIC_PRECISION -> precision.toString)) + + /** + * Numeric scale for Decimal data type + * + * @param scale Decimal scale + * @return GeneratorBuilder + */ + def numericScale(scale: Int): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(NUMERIC_SCALE -> scale.toString)) + + /** + * Enable/disable including the value in the final output to the data source. Allows you to define intermediate values + * that can be used to generate other columns + * + * @param omit Enable/disable the value being in output to data source + * @return GeneratorBuilder + */ + def omit(omit: Boolean): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(OMIT -> omit.toString)) + + /** + * Field is a primary key of the data source. + * + * @param isPrimaryKey Enable/disable field being a primary key + * @return GeneratorBuilder + */ + def primaryKey(isPrimaryKey: Boolean): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(IS_PRIMARY_KEY -> isPrimaryKey.toString)) + + /** + * If [[primaryKey]] is enabled, this defines the position of the primary key. Starts at 1. + * + * @param position Position of primary key + * @return GeneratorBuilder + */ + def primaryKeyPosition(position: Int): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(PRIMARY_KEY_POSITION -> position.toString)) + + /** + * If the data source supports clustering order (like Cassandra), this represents the order of the clustering key. + * Starts at 1. + * + * @param position Position of clustering key + * @return GeneratorBuilder + */ + def clusteringPosition(position: Int): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(CLUSTERING_POSITION -> position.toString)) + + /** + * The standard deviation of the data if it follows a normal distribution. + * + * @param stddev Standard deviation + * @return GeneratorBuilder + */ + def standardDeviation(stddev: Double): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(STANDARD_DEVIATION -> stddev.toString)) + + /** + * The mean of the data if it follows a normal distribution. + * + * @param mean Mean + * @return GeneratorBuilder + */ + def mean(mean: Double): GeneratorBuilder = + this.modify(_.generator.options)(_ ++ Map(MEAN -> mean.toString)) +} \ No newline at end of file diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/ValidationBuilder.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/ValidationBuilder.scala new file mode 100644 index 00000000..43fad55c --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/ValidationBuilder.scala @@ -0,0 +1,505 @@ +package com.github.pflooky.datacaterer.api + +import com.fasterxml.jackson.databind.annotation.JsonSerialize +import com.github.pflooky.datacaterer.api.connection.{ConnectionTaskBuilder, FileBuilder} +import com.github.pflooky.datacaterer.api.model.Constants.{AGGREGATION_AVG, AGGREGATION_COUNT, AGGREGATION_MAX, AGGREGATION_MIN, AGGREGATION_STDDEV, AGGREGATION_SUM, DEFAULT_VALIDATION_JOIN_TYPE, DEFAULT_VALIDATION_WEBHOOK_HTTP_DATA_SOURCE_NAME, VALIDATION_PREFIX_JOIN_EXPRESSION, VALIDATION_UNIQUE} +import com.github.pflooky.datacaterer.api.model.{DataExistsWaitCondition, DataSourceValidation, ExpressionValidation, FileExistsWaitCondition, GroupByValidation, PauseWaitCondition, UpstreamDataSourceValidation, Validation, ValidationConfiguration, WaitCondition, WebhookWaitCondition} +import com.github.pflooky.datacaterer.api.parser.ValidationBuilderSerializer +import com.softwaremill.quicklens.ModifyPimp + +import java.sql.{Date, Timestamp} +import scala.annotation.varargs + + +case class ValidationConfigurationBuilder(validationConfiguration: ValidationConfiguration = ValidationConfiguration()) { + def this() = this(ValidationConfiguration()) + + def name(name: String): ValidationConfigurationBuilder = + this.modify(_.validationConfiguration.name).setTo(name) + + def description(description: String): ValidationConfigurationBuilder = + this.modify(_.validationConfiguration.description).setTo(description) + + def addDataSourceValidation( + dataSourceName: String, + validations: Seq[DataSourceValidation] + ): ValidationConfigurationBuilder = { + val mergedDataSourceValidations = mergeDataSourceValidations(dataSourceName, validations) + this.modify(_.validationConfiguration.dataSources)(_ ++ Map(dataSourceName -> mergedDataSourceValidations)) + } + + def addDataSourceValidation( + dataSourceName: String, + validation: DataSourceValidationBuilder + ): ValidationConfigurationBuilder = { + val mergedDataSourceValidations = mergeDataSourceValidations(dataSourceName, validation) + this.modify(_.validationConfiguration.dataSources)(_ ++ Map(dataSourceName -> mergedDataSourceValidations)) + } + + @varargs def addValidations( + dataSourceName: String, + options: Map[String, String], + validations: ValidationBuilder* + ): ValidationConfigurationBuilder = + addValidations(dataSourceName, options, WaitConditionBuilder(), validations: _*) + + @varargs def addValidations( + dataSourceName: String, + options: Map[String, String], + waitCondition: WaitConditionBuilder, + validationBuilders: ValidationBuilder* + ): ValidationConfigurationBuilder = { + val newDsValidation = DataSourceValidationBuilder().options(options).wait(waitCondition).validations(validationBuilders: _*) + addDataSourceValidation(dataSourceName, newDsValidation) + } + + private def mergeDataSourceValidations(dataSourceName: String, validation: DataSourceValidationBuilder): List[DataSourceValidation] = { + validationConfiguration.dataSources.get(dataSourceName) + .map(listDsValidations => listDsValidations ++ List(validation.dataSourceValidation)) + .getOrElse(List(validation.dataSourceValidation)) + } + + private def mergeDataSourceValidations(dataSourceName: String, validations: Seq[DataSourceValidation]): List[DataSourceValidation] = { + validationConfiguration.dataSources.get(dataSourceName) + .map(listDsValidations => listDsValidations ++ validations) + .getOrElse(validations.toList) + } +} + +case class DataSourceValidationBuilder(dataSourceValidation: DataSourceValidation = DataSourceValidation()) { + def this() = this(DataSourceValidation()) + + def options(options: Map[String, String]): DataSourceValidationBuilder = + this.modify(_.dataSourceValidation.options)(_ ++ options) + + def option(option: (String, String)): DataSourceValidationBuilder = + this.modify(_.dataSourceValidation.options)(_ ++ Map(option)) + + @varargs def validations(validations: ValidationBuilder*): DataSourceValidationBuilder = + this.modify(_.dataSourceValidation.validations)(_ ++ validations) + + def wait(waitCondition: WaitConditionBuilder): DataSourceValidationBuilder = + this.modify(_.dataSourceValidation.waitCondition).setTo(waitCondition.waitCondition) + + def wait(waitCondition: WaitCondition): DataSourceValidationBuilder = + this.modify(_.dataSourceValidation.waitCondition).setTo(waitCondition) +} + +@JsonSerialize(using = classOf[ValidationBuilderSerializer]) +case class ValidationBuilder(validation: Validation = ExpressionValidation()) { + def this() = this(ExpressionValidation()) + + def description(description: String): ValidationBuilder = { + this.validation.description = Some(description) + this + } + + /** + * Define the number of records or percentage of records that do not meet the validation rule before marking the validation + * as failed. If no error threshold is defined, any failures will mark the whole validation as failed.
+ * For example, if there are 10 records and 4 have failed:
+ * {{{errorThreshold(2) #marked as failed as more than 2 records have failed}}} + * {{{errorThreshold(0.1) #marked as failed as more than 10% of records have failed}}} + * {{{errorThreshold(4) #marked as success as less than or equal to 4 records have failed}}} + * {{{errorThreshold(0.4) #marked as success as less than or equal to 40% of records have failed}}} + * + * @param threshold Number or percentage of failed records which is acceptable before marking as failed + * @return ValidationBuilder + */ + def errorThreshold(threshold: Double): ValidationBuilder = { + this.validation.errorThreshold = Some(threshold) + this + } + + /** + * SQL expression used to check if data is adhering to specified condition. Return result from SQL expression is + * required to be boolean. Can use any columns in the validation logic. + * + * For example, + * {{{validation.expr("CASE WHEN status == 'open' THEN balance > 0 ELSE balance == 0 END}}} + * + * @param expr SQL expression which returns a boolean + * @return ValidationBuilder + * @see SQL expressions + */ + def expr(expr: String): ValidationBuilder = { + validation match { + case GroupByValidation(grpCols, aggCol, aggType, _) => + val grpWithExpr = GroupByValidation(grpCols, aggCol, aggType, expr) + grpWithExpr.description = this.validation.description + grpWithExpr.errorThreshold = this.validation.errorThreshold + this.modify(_.validation).setTo(grpWithExpr) + case expressionValidation: ExpressionValidation => + val withExpr = expressionValidation.modify(_.expr).setTo(expr) + withExpr.description = this.validation.description + withExpr.errorThreshold = this.validation.errorThreshold + this.modify(_.validation).setTo(withExpr) + } + } + + /** + * Define a column validation that can cover validations for any type of data. + * + * @param column Name of the column to run validation against + * @return ColumnValidationBuilder + */ + def col(column: String): ColumnValidationBuilder = { + ColumnValidationBuilder(this, column) + } + + /** + * Define columns to group by, so that validation can be run on grouped by dataset + * + * @param columns Name of the column to run validation against + * @return ColumnValidationBuilder + */ + @varargs def groupBy(columns: String*): GroupByValidationBuilder = { + GroupByValidationBuilder(this, columns) + } + + /** + * Check row count of dataset + * + * @return ColumnValidationBuilder to apply validation on row count + */ + def count(): ColumnValidationBuilder = { + GroupByValidationBuilder().count() + } + + /** + * Check if column(s) values are unique + * + * @param columns One or more columns whose values will be checked for uniqueness + * @return ValidationBuilder + */ + @varargs def unique(columns: String*): ValidationBuilder = { + this.modify(_.validation).setTo(GroupByValidation(columns, VALIDATION_UNIQUE, AGGREGATION_COUNT)) + .expr("count == 1") + } + + /** + * Define validations based on data in another data source. + * json(...) + * .validations( + * validation.upstreamData(csvTask).joinCols("account_id").withValidation(validation.col("upstream_name").isEqualCol("name")), + * validation.upstreamData(csvTask).joinCols("account_id").withValidation(validation.expr("upstream_name == name")), + * validation.upstreamData(csvTask).joinCols("account_id").withValidation(validation.groupBy("account_id").sum("amount").lessThanCol("balance")), + * ) + * + * @param connectionTaskBuilder + * @return + */ + def upstreamData(connectionTaskBuilder: ConnectionTaskBuilder[_]): UpstreamDataSourceValidationBuilder = { + UpstreamDataSourceValidationBuilder(this, connectionTaskBuilder) + } +} + +case class ColumnValidationBuilder(validationBuilder: ValidationBuilder = ValidationBuilder(), column: String = "") { + def this() = this(ValidationBuilder(), "") + + def isEqual(value: Any): ValidationBuilder = { + validationBuilder.expr(s"$column == ${colValueToString(value)}") + } + + def isEqualCol(value: String): ValidationBuilder = { + validationBuilder.expr(s"$column == $value") + } + + def isNotEqual(value: Any): ValidationBuilder = { + validationBuilder.expr(s"$column != ${colValueToString(value)}") + } + + def isNotEqualCol(value: String): ValidationBuilder = { + validationBuilder.expr(s"$column != $value") + } + + def isNull: ValidationBuilder = { + validationBuilder.expr(s"ISNULL($column)") + } + + def isNotNull: ValidationBuilder = { + validationBuilder.expr(s"ISNOTNULL($column)") + } + + def contains(value: String): ValidationBuilder = { + validationBuilder.expr(s"CONTAINS($column, '$value')") + } + + def notContains(value: String): ValidationBuilder = { + validationBuilder.expr(s"!CONTAINS($column, '$value')") + } + + def lessThan(value: Any): ValidationBuilder = { + validationBuilder.expr(s"$column < ${colValueToString(value)}") + } + + def lessThanCol(value: String): ValidationBuilder = { + validationBuilder.expr(s"$column < $value") + } + + def lessThanOrEqual(value: Any): ValidationBuilder = { + validationBuilder.expr(s"$column <= ${colValueToString(value)}") + } + + def lessThanOrEqualCol(value: String): ValidationBuilder = { + validationBuilder.expr(s"$column <= $value") + } + + def greaterThan(value: Any): ValidationBuilder = { + validationBuilder.expr(s"$column > ${colValueToString(value)}") + } + + def greaterThanCol(value: String): ValidationBuilder = { + validationBuilder.expr(s"$column > $value") + } + + def greaterThanOrEqual(value: Any): ValidationBuilder = { + validationBuilder.expr(s"$column >= ${colValueToString(value)}") + } + + def greaterThanOrEqualCol(value: String): ValidationBuilder = { + validationBuilder.expr(s"$column >= $value") + } + + def between(minValue: Any, maxValue: Any): ValidationBuilder = { + validationBuilder.expr(s"$column BETWEEN ${colValueToString(minValue)} AND ${colValueToString(maxValue)}") + } + + def betweenCol(minValue: String, maxValue: String): ValidationBuilder = { + validationBuilder.expr(s"$column BETWEEN $minValue AND $maxValue") + } + + def notBetween(minValue: Any, maxValue: Any): ValidationBuilder = { + validationBuilder.expr(s"$column NOT BETWEEN ${colValueToString(minValue)} AND ${colValueToString(maxValue)}") + } + + def notBetweenCol(minValue: String, maxValue: String): ValidationBuilder = { + validationBuilder.expr(s"$column NOT BETWEEN $minValue AND $maxValue") + } + + @varargs def in(values: Any*): ValidationBuilder = { + validationBuilder.expr(s"$column IN (${values.map(colValueToString).mkString(",")})") + } + + @varargs def notIn(values: Any*): ValidationBuilder = { + validationBuilder.expr(s"NOT $column IN (${values.map(colValueToString).mkString(",")})") + } + + def matches(regex: String): ValidationBuilder = { + validationBuilder.expr(s"REGEXP($column, '$regex')") + } + + def notMatches(regex: String): ValidationBuilder = { + validationBuilder.expr(s"!REGEXP($column, '$regex')") + } + + def startsWith(value: String): ValidationBuilder = { + validationBuilder.expr(s"STARTSWITH($column, '$value')") + } + + def notStartsWith(value: String): ValidationBuilder = { + validationBuilder.expr(s"!STARTSWITH($column, '$value')") + } + + def endsWith(value: String): ValidationBuilder = { + validationBuilder.expr(s"ENDSWITH($column, '$value')") + } + + def notEndsWith(value: String): ValidationBuilder = { + validationBuilder.expr(s"!ENDSWITH($column, '$value')") + } + + def size(size: Int): ValidationBuilder = { + validationBuilder.expr(s"SIZE($column) == $size") + } + + def notSize(size: Int): ValidationBuilder = { + validationBuilder.expr(s"SIZE($column) != $size") + } + + def lessThanSize(size: Int): ValidationBuilder = { + validationBuilder.expr(s"SIZE($column) < $size") + } + + def lessThanOrEqualSize(size: Int): ValidationBuilder = { + validationBuilder.expr(s"SIZE($column) <= $size") + } + + def greaterThanSize(size: Int): ValidationBuilder = { + validationBuilder.expr(s"SIZE($column) > $size") + } + + def greaterThanOrEqualSize(size: Int): ValidationBuilder = { + validationBuilder.expr(s"SIZE($column) >= $size") + } + + def luhnCheck: ValidationBuilder = { + validationBuilder.expr(s"LUHN_CHECK($column)") + } + + def hasType(`type`: String): ValidationBuilder = { + validationBuilder.expr(s"TYPEOF($column) == '${`type`}'") + } + + def expr(expr: String): ValidationBuilder = { + validationBuilder.expr(expr) + } + + private def colValueToString(value: Any): String = { + value match { + case _: String => s"'$value'" + case _: Date => s"DATE('$value')" + case _: Timestamp => s"TIMESTAMP('$value')" + case _ => s"$value" + } + } +} + +case class GroupByValidationBuilder( + validationBuilder: ValidationBuilder = ValidationBuilder(), + groupByCols: Seq[String] = Seq() + ) { + def this() = this(ValidationBuilder(), Seq()) + + def sum(column: String): ColumnValidationBuilder = { + setGroupValidation(column, AGGREGATION_SUM) + } + + def count(column: String): ColumnValidationBuilder = { + setGroupValidation(column, AGGREGATION_COUNT) + } + + def count(): ColumnValidationBuilder = { + setGroupValidation("", AGGREGATION_COUNT) + } + + def min(column: String): ColumnValidationBuilder = { + setGroupValidation(column, AGGREGATION_MIN) + } + + def max(column: String): ColumnValidationBuilder = { + setGroupValidation(column, AGGREGATION_MAX) + } + + def avg(column: String): ColumnValidationBuilder = { + setGroupValidation(column, AGGREGATION_AVG) + } + + def stddev(column: String): ColumnValidationBuilder = { + setGroupValidation(column, AGGREGATION_STDDEV) + } + + private def setGroupValidation(column: String, aggType: String): ColumnValidationBuilder = { + val groupByValidation = GroupByValidation(groupByCols, column, aggType) + groupByValidation.errorThreshold = validationBuilder.validation.errorThreshold + groupByValidation.description = validationBuilder.validation.description + val colName = if (column.isEmpty) aggType else s"$aggType($column)" + ColumnValidationBuilder(validationBuilder.modify(_.validation).setTo(groupByValidation), colName) + } +} + +case class UpstreamDataSourceValidationBuilder( + validationBuilder: ValidationBuilder = ValidationBuilder(), + connectionTaskBuilder: ConnectionTaskBuilder[_] = FileBuilder(), + readOptions: Map[String, String] = Map(), + joinColumns: List[String] = List(), + joinType: String = DEFAULT_VALIDATION_JOIN_TYPE + ) { + def this() = this(ValidationBuilder(), FileBuilder(), Map(), List(), DEFAULT_VALIDATION_JOIN_TYPE) + + def readOptions(readOptions: Map[String, String]): UpstreamDataSourceValidationBuilder = { + this.modify(_.readOptions).setTo(readOptions) + } + + @varargs def joinColumns(joinCols: String*): UpstreamDataSourceValidationBuilder = { + this.modify(_.joinColumns).setTo(joinCols.toList) + } + + def joinExpr(expr: String): UpstreamDataSourceValidationBuilder = { + this.modify(_.joinColumns).setTo(List(s"$VALIDATION_PREFIX_JOIN_EXPRESSION$expr")) + } + + def joinType(joinType: String): UpstreamDataSourceValidationBuilder = { + this.modify(_.joinType).setTo(joinType) + } + + def withValidation(validationBuilder: ValidationBuilder): ValidationBuilder = { + validationBuilder.modify(_.validation).setTo(UpstreamDataSourceValidation(validationBuilder, connectionTaskBuilder, readOptions, joinColumns, joinType)) + } +} + +case class WaitConditionBuilder(waitCondition: WaitCondition = PauseWaitCondition()) { + def this() = this(PauseWaitCondition()) + + /** + * Pause for configurable number of seconds, before starting data validations. + * + * @param pauseInSeconds Seconds to pause + * @return WaitConditionBuilder + */ + def pause(pauseInSeconds: Int): WaitConditionBuilder = this.modify(_.waitCondition).setTo(PauseWaitCondition(pauseInSeconds)) + + /** + * Wait until file exists within path before starting data validations. + * + * @param path Path to file + * @return WaitConditionBuilder + */ + def file(path: String): WaitConditionBuilder = this.modify(_.waitCondition).setTo(FileExistsWaitCondition(path)) + + /** + * Wait until a specific data condition is met before starting data validations. Specific data condition to be defined + * as a SQL expression that returns a boolean value. Need to use a data source that is already defined. + * + * @param dataSourceName Name of data source that is already defined + * @param options Additional data source connection options to use to get data + * @param expr SQL expression that returns a boolean + * @return WaitConditionBuilder + */ + def dataExists(dataSourceName: String, options: Map[String, String], expr: String): WaitConditionBuilder = + this.modify(_.waitCondition).setTo(DataExistsWaitCondition(dataSourceName, options, expr)) + + /** + * Wait until GET request to URL returns back 200 status code, then will start data validations + * + * @param url URL for HTTP GET request + * @return WaitConditionBuilder + */ + def webhook(url: String): WaitConditionBuilder = + webhook(DEFAULT_VALIDATION_WEBHOOK_HTTP_DATA_SOURCE_NAME, url) + + /** + * Wait until URL returns back one of the status codes provided before starting data validations. + * + * @param url URL for HTTP request + * @param method HTTP method (i.e. GET, PUT, POST) + * @param statusCodes HTTP status codes that are treated as successful + * @return WaitConditionBuilder + */ + @varargs def webhook(url: String, method: String, statusCodes: Int*): WaitConditionBuilder = + webhook(DEFAULT_VALIDATION_WEBHOOK_HTTP_DATA_SOURCE_NAME, url, method, statusCodes: _*) + + /** + * Wait until pre-defined HTTP data source with URL, returns back 200 status code from GET request before starting + * data validations. + * + * @param dataSourceName Name of data source already defined + * @param url URL for HTTP GET request + * @return WaitConditionBuilder + */ + def webhook(dataSourceName: String, url: String): WaitConditionBuilder = + this.modify(_.waitCondition).setTo(WebhookWaitCondition(dataSourceName, url)) + + /** + * Wait until pre-defined HTTP data source with URL, HTTP method and set of successful status codes, return back one + * of the successful status codes before starting data validations. + * + * @param dataSourceName Name of data source already defined + * @param url URL for HTTP request + * @param method HTTP method (i.e. GET, PUT, POST) + * @param statusCode HTTP status codes that are treated as successful + * @return WaitConditionBuilder + */ + @varargs def webhook(dataSourceName: String, url: String, method: String, statusCode: Int*): WaitConditionBuilder = + this.modify(_.waitCondition).setTo(WebhookWaitCondition(dataSourceName, url, method, statusCode.toList)) +} \ No newline at end of file diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/connection/ConnectionBuilder.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/connection/ConnectionBuilder.scala new file mode 100644 index 00000000..061c29ea --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/connection/ConnectionBuilder.scala @@ -0,0 +1,193 @@ +package com.github.pflooky.datacaterer.api.connection + +import com.github.pflooky.datacaterer.api.model.Constants.FORMAT +import com.github.pflooky.datacaterer.api.model.{Step, Task} +import com.github.pflooky.datacaterer.api.{ConnectionConfigWithTaskBuilder, CountBuilder, FieldBuilder, GeneratorBuilder, MetadataSourceBuilder, SchemaBuilder, StepBuilder, TaskBuilder, TasksBuilder, ValidationBuilder, WaitConditionBuilder} + +import scala.annotation.varargs + +trait ConnectionTaskBuilder[T] { + var connectionConfigWithTaskBuilder: ConnectionConfigWithTaskBuilder = ConnectionConfigWithTaskBuilder() + var task: Option[TaskBuilder] = None + var step: Option[StepBuilder] = None + + def apply(builder: ConnectionConfigWithTaskBuilder, optTask: Option[Task], optStep: Option[Step]): ConnectionTaskBuilder[T] = { + this.connectionConfigWithTaskBuilder = builder + this.task = optTask.map(TaskBuilder) + this.step = optStep.map(s => StepBuilder(s)) + this + } + + def fromBaseConfig(connectionTaskBuilder: ConnectionTaskBuilder[T]): T + + def step(stepBuilder: StepBuilder): ConnectionTaskBuilder[T] = { + this.step = Some(stepBuilder) + this + } + + @varargs def schema(fields: FieldBuilder*): ConnectionTaskBuilder[T] = { + this.step = Some(getStep.schema(fields: _*)) + this + } + + def schema(schemaBuilder: SchemaBuilder): ConnectionTaskBuilder[T] = { + this.step = Some(getStep.schema(schemaBuilder)) + this + } + + def schema(metadataSourceBuilder: MetadataSourceBuilder): ConnectionTaskBuilder[T] = { + this.connectionConfigWithTaskBuilder = this.connectionConfigWithTaskBuilder.metadataSource(metadataSourceBuilder) + this.step = Some(getStep.options(metadataSourceBuilder.metadataSource.allOptions)) + this + } + + def count(countBuilder: CountBuilder): ConnectionTaskBuilder[T] = { + this.step = Some(getStep.count(countBuilder)) + this + } + + def count(generatorBuilder: GeneratorBuilder): ConnectionTaskBuilder[T] = { + this.step = Some(getStep.count(generatorBuilder)) + this + } + + def numPartitions(numPartitions: Int): ConnectionTaskBuilder[T] = { + this.step = Some(getStep.numPartitions(numPartitions)) + this + } + + @varargs def validations(validationBuilders: ValidationBuilder*): ConnectionTaskBuilder[T] = { + this.step = Some(getStep.validations(validationBuilders: _*)) + this + } + + def validationWait(waitConditionBuilder: WaitConditionBuilder): ConnectionTaskBuilder[T] = { + this.step = Some(getStep.wait(waitConditionBuilder)) + this + } + + def validationWaitDataExists(expr: String): ConnectionTaskBuilder[T] = { + val waitConditionBuilder = new WaitConditionBuilder() + .dataExists(this.connectionConfigWithTaskBuilder.dataSourceName, this.connectionConfigWithTaskBuilder.options, expr) + this.step = Some(getStep.wait(waitConditionBuilder)) + this + } + + def task(taskBuilder: TaskBuilder): ConnectionTaskBuilder[T] = { + this.task = Some(taskBuilder) + this + } + + @varargs def task(stepBuilders: StepBuilder*): ConnectionTaskBuilder[T] = { + this.task = Some(TaskBuilder().steps(stepBuilders: _*)) + this + } + + def toTasksBuilder: Option[TasksBuilder] = { + val dataSourceName = connectionConfigWithTaskBuilder.dataSourceName + val format = connectionConfigWithTaskBuilder.options(FORMAT) + val optBaseTask = (task, step) match { + case (Some(task), Some(step)) => Some(task.steps(step.`type`(format))) + case (Some(task), None) => Some(task) + case (None, Some(step)) => Some(TaskBuilder().steps(step.`type`(format))) + case _ => None + } + + optBaseTask.map(TasksBuilder().addTasks(dataSourceName, _)) + } + + protected def getStep: StepBuilder = step match { + case Some(value) => value + case None => StepBuilder() + } + + protected def getTask: TaskBuilder = task match { + case Some(value) => value + case None => TaskBuilder() + } +} + +case class FileBuilder() extends ConnectionTaskBuilder[FileBuilder] { + override def fromBaseConfig(connectionTaskBuilder: ConnectionTaskBuilder[FileBuilder]): FileBuilder = { + this.connectionConfigWithTaskBuilder = connectionTaskBuilder.connectionConfigWithTaskBuilder + this + } + + @varargs def partitionBy(partitionsBy: String*): FileBuilder = { + this.step = Some(getStep.partitionBy(partitionsBy: _*)) + this + } +} + +trait JdbcBuilder[T] extends ConnectionTaskBuilder[T] { + + def table(table: String): JdbcBuilder[T] = { + this.step = Some(getStep.jdbcTable(table)) + this + } + + def table(schema: String, table: String): JdbcBuilder[T] = { + this.step = Some(getStep.jdbcTable(schema, table)) + this + } +} + +case class PostgresBuilder() extends JdbcBuilder[PostgresBuilder] { + override def fromBaseConfig(connectionTaskBuilder: ConnectionTaskBuilder[PostgresBuilder]): PostgresBuilder = { + this.connectionConfigWithTaskBuilder = connectionTaskBuilder.connectionConfigWithTaskBuilder + this + } +} + +case class MySqlBuilder() extends JdbcBuilder[MySqlBuilder] { + override def fromBaseConfig(connectionTaskBuilder: ConnectionTaskBuilder[MySqlBuilder]): MySqlBuilder = { + this.connectionConfigWithTaskBuilder = connectionTaskBuilder.connectionConfigWithTaskBuilder + this + } +} + +case class CassandraBuilder() extends ConnectionTaskBuilder[CassandraBuilder] { + override def fromBaseConfig(connectionTaskBuilder: ConnectionTaskBuilder[CassandraBuilder]): CassandraBuilder = { + this.connectionConfigWithTaskBuilder = connectionTaskBuilder.connectionConfigWithTaskBuilder + this + } + + def table(keyspace: String, table: String): CassandraBuilder = { + this.step = Some(getStep.cassandraTable(keyspace, table)) + this + } + +} + +case class SolaceBuilder() extends ConnectionTaskBuilder[SolaceBuilder] { + override def fromBaseConfig(connectionTaskBuilder: ConnectionTaskBuilder[SolaceBuilder]): SolaceBuilder = { + this.connectionConfigWithTaskBuilder = connectionTaskBuilder.connectionConfigWithTaskBuilder + this + } + + def destination(destination: String): SolaceBuilder = { + this.step = Some(getStep.jmsDestination(destination)) + this + } + +} + +case class KafkaBuilder() extends ConnectionTaskBuilder[KafkaBuilder] { + override def fromBaseConfig(connectionTaskBuilder: ConnectionTaskBuilder[KafkaBuilder]): KafkaBuilder = { + this.connectionConfigWithTaskBuilder = connectionTaskBuilder.connectionConfigWithTaskBuilder + this + } + + def topic(topic: String): KafkaBuilder = { + this.step = Some(getStep.kafkaTopic(topic)) + this + } + +} + +case class HttpBuilder() extends ConnectionTaskBuilder[HttpBuilder] { + override def fromBaseConfig(connectionTaskBuilder: ConnectionTaskBuilder[HttpBuilder]): HttpBuilder = { + this.connectionConfigWithTaskBuilder = connectionTaskBuilder.connectionConfigWithTaskBuilder + this + } +} diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/converter/Converters.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/converter/Converters.scala new file mode 100644 index 00000000..c0b694c1 --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/converter/Converters.scala @@ -0,0 +1,19 @@ +package com.github.pflooky.datacaterer.api.converter + +import java.util.Optional +import scala.collection.JavaConverters.{collectionAsScalaIterableConverter, mapAsScalaMapConverter, seqAsJavaListConverter} + +object Converters { + + def toScalaMap[T, K](m: java.util.Map[T, K]): Map[T, K] = m.asScala.toMap + + def toScalaList[T](list: java.util.List[T]): List[T] = list.asScala.toList + + def toScalaSeq[T](list: java.util.List[T]): Seq[T] = list.asScala.toSeq + + def toScalaTuple[T, K](key: T, value: K): (T, K) = (key, value) + + def toScalaOption[T](opt: Optional[T]): Option[T] = if (opt.isPresent) Some(opt.get()) else None + + def toJavaList[T](seq: Seq[T]): java.util.List[T] = seq.asJava +} diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/model/ConfigModels.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/model/ConfigModels.scala new file mode 100644 index 00000000..5792d841 --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/model/ConfigModels.scala @@ -0,0 +1,56 @@ +package com.github.pflooky.datacaterer.api.model + +import com.github.pflooky.datacaterer.api.model.Constants._ + +case class FlagsConfig( + enableCount: Boolean = DEFAULT_ENABLE_COUNT, + enableGenerateData: Boolean = DEFAULT_ENABLE_GENERATE_DATA, + enableRecordTracking: Boolean = DEFAULT_ENABLE_RECORD_TRACKING, + enableDeleteGeneratedRecords: Boolean = DEFAULT_ENABLE_DELETE_GENERATED_RECORDS, + enableGeneratePlanAndTasks: Boolean = DEFAULT_ENABLE_GENERATE_PLAN_AND_TASKS, + enableFailOnError: Boolean = DEFAULT_ENABLE_FAIL_ON_ERROR, + enableUniqueCheck: Boolean = DEFAULT_ENABLE_UNIQUE_CHECK, + enableSinkMetadata: Boolean = DEFAULT_ENABLE_SINK_METADATA, + enableSaveReports: Boolean = DEFAULT_ENABLE_SAVE_REPORTS, + enableValidation: Boolean = DEFAULT_ENABLE_VALIDATION, + enableGenerateValidations: Boolean = DEFAULT_ENABLE_SUGGEST_VALIDATIONS, + ) + +case class FoldersConfig( + planFilePath: String = DEFAULT_PLAN_FILE_PATH, + taskFolderPath: String = DEFAULT_TASK_FOLDER_PATH, + generatedPlanAndTaskFolderPath: String = DEFAULT_GENERATED_PLAN_AND_TASK_FOLDER_PATH, + generatedReportsFolderPath: String = DEFAULT_GENERATED_REPORTS_FOLDER_PATH, + recordTrackingFolderPath: String = DEFAULT_RECORD_TRACKING_FOLDER_PATH, + validationFolderPath: String = DEFAULT_VALIDATION_FOLDER_PATH, + recordTrackingForValidationFolderPath: String = DEFAULT_RECORD_TRACKING_VALIDATION_FOLDER_PATH, + ) + +case class MetadataConfig( + numRecordsFromDataSource: Int = DEFAULT_NUM_RECORD_FROM_DATA_SOURCE, + numRecordsForAnalysis: Int = DEFAULT_NUM_RECORD_FOR_ANALYSIS, + oneOfDistinctCountVsCountThreshold: Double = DEFAULT_ONE_OF_DISTINCT_COUNT_VS_COUNT_THRESHOLD, + oneOfMinCount: Long = DEFAULT_ONE_OF_MIN_COUNT, + numGeneratedSamples: Int = DEFAULT_NUM_GENERATED_SAMPLES, + ) + +case class GenerationConfig( + numRecordsPerBatch: Long = DEFAULT_NUM_RECORDS_PER_BATCH, + numRecordsPerStep: Option[Long] = None, + ) + +case class ValidationConfig( + numSampleErrorRecords: Int = DEFAULT_VALIDATION_NUM_ERROR_RECORDS, + enableDeleteRecordTrackingFiles: Boolean = DEFAULT_VALIDATION_DELETE_RECORD_TRACKING_FILES, + ) + +case class DataCatererConfiguration( + flagsConfig: FlagsConfig = FlagsConfig(), + foldersConfig: FoldersConfig = FoldersConfig(), + metadataConfig: MetadataConfig = MetadataConfig(), + generationConfig: GenerationConfig = GenerationConfig(), + validationConfig: ValidationConfig = ValidationConfig(), + connectionConfigByName: Map[String, Map[String, String]] = Map(), + runtimeConfig: Map[String, String] = DEFAULT_RUNTIME_CONFIG, + master: String = DEFAULT_MASTER + ) diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/model/Constants.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/model/Constants.scala new file mode 100644 index 00000000..5a809ff9 --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/model/Constants.scala @@ -0,0 +1,347 @@ +package com.github.pflooky.datacaterer.api.model + +import java.util.UUID + +object Constants { + + lazy val PLAN_CLASS = "PLAN_CLASS" + + //supported data formats + lazy val CASSANDRA = "org.apache.spark.sql.cassandra" + lazy val JDBC = "jdbc" + lazy val POSTGRES = "postgres" + lazy val MYSQL = "mysql" + lazy val HTTP = "http" + lazy val JMS = "jms" + lazy val KAFKA = "kafka" + lazy val RATE = "rate" + //file formats + lazy val CSV = "csv" + lazy val DELTA = "delta" + lazy val JSON = "json" + lazy val ORC = "orc" + lazy val PARQUET = "parquet" + lazy val XML = "xml" + //jdbc drivers + lazy val POSTGRES_DRIVER = "org.postgresql.Driver" + lazy val MYSQL_DRIVER = "com.mysql.cj.jdbc.Driver" + + //spark data options + lazy val FORMAT = "format" + lazy val PATH = "path" + lazy val SAVE_MODE = "saveMode" + lazy val CASSANDRA_KEYSPACE = "keyspace" + lazy val CASSANDRA_TABLE = "table" + lazy val JDBC_TABLE = "dbtable" + lazy val JDBC_QUERY = "query" + lazy val URL = "url" + lazy val USERNAME = "user" + lazy val PASSWORD = "password" + lazy val DRIVER = "driver" + lazy val PARTITIONS = "partitions" + lazy val PARTITION_BY = "partitionBy" + lazy val BODY_FIELD = "bodyField" + lazy val JMS_DESTINATION_NAME = "destinationName" + lazy val KAFKA_TOPIC = "topic" + lazy val JMS_INITIAL_CONTEXT_FACTORY = "initialContextFactory" + lazy val JMS_CONNECTION_FACTORY = "connectionFactory" + lazy val JMS_VPN_NAME = "vpnName" + lazy val SCHEMA_LOCATION = "schemaLocation" + lazy val ROWS_PER_SECOND = "rowsPerSecond" + lazy val HTTP_METHOD = "httpMethod" + lazy val HTTP_CONTENT_TYPE = "httpContentType" + lazy val HTTP_HEADER = "httpHeader" + + //field metadata + lazy val FIELD_DATA_TYPE = "type" + lazy val FIELD_DESCRIPTION = "description" + lazy val RANDOM_SEED = "seed" + lazy val ENABLED_NULL = "enableNull" + lazy val PROBABILITY_OF_NULL = "nullProb" + lazy val ENABLED_EDGE_CASE = "enableEdgeCase" + lazy val PROBABILITY_OF_EDGE_CASE = "edgeCaseProb" + lazy val AVERAGE_LENGTH = "avgLen" + lazy val MINIMUM_LENGTH = "minLen" + lazy val ARRAY_MINIMUM_LENGTH = "arrayMinLen" + lazy val MAXIMUM_LENGTH = "maxLen" + lazy val ARRAY_MAXIMUM_LENGTH = "arrayMaxLen" + lazy val SOURCE_MAXIMUM_LENGTH = "sourceMaxLen" + lazy val MINIMUM = "min" + lazy val MAXIMUM = "max" + lazy val STANDARD_DEVIATION = "stddev" + lazy val MEAN = "mean" + lazy val ARRAY_TYPE = "arrayType" + lazy val EXPRESSION = "expression" + lazy val DISTINCT_COUNT = "distinctCount" + lazy val ROW_COUNT = "count" + lazy val IS_PRIMARY_KEY = "isPrimaryKey" + lazy val PRIMARY_KEY_POSITION = "primaryKeyPos" + lazy val IS_UNIQUE = "isUnique" + lazy val IS_NULLABLE = "isNullable" + lazy val NULL_COUNT = "nullCount" + lazy val HISTOGRAM = "histogram" + lazy val SOURCE_COLUMN_DATA_TYPE = "sourceDataType" + lazy val NUMERIC_PRECISION = "precision" + lazy val NUMERIC_SCALE = "scale" + lazy val DEFAULT_VALUE = "defaultValue" + lazy val DATA_SOURCE_GENERATION = "dataSourceGeneration" + lazy val OMIT = "omit" + lazy val CONSTRAINT_TYPE = "constraintType" + lazy val STATIC = "static" + lazy val CLUSTERING_POSITION = "clusteringPos" + lazy val METADATA_IDENTIFIER = "metadataIdentifier" + lazy val FIELD_LABEL = "label" + lazy val IS_PII = "isPII" + lazy val HTTP_PARAMETER_TYPE = "httpParamType" + lazy val POST_SQL_EXPRESSION = "postSqlExpression" + + //field labels + lazy val LABEL_NAME = "name" + lazy val LABEL_USERNAME = "username" + lazy val LABEL_ADDRESS = "address" + lazy val LABEL_APP = "app" + lazy val LABEL_NATION = "nation" + lazy val LABEL_MONEY = "money" + lazy val LABEL_INTERNET = "internet" + lazy val LABEL_FOOD = "food" + lazy val LABEL_JOB = "job" + lazy val LABEL_RELATIONSHIP = "relationship" + lazy val LABEL_WEATHER = "weather" + lazy val LABEL_PHONE = "phone" + lazy val LABEL_GEO = "geo" + + //expressions + lazy val FAKER_EXPR_FIRST_NAME = "Name.firstname" + lazy val FAKER_EXPR_LAST_NAME = "Name.lastname" + lazy val FAKER_EXPR_USERNAME = "Name.username" + lazy val FAKER_EXPR_NAME = "Name.name" + lazy val FAKER_EXPR_CITY = "Address.city" + lazy val FAKER_EXPR_COUNTRY = "Address.country" + lazy val FAKER_EXPR_COUNTRY_CODE = "Address.countryCode" + lazy val FAKER_EXPR_NATIONALITY = "Nation.nationality" + lazy val FAKER_EXPR_LANGUAGE = "Nation.language" + lazy val FAKER_EXPR_CAPITAL = "Nation.capitalCity" + lazy val FAKER_EXPR_APP_VERSION = "App.version" + lazy val FAKER_EXPR_PAYMENT_METHODS = "Subscription.paymentMethods" + lazy val FAKER_EXPR_MAC_ADDRESS = "Internet.macAddress" + lazy val FAKER_EXPR_CURRENCY = "Money.currency" + lazy val FAKER_EXPR_CURRENCY_CODE = "Money.currencyCode" + lazy val FAKER_EXPR_CREDIT_CARD = "Finance.creditCard" + lazy val FAKER_EXPR_FOOD = "Food.dish" + lazy val FAKER_EXPR_FOOD_INGREDIENT = "Food.ingredient" + lazy val FAKER_EXPR_JOB_FIELD = "Job.field" + lazy val FAKER_EXPR_JOB_POSITION = "Job.position" + lazy val FAKER_EXPR_JOB_TITLE = "Job.title" + lazy val FAKER_EXPR_RELATIONSHIP = "Relationship.any" + lazy val FAKER_EXPR_WEATHER = "Weather.description" + lazy val FAKER_EXPR_PHONE = "PhoneNumber.cellPhone" + lazy val FAKER_EXPR_EMAIL = "Internet.emailAddress" + lazy val FAKER_EXPR_IPV4 = "Internet.ipV4Address" + lazy val FAKER_EXPR_IPV6 = "Internet.ipV6Address" + lazy val FAKER_EXPR_ADDRESS = "Address.fullAddress" + lazy val FAKER_EXPR_ADDRESS_POSTCODE = "Address.postcode" + + //generator types + lazy val RANDOM_GENERATOR = "random" + lazy val ONE_OF_GENERATOR = "oneOf" + lazy val REGEX_GENERATOR = "regex" + lazy val SQL_GENERATOR = "sql" + + //flags defaults + lazy val DEFAULT_ENABLE_COUNT = true + lazy val DEFAULT_ENABLE_GENERATE_DATA = true + lazy val DEFAULT_ENABLE_RECORD_TRACKING = false + lazy val DEFAULT_ENABLE_DELETE_GENERATED_RECORDS = false + lazy val DEFAULT_ENABLE_GENERATE_PLAN_AND_TASKS = false + lazy val DEFAULT_ENABLE_FAIL_ON_ERROR = true + lazy val DEFAULT_ENABLE_UNIQUE_CHECK = false + lazy val DEFAULT_ENABLE_SINK_METADATA = false + lazy val DEFAULT_ENABLE_SAVE_REPORTS = true + lazy val DEFAULT_ENABLE_VALIDATION = true + lazy val DEFAULT_ENABLE_SUGGEST_VALIDATIONS = false + + //folders defaults + lazy val DEFAULT_PLAN_FILE_PATH = "/opt/app/plan/customer-create-plan.yaml" + lazy val DEFAULT_TASK_FOLDER_PATH = "/opt/app/task" + lazy val DEFAULT_GENERATED_PLAN_AND_TASK_FOLDER_PATH = "/tmp" + lazy val DEFAULT_GENERATED_REPORTS_FOLDER_PATH = "/opt/app/report" + lazy val DEFAULT_RECORD_TRACKING_FOLDER_PATH = "/opt/app/record-tracking" + lazy val DEFAULT_VALIDATION_FOLDER_PATH = "/opt/app/validation" + lazy val DEFAULT_RECORD_TRACKING_VALIDATION_FOLDER_PATH = "/opt/app/record-tracking-validation" + + //metadata defaults + lazy val DEFAULT_NUM_RECORD_FROM_DATA_SOURCE = 10000 + lazy val DEFAULT_NUM_RECORD_FOR_ANALYSIS = 10000 + lazy val DEFAULT_ONE_OF_DISTINCT_COUNT_VS_COUNT_THRESHOLD = 0.2 + lazy val DEFAULT_ONE_OF_MIN_COUNT = 1000 + lazy val DEFAULT_NUM_GENERATED_SAMPLES = 10 + + //generation defaults + lazy val DEFAULT_NUM_RECORDS_PER_BATCH = 100000 + + //spark defaults + lazy val DEFAULT_MASTER = "local[*]" + lazy val DEFAULT_RUNTIME_CONFIG = Map( + "spark.sql.cbo.enabled" -> "true", + "spark.sql.adaptive.enabled" -> "true", + "spark.sql.cbo.planStats.enabled" -> "true", + "spark.sql.legacy.allowUntypedScalaUDF" -> "true", + "spark.sql.legacy.allowParameterlessCount" -> "true", + "spark.sql.statistics.histogram.enabled" -> "true", + "spark.sql.shuffle.partitions" -> "10", + "spark.sql.catalog.postgres" -> "", + "spark.sql.catalog.cassandra" -> "com.datastax.spark.connector.datasource.CassandraCatalog", + "spark.hadoop.fs.s3a.directory.marker.retention" -> "keep", + "spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled" -> "true" + ) + + //jdbc defaults + lazy val DEFAULT_POSTGRES_URL = "jdbc:postgresql://postgresserver:5432/customer" + lazy val DEFAULT_POSTGRES_USERNAME = "postgres" + lazy val DEFAULT_POSTGRES_PASSWORD = "postgres" + lazy val DEFAULT_MYSQL_URL = "jdbc:mysql://mysqlserver:3306/customer" + lazy val DEFAULT_MYSQL_USERNAME = "root" + lazy val DEFAULT_MYSQL_PASSWORD = "root" + + //cassandra defaults + lazy val DEFAULT_CASSANDRA_URL = "cassandraserver:9042" + lazy val DEFAULT_CASSANDRA_USERNAME = "cassandra" + lazy val DEFAULT_CASSANDRA_PASSWORD = "cassandra" + + //solace defaults + lazy val DEFAULT_SOLACE_URL = "smf://solaceserver:55554" + lazy val DEFAULT_SOLACE_USERNAME = "admin" + lazy val DEFAULT_SOLACE_PASSWORD = "admin" + lazy val DEFAULT_SOLACE_VPN_NAME = "default" + lazy val DEFAULT_SOLACE_CONNECTION_FACTORY = "/jms/cf/default" + lazy val DEFAULT_SOLACE_INITIAL_CONTEXT_FACTORY = "com.solacesystems.jndi.SolJNDIInitialContextFactory" + + //kafka defaults + lazy val DEFAULT_KAFKA_URL = "kafkaserver:9092" + + //http defaults + lazy val DEFAULT_REAL_TIME_HEADERS_INNER_DATA_TYPE = "struct" + lazy val DEFAULT_REAL_TIME_HEADERS_DATA_TYPE = s"array<$DEFAULT_REAL_TIME_HEADERS_INNER_DATA_TYPE>" + lazy val HTTP_PATH_PARAMETER = "path" + lazy val HTTP_QUERY_PARAMETER = "query" + lazy val HTTP_HEADER_PARAMETER = "header" + + //foreign key defaults + lazy val DEFAULT_FOREIGN_KEY_COLUMN = "default_column" + lazy val FOREIGN_KEY_DELIMITER = "||" + lazy val FOREIGN_KEY_DELIMITER_REGEX = "\\|\\|" + + //task defaults + def DEFAULT_TASK_NAME: String = UUID.randomUUID().toString + + lazy val DEFAULT_DATA_SOURCE_NAME = "json" + lazy val DEFAULT_TASK_SUMMARY_ENABLE = true + + //step defaults + def DEFAULT_STEP_NAME: String = UUID.randomUUID().toString + + lazy val DEFAULT_STEP_TYPE = "json" + lazy val DEFAULT_STEP_ENABLED = true + + //field defaults + def DEFAULT_FIELD_NAME: String = UUID.randomUUID().toString + + lazy val DEFAULT_FIELD_TYPE = "string" + lazy val DEFAULT_FIELD_NULLABLE = true + lazy val ONE_OF_GENERATOR_DELIMITER = "," + + //generator defaults + lazy val DEFAULT_GENERATOR_TYPE = "random" + + //count defaults + lazy val DEFAULT_COUNT_RECORDS = 1000L + lazy val DEFAULT_PER_COLUMN_COUNT_RECORDS = 10L + + //validation defaults + lazy val DEFAULT_VALIDATION_CONFIG_NAME = "default_validation" + lazy val DEFAULT_VALIDATION_DESCRIPTION = "Validation of data sources after generating data" + lazy val DEFAULT_VALIDATION_JOIN_TYPE = "outer" + lazy val DEFAULT_VALIDATION_NUM_ERROR_RECORDS = 5 + lazy val DEFAULT_VALIDATION_DELETE_RECORD_TRACKING_FILES = true + lazy val DEFAULT_VALIDATION_WEBHOOK_HTTP_DATA_SOURCE_NAME = "tmp_http_data_source" + lazy val DEFAULT_VALIDATION_WEBHOOK_HTTP_METHOD = "GET" + lazy val DEFAULT_VALIDATION_WEBHOOK_HTTP_STATUS_CODES = List(200) + + //metadata source + lazy val METADATA_SOURCE_TYPE = "metadata_source_type" + lazy val METADATA_SOURCE_NAME = "metadata_source_name" + lazy val METADATA_SOURCE_HAS_OPEN_LINEAGE_SUPPORT = "metadata_source_has_open_lineage_support" + lazy val METADATA_SOURCE_URL = "metadata_source_url" + lazy val MARQUEZ = "marquez" + lazy val OPEN_METADATA = "open_metadata" + lazy val OPEN_API = "open_api" + lazy val AMUNDSEN = "amundsen" + lazy val DATAHUB = "datahub" + lazy val DEFAULT_METADATA_SOURCE_NAME = "default_metadata_source" + + //openlineage + lazy val OPEN_LINEAGE_NAMESPACE = "namespace" + lazy val OPEN_LINEAGE_DATASET = "dataset" + lazy val DATASET_NAME = "name" + lazy val FACET_DATA_SOURCE = "dataSource" + lazy val DATA_SOURCE_NAME = "dataSourceName" + lazy val URI = "uri" + lazy val FACET_DATA_QUALITY_METRICS = "dataQualityMetrics" + lazy val FACET_DATA_QUALITY_ASSERTIONS = "dataQualityAssertions" + + //openmetadata + lazy val OPEN_METADATA_HOST = "host" + lazy val OPEN_METADATA_API_VERSION = "apiVersion" + lazy val OPEN_METADATA_DEFAULT_API_VERSION = "v1" + lazy val OPEN_METADATA_AUTH_TYPE = "authType" + lazy val OPEN_METADATA_AUTH_TYPE_BASIC = "basic" + lazy val OPEN_METADATA_AUTH_TYPE_NO_AUTH = "no-auth" + lazy val OPEN_METADATA_AUTH_TYPE_AZURE = "azure" + lazy val OPEN_METADATA_AUTH_TYPE_GOOGLE = "google" + lazy val OPEN_METADATA_AUTH_TYPE_OKTA = "okta" + lazy val OPEN_METADATA_AUTH_TYPE_AUTH0 = "auth0" + lazy val OPEN_METADATA_AUTH_TYPE_AWS_COGNITO = "aws-cognito" + lazy val OPEN_METADATA_AUTH_TYPE_CUSTOM_OIDC = "custom-oidc" + lazy val OPEN_METADATA_AUTH_TYPE_OPEN_METADATA = "openmetadata" + lazy val OPEN_METADATA_BASIC_AUTH_USERNAME = "basicAuthUsername" + lazy val OPEN_METADATA_BASIC_AUTH_PASSWORD = "basicAuthPassword" + lazy val OPEN_METADATA_GOOGLE_AUTH_AUDIENCE = "googleAudience" + lazy val OPEN_METADATA_GOOGLE_AUTH_SECRET_KEY = "googleSecretKey" + lazy val OPEN_METADATA_OKTA_AUTH_CLIENT_ID = "oktaClientId" + lazy val OPEN_METADATA_OKTA_AUTH_ORG_URL = "oktaOrgUrl" + lazy val OPEN_METADATA_OKTA_AUTH_EMAIL = "oktaEmail" + lazy val OPEN_METADATA_OKTA_AUTH_SCOPES = "oktaScopes" + lazy val OPEN_METADATA_OKTA_AUTH_PRIVATE_KEY = "oktaPrivateKey" + lazy val OPEN_METADATA_AUTH0_CLIENT_ID = "auth0ClientId" + lazy val OPEN_METADATA_AUTH0_SECRET_KEY = "auth0SecretKey" + lazy val OPEN_METADATA_AUTH0_DOMAIN = "auth0Domain" + lazy val OPEN_METADATA_AZURE_CLIENT_ID = "azureClientId" + lazy val OPEN_METADATA_AZURE_CLIENT_SECRET = "azureClientSecret" + lazy val OPEN_METADATA_AZURE_SCOPES = "azureScopes" + lazy val OPEN_METADATA_AZURE_AUTHORITY = "azureAuthority" + lazy val OPEN_METADATA_JWT_TOKEN = "openMetadataJwtToken" + lazy val OPEN_METADATA_CUSTOM_OIDC_CLIENT_ID = "customOidcClientId" + lazy val OPEN_METADATA_CUSTOM_OIDC_SECRET_KEY = "customOidcSecretKey" + lazy val OPEN_METADATA_CUSTOM_OIDC_TOKEN_ENDPOINT = "customOidcTokenEndpoint" + lazy val OPEN_METADATA_DATABASE = "database" + lazy val OPEN_METADATA_DATABASE_SCHEMA = "databaseSchema" + lazy val OPEN_METADATA_TABLE_FQN = "tableFqn" + lazy val OPEN_METADATA_SERVICE = "service" + + //aggregation types + lazy val AGGREGATION_SUM = "sum" + lazy val AGGREGATION_COUNT = "count" + lazy val AGGREGATION_MAX = "max" + lazy val AGGREGATION_MIN = "min" + lazy val AGGREGATION_AVG = "avg" + lazy val AGGREGATION_STDDEV = "stddev" + + //validation types + lazy val VALIDATION_EXPRESSION = "expr" + lazy val VALIDATION_GROUP_BY = "groupBy" + lazy val VALIDATION_UNIQUE = "unique" + lazy val VALIDATION_PREFIX_JOIN_EXPRESSION = "expr:" + + //trial + lazy val API_KEY = "API_KEY" +} diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/model/DataType.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/model/DataType.scala new file mode 100644 index 00000000..a94d5ef2 --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/model/DataType.scala @@ -0,0 +1,132 @@ +package com.github.pflooky.datacaterer.api.model + +import scala.collection.JavaConverters.iterableAsScalaIterableConverter + +trait DataType { + override def toString: String = getClass.getSimpleName.toLowerCase.stripSuffix("type$") +} + +object DataType { + def fromString(str: String): DataType = { + str.toLowerCase match { + case "string" => StringType + case "int" | "integer" => IntegerType + case "long" => LongType + case "short" | "tinyint" | "smallint" => ShortType + case "decimal" => DecimalType + case "double" => DoubleType + case "float" => FloatType + case "date" => DateType + case "date-time" | "datetime" | "timestamp" => TimestampType + case "boolean" | "bool" => BooleanType + case "binary" => BinaryType + case "byte" => ByteType + case "array" | "list" | "seq" => ArrayType + case _ => StructType + } + } +} + +class StringType extends DataType + +case object StringType extends StringType { + def instance: StringType.type = this +} + +class IntegerType extends DataType + +case object IntegerType extends IntegerType { + def instance: IntegerType.type = this +} + +class LongType extends DataType + +case object LongType extends LongType { + def instance: LongType.type = this +} + +class ShortType extends DataType + +case object ShortType extends ShortType { + def instance: ShortType.type = this +} + +class DecimalType(precision: Int = 10, scale: Int = 0) extends DataType { + assert(scale < precision, "Scale required to be less than precision") + + override def toString: String = s"decimal($precision, $scale)" +} + +case object DecimalType extends DecimalType(10, 0) { + def instance: DecimalType.type = this +} + +class DoubleType extends DataType + +case object DoubleType extends DoubleType { + def instance: DoubleType.type = this +} + +class FloatType extends DataType + +case object FloatType extends FloatType { + def instance: FloatType.type = this +} + +class DateType extends DataType + +case object DateType extends DateType { + def instance: DateType.type = this +} + +class TimestampType extends DataType + +case object TimestampType extends TimestampType { + def instance: TimestampType.type = this +} + +class BooleanType extends DataType + +case object BooleanType extends BooleanType { + def instance: BooleanType.type = this +} + +class BinaryType extends DataType + +case object BinaryType extends BinaryType { + def instance: BinaryType.type = this +} + +class ByteType extends DataType + +case object ByteType extends ByteType { + def instance: ByteType.type = this +} + +class ArrayType(`type`: DataType = StringType) extends DataType { + override def toString: String = s"array<${`type`.toString}>" +} + +case object ArrayType extends ArrayType(StringType) { + def instance: ArrayType.type = this +} + +class StructType(innerType: List[(String, DataType)] = List()) extends DataType { + + def this(innerType: java.util.List[java.util.Map.Entry[String, DataType]]) = { + this(innerType.asScala.map(entry => entry.getKey -> entry.getValue).toList) + } + + override def toString: String = { + val innerStructType = innerType.map(t => s"${t._1}: ${t._2.toString}").mkString(",") + s"struct<$innerStructType>" + } +} + +case object StructType extends StructType(List()) { + def instance: StructType.type = this +} + +object HeaderType { + def getType: DataType = new ArrayType(new StructType(List("key" -> StringType, "value" -> BinaryType))) +} \ No newline at end of file diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/model/MetadataSourceModels.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/model/MetadataSourceModels.scala new file mode 100644 index 00000000..9bde5c3a --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/model/MetadataSourceModels.scala @@ -0,0 +1,33 @@ +package com.github.pflooky.datacaterer.api.model + +import com.github.pflooky.datacaterer.api.model.Constants.{MARQUEZ, METADATA_SOURCE_HAS_OPEN_LINEAGE_SUPPORT, METADATA_SOURCE_TYPE, OPEN_API, OPEN_METADATA} + +trait MetadataSource { + + val `type`: String + val connectionOptions: Map[String, String] = Map() + + def allOptions: Map[String, String] = { + connectionOptions ++ Map( + METADATA_SOURCE_TYPE -> `type` + ) + } +} + +case class MarquezMetadataSource(override val connectionOptions: Map[String, String] = Map(METADATA_SOURCE_HAS_OPEN_LINEAGE_SUPPORT -> "true")) extends MetadataSource { + + override val `type`: String = MARQUEZ + +} + +case class OpenMetadataSource(override val connectionOptions: Map[String, String] = Map()) extends MetadataSource { + + override val `type`: String = OPEN_METADATA + +} + +case class OpenAPISource(override val connectionOptions: Map[String, String] = Map()) extends MetadataSource { + + override val `type`: String = OPEN_API + +} diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/model/PlanModels.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/model/PlanModels.scala new file mode 100644 index 00000000..98712414 --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/model/PlanModels.scala @@ -0,0 +1,81 @@ +package com.github.pflooky.datacaterer.api.model + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize +import com.github.pflooky.datacaterer.api.model.Constants.{DEFAULT_COUNT_RECORDS, DEFAULT_DATA_SOURCE_NAME, DEFAULT_FIELD_NAME, DEFAULT_FIELD_NULLABLE, DEFAULT_FIELD_TYPE, DEFAULT_GENERATOR_TYPE, DEFAULT_PER_COLUMN_COUNT_RECORDS, DEFAULT_STEP_ENABLED, DEFAULT_STEP_NAME, DEFAULT_STEP_TYPE, DEFAULT_TASK_NAME, DEFAULT_TASK_SUMMARY_ENABLE, FOREIGN_KEY_DELIMITER} + +import scala.language.implicitConversions + +case class Plan( + name: String = "Default plan", + description: String = "Data generation plan", + tasks: List[TaskSummary] = List(), + sinkOptions: Option[SinkOptions] = None, + validations: List[String] = List() + ) + +case class SinkOptions( + seed: Option[String] = None, + locale: Option[String] = None, + foreignKeys: List[(String, List[String])] = List() + ) + +case class ForeignKeyRelation( + dataSource: String = DEFAULT_DATA_SOURCE_NAME, + step: String = DEFAULT_STEP_NAME, + columns: List[String] = List() + ) { + + def this(dataSource: String, step: String, column: String) = this(dataSource, step, List(column)) + + override def toString: String = s"$dataSource$FOREIGN_KEY_DELIMITER$step$FOREIGN_KEY_DELIMITER${columns.mkString(",")}" +} + +case class TaskSummary( + name: String, + dataSourceName: String, + enabled: Boolean = DEFAULT_TASK_SUMMARY_ENABLE + ) + +case class Task( + name: String = DEFAULT_TASK_NAME, + steps: List[Step] = List() + ) + +case class Step( + name: String = DEFAULT_STEP_NAME, + `type`: String = DEFAULT_STEP_TYPE, + count: Count = Count(), + options: Map[String, String] = Map(), + schema: Schema = Schema(), + enabled: Boolean = DEFAULT_STEP_ENABLED + ) + +case class Count( + @JsonDeserialize(contentAs = classOf[java.lang.Long]) records: Option[Long] = Some(DEFAULT_COUNT_RECORDS), + perColumn: Option[PerColumnCount] = None, + generator: Option[Generator] = None + ) + +case class PerColumnCount( + columnNames: List[String] = List(), + @JsonDeserialize(contentAs = classOf[java.lang.Long]) count: Option[Long] = Some(DEFAULT_PER_COLUMN_COUNT_RECORDS), + generator: Option[Generator] = None + ) + +case class Schema( + fields: Option[List[Field]] = None + ) + +case class Field( + name: String = DEFAULT_FIELD_NAME, + `type`: Option[String] = Some(DEFAULT_FIELD_TYPE), + generator: Option[Generator] = Some(Generator()), + nullable: Boolean = DEFAULT_FIELD_NULLABLE, + static: Option[String] = None, + schema: Option[Schema] = None + ) + +case class Generator( + `type`: String = DEFAULT_GENERATOR_TYPE, + options: Map[String, Any] = Map() + ) diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/model/ValidationModels.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/model/ValidationModels.scala new file mode 100644 index 00000000..8fd0074f --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/model/ValidationModels.scala @@ -0,0 +1,78 @@ +package com.github.pflooky.datacaterer.api.model + +import com.fasterxml.jackson.annotation.JsonTypeInfo.Id +import com.fasterxml.jackson.annotation.{JsonIgnoreProperties, JsonTypeInfo} +import com.fasterxml.jackson.databind.annotation.{JsonDeserialize, JsonTypeIdResolver} +import com.github.pflooky.datacaterer.api.{ColumnValidationBuilder, ValidationBuilder} +import com.github.pflooky.datacaterer.api.connection.{ConnectionTaskBuilder, FileBuilder} +import com.github.pflooky.datacaterer.api.model.Constants.{AGGREGATION_SUM, DEFAULT_VALIDATION_CONFIG_NAME, DEFAULT_VALIDATION_DESCRIPTION, DEFAULT_VALIDATION_JOIN_TYPE, DEFAULT_VALIDATION_WEBHOOK_HTTP_METHOD, DEFAULT_VALIDATION_WEBHOOK_HTTP_STATUS_CODES} +import com.github.pflooky.datacaterer.api.parser.ValidationIdResolver + + +@JsonTypeInfo(use = Id.CUSTOM, defaultImpl = classOf[ExpressionValidation]) +@JsonTypeIdResolver(classOf[ValidationIdResolver]) +@JsonIgnoreProperties(ignoreUnknown = true) +trait Validation { + var description: Option[String] = None + @JsonDeserialize(contentAs = classOf[java.lang.Double]) var errorThreshold: Option[Double] = None +} + +case class ExpressionValidation( + expr: String = "true" + ) extends Validation + +case class GroupByValidation( + groupByCols: Seq[String] = Seq(), + aggCol: String = "", + aggType: String = AGGREGATION_SUM, + expr: String = "true" + ) extends Validation + +case class UpstreamDataSourceValidation( + validationBuilder: ValidationBuilder = ValidationBuilder(), + upstreamDataSource: ConnectionTaskBuilder[_] = FileBuilder(), + upstreamReadOptions: Map[String, String] = Map(), + joinCols: List[String] = List(), + joinType: String = DEFAULT_VALIDATION_JOIN_TYPE, + ) extends Validation + +case class ValidationConfiguration( + name: String = DEFAULT_VALIDATION_CONFIG_NAME, + description: String = DEFAULT_VALIDATION_DESCRIPTION, + dataSources: Map[String, List[DataSourceValidation]] = Map() + ) + +case class DataSourceValidation( + options: Map[String, String] = Map(), + waitCondition: WaitCondition = PauseWaitCondition(), + validations: List[ValidationBuilder] = List() + ) + +trait WaitCondition { + val isRetryable: Boolean = true + val maxRetries: Int = 10 + val waitBeforeRetrySeconds: Int = 2 +} + +case class PauseWaitCondition( + pauseInSeconds: Int = 0, + ) extends WaitCondition { + override val isRetryable: Boolean = false +} + +case class FileExistsWaitCondition( + path: String, + ) extends WaitCondition + +case class DataExistsWaitCondition( + dataSourceName: String, + options: Map[String, String], + expr: String, + ) extends WaitCondition + +case class WebhookWaitCondition( + dataSourceName: String, + url: String, + method: String = DEFAULT_VALIDATION_WEBHOOK_HTTP_METHOD, + statusCodes: List[Int] = DEFAULT_VALIDATION_WEBHOOK_HTTP_STATUS_CODES + ) extends WaitCondition \ No newline at end of file diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/model/generator/BaseGenerator.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/model/generator/BaseGenerator.scala new file mode 100644 index 00000000..96492aa6 --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/model/generator/BaseGenerator.scala @@ -0,0 +1,12 @@ +package com.github.pflooky.datacaterer.api.model.generator + +trait BaseGenerator[T] { + val options: Map[String, Any] = Map() + + val edgeCases: List[T] = List() + + def generateSqlExpression: String = "" + + //TODO how to set default for generic trait method + def generate: T +} diff --git a/api/src/main/scala/com/github/pflooky/datacaterer/api/parser/ValidationIdResolver.scala b/api/src/main/scala/com/github/pflooky/datacaterer/api/parser/ValidationIdResolver.scala new file mode 100644 index 00000000..e9e0978e --- /dev/null +++ b/api/src/main/scala/com/github/pflooky/datacaterer/api/parser/ValidationIdResolver.scala @@ -0,0 +1,47 @@ +package com.github.pflooky.datacaterer.api.parser + +import com.fasterxml.jackson.annotation.JsonTypeInfo.Id +import com.fasterxml.jackson.core.JsonGenerator +import com.fasterxml.jackson.databind.jsontype.impl.TypeIdResolverBase +import com.fasterxml.jackson.databind.{DatabindContext, JavaType, JsonSerializer, SerializerProvider} +import com.github.pflooky.datacaterer.api.ValidationBuilder +import com.github.pflooky.datacaterer.api.model.{ExpressionValidation, GroupByValidation} + +class ValidationIdResolver extends TypeIdResolverBase { + private var superType: JavaType = null + + override def init(bt: JavaType): Unit = { + superType = bt + } + + override def idFromValue(value: Any): String = null + + override def idFromValueAndType(value: Any, suggestedType: Class[_]): String = null + + override def getMechanism: Id = null + + override def typeFromId(context: DatabindContext, id: String): JavaType = { + val subType = classOf[ExpressionValidation] + context.constructSpecializedType(superType, subType) + } +} + +class ValidationBuilderSerializer extends JsonSerializer[ValidationBuilder] { + override def serialize(value: ValidationBuilder, gen: JsonGenerator, serializers: SerializerProvider): Unit = { + val validation = value.validation + gen.writeStartObject() + validation match { + case ExpressionValidation(expr) => + gen.writeStringField("expr", expr) + case GroupByValidation(groupByCols, aggCol, aggType, expr) => + gen.writeArrayFieldStart("groupByCols") + groupByCols.foreach(gen.writeObject) + gen.writeEndArray() + gen.writeStringField("aggCol", aggCol) + gen.writeStringField("aggType", aggType) + gen.writeStringField("expr", expr) + case _ => + } + gen.writeEndObject() + } +} diff --git a/api/src/test/java/com/github/pflooky/datacaterer/java/api/DocumentationJavaPlanRun.java b/api/src/test/java/com/github/pflooky/datacaterer/java/api/DocumentationJavaPlanRun.java new file mode 100644 index 00000000..b36d5744 --- /dev/null +++ b/api/src/test/java/com/github/pflooky/datacaterer/java/api/DocumentationJavaPlanRun.java @@ -0,0 +1,98 @@ +//package com.github.pflooky.datacaterer.java.api; +// +//import com.github.pflooky.datacaterer.api.model.ArrayType; +//import com.github.pflooky.datacaterer.api.model.Constants; +//import com.github.pflooky.datacaterer.api.model.DateType; +//import com.github.pflooky.datacaterer.api.model.DoubleType; +//import com.github.pflooky.datacaterer.api.model.IntegerType; +//import com.github.pflooky.datacaterer.api.model.TimestampType; +// +//import java.sql.Date; +//import java.sql.Timestamp; +//import java.util.List; +//import java.util.Map; +// +//public class DocumentationJavaPlanRun extends PlanRun { +// { +// String baseFolder = "src/test/resources/sample/java/documentation"; +// String[] accountStatus = {"open", "closed", "pending", "suspended"}; +// var jsonTask = json("account_info", baseFolder + "/json", Map.of(Constants.SAVE_MODE(), "overwrite")) +// .schema( +// field().name("account_id").regex("ACC[0-9]{8}"), +// field().name("year").type(IntegerType.instance()).sql("YEAR(date)"), +// field().name("balance").type(DoubleType.instance()).min(10).max(1000), +// field().name("date").type(DateType.instance()).min(Date.valueOf("2022-01-01")), +// field().name("status").sql("element_at(sort_array(update_history.updated_time), 1)"), +// field().name("update_history") +// .type(ArrayType.instance()) +// .schema( +// field().name("updated_time").type(TimestampType.instance()).min(Timestamp.valueOf("2022-01-01 00:00:00")), +// field().name("status").oneOf(accountStatus) +// ), +// field().name("customer_details") +// .schema( +// field().name("name").sql("_join_txn_name"), +// field().name("age").type(IntegerType.instance()).min(18).max(90), +// field().name("city").expression("#{Address.city}") +// ), +// field().name("_join_txn_name").expression("#{Name.name}").omit(true) +// ) +// .count(count().records(100)); +// +// var csvTxns = csv("transactions", baseFolder + "/csv", Map.of(Constants.SAVE_MODE(), "overwrite", "header", "true")) +// .schema( +// field().name("account_id"), +// field().name("txn_id"), +// field().name("name"), +// field().name("amount").type(DoubleType.instance()).min(10).max(100), +// field().name("merchant").expression("#{Company.name}") +// ) +// .count( +// count() +// .records(100) +// .recordsPerColumnGenerator(generator().min(1).max(2), "account_id", "name") +// ); +// +// var foreignKeySetup = plan() +// .addForeignKeyRelationship( +// jsonTask, List.of("account_id", "_join_txn_name"), +// List.of(Map.entry(csvTxns, List.of("account_id", "name"))) +// ); +// +// var postgresAcc = postgres("my_postgres", "jdbc:...") +// .table("public.accounts") +// .schema( +// field().name("account_id") +// ); +// var postgresTxn = postgres(postgresAcc) +// .table("public.transactions") +// .schema( +// field().name("account_id").type(DoubleType.instance()).enableEdgeCases(true).edgeCaseProbability(0.1) +// ); +// plan().addForeignKeyRelationship( +// postgresAcc, List.of("account_id", "name"), +// List.of(Map.entry(postgresTxn, List.of("account_id", "name"))) +// ); +// +// var csvTask = csv("my_csv", "s3a://my-bucket/csv/accounts") +// .schema( +// field().name("account_id") +// ); +// +// var s3Configuration = configuration() +// .runtimeConfig(Map.of( +// "spark.hadoop.fs.s3a.directory.marker.retention", "keep", +// "spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled", "true", +// "spark.hadoop.fs.defaultFS", "s3a://my-bucket", +// //can change to other credential providers as shown here +// //https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html#Changing_Authentication_Providers +// "spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider", +// "spark.hadoop.fs.s3a.access.key", "access_key", +// "spark.hadoop.fs.s3a.secret.key", "secret_key" +// )); +// +// execute(s3Configuration, csvTask); +// +// execute(foreignKeySetup, configuration().generatedReportsFolderPath(baseFolder + "/report"), jsonTask, csvTxns); +// } +//} diff --git a/api/src/test/java/com/github/pflooky/datacaterer/java/api/ExampleJavaPlanRun.java b/api/src/test/java/com/github/pflooky/datacaterer/java/api/ExampleJavaPlanRun.java new file mode 100644 index 00000000..0ce03402 --- /dev/null +++ b/api/src/test/java/com/github/pflooky/datacaterer/java/api/ExampleJavaPlanRun.java @@ -0,0 +1,17 @@ +//package com.github.pflooky.datacaterer.java.api; +// +//public class ExampleJavaPlanRun extends PlanRun { +// { +// var myJson = json("minimal_json", "app/src/test/resources/sample/json/minimal") +// .schema(schema().addFields(field().name("account_id"), field().name("peter"))); +// var myPostgres = postgres("my_postgres", "url") +// .table("my.table") +// .schema(field().name("account_id")) +// .count(count() +// .recordsPerColumn(10, "account_id", "name") +// .generator(generator().min(10).max(100)) +// ); +// +// execute(myJson); +// } +//} diff --git a/api/src/test/scala/com/github/pflooky/datacaterer/api/DataCatererConfigurationBuilderTest.scala b/api/src/test/scala/com/github/pflooky/datacaterer/api/DataCatererConfigurationBuilderTest.scala new file mode 100644 index 00000000..4fad949b --- /dev/null +++ b/api/src/test/scala/com/github/pflooky/datacaterer/api/DataCatererConfigurationBuilderTest.scala @@ -0,0 +1,203 @@ +package com.github.pflooky.datacaterer.api + +import com.github.pflooky.datacaterer.api.model.Constants.{DEFAULT_CASSANDRA_PASSWORD, DEFAULT_CASSANDRA_USERNAME, DEFAULT_KAFKA_URL, DEFAULT_MYSQL_PASSWORD, DEFAULT_MYSQL_URL, DEFAULT_MYSQL_USERNAME, DEFAULT_POSTGRES_PASSWORD, DEFAULT_POSTGRES_URL, DEFAULT_POSTGRES_USERNAME, DEFAULT_SOLACE_CONNECTION_FACTORY, DEFAULT_SOLACE_INITIAL_CONTEXT_FACTORY, DEFAULT_SOLACE_PASSWORD, DEFAULT_SOLACE_URL, DEFAULT_SOLACE_USERNAME, DEFAULT_SOLACE_VPN_NAME} +import com.github.pflooky.datacaterer.api.model.{FlagsConfig, FoldersConfig, GenerationConfig, MetadataConfig} +import org.junit.runner.RunWith +import org.scalatest.funsuite.AnyFunSuite +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class DataCatererConfigurationBuilderTest extends AnyFunSuite { + + test("Can create basic configuration with defaults") { + val result = DataCatererConfigurationBuilder().build + + assert(result.flagsConfig == FlagsConfig()) + assert(result.foldersConfig == FoldersConfig()) + assert(result.metadataConfig == MetadataConfig()) + assert(result.generationConfig == GenerationConfig()) + assert(result.connectionConfigByName.isEmpty) + assert(result.runtimeConfig.size == 11) + assert(result.master == "local[*]") + } + + test("Can create postgres connection configuration") { + val result = DataCatererConfigurationBuilder() + .postgres("my_postgres") + .build + .connectionConfigByName + + assert(result.size == 1) + assert(result.contains("my_postgres")) + val config = result("my_postgres") + assert(config("url") == DEFAULT_POSTGRES_URL) + assert(config("user") == DEFAULT_POSTGRES_USERNAME) + assert(config("password") == DEFAULT_POSTGRES_PASSWORD) + assert(config("format") == "jdbc") + assert(config("driver") == "org.postgresql.Driver") + } + + test("Can create postgres connection with custom configuration") { + val result = DataCatererConfigurationBuilder() + .postgres("my_postgres", "jdbc:postgresql://localhost:5432/customer", options = Map("stringtype" -> "undefined")) + .build + .connectionConfigByName + + assert(result.size == 1) + assert(result.contains("my_postgres")) + val config = result("my_postgres") + assert(config.size == 6) + assert(config("url") == "jdbc:postgresql://localhost:5432/customer") + assert(config("stringtype") == "undefined") + } + + test("Can create mysql connection configuration") { + val result = DataCatererConfigurationBuilder() + .mysql("my_mysql") + .build + .connectionConfigByName + + assert(result.size == 1) + assert(result.contains("my_mysql")) + val config = result("my_mysql") + assert(config("url") == DEFAULT_MYSQL_URL) + assert(config("user") == DEFAULT_MYSQL_USERNAME) + assert(config("password") == DEFAULT_MYSQL_PASSWORD) + assert(config("format") == "jdbc") + assert(config("driver") == "com.mysql.cj.jdbc.Driver") + } + + test("Can create cassandra connection configuration") { + val result = DataCatererConfigurationBuilder() + .cassandra("my_cassandra") + .build + .connectionConfigByName + + assert(result.size == 1) + assert(result.contains("my_cassandra")) + val config = result("my_cassandra") + assert(config("spark.cassandra.connection.host") == "cassandraserver") + assert(config("spark.cassandra.connection.port") == "9042") + assert(config("spark.cassandra.auth.username") == DEFAULT_CASSANDRA_USERNAME) + assert(config("spark.cassandra.auth.password") == DEFAULT_CASSANDRA_PASSWORD) + assert(config("format") == "org.apache.spark.sql.cassandra") + } + + test("Can create solace connection configuration") { + val result = DataCatererConfigurationBuilder() + .solace("my_solace") + .build + .connectionConfigByName + + assert(result.size == 1) + assert(result.contains("my_solace")) + val config = result("my_solace") + assert(config("url") == DEFAULT_SOLACE_URL) + assert(config("user") == DEFAULT_SOLACE_USERNAME) + assert(config("password") == DEFAULT_SOLACE_PASSWORD) + assert(config("format") == "jms") + assert(config("vpnName") == DEFAULT_SOLACE_VPN_NAME) + assert(config("connectionFactory") == DEFAULT_SOLACE_CONNECTION_FACTORY) + assert(config("initialContextFactory") == DEFAULT_SOLACE_INITIAL_CONTEXT_FACTORY) + } + + test("Can create kafka connection configuration") { + val result = DataCatererConfigurationBuilder() + .kafka("my_kafka") + .build + .connectionConfigByName + + assert(result.size == 1) + assert(result.contains("my_kafka")) + val config = result("my_kafka") + assert(config("kafka.bootstrap.servers") == DEFAULT_KAFKA_URL) + assert(config("format") == "kafka") + } + + test("Can create http connection configuration") { + val result = DataCatererConfigurationBuilder() + .http("my_http", "user", "pw") + .build + .connectionConfigByName + + assert(result.size == 1) + assert(result.contains("my_http")) + val config = result("my_http") + assert(config("user") == "user") + assert(config("password") == "pw") + } + + test("Can enable/disable flags") { + val result = DataCatererConfigurationBuilder() + .enableCount(false) + .enableGenerateData(false) + .enableDeleteGeneratedRecords(true) + .enableGeneratePlanAndTasks(true) + .enableUniqueCheck(true) + .enableFailOnError(false) + .enableRecordTracking(true) + .enableSaveReports(true) + .enableSinkMetadata(true) + .enableValidation(true) + .build + .flagsConfig + + assert(!result.enableCount) + assert(!result.enableGenerateData) + assert(result.enableDeleteGeneratedRecords) + assert(result.enableGeneratePlanAndTasks) + assert(result.enableUniqueCheck) + assert(!result.enableFailOnError) + assert(result.enableRecordTracking) + assert(result.enableSaveReports) + assert(result.enableSinkMetadata) + assert(result.enableValidation) + } + + test("Can alter folder paths") { + val result = DataCatererConfigurationBuilder() + .planFilePath("/my_plan") + .taskFolderPath("/my_task") + .recordTrackingFolderPath("/my_record_tracking") + .validationFolderPath("/my_validation") + .generatedReportsFolderPath("/my_generation_results") + .generatedPlanAndTaskFolderPath("/my_generated_plan_tasks") + .build + .foldersConfig + + assert(result.planFilePath == "/my_plan") + assert(result.taskFolderPath == "/my_task") + assert(result.recordTrackingFolderPath == "/my_record_tracking") + assert(result.validationFolderPath == "/my_validation") + assert(result.generatedReportsFolderPath == "/my_generation_results") + assert(result.generatedPlanAndTaskFolderPath == "/my_generated_plan_tasks") + } + + test("Can alter metadata configurations") { + val result = DataCatererConfigurationBuilder() + .numRecordsFromDataSourceForDataProfiling(1) + .numRecordsForAnalysisForDataProfiling(2) + .numGeneratedSamples(3) + .oneOfMinCount(100) + .oneOfDistinctCountVsCountThreshold(0.3) + .build + .metadataConfig + + assert(result.numRecordsFromDataSource == 1) + assert(result.numRecordsForAnalysis == 2) + assert(result.numGeneratedSamples == 3) + assert(result.oneOfMinCount == 100) + assert(result.oneOfDistinctCountVsCountThreshold == 0.3) + } + + test("Can alter generation configurations") { + val result = DataCatererConfigurationBuilder() + .numRecordsPerBatch(100) + .numRecordsPerStep(10) + .build + .generationConfig + + assert(result.numRecordsPerBatch == 100) + assert(result.numRecordsPerStep.contains(10)) + } +} diff --git a/api/src/test/scala/com/github/pflooky/datacaterer/api/ExamplePlanRun.scala b/api/src/test/scala/com/github/pflooky/datacaterer/api/ExamplePlanRun.scala new file mode 100644 index 00000000..0a864d93 --- /dev/null +++ b/api/src/test/scala/com/github/pflooky/datacaterer/api/ExamplePlanRun.scala @@ -0,0 +1,295 @@ +package com.github.pflooky.datacaterer.api + +import com.github.pflooky.datacaterer.api.model.{ArrayType, DateType, DoubleType, IntegerType} + +import java.sql.Date + +class ExamplePlanRun extends PlanRun { + + val planBuilder = plan.name("sample plan") + + val tasksBuilder = tasks. + addTask("account_json", "fs_json", + step.name("account") + .option(("path", "app/src/test/resources/sample/json/account")) + .schema(schema.addFields( + field.name("account_id"), + field.name("year").`type`(IntegerType).min(2022), + field.name("name").static("peter") + )) + ) + + execute(List(tasksBuilder), planBuilder) +} + +class MinimalPlanRun extends PlanRun { + execute(configuration = + configuration + .enableGeneratePlanAndTasks(true) + .addConnectionConfig("account_json", "json", Map("path" -> "app/src/test/resources/sample/json/account")) + ) +} + +class MinimalPlanWithManualTaskRun extends PlanRun { + val tasksBuilder = tasks.addTask("my_task", "mininal_json", + step + .option(("path", "app/src/test/resources/sample/json/minimal")) + .schema(schema.addFields(field.name("account_id"))) + ) + execute(tasksBuilder) +} + + +class LargeCountRun extends PlanRun { + val jsonTask = json("mininal_json", "app/src/test/resources/sample/json/large") + .schema(schema.addFields( + field.name("account_id"), + field.name("year").`type`(IntegerType).min(2022), + field.name("name").expression("#{Name.name}"), + field.name("amount").`type`(DoubleType).max(1000.0), + field.name("date").`type`(DateType).min(Date.valueOf("2022-01-01")), + field.name("status").oneOf("open", "closed"), + field.name("txn_list") + .`type`(ArrayType) + .schema(schema.addFields( + field.name("id"), + field.name("date").`type`(DateType).min(Date.valueOf("2022-01-01")), + field.name("amount").`type`(DoubleType) + )) + )) + .count(count + .records(10000) + .recordsPerColumn(100, "account_id") + ) + + val conf = configuration + .enableCount(true) + .generatedReportsFolderPath("app/src/test/resources/sample/report") + execute(conf, jsonTask) +} + +class DocsPlanRun extends PlanRun { + + validationConfig + .name("account_checks") + .description("Check account related fields have gone through system correctly") + .addValidations( + "accountJson", + Map("path" -> "sample/json/txn-gen"), + validation.expr("amount < 100"), + validation.expr("year == 2021").errorThreshold(0.1), + validation.expr("regexp_like(name, 'Peter .*')").errorThreshold(200).description("Should be lots of Peters") + ) + val t = task + .name("csv_file") + .steps( + step + .name("transactions") + .`type`("csv") + .option("path", "app/src/test/resources/sample/csv/transactions") + .count( + count + .records(1000) + .recordsPerColumnGenerator( + generator.min(1).max(2), + "account_id" + ) + ) + .schema(schema.addField("account_id")) + ) +} + +class FullExamplePlanRun extends PlanRun { + + val startDate = Date.valueOf("2022-01-01") + val accountIdField = field.name("account_id").regex("ACC[0-9]{8}") + val nameField = field.name("name").expression("#{Name.name}") + + val postgresTask = task.name("postgres_account_details") + .steps( + step + .name("transaction") + .jdbcTable("account.transaction") + .schema(schema.addFields( + accountIdField, + field.name("txn_id").regex("txn_[0-9]{5}"), + field.name("year").`type`(IntegerType).sql("YEAR(date)"), + nameField, + field.name("date").`type`(DateType).min(startDate), + field.name("amount").`type`(DoubleType).max(10000), + field.name("credit_debit").sql("CASE WHEN amount < 0 THEN 'C' ELSE 'D' END") + )), + step + .name("account") + .jdbcTable("account.account") + .schema(schema.addFields( + accountIdField, + nameField, + field.name("open_date").`type`(DateType).min(startDate), + field.name("status").oneOf("open", "closed", "pending") + )) + ) + + val jsonTask = task.name("json_account_details") + .steps( + step + .name("account_info") + .path("/tmp/src/main/resources/sample/json") + .schema(schema.addFields( + accountIdField, + nameField, + field.name("txn_list") + .`type`(ArrayType) + .schema(schema.addFields( + field.name("id"), + field.name("date").`type`(DateType).min(startDate), + field.name("amount").`type`(DoubleType), + )) + )) + ) + + val conf = configuration + .postgres("customer_postgres") + .json("account_json") + + val p = plan.taskSummaries( + taskSummary.dataSource("customer_postgres").task(postgresTask), + taskSummary.dataSource("account_json").task(jsonTask), + ).addForeignKeyRelationship( + foreignField("customer_postgres", "account", "account_id"), + foreignField("customer_postgres", "transaction", "account_id") + ).addForeignKeyRelationship( + foreignField("customer_postgres", "account", "account_id"), + foreignField("account_json", "account_info", "account_id") + ) + + execute(p, conf) +} + +class ConnectionBasedApiPlanRun extends PlanRun { + + val csvGenerate = csv("my_csv", "app/src/test/resources/sample/connection-api/csv") + .schema( + field.name("account_id"), + field.name("year").`type`(IntegerType).min(2022) + ) + .count(count.records(100)) + + val jsonGenerate = json("my_json", "app/src/test/resources/sample/connection-api/json") + .partitionBy("age") + .schema( + field.name("name").expression("#{Name.name}"), + field.name("age").`type`(IntegerType).min(18).max(20), + ) + .count(count.records(100)) + + val x = json("account_info", "/tmp/data-caterer/json") + .schema( + field.name("account_id"), + field.name("year").`type`(IntegerType).min(2022), + field.name("name").expression("#{Name.name}"), + field.name("amount").`type`(DoubleType).max(1000.0), + field.name("date").`type`(DateType).min(Date.valueOf("2022-01-01")), + field.name("status").oneOf("open", "closed"), + field.name("txn_list") + .`type`(ArrayType) + .schema(schema.addFields( + field.name("id"), + field.name("date").`type`(DateType).min(Date.valueOf("2022-01-01")), + field.name("amount").`type`(DoubleType), + )) + ) + .count(count.records(100)) + + val postgresGenerate = postgres("my_postgres") + .task( + step + .jdbcTable("public.accounts") + .schema( + field.name("account_id"), + field.name("name").expression("#{Name.name}"), + ), + step + .jdbcTable("public.transactions") + .schema( + field.name("account_id"), + field.name("amount").`type`(DoubleType).max(1000) + ) + .count(count.recordsPerColumn(10, "account_id")) + ) + + val postgresAcc = postgres("my_postgres") + .table("public.accounts") + .schema( + field.name("account_id") + ) + var jsonTask = json("my_json", "/tmp/json") + .schema( + field.name("account_id"), + field.name("customer_details") + .schema( + field.name("name").sql("_join_txn_name").`type`(DoubleType).enableEdgeCases(true).edgeCaseProbability(0.1) + ), + field.name("_join_txn_name").omit(true) + ) + plan.addForeignKeyRelationship( + postgresAcc, List("account_id", ""), + List(jsonTask -> List("account_id", "")) + ) + val csvTask = csv("my_csv", "s3a://my-bucket/csv/accounts") + .schema( + field.name("account_id"), + ) + val conf = configuration + .generatedReportsFolderPath("s3a://my-bucket/data-caterer/generated") + .planFilePath("s3a://my-bucket/data-caterer/generated/plan/customer-create-plan.yaml") + .taskFolderPath("s3a://my-bucket/data-caterer/generated/task") + .runtimeConfig(Map( + "spark.hadoop.fs.s3a.directory.marker.retention" -> "keep", + "spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled" -> "true", + "spark.hadoop.fs.defaultFS" -> "s3a://my-bucket", + //can change to other credential providers as shown here + //https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html#Changing_Authentication_Providers + "spark.hadoop.fs.s3a.aws.credentials.provider" -> "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider", + "spark.hadoop.fs.s3a.access.key" -> "access_key", + "spark.hadoop.fs.s3a.secret.key" -> "secret_key" + )) + execute(conf, csvGenerate, jsonGenerate) +} + +class DocumentationPlanRun extends PlanRun { + val jsonTask = json("account_info", "/opt/app/data/json") + .schema( + field.name("account_id").regex("ACC[0-9]{8}"), + field.name("year").`type`(IntegerType).sql("YEAR(date)"), + field.name("name").expression("#{Name.name}"), + field.name("amount").`type`(DoubleType).min(10).max(1000), + field.name("date").`type`(DateType).min(Date.valueOf("2022-01-01")), + field.name("status").oneOf("open", "closed"), + field.name("txn_list") + .`type`(ArrayType) + .schema(schema.addFields( + field.name("id").sql("_join_txn_id"), + field.name("date").`type`(DateType).min(Date.valueOf("2022-01-01")), + field.name("amount").`type`(DoubleType) + )), + field.name("_join_txn_id").omit(true) + ) + .count(count.records(100)) + + val csvTxns = csv("transactions", "/opt/app/data/csv") + .schema( + field.name("account_id"), + field.name("txn_id"), + field.name("amount"), + field.name("merchant").expression("#{Company.name}"), + ) + .count(count.recordsPerColumnGenerator(generator.min(1).max(5), "account_id")) + + val foreignKeySetup = plan + .addForeignKeyRelationship(jsonTask, "account_id", List((csvTxns, "account_id"))) + .addForeignKeyRelationship(jsonTask, "_join_txn_id", List((csvTxns, "txn_id"))) + .addForeignKeyRelationship(jsonTask, "amount", List((csvTxns, "amount"))) + + execute(foreignKeySetup, configuration, jsonTask, csvTxns) +} \ No newline at end of file diff --git a/api/src/test/scala/com/github/pflooky/datacaterer/api/MetadataSourceBuilderTest.scala b/api/src/test/scala/com/github/pflooky/datacaterer/api/MetadataSourceBuilderTest.scala new file mode 100644 index 00000000..820c438d --- /dev/null +++ b/api/src/test/scala/com/github/pflooky/datacaterer/api/MetadataSourceBuilderTest.scala @@ -0,0 +1,55 @@ +package com.github.pflooky.datacaterer.api + +import com.github.pflooky.datacaterer.api.model.Constants.{METADATA_SOURCE_URL, OPEN_LINEAGE_DATASET, OPEN_LINEAGE_NAMESPACE, OPEN_METADATA_API_VERSION, OPEN_METADATA_AUTH_TYPE, OPEN_METADATA_AUTH_TYPE_BASIC, OPEN_METADATA_AUTH_TYPE_OPEN_METADATA, OPEN_METADATA_BASIC_AUTH_PASSWORD, OPEN_METADATA_BASIC_AUTH_USERNAME, OPEN_METADATA_DEFAULT_API_VERSION, OPEN_METADATA_HOST, OPEN_METADATA_JWT_TOKEN, SCHEMA_LOCATION} +import com.github.pflooky.datacaterer.api.model.{MarquezMetadataSource, OpenAPISource, OpenMetadataSource} +import org.junit.runner.RunWith +import org.scalatest.funsuite.AnyFunSuite +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class MetadataSourceBuilderTest extends AnyFunSuite { + + test("Can create Marquez metadata source") { + val result = MetadataSourceBuilder().marquez("localhost:8080", "food_delivery").metadataSource + + assert(result.isInstanceOf[MarquezMetadataSource]) + assert(result.asInstanceOf[MarquezMetadataSource].connectionOptions == + Map(METADATA_SOURCE_URL -> "localhost:8080", OPEN_LINEAGE_NAMESPACE -> "food_delivery")) + } + + test("Can create Marquez metadata source with dataset") { + val result = MetadataSourceBuilder().marquez("localhost:8080", "food_delivery", "public.delivery").metadataSource + + assert(result.isInstanceOf[MarquezMetadataSource]) + assert(result.asInstanceOf[MarquezMetadataSource].connectionOptions == + Map(METADATA_SOURCE_URL -> "localhost:8080", OPEN_LINEAGE_NAMESPACE -> "food_delivery", OPEN_LINEAGE_DATASET -> "public.delivery")) + } + + test("Can create OpenMetadata metadata source") { + val result = MetadataSourceBuilder().openMetadataWithToken("localhost:8080", "my_token").metadataSource + + assert(result.isInstanceOf[OpenMetadataSource]) + assert(result.asInstanceOf[OpenMetadataSource].connectionOptions == + Map(OPEN_METADATA_HOST -> "localhost:8080", OPEN_METADATA_API_VERSION -> OPEN_METADATA_DEFAULT_API_VERSION, + OPEN_METADATA_AUTH_TYPE -> OPEN_METADATA_AUTH_TYPE_OPEN_METADATA, OPEN_METADATA_JWT_TOKEN -> "my_token")) + } + + test("Can create OpenMetadata metadata source with basic auth") { + val result = MetadataSourceBuilder().openMetadata("localhost:8080", OPEN_METADATA_AUTH_TYPE_BASIC, + Map(OPEN_METADATA_BASIC_AUTH_USERNAME -> "username", OPEN_METADATA_BASIC_AUTH_PASSWORD -> "password")).metadataSource + + assert(result.isInstanceOf[OpenMetadataSource]) + assert(result.asInstanceOf[OpenMetadataSource].connectionOptions == + Map(OPEN_METADATA_HOST -> "localhost:8080", OPEN_METADATA_API_VERSION -> OPEN_METADATA_DEFAULT_API_VERSION, + OPEN_METADATA_AUTH_TYPE -> OPEN_METADATA_AUTH_TYPE_BASIC, OPEN_METADATA_BASIC_AUTH_USERNAME -> "username", + OPEN_METADATA_BASIC_AUTH_PASSWORD -> "password")) + } + + test("Can create OpenAPI metadata source") { + val result = MetadataSourceBuilder().openApi("localhost:8080").metadataSource + + assert(result.isInstanceOf[OpenAPISource]) + assert(result.asInstanceOf[OpenAPISource].connectionOptions == Map(SCHEMA_LOCATION -> "localhost:8080")) + } + +} diff --git a/api/src/test/scala/com/github/pflooky/datacaterer/api/PlanBuilderTest.scala b/api/src/test/scala/com/github/pflooky/datacaterer/api/PlanBuilderTest.scala new file mode 100644 index 00000000..131a29b0 --- /dev/null +++ b/api/src/test/scala/com/github/pflooky/datacaterer/api/PlanBuilderTest.scala @@ -0,0 +1,201 @@ +package com.github.pflooky.datacaterer.api + +import com.github.pflooky.datacaterer.api.connection.FileBuilder +import com.github.pflooky.datacaterer.api.model.Constants.FOREIGN_KEY_DELIMITER +import com.github.pflooky.datacaterer.api.model.{DataCatererConfiguration, ExpressionValidation, ForeignKeyRelation, PauseWaitCondition} +import org.junit.runner.RunWith +import org.scalatest.funsuite.AnyFunSuite +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class PlanBuilderTest extends AnyFunSuite { + + test("Can create Plan") { + val planBuilder = PlanBuilder() + val name = "basic plan" + val desc = "basic desc" + val taskSummaries = TaskSummaryBuilder() + .name("account_json_task") + .dataSource("account_json") + + val result = planBuilder.name(name) + .description(desc) + .taskSummaries(taskSummaries) + + assert(result.plan.name == name) + assert(result.plan.description == desc) + assert(result.plan.tasks.size == 1) + assert(result.plan.tasks.head == taskSummaries.taskSummary) + } + + test("Can implement PlanRun") { + val result: PlanRun = new PlanRun { + val dataSourceName = "account_json" + val t = tasks.addTask( + "my task", + dataSourceName, + step.schema(schema.addFields(field.name("account_id"))) + ) + + val p = plan.name("my plan") + .seed(1) + .locale("en") + .addForeignKeyRelationship( + new ForeignKeyRelation("account_json", "default_step", "account_id"), + new ForeignKeyRelation("txn_db", "txn_step", "account_number") + ) + .addForeignKeyRelationship( + new ForeignKeyRelation("account_json", "default_step", "customer_number"), + new ForeignKeyRelation("acc_db", "acc_step", "customer_number") + ) + + val c = configuration + .addRuntimeConfig("spark.sql.shuffle.partitions" -> "2") + .enableGeneratePlanAndTasks(true) + .enableValidation(true) + .addConnectionConfig(dataSourceName, "json", Map()) + .addConnectionConfig("txn_db", "postgres", Map()) + + val v = validationConfig + .name("account_validation") + .description("account checks") + .addDataSourceValidation( + dataSourceName, + dataSourceValidation + .validations( + validation + .description("name is equal to Peter") + .errorThreshold(0.1) + .expr("name == 'Peter'") + ).option(("path", "test/path/json")) + ) + + execute(List(t), p, c, List(v)) + } + + assert(result._tasks.size == 1) + assert(result._tasks.head.name == "my task") + assert(result._tasks.head.steps.head.schema.fields.get.head.name == "account_id") + + assert(result._plan.name == "my plan") + assert(result._plan.tasks.size == 1) + assert(result._plan.tasks.head.name == "my task") + assert(result._plan.tasks.head.dataSourceName == "account_json") + assert(result._plan.tasks.head.enabled) + assert(result._plan.sinkOptions.get.seed.contains("1")) + assert(result._plan.sinkOptions.get.locale.contains("en")) + val fk = result._plan.sinkOptions.get.foreignKeys + assert(fk.exists(f => f._1.equalsIgnoreCase(s"account_json${FOREIGN_KEY_DELIMITER}default_step${FOREIGN_KEY_DELIMITER}account_id"))) + assert( + fk.find(f => f._1.equalsIgnoreCase(s"account_json${FOREIGN_KEY_DELIMITER}default_step${FOREIGN_KEY_DELIMITER}account_id")).get._2 == + List(s"txn_db${FOREIGN_KEY_DELIMITER}txn_step${FOREIGN_KEY_DELIMITER}account_number") + ) + assert(fk.exists(f => f._1.equalsIgnoreCase(s"account_json${FOREIGN_KEY_DELIMITER}default_step${FOREIGN_KEY_DELIMITER}customer_number"))) + assert( + fk.find(f => f._1.equalsIgnoreCase(s"account_json${FOREIGN_KEY_DELIMITER}default_step${FOREIGN_KEY_DELIMITER}customer_number")).get._2 == + List(s"acc_db${FOREIGN_KEY_DELIMITER}acc_step${FOREIGN_KEY_DELIMITER}customer_number") + ) + + assert(result._configuration.flagsConfig.enableCount) + assert(result._configuration.flagsConfig.enableGenerateData) + assert(!result._configuration.flagsConfig.enableRecordTracking) + assert(!result._configuration.flagsConfig.enableDeleteGeneratedRecords) + assert(result._configuration.flagsConfig.enableGeneratePlanAndTasks) + assert(result._configuration.flagsConfig.enableFailOnError) + assert(!result._configuration.flagsConfig.enableUniqueCheck) + assert(!result._configuration.flagsConfig.enableSinkMetadata) + assert(result._configuration.flagsConfig.enableSaveReports) + assert(result._configuration.flagsConfig.enableValidation) + assert(result._configuration.connectionConfigByName.size == 2) + assert(result._configuration.connectionConfigByName.contains("account_json")) + assert(result._configuration.connectionConfigByName("account_json") == Map("format" -> "json")) + assert(result._configuration.connectionConfigByName.contains("txn_db")) + assert(result._configuration.connectionConfigByName("txn_db") == Map("format" -> "postgres")) + assert(result._configuration.runtimeConfig == DataCatererConfiguration().runtimeConfig ++ Map("spark.sql.shuffle.partitions" -> "2")) + + assert(result._validations.size == 1) + assert(result._validations.head.dataSources.size == 1) + val dataSourceHead = result._validations.head.dataSources.head + assert(dataSourceHead._1 == "account_json") + assert(dataSourceHead._2.size == 1) + assert(dataSourceHead._2.head.validations.size == 1) + val validationHead = dataSourceHead._2.head.validations.head.validation + assert(validationHead.description.contains("name is equal to Peter")) + assert(validationHead.errorThreshold.contains(0.1)) + assert(validationHead.isInstanceOf[ExpressionValidation]) + assert(validationHead.asInstanceOf[ExpressionValidation].expr == "name == 'Peter'") + assert(dataSourceHead._2.head.options == Map("path" -> "test/path/json")) + assert(dataSourceHead._2.head.waitCondition == PauseWaitCondition()) + } + + test("Can define random seed and locale that get used across all data generators") { + val result = PlanBuilder().sinkOptions(SinkOptionsBuilder().locale("es").seed(1)).plan + + assert(result.sinkOptions.isDefined) + assert(result.sinkOptions.get.locale.contains("es")) + assert(result.sinkOptions.get.seed.contains("1")) + } + + test("Can define foreign key via connection task builder") { + val jsonTask = ConnectionConfigWithTaskBuilder().file("my_json", "json") + .schema(FieldBuilder().name("account_id")) + val csvTask = ConnectionConfigWithTaskBuilder().file("my_csv", "csv") + .schema(FieldBuilder().name("account_id")) + val result = PlanBuilder().addForeignKeyRelationship( + jsonTask, List("account_id"), + List(csvTask -> List("account_id")) + ).plan + + assert(result.sinkOptions.isDefined) + val fk = result.sinkOptions.get.foreignKeys + assert(fk.nonEmpty) + assert(fk.size == 1) + assert(fk.exists(f => f._1.startsWith("my_json") && f._1.endsWith("account_id") && + f._2.size == 1 && f._2.head.startsWith("my_csv") && f._2.head.endsWith("account_id") + )) + + val result2 = PlanBuilder().addForeignKeyRelationship( + jsonTask, "account_id", + List(csvTask -> "account_id") + ).plan + + assert(result2.sinkOptions.isDefined) + val fk2 = result2.sinkOptions.get.foreignKeys + assert(fk2.nonEmpty) + assert(fk2.size == 1) + } + + test("Throw runtime exception when foreign key column is not defined in data sources") { + val jsonTask = ConnectionConfigWithTaskBuilder().file("my_json", "json") + val csvTask = ConnectionConfigWithTaskBuilder().file("my_csv", "csv") + + assertThrows[RuntimeException](PlanBuilder().addForeignKeyRelationship( + jsonTask, List("account_id"), + List(csvTask -> List("account_id")) + ).plan) + } + + test("Throw runtime exception when foreign key column is not defined in data sources with other columns") { + val jsonTask = ConnectionConfigWithTaskBuilder().file("my_json", "json").schema(FieldBuilder().name("account_number")) + val csvTask = ConnectionConfigWithTaskBuilder().file("my_csv", "csv").schema(FieldBuilder().name("account_type")) + + assertThrows[RuntimeException](PlanBuilder().addForeignKeyRelationship( + jsonTask, List("account_id"), + List(csvTask -> List("account_id")) + ).plan) + } + + test("Don't throw runtime exception when data source schema is defined from metadata source") { + val jsonTask = ConnectionConfigWithTaskBuilder().file("my_json", "json").schema(MetadataSourceBuilder().openApi("localhost:8080")) + val csvTask = ConnectionConfigWithTaskBuilder().file("my_csv", "csv").schema(MetadataSourceBuilder().openApi("localhost:8080")) + val result = PlanBuilder().addForeignKeyRelationship( + jsonTask, List("account_id"), + List(csvTask -> List("account_id")) + ).plan + + assert(result.sinkOptions.isDefined) + val fk = result.sinkOptions.get.foreignKeys + assert(fk.nonEmpty) + assert(fk.size == 1) + } +} diff --git a/api/src/test/scala/com/github/pflooky/datacaterer/api/PlanRunTest.scala b/api/src/test/scala/com/github/pflooky/datacaterer/api/PlanRunTest.scala new file mode 100644 index 00000000..cf61d6ba --- /dev/null +++ b/api/src/test/scala/com/github/pflooky/datacaterer/api/PlanRunTest.scala @@ -0,0 +1,133 @@ +package com.github.pflooky.datacaterer.api + +import com.github.pflooky.datacaterer.api.model.Constants.{CSV, FORMAT, JDBC_TABLE, PATH, URL} +import com.github.pflooky.datacaterer.api.model.ExpressionValidation +import org.junit.runner.RunWith +import org.scalatest.funsuite.AnyFunSuite +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class PlanRunTest extends AnyFunSuite { + + test("Can create plan with each type of connection") { + val result = new PlanRun { + val mySchema = schema.addFields(field.name("account_id")) + val myCsv = csv("my_csv", "/my/csv").schema(mySchema) + val myJson = json("my_json", "/my/json").schema(mySchema) + val myParquet = parquet("my_parquet", "/my/parquet").schema(mySchema) + val myOrc = orc("my_orc", "/my/orc").schema(mySchema) + val myPostgres = postgres("my_postgres").table("account").schema(mySchema) + val myMySql = mysql("my_mysql").table("transaction").schema(mySchema) + val myCassandra = cassandra("my_cassandra").table("account", "accounts").schema(mySchema) + val mySolace = solace("my_solace").destination("solace_topic").schema(mySchema) + val myKafka = kafka("my_kafka").topic("kafka_topic").schema(mySchema) + val myHttp = http("my_http").schema(mySchema) + + execute(myCsv, myJson, myParquet, myOrc, myPostgres, myMySql, myCassandra, mySolace, myKafka, myHttp) + } + + val dsNames = List("my_csv", "my_json", "my_parquet", "my_orc", "my_postgres", "my_mysql", "my_cassandra", "my_solace", "my_kafka", "my_http") + assert(result._plan.tasks.size == 10) + assert(result._plan.tasks.map(_.dataSourceName) == dsNames) + assert(result._configuration.connectionConfigByName.size == 10) + assert(result._configuration.connectionConfigByName.keys.forall(dsNames.contains)) + assert(result._tasks.size == 10) + } + + test("Can create plan using same connection details from another step") { + val result = new PlanRun { + val myPostgresAccount = postgres("my_postgres", "my_postgres_url") + .table("account.accounts") + .schema(field.name("account_id")) + val myPostgresTransaction = postgres(myPostgresAccount) + .table("account.transactions") + .schema(field.name("txn_id")) + + execute(myPostgresAccount, myPostgresTransaction) + } + + assert(result._plan.tasks.size == 2) + assert(result._plan.tasks.map(_.dataSourceName).forall(_ == "my_postgres")) + assert(result._configuration.connectionConfigByName.size == 1) + assert(result._configuration.connectionConfigByName.contains("my_postgres")) + assert(result._configuration.connectionConfigByName("my_postgres").contains(URL)) + assert(result._configuration.connectionConfigByName("my_postgres").get(URL).contains("my_postgres_url")) + assert(result._tasks.size == 2) + val steps = result._tasks.flatMap(_.steps) + val resAccount = steps.filter(s => s.options.get(JDBC_TABLE).contains("account.accounts")).head + assert(resAccount.schema.fields.isDefined) + assert(resAccount.schema.fields.get.size == 1) + assert(resAccount.schema.fields.get.head.name == "account_id") + val resTxn = steps.filter(s => s.options.get(JDBC_TABLE).contains("account.transactions")).head + assert(resTxn.schema.fields.isDefined) + assert(resTxn.schema.fields.get.size == 1) + assert(resTxn.schema.fields.get.head.name == "txn_id") + assert(result._validations.isEmpty) + } + + test("Can create plan with validations for one data source") { + val result = new PlanRun { + val myCsv = csv("my_csv", "/my/data/path") + .schema(field.name("account_id")) + .validations(validation.expr("account_id != ''")) + + execute(myCsv) + } + + assert(result._validations.size == 1) + assert(result._validations.head.dataSources.size == 1) + assert(result._validations.head.dataSources.head._2.size == 1) + val dsValidation = result._validations.head.dataSources.head + assert(dsValidation._1 == "my_csv") + assert(dsValidation._2.head.options.nonEmpty) + assert(dsValidation._2.head.options == Map(FORMAT -> "csv", PATH -> "/my/data/path")) + assert(dsValidation._2.head.validations.size == 1) + assert(dsValidation._2.head.validations.head.validation.isInstanceOf[ExpressionValidation]) + val expressionValidation = dsValidation._2.head.validations.head.validation.asInstanceOf[ExpressionValidation] + assert(expressionValidation.expr == "account_id != ''") + } + + test("Can create plan with multiple validations for one data source") { + val result = new PlanRun { + val myPostgresAccount = postgres("my_postgres") + .table("account.accounts") + .validations(validation.expr("account_id != ''")) + val myPostgresTransaction = postgres("my_postgres") + .table("account", "transactions") + .validations(validation.expr("txn_id IS NOT NULL")) + + execute(myPostgresAccount, myPostgresTransaction) + } + + assert(result._validations.size == 1) + assert(result._validations.head.dataSources.size == 1) + val dsValidation = result._validations.head.dataSources.head + assert(dsValidation._1 == "my_postgres") + val accountValid = dsValidation._2.filter(_.options.get(JDBC_TABLE).contains("account.accounts")).head + assert(accountValid.validations.size == 1) + assert(accountValid.validations.exists(v => v.validation.asInstanceOf[ExpressionValidation].expr == "account_id != ''")) + val txnValid = dsValidation._2.filter(_.options.get(JDBC_TABLE).contains("account.transactions")).head + assert(txnValid.validations.size == 1) + assert(txnValid.validations.exists(v => v.validation.asInstanceOf[ExpressionValidation].expr == "txn_id IS NOT NULL")) + } + + test("Can create plan with validations only defined") { + val result = new PlanRun { + val myCsv = csv("my_csv", "/my/csv") + .validations(validation.expr("account_id != 'acc123'")) + + execute(myCsv) + } + + assert(result._tasks.size == 1) + assert(result._validations.size == 1) + assert(result._validations.head.dataSources.contains("my_csv")) + val validRes = result._validations.head.dataSources("my_csv").head + assert(validRes.validations.size == 1) + assert(validRes.validations.head.validation.asInstanceOf[ExpressionValidation].expr == "account_id != 'acc123'") + assert(validRes.options.nonEmpty) + assert(validRes.options == Map(FORMAT -> "csv", PATH -> "/my/csv")) + } + + +} \ No newline at end of file diff --git a/api/src/test/scala/com/github/pflooky/datacaterer/api/SinkOptionsBuilderTest.scala b/api/src/test/scala/com/github/pflooky/datacaterer/api/SinkOptionsBuilderTest.scala new file mode 100644 index 00000000..9b978ed3 --- /dev/null +++ b/api/src/test/scala/com/github/pflooky/datacaterer/api/SinkOptionsBuilderTest.scala @@ -0,0 +1,32 @@ +package com.github.pflooky.datacaterer.api + +import com.github.pflooky.datacaterer.api.model.Constants.FOREIGN_KEY_DELIMITER +import com.github.pflooky.datacaterer.api.model.ForeignKeyRelation +import org.junit.runner.RunWith +import org.scalatest.funsuite.AnyFunSuite +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class SinkOptionsBuilderTest extends AnyFunSuite { + + test("Can create sink options with random seed, locale and foreign keys") { + val result = SinkOptionsBuilder() + .seed(10) + .locale("id") + .foreignKey(new ForeignKeyRelation("my_postgres", "account", "account_id"), + new ForeignKeyRelation("my_json", "account", "account_id")) + .foreignKey(new ForeignKeyRelation("my_postgres", "account", "customer_number"), + new ForeignKeyRelation("my_json", "account", "customer_number"), + new ForeignKeyRelation("my_parquet", "transaction", "cust_num")) + .sinkOptions + + assert(result.seed.contains("10")) + assert(result.locale.contains("id")) + assert(result.foreignKeys.size == 2) + assert(result.foreignKeys.contains(s"my_postgres${FOREIGN_KEY_DELIMITER}account${FOREIGN_KEY_DELIMITER}account_id" -> + List(s"my_json${FOREIGN_KEY_DELIMITER}account${FOREIGN_KEY_DELIMITER}account_id"))) + assert(result.foreignKeys.contains(s"my_postgres${FOREIGN_KEY_DELIMITER}account${FOREIGN_KEY_DELIMITER}customer_number" -> + List(s"my_json${FOREIGN_KEY_DELIMITER}account${FOREIGN_KEY_DELIMITER}customer_number", s"my_parquet${FOREIGN_KEY_DELIMITER}transaction${FOREIGN_KEY_DELIMITER}cust_num"))) + } + +} diff --git a/api/src/test/scala/com/github/pflooky/datacaterer/api/TasksBuilderTest.scala b/api/src/test/scala/com/github/pflooky/datacaterer/api/TasksBuilderTest.scala new file mode 100644 index 00000000..38a6c73f --- /dev/null +++ b/api/src/test/scala/com/github/pflooky/datacaterer/api/TasksBuilderTest.scala @@ -0,0 +1,289 @@ +package com.github.pflooky.datacaterer.api + +import com.github.pflooky.datacaterer.api.model.{ArrayType, Count, DateType, Field, Generator, IntegerType, StringType} +import org.junit.runner.RunWith +import org.scalatest.funsuite.AnyFunSuite +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class TasksBuilderTest extends AnyFunSuite { + + test("Can create a task summary when given a task") { + val result = TaskSummaryBuilder() + .task(TaskBuilder().name("my task")) + .enabled(false) + .dataSource("account_json") + .taskSummary + + assert(result.name == "my task") + assert(result.dataSourceName == "account_json") + assert(!result.enabled) + } + + test("Can create a step with details") { + val result = StepBuilder() + .name("my step") + .`type`("csv") + .enabled(false) + .schema(SchemaBuilder()) + .count(CountBuilder()) + .option("dbtable" -> "account.history") + .options(Map("stringtype" -> "undefined")) + .step + + assert(result.name == "my step") + assert(result.`type` == "csv") + assert(!result.enabled) + assert(result.schema.fields.isEmpty) + assert(result.count == Count()) + assert(result.options == Map( + "dbtable" -> "account.history", + "stringtype" -> "undefined" + )) + } + + test("Can create simple count") { + val result = CountBuilder().records(20).count + + assert(result.records.contains(20)) + assert(result.perColumn.isEmpty) + assert(result.generator.isEmpty) + } + + test("Can create per column count") { + val result = CountBuilder() + .perColumn(PerColumnCountBuilder() + .records(20, "account_id") + ) + .count + + assert(result.records.contains(1000)) + assert(result.perColumn.isDefined) + assert(result.perColumn.get.count.contains(20)) + assert(result.perColumn.get.columnNames == List("account_id")) + assert(result.perColumn.get.generator.isEmpty) + assert(result.generator.isEmpty) + } + + test("Can create records per column from count builder") { + val result = CountBuilder() + .recordsPerColumn(20, "account_id") + .count + + assert(result.records.contains(1000)) + assert(result.perColumn.isDefined) + assert(result.perColumn.get.count.contains(20)) + assert(result.perColumn.get.columnNames == List("account_id")) + assert(result.perColumn.get.generator.isEmpty) + assert(result.generator.isEmpty) + } + + test("Can create generated records per column from count builder") { + val result = CountBuilder() + .recordsPerColumnGenerator(GeneratorBuilder(), "account_id") + .count + + assert(result.records.contains(1000)) + assert(result.perColumn.isDefined) + assert(result.perColumn.get.count.contains(10)) + assert(result.perColumn.get.columnNames == List("account_id")) + assert(result.perColumn.get.generator.isDefined) + assert(result.generator.isEmpty) + } + + test("Can create generated records per column with total records from count builder") { + val result = CountBuilder() + .recordsPerColumnGenerator(100, GeneratorBuilder(), "account_id") + .count + + assert(result.records.contains(100)) + assert(result.perColumn.isDefined) + assert(result.perColumn.get.count.contains(10)) + assert(result.perColumn.get.columnNames == List("account_id")) + assert(result.perColumn.get.generator.isDefined) + assert(result.generator.isEmpty) + } + + test("Can create per column count with generator") { + val result = CountBuilder() + .perColumn(PerColumnCountBuilder() + .generator( + GeneratorBuilder().min(5), + "account_id" + ) + ).count + + assert(result.records.contains(1000)) + assert(result.perColumn.isDefined) + assert(result.perColumn.get.count.contains(10)) + assert(result.perColumn.get.columnNames == List("account_id")) + assert(result.perColumn.get.generator.isDefined) + assert(result.perColumn.get.generator.get.`type` == "random") + assert(result.perColumn.get.generator.get.options("min") == "5") + assert(result.generator.isEmpty) + } + + test("Can create schema with add fields") { + val result = SchemaBuilder() + .addField("account_id") + .addField("year", IntegerType) + .addFields(FieldBuilder().name("name")) + .schema + + assert(result.fields.isDefined) + assert(result.fields.get.size == 3) + assert(result.fields.get.contains(Field("account_id", Some("string")))) + assert(result.fields.get.contains(Field("year", Some("integer")))) + assert(result.fields.get.contains(Field("name", Some("string")))) + } + + test("Can create field") { + val result = FieldBuilder() + .name("account_id") + .`type`(StringType) + .nullable(false) + .generator(GeneratorBuilder()) + .field + + assert(result.name == "account_id") + assert(result.`type`.contains("string")) + assert(!result.nullable) + assert(result.generator.isDefined) + assert(result.generator.contains(Generator())) + } + + test("Can create field generated from sql expression") { + val result = FieldBuilder() + .name("account_id") + .sql("SUBSTRING(account, 1, 5)") + .field + + assert(result.name == "account_id") + assert(result.`type`.contains("string")) + assert(result.generator.isDefined) + assert(result.generator.get.`type` == "sql") + assert(result.generator.get.options("sql") == "SUBSTRING(account, 1, 5)") + } + + test("Can create field generated from one of list of doubles") { + val result = FieldBuilder().name("account_id").oneOf(123.1, 789.2).field + + assert(result.name == "account_id") + assert(result.`type`.contains("double")) + assert(result.generator.isDefined) + assert(result.generator.get.options("oneOf") == List(123.1, 789.2)) + } + + test("Can create field generated from one of list of strings") { + val result = FieldBuilder().name("status").oneOf("open", "closed").field + + assert(result.name == "status") + assert(result.`type`.contains("string")) + assert(result.generator.get.options("oneOf") == List("open", "closed")) + } + + test("Can create field generated from one of list of long") { + val result = FieldBuilder().name("amount").oneOf(100L, 200L).field + + assert(result.name == "amount") + assert(result.`type`.contains("long")) + assert(result.generator.get.options("oneOf") == List(100L, 200L)) + } + + test("Can create field generated from one of list of int") { + val result = FieldBuilder().name("amount").oneOf(100, 200).field + + assert(result.name == "amount") + assert(result.`type`.contains("integer")) + assert(result.generator.get.options("oneOf") == List(100, 200)) + } + + test("Can create field generated from one of list of boolean") { + val result = FieldBuilder().name("is_open").oneOf(true, false).field + + assert(result.name == "is_open") + assert(result.`type`.contains("boolean")) + assert(result.generator.get.options("oneOf") == List(true, false)) + } + + test("Can create field with nested schema") { + val result = FieldBuilder() + .name("txn_list") + .`type`(new ArrayType(DateType)) + .schema(SchemaBuilder().addFields( + FieldBuilder().name("date").`type`(DateType) + )) + .field + + assert(result.name == "txn_list") + assert(result.`type`.contains("array")) + } + + test("Can create field with metadata") { + val result = FieldBuilder() + .name("account_id") + .regex("acc[0-9]{3}") + .seed(1) + .min(2) + .max(10) + .minLength(3) + .maxLength(4) + .avgLength(3) + .arrayMinLength(2) + .arrayMaxLength(2) + .expression("hello") + .nullable(false) + .static("acc123") + .arrayType("boolean") + .numericPrecision(10) + .numericScale(1) + .enableEdgeCases(true) + .edgeCaseProbability(0.5) + .enableNull(true) + .nullProbability(0.1) + .unique(true) + .omit(false) + .primaryKey(true) + .primaryKeyPosition(1) + .clusteringPosition(1) + .standardDeviation(0.1) + .mean(5.1) + .options(Map("customMetadata" -> "yes")) + .option("data" -> "big") + .field + + assert(result.name == "account_id") + assert(result.`type`.contains("string")) + assert(!result.nullable) + assert(result.generator.get.`type` == "regex") + val gen = result.generator.get.options + assert(gen("regex") == "acc[0-9]{3}") + assert(gen("seed") == "1") + assert(gen("min") == "2") + assert(gen("max") == "10") + assert(gen("minLen") == "3") + assert(gen("maxLen") == "4") + assert(gen("avgLen") == "3") + assert(gen("arrayMinLen") == "2") + assert(gen("arrayMaxLen") == "2") + assert(gen("expression") == "hello") + assert(gen("static") == "acc123") + assert(gen("arrayType") == "boolean") + assert(gen("precision") == "10") + assert(gen("scale") == "1") + assert(gen("enableEdgeCase") == "true") + assert(gen("edgeCaseProb") == "0.5") + assert(gen("enableNull") == "true") + assert(gen("nullProb") == "0.1") + assert(gen("isUnique") == "true") + assert(gen("omit") == "false") + assert(gen("isPrimaryKey") == "true") + assert(gen("primaryKeyPos") == "1") + assert(gen("clusteringPos") == "1") + assert(gen("customMetadata") == "yes") + assert(gen("data") == "big") + assert(gen("stddev") == "0.1") + assert(gen("mean") == "5.1") + } + +} diff --git a/api/src/test/scala/com/github/pflooky/datacaterer/api/ValidationConfigurationBuilderTest.scala b/api/src/test/scala/com/github/pflooky/datacaterer/api/ValidationConfigurationBuilderTest.scala new file mode 100644 index 00000000..cf7f3a60 --- /dev/null +++ b/api/src/test/scala/com/github/pflooky/datacaterer/api/ValidationConfigurationBuilderTest.scala @@ -0,0 +1,553 @@ +package com.github.pflooky.datacaterer.api + +import com.github.pflooky.datacaterer.api.model.Constants.{DEFAULT_VALIDATION_JOIN_TYPE, DEFAULT_VALIDATION_WEBHOOK_HTTP_DATA_SOURCE_NAME, DEFAULT_VALIDATION_WEBHOOK_HTTP_METHOD, DEFAULT_VALIDATION_WEBHOOK_HTTP_STATUS_CODES, PATH} +import com.github.pflooky.datacaterer.api.model.{DataExistsWaitCondition, ExpressionValidation, FileExistsWaitCondition, GroupByValidation, PauseWaitCondition, UpstreamDataSourceValidation, WebhookWaitCondition} +import org.junit.runner.RunWith +import org.scalatest.funsuite.AnyFunSuite +import org.scalatestplus.junit.JUnitRunner + +import java.sql.{Date, Timestamp} + +@RunWith(classOf[JUnitRunner]) +class ValidationConfigurationBuilderTest extends AnyFunSuite { + + test("Can create simple validation for data source") { + val result = ValidationConfigurationBuilder() + .name("my validations") + .description("check account data") + .addValidations( + "account_json", + Map("path" -> "/my/data/path"), + ValidationBuilder().expr("amount < 100"), + ValidationBuilder().expr("name == 'Peter'") + ).validationConfiguration + + assert(result.name == "my validations") + assert(result.description == "check account data") + assert(result.dataSources.size == 1) + assert(result.dataSources.head._1 == "account_json") + val headDsValid = result.dataSources.head._2.head + assert(headDsValid.options == Map("path" -> "/my/data/path")) + assert(headDsValid.waitCondition == PauseWaitCondition()) + assert(headDsValid.validations.size == 2) + assert(headDsValid.validations.map(_.validation).contains(ExpressionValidation("amount < 100"))) + assert(headDsValid.validations.map(_.validation).contains(ExpressionValidation("name == 'Peter'"))) + } + + test("Can create column specific validation") { + val result = ValidationBuilder().col("my_col").greaterThan(10) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col > 10") + } + + test("Can create column equal to validation") { + val result = ValidationBuilder().col("my_col").isEqual(10) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col == 10") + + val resultStr = ValidationBuilder().col("my_col").isEqual("created") + + assert(resultStr.validation.isInstanceOf[ExpressionValidation]) + assert(resultStr.validation.asInstanceOf[ExpressionValidation].expr == "my_col == 'created'") + } + + test("Can create column equal to another column validation") { + val result = ValidationBuilder().col("my_col").isEqualCol("other_col") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col == other_col") + } + + test("Can create column not equal to validation") { + val result = ValidationBuilder().col("my_col").isNotEqual(10) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col != 10") + + val resultStr = ValidationBuilder().col("my_col").isNotEqual("created") + + assert(resultStr.validation.isInstanceOf[ExpressionValidation]) + assert(resultStr.validation.asInstanceOf[ExpressionValidation].expr == "my_col != 'created'") + } + + test("Can create column not equal to another column validation") { + val result = ValidationBuilder().col("my_col").isNotEqualCol("other_col") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col != other_col") + } + + test("Can create column is null validation") { + val result = ValidationBuilder().col("my_col").isNull + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "ISNULL(my_col)") + } + + test("Can create column is not null validation") { + val result = ValidationBuilder().col("my_col").isNotNull + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "ISNOTNULL(my_col)") + } + + test("Can create column contains validation") { + val result = ValidationBuilder().col("my_col").contains("apple") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "CONTAINS(my_col, 'apple')") + } + + test("Can create column not contains validation") { + val result = ValidationBuilder().col("my_col").notContains("apple") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "!CONTAINS(my_col, 'apple')") + } + + test("Can create column less than validation") { + val result = ValidationBuilder().col("my_col").lessThan(Date.valueOf("2023-01-01")) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col < DATE('2023-01-01')") + } + + test("Can create column less than other column validation") { + val result = ValidationBuilder().col("my_col").lessThanCol("other_col") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col < other_col") + } + + test("Can create column less than or equal validation") { + val result = ValidationBuilder().col("my_col").lessThanOrEqual(Timestamp.valueOf("2023-01-01 00:00:00.0")) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col <= TIMESTAMP('2023-01-01 00:00:00.0')") + } + + test("Can create column less than or equal other column validation") { + val result = ValidationBuilder().col("my_col").lessThanOrEqualCol("other_col") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col <= other_col") + } + + test("Can create column greater than validation") { + val result = ValidationBuilder().col("my_col").greaterThan(10) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col > 10") + } + + test("Can create column greater than other column validation") { + val result = ValidationBuilder().col("my_col").greaterThanCol("other_col") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col > other_col") + } + + test("Can create column greater than or equal validation") { + val result = ValidationBuilder().col("my_col").greaterThanOrEqual(10) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col >= 10") + } + + test("Can create column greater than or equal other column validation") { + val result = ValidationBuilder().col("my_col").greaterThanOrEqualCol("other_col") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col >= other_col") + } + + test("Can create column between validation") { + val result = ValidationBuilder().col("my_col").between(10, 20) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col BETWEEN 10 AND 20") + } + + test("Can create column between other col validation") { + val result = ValidationBuilder().col("my_col").betweenCol("other_col", "another_col") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col BETWEEN other_col AND another_col") + } + + test("Can create column not between validation") { + val result = ValidationBuilder().col("my_col").notBetween(10, 20) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col NOT BETWEEN 10 AND 20") + } + + test("Can create column not between other col validation") { + val result = ValidationBuilder().col("my_col").notBetweenCol("other_col", "another_col") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col NOT BETWEEN other_col AND another_col") + } + + test("Can create column in validation") { + val result = ValidationBuilder().col("my_col").in("open", "closed") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col IN ('open','closed')") + } + + test("Can create column not in validation") { + val result = ValidationBuilder().col("my_col").notIn("open", "closed") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "NOT my_col IN ('open','closed')") + } + + test("Can create column matches validation") { + val result = ValidationBuilder().col("my_col").matches("ACC[0-9]{8}") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "REGEXP(my_col, 'ACC[0-9]{8}')") + } + + test("Can create column not matches validation") { + val result = ValidationBuilder().col("my_col").notMatches("ACC[0-9]{8}") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "!REGEXP(my_col, 'ACC[0-9]{8}')") + } + + test("Can create column starts with validation") { + val result = ValidationBuilder().col("my_col").startsWith("ACC") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "STARTSWITH(my_col, 'ACC')") + } + + test("Can create column not starts with validation") { + val result = ValidationBuilder().col("my_col").notStartsWith("ACC") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "!STARTSWITH(my_col, 'ACC')") + } + + test("Can create column ends with validation") { + val result = ValidationBuilder().col("my_col").endsWith("ACC") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "ENDSWITH(my_col, 'ACC')") + } + + test("Can create column not ends with validation") { + val result = ValidationBuilder().col("my_col").notEndsWith("ACC") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "!ENDSWITH(my_col, 'ACC')") + } + + test("Can create column size validation") { + val result = ValidationBuilder().col("my_col").size(2) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "SIZE(my_col) == 2") + } + + test("Can create column not size validation") { + val result = ValidationBuilder().col("my_col").notSize(5) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "SIZE(my_col) != 5") + } + + test("Can create column less than size validation") { + val result = ValidationBuilder().col("my_col").lessThanSize(5) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "SIZE(my_col) < 5") + } + + test("Can create column less than or equal size validation") { + val result = ValidationBuilder().col("my_col").lessThanOrEqualSize(5) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "SIZE(my_col) <= 5") + } + + test("Can create column greater than size validation") { + val result = ValidationBuilder().col("my_col").greaterThanSize(5) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "SIZE(my_col) > 5") + } + + test("Can create column greater than or equal size validation") { + val result = ValidationBuilder().col("my_col").greaterThanOrEqualSize(5) + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "SIZE(my_col) >= 5") + } + + test("Can create column greater luhn check validation") { + val result = ValidationBuilder().col("my_col").luhnCheck + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "LUHN_CHECK(my_col)") + } + + test("Can create column type validation") { + val result = ValidationBuilder().col("my_col").hasType("double") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "TYPEOF(my_col) == 'double'") + } + + test("Can create column generic expression validation") { + val result = ValidationBuilder().col("my_col").expr("my_col * 2 < other_col / 4") + + assert(result.validation.isInstanceOf[ExpressionValidation]) + assert(result.validation.asInstanceOf[ExpressionValidation].expr == "my_col * 2 < other_col / 4") + } + + test("Can create group by column validation") { + val result = ValidationBuilder() + .description("my_description") + .errorThreshold(0.5) + .groupBy("account_id", "year") + .sum("amount") + .lessThan(100) + + assert(result.validation.isInstanceOf[GroupByValidation]) + val validation = result.validation.asInstanceOf[GroupByValidation] + assert(validation.groupByCols == Seq("account_id", "year")) + assert(validation.aggCol == "amount") + assert(validation.aggType == "sum") + assert(validation.expr == "sum(amount) < 100") + assert(validation.description.contains("my_description")) + assert(validation.errorThreshold.contains(0.5)) + } + + test("Can create dataset count validation") { + val result = ValidationBuilder().count().lessThan(10) + + assert(result.validation.isInstanceOf[GroupByValidation]) + val validation = result.validation.asInstanceOf[GroupByValidation] + assert(validation.groupByCols.isEmpty) + assert(validation.aggCol.isEmpty) + assert(validation.aggType == "count") + assert(validation.expr == "count < 10") + } + + test("Can create group by then get count column validation") { + val result = ValidationBuilder() + .groupBy("account_id") + .count("amount") + .lessThan(100) + + assert(result.validation.isInstanceOf[GroupByValidation]) + val validation = result.validation.asInstanceOf[GroupByValidation] + assert(validation.groupByCols == Seq("account_id")) + assert(validation.aggCol == "amount") + assert(validation.aggType == "count") + assert(validation.expr == "count(amount) < 100") + } + + test("Can create group by then get max column validation") { + val result = ValidationBuilder() + .groupBy("account_id") + .max("amount") + .lessThan(100) + + assert(result.validation.isInstanceOf[GroupByValidation]) + val validation = result.validation.asInstanceOf[GroupByValidation] + assert(validation.groupByCols == Seq("account_id")) + assert(validation.aggCol == "amount") + assert(validation.aggType == "max") + assert(validation.expr == "max(amount) < 100") + } + + test("Can create group by then get min column validation") { + val result = ValidationBuilder() + .groupBy("account_id") + .min("amount") + .lessThan(100) + + assert(result.validation.isInstanceOf[GroupByValidation]) + val validation = result.validation.asInstanceOf[GroupByValidation] + assert(validation.groupByCols == Seq("account_id")) + assert(validation.aggCol == "amount") + assert(validation.aggType == "min") + assert(validation.expr == "min(amount) < 100") + } + + test("Can create group by then get average column validation") { + val result = ValidationBuilder() + .groupBy("account_id") + .avg("amount") + .lessThan(100) + + assert(result.validation.isInstanceOf[GroupByValidation]) + val validation = result.validation.asInstanceOf[GroupByValidation] + assert(validation.groupByCols == Seq("account_id")) + assert(validation.aggCol == "amount") + assert(validation.aggType == "avg") + assert(validation.expr == "avg(amount) < 100") + } + + test("Can create group by then get stddev column validation") { + val result = ValidationBuilder() + .groupBy("account_id") + .stddev("amount") + .lessThan(100) + + assert(result.validation.isInstanceOf[GroupByValidation]) + val validation = result.validation.asInstanceOf[GroupByValidation] + assert(validation.groupByCols == Seq("account_id")) + assert(validation.aggCol == "amount") + assert(validation.aggType == "stddev") + assert(validation.expr == "stddev(amount) < 100") + } + + test("Can create unique column validation") { + val result = ValidationBuilder().unique("account_id").description("my_description").errorThreshold(0.2) + + assert(result.validation.isInstanceOf[GroupByValidation]) + val validation = result.validation.asInstanceOf[GroupByValidation] + assert(validation.groupByCols == Seq("account_id")) + assert(validation.aggCol == "unique") + assert(validation.aggType == "count") + assert(validation.expr == "count == 1") + assert(validation.description.contains("my_description")) + assert(validation.errorThreshold.contains(0.2)) + } + + test("Can create unique column validation with multiple columns") { + val result = ValidationBuilder().unique("account_id", "year", "name") + + assert(result.validation.isInstanceOf[GroupByValidation]) + val validation = result.validation.asInstanceOf[GroupByValidation] + assert(validation.groupByCols == Seq("account_id", "year", "name")) + assert(validation.aggCol == "unique") + assert(validation.aggType == "count") + assert(validation.expr == "count == 1") + } + + test("Can create validation based on data from another data source") { + val upstreamDataSource = ConnectionConfigWithTaskBuilder().file("other_data_source", "json") + val result = ValidationBuilder() + .upstreamData(upstreamDataSource) + .joinColumns("account_id") + .withValidation(ValidationBuilder().col("amount").lessThanOrEqualCol("other_data_source_balance")) + + assert(result.validation.isInstanceOf[UpstreamDataSourceValidation]) + val validation = result.validation.asInstanceOf[UpstreamDataSourceValidation] + assert(validation.upstreamDataSource.connectionConfigWithTaskBuilder.dataSourceName == "other_data_source") + assert(validation.joinType == DEFAULT_VALIDATION_JOIN_TYPE) + assert(validation.joinCols == List("account_id")) + assert(validation.validationBuilder.validation.isInstanceOf[ExpressionValidation]) + assert(validation.validationBuilder.validation.asInstanceOf[ExpressionValidation].expr == "amount <= other_data_source_balance") + } + + test("Can create validation based on data from another data source as an anti-join") { + val upstreamDataSource = ConnectionConfigWithTaskBuilder().file("other_data_source", "json") + val result = ValidationBuilder() + .upstreamData(upstreamDataSource) + .joinColumns("account_id") + .joinType("anti-join") + .withValidation(ValidationBuilder().count().isEqual(0)) + + assert(result.validation.isInstanceOf[UpstreamDataSourceValidation]) + val validation = result.validation.asInstanceOf[UpstreamDataSourceValidation] + assert(validation.upstreamDataSource.connectionConfigWithTaskBuilder.dataSourceName == "other_data_source") + assert(validation.joinType == "anti-join") + assert(validation.joinCols == List("account_id")) + assert(validation.validationBuilder.validation.isInstanceOf[GroupByValidation]) + assert(validation.validationBuilder.validation.asInstanceOf[GroupByValidation].expr == "count == 0") + } + + test("Can create validation based on data from another data source with expression for join logic") { + val upstreamDataSource = ConnectionConfigWithTaskBuilder().file("other_data_source", "json") + val result = ValidationBuilder() + .upstreamData(upstreamDataSource) + .joinExpr("account_id == CONCAT('ACC', other_data_source_account_number)") + .withValidation(ValidationBuilder().count().isEqual(0)) + + assert(result.validation.isInstanceOf[UpstreamDataSourceValidation]) + val validation = result.validation.asInstanceOf[UpstreamDataSourceValidation] + assert(validation.upstreamDataSource.connectionConfigWithTaskBuilder.dataSourceName == "other_data_source") + assert(validation.joinType == DEFAULT_VALIDATION_JOIN_TYPE) + assert(validation.joinCols == List("expr:account_id == CONCAT('ACC', other_data_source_account_number)")) + assert(validation.validationBuilder.validation.isInstanceOf[GroupByValidation]) + assert(validation.validationBuilder.validation.asInstanceOf[GroupByValidation].expr == "count == 0") + } + + test("Can create validation pause wait condition") { + val result = WaitConditionBuilder().pause(10).waitCondition + + assert(result.isInstanceOf[PauseWaitCondition]) + assert(result.asInstanceOf[PauseWaitCondition].pauseInSeconds == 10) + } + + test("Can create validation file exists wait condition") { + val result = WaitConditionBuilder().file("/my/file/path").waitCondition + + assert(result.isInstanceOf[FileExistsWaitCondition]) + assert(result.asInstanceOf[FileExistsWaitCondition].path == "/my/file/path") + } + + test("Can create validation data exists wait condition") { + val result = WaitConditionBuilder().dataExists("my_json", Map(PATH -> "/my/json"), "created_date > '2023-01-01'").waitCondition + + assert(result.isInstanceOf[DataExistsWaitCondition]) + val waitCondition = result.asInstanceOf[DataExistsWaitCondition] + assert(waitCondition.dataSourceName == "my_json") + assert(waitCondition.options.nonEmpty) + assert(waitCondition.options == Map(PATH -> "/my/json")) + assert(waitCondition.expr == "created_date > '2023-01-01'") + } + + test("Can create validation webhook wait condition") { + val result = WaitConditionBuilder().webhook("localhost:8080/ready").waitCondition + + assert(result.isInstanceOf[WebhookWaitCondition]) + val waitCondition = result.asInstanceOf[WebhookWaitCondition] + assert(waitCondition.url == "localhost:8080/ready") + assert(waitCondition.method == DEFAULT_VALIDATION_WEBHOOK_HTTP_METHOD) + assert(waitCondition.dataSourceName == DEFAULT_VALIDATION_WEBHOOK_HTTP_DATA_SOURCE_NAME) + assert(waitCondition.statusCodes == DEFAULT_VALIDATION_WEBHOOK_HTTP_STATUS_CODES) + } + + test("Can create validation webhook wait condition with PUT method and 202 status code") { + val result = WaitConditionBuilder().webhook("localhost:8080/ready", "PUT", 202).waitCondition + + assert(result.isInstanceOf[WebhookWaitCondition]) + val waitCondition = result.asInstanceOf[WebhookWaitCondition] + assert(waitCondition.url == "localhost:8080/ready") + assert(waitCondition.method == "PUT") + assert(waitCondition.dataSourceName == DEFAULT_VALIDATION_WEBHOOK_HTTP_DATA_SOURCE_NAME) + assert(waitCondition.statusCodes == List(202)) + } + + test("Can create validation webhook wait condition using pre-defined HTTP data source name") { + val result = WaitConditionBuilder().webhook("my_http", "localhost:8080/ready").waitCondition + + assert(result.isInstanceOf[WebhookWaitCondition]) + val waitCondition = result.asInstanceOf[WebhookWaitCondition] + assert(waitCondition.url == "localhost:8080/ready") + assert(waitCondition.method == DEFAULT_VALIDATION_WEBHOOK_HTTP_METHOD) + assert(waitCondition.dataSourceName == "my_http") + assert(waitCondition.statusCodes == DEFAULT_VALIDATION_WEBHOOK_HTTP_STATUS_CODES) + } + + test("Can create validation webhook wait condition using pre-defined HTTP data source name, with different method and status code") { + val result = WaitConditionBuilder().webhook("my_http", "localhost:8080/ready", "PUT", 202).waitCondition + + assert(result.isInstanceOf[WebhookWaitCondition]) + val waitCondition = result.asInstanceOf[WebhookWaitCondition] + assert(waitCondition.url == "localhost:8080/ready") + assert(waitCondition.method == "PUT") + assert(waitCondition.dataSourceName == "my_http") + assert(waitCondition.statusCodes == List(202)) + } +} diff --git a/app/build.gradle.kts b/app/build.gradle.kts new file mode 100644 index 00000000..9c067e45 --- /dev/null +++ b/app/build.gradle.kts @@ -0,0 +1,134 @@ +import org.scoverage.ScoverageExtension + +/* + * This file was generated by the Gradle 'init' task. + * + * This generated file contains a sample Scala application project to get you started. + * For more details take a look at the 'Building Java & JVM projects' chapter in the Gradle + * User Manual available at https://docs.gradle.org/7.5.1/userguide/building_java_projects.html + * This project uses @Incubating APIs which are subject to change. + */ +val scalaVersion: String by project +val scalaSpecificVersion: String by project +val sparkVersion: String by project + + +plugins { + scala + application + + id("org.scoverage") version "8.0.3" + id("com.github.johnrengelman.shadow") version "8.1.1" +} + +repositories { + // Use Maven Central for resolving dependencies. + mavenCentral() + maven { + url = uri("https://plugins.gradle.org/m2/") + } +} + +tasks.withType { + targetCompatibility = "11" +} + +val basicImpl: Configuration by configurations.creating + +configurations { + implementation { + extendsFrom(basicImpl) + } +} + +dependencies { + compileOnly("org.scala-lang:scala-library:$scalaSpecificVersion") + compileOnly("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") + compileOnly(project(":api")) + + // connectors + // postgres + basicImpl("org.postgresql:postgresql:42.6.0") + // mysql + basicImpl("mysql:mysql-connector-java:8.0.33") + // cassandra + basicImpl("com.datastax.spark:spark-cassandra-connector_$scalaVersion:3.3.0") { + exclude(group = "org.scala-lang") + } + // cloud file storage + basicImpl("org.apache.spark:spark-hadoop-cloud_$scalaVersion:$sparkVersion") { + exclude(group = "org.scala-lang") + } + + // data generation helpers + basicImpl("net.datafaker:datafaker:1.9.0") + basicImpl("org.reflections:reflections:0.10.2") + + // misc + basicImpl("joda-time:joda-time:2.12.5") + basicImpl("com.google.guava:guava:32.1.3-jre") + basicImpl("org.asynchttpclient:async-http-client:2.12.3") + basicImpl("com.github.pureconfig:pureconfig_$scalaVersion:0.17.2") { + exclude(group = "org.scala-lang") + } + basicImpl("com.fasterxml.jackson.core:jackson-databind:2.15.3") + basicImpl("com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.15.3") + basicImpl("com.fasterxml.jackson.module:jackson-module-scala_$scalaVersion:2.15.3") { + exclude(group = "org.scala-lang") + } + basicImpl("org.scala-lang.modules:scala-xml_$scalaVersion:2.2.0") { + exclude(group = "org.scala-lang") + } +} + +testing { + suites { + // Configure the built-in test suite + val test by getting(JvmTestSuite::class) { + // Use JUnit4 test framework + useJUnit("4.13.2") + + dependencies { + // Use Scalatest for testing our library + implementation("org.scalatest:scalatest_$scalaVersion:3.2.17") + implementation("org.scalatestplus:junit-4-13_$scalaVersion:3.2.17.0") + implementation("org.scalamock:scalamock_$scalaVersion:5.2.0") + implementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") + implementation("org.apache.spark:spark-avro_$scalaVersion:$sparkVersion") + implementation("org.apache.spark:spark-protobuf_$scalaVersion:$sparkVersion") + implementation(project(":api")) + + // Need scala-xml at test runtime + runtimeOnly("org.scala-lang.modules:scala-xml_$scalaVersion:1.2.0") + } + } + } +} + +application { + // Define the main class for the application. + mainClass.set("com.github.pflooky.datagen.App") +} + +sourceSets { + test { + resources { + setSrcDirs(listOf("src/test/resources")) + } + } +} + +tasks.shadowJar { + isZip64 = true + relocate("com.google.common", "shadow.com.google.common") +} + +tasks.test { + finalizedBy(tasks.reportScoverage) +} + +configure { + scoverageScalaVersion.set(scalaSpecificVersion) + excludedFiles.add(".*CombinationCalculator.*") + excludedPackages.add("com.github.pflooky.datagen.core.exception.*") +} diff --git a/app/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/app/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister new file mode 100644 index 00000000..e69de29b diff --git a/app/src/main/resources/application.conf b/app/src/main/resources/application.conf new file mode 100644 index 00000000..d9d844a8 --- /dev/null +++ b/app/src/main/resources/application.conf @@ -0,0 +1,166 @@ +flags { + enableGeneratePlanAndTasks = false + enableGeneratePlanAndTasks = ${?ENABLE_GENERATE_PLAN_AND_TASKS} + enableCount = true + enableCount = ${?ENABLE_COUNT} + enableGenerateData = true + enableGenerateData = ${?ENABLE_GENERATE_DATA} + enableRecordTracking = false + enableRecordTracking = ${?ENABLE_RECORD_TRACKING} + enableDeleteGeneratedRecords = false + enableDeleteGeneratedRecords = ${?ENABLE_DELETE_GENERATED_RECORDS} + enableFailOnError = true + enableFailOnError = ${?ENABLE_FAIL_ON_ERROR} + enableSinkMetadata = true + enableSinkMetadata = ${?ENABLED_SINK_METADATA} + enableSaveReports = true + enableSaveReports = ${?ENABLED_SAVE_REPORTS} + enableValidation = false + enableValidation = ${?ENABLED_VALIDATION} +} + +folders { + generatedPlanAndTaskFolderPath = "/tmp" + generatedPlanAndTaskFolderPath = ${?GENERATED_PLAN_AND_TASK_FOLDER_PATH} + planFilePath = "/plan/customer-create-plan.yaml" + planFilePath = ${?PLAN_FILE_PATH} + taskFolderPath = "/task" + taskFolderPath = ${?TASK_FOLDER_PATH} + recordTrackingFolderPath = "/tmp/data/generated/recordTracking" + recordTrackingFolderPath = ${?RECORD_TRACKING_FOLDER_PATH} + generatedReportsFolderPath = "app/src/test/resources/sample/html" + generatedReportsFolderPath = ${?GENERATED_REPORTS_FOLDER_PATH} + validationFolderPath = "app/src/test/resources/sample/validation" + validationFolderPath = ${?VALIDATION_FOLDER_PATH} +} + +metadata { + numRecordsFromDataSource = 10000 + numRecordsFromDataSource = ${?METADATA_NUM_RECORDS_FROM_DATA_SOURCE} + numRecordsForAnalysis = 10000 + numRecordsForAnalysis = ${?METADATA_NUM_RECORDS_FOR_ANALYSIS} + oneOfDistinctCountVsCountThreshold = 0.1 + oneOfDistinctCountVsCountThreshold = ${?METADATA_ONE_OF_DISTINCT_COUNT_VS_COUNT_THRESHOLD} +} + +generation { + numRecordsPerBatch = 100000 + numRecordsPerBatch = ${?GENERATION_NUM_RECORDS_PER_BATCH} +} + +runtime { + master = "local[*]" + master = ${?DATA_CATERER_MASTER} + config { + "spark.sql.cbo.enabled" = "true" + "spark.sql.adaptive.enabled" = "true" + "spark.sql.cbo.planStats.enabled" = "true" + "spark.sql.legacy.allowUntypedScalaUDF" = "true" + "spark.sql.statistics.histogram.enabled" = "true" + "spark.sql.shuffle.partitions" = "10" + "spark.sql.catalog.postgres" = "" + "spark.sql.catalog.cassandra" = "com.datastax.spark.connector.datasource.CassandraCatalog" + "spark.hadoop.fs.s3a.directory.marker.retention" = "keep" + "spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled" = "true" + #"spark.hadoop.fs.defaultFS" = "s3a://my-bucket" + } +} + +# connection type +jdbc { +# connection name + postgres { +# connection details + url = "jdbc:postgresql://localhost:5432/customer" + url = ${?POSTGRES_URL} + user = "postgres" + user = ${?POSTGRES_USERNAME} + password = "postgres" + password = ${?POSTGRES_PASSWORD} + driver = "org.postgresql.Driver" + } + postgresDvd { + url = "jdbc:postgresql://localhost:5432/dvdrental" + url = ${?POSTGRES_URL} + user = "postgres" + user = ${?POSTGRES_USERNAME} + password = "postgres" + password = ${?POSTGRES_PASSWORD} + driver = "org.postgresql.Driver" + stringtype = "unspecified" + } + mysql { + url = "jdbc:mysql://localhost:3306/customer" + url = ${?MYSQL_URL} + user = "root" + user = ${?MYSQL_USERNAME} + password = "root" + password = ${?MYSQL_PASSWORD} + driver = "com.mysql.cj.jdbc.Driver" + } +} + +org.apache.spark.sql.cassandra { + cassandra { + spark.cassandra.connection.host = "localhost" + spark.cassandra.connection.host = ${?CASSANDRA_HOST} + spark.cassandra.connection.port = "9042" + spark.cassandra.connection.port = ${?CASSANDRA_PORT} + spark.cassandra.auth.username = "cassandra" + spark.cassandra.auth.username = ${?CASSANDRA_USERNAME} + spark.cassandra.auth.password = "cassandra" + spark.cassandra.auth.password = ${?CASSANDRA_PASSWORD} + } +} + +http { + httpbin { + } +} + +jms { + solace { + initialContextFactory = "com.solacesystems.jndi.SolJNDIInitialContextFactory" + initialContextFactory = ${?SOLACE_INITIAL_CONTEXT_FACTORY} + connectionFactory = "/jms/cf/default" + connectionFactory = ${?SOLACE_CONNECTION_FACTORY} + url = "smf://localhost:55554" + url = ${?SOLACE_URL} + user = "admin" + user = ${?SOLACE_USER} + password = "admin" + password = ${?SOLACE_PASSWORD} + vpnName = "default" + vpnName = ${?SOLACE_VPN} + } +} + +kafka { + kafka { + kafka.bootstrap.servers = "localhost:9092" + kafka.bootstrap.servers = ${?KAFKA_BOOTSTRAP_SERVERS} + } +} + +parquet { + parquet { + path = "app/src/test/resources/sample" + path = ${?PARQUET_PATH} + } +} + +json { + json { + path = "app/src/test/resources/sample" + path = ${?JSON_PATH} + } +} + +csv { + csv { + path = "app/src/test/resources/sample" + path = ${?CSV_PATH} + } +} + +datastax-java-driver.advanced.metadata.schema.refreshed-keyspaces = [ "/.*/" ] diff --git a/app/src/main/resources/log4j2.properties b/app/src/main/resources/log4j2.properties new file mode 100644 index 00000000..b20fecac --- /dev/null +++ b/app/src/main/resources/log4j2.properties @@ -0,0 +1,51 @@ +rootLogger.level=${env:LOG_LEVEL:-info} +rootLogger.appenderRef.stdout.ref=console +appender.console.type=Console +appender.console.name=console +appender.console.target=SYSTEM_OUT +appender.console.layout.type=PatternLayout +appender.console.layout.pattern=%d{dd/MM/yyyy HH:mm:ss} [%-5p] %c: %m%n%ex +# Settings to quiet third party logs that are too verbose +logger.spark.name=org.apache.spark +logger.spark.level=error +logger.sparktask.name=org.apache.spark.scheduler.TaskSetManager +logger.sparktask.level=error +logger.jetty.name=org.sparkproject.jetty +logger.jetty.level=warn +logger.jetty2.name=org.sparkproject.jetty.util.component.AbstractLifeCycle +logger.jetty2.level=error +logger.repl1.name=org.apache.spark.repl.SparkIMain$exprTyper +logger.repl1.level=info +logger.repl2.name=org.apache.spark.repl.SparkILoop$SparkILoopInterpreter +logger.repl2.level=info +# Set the default spark-shell log level to WARN. When running the spark-shell, the +# log level for this class is used to overwrite the root logger's log level, so that +# the user can have different defaults for the shell and regular Spark apps. +logger.repl.name=org.apache.spark.repl.Main +logger.repl.level=warn +# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs +# in SparkSQL with Hive support +logger.hadoop.name=org.apache.hadoop +logger.hadoop.level=error +logger.metastore.name=org.apache.hadoop.hive.metastore.RetryingHMSHandler +logger.metastore.level=fatal +logger.hive_functionregistry.name=org.apache.hadoop.hive.ql.exec.FunctionRegistry +logger.hive_functionregistry.level=error +# Parquet related logging +logger.parquet.name=org.apache.parquet +logger.parquet.level=warn +logger.parquet1.name=org.apache.parquet.CorruptStatistics +logger.parquet1.level=error +logger.parquet2.name=parquet.CorruptStatistics +logger.parquet2.level=error +# Datastax logging +logger.dse.name=com.datastax +logger.dse.level=warn +logger.dseauth.name=com.datastax.oss.driver.api.core.auth.PlainTextAuthProviderBase +logger.dseauth.level=error +# Solace logging +logger.solace.name=com.solacesystems.jcsmp +logger.solace.level=warn +# Kafka logging +logger.kafka.name=org.apache.kafka +logger.kafka.level=warn diff --git a/app/src/main/resources/report/data_catering_transparent.svg b/app/src/main/resources/report/data_catering_transparent.svg new file mode 100644 index 00000000..cc6a8d60 --- /dev/null +++ b/app/src/main/resources/report/data_catering_transparent.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/app/src/main/resources/report/main.css b/app/src/main/resources/report/main.css new file mode 100644 index 00000000..e07128fc --- /dev/null +++ b/app/src/main/resources/report/main.css @@ -0,0 +1,173 @@ +.box-iframe { + float: left; + margin-right: 10px; +} + +body { + margin: 0; +} + +.top-banner { + height: fit-content; + background-color: #ff6e42; + padding: 0 .2rem; + display: flex; +} + +.top-banner span { + color: #f2f2f2; + font-size: 17px; + padding: 5px 6px; + display: flex; + align-items: center; +} + +.logo { + padding: 5px; + height: 45px; + width: auto; + display: flex; + align-items: center; + justify-content: center; +} + +.logo:hover { + background-color: #ff9100; + color: black; +} + +.top-banner img { + height: 35px; + width: auto; + display: flex; + justify-content: center; + vertical-align: middle; +} + +.topnav { + overflow: hidden; + background-color: #ff6e42; +} + +.topnav a { + float: left; + color: #f2f2f2; + text-align: center; + padding: 8px 10px; + text-decoration: none; + font-size: 17px; +} + +.topnav a:hover { + background-color: #ff9100; + color: black; +} + +.topnav a.active { + color: black; +} + +table { + overflow: hidden; + transition: max-height 0.2s ease-out; +} + +table.codegrid { + font-family: monospace; + font-size: 12px; + width: auto !important; +} + +table.statementlist { + width: auto !important; + font-size: 13px; +} + +table.codegrid td { + padding: 0 !important; + border: 0 !important +} + +table td.linenumber { + width: 40px !important; +} + +td { + white-space: normal +} + +.table thead th { + position: sticky; + top: 0; + z-index: 1; +} + +table, tr, td, th { + border-collapse: collapse; +} + +.table-collapsible { + max-height: 0; + overflow: hidden; + transition: max-height 0.2s ease-out; +} + +.collapsible { + background-color: lightgray; + color: black; + cursor: pointer; + width: 100%; + border: none; + text-align: left; + outline: none; +} + +.collapsible:after { + content: "\02795"; /* Unicode character for "plus" sign (+) */ + color: white; + float: right; +} + +.active:after { + content: "\2796"; /* Unicode character for "minus" sign (-) */ +} + +.outer-container { + display: flex; + flex-direction: column; + height: 100vh; +} + +.top-container { + height: 50%; + overflow: auto; + resize: vertical; +} + +.bottom-container { + flex: 1; + min-height: 0; + height: 50%; + overflow: auto; + resize: vertical; +} + +.slider { + text-align: center; + background-color: #dee2e6; + cursor: row-resize; + user-select: none; +} + +.selected-row { + background-color: #ff6e42 !important; +} + +.progress { + white-space: normal; + background-color: #d9534f; +} + +.progress-bar { + color: black; +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/App.scala b/app/src/main/scala/com/github/pflooky/datagen/App.scala new file mode 100644 index 00000000..002508a8 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/App.scala @@ -0,0 +1,24 @@ +/* + * This Scala source file was generated by the Gradle 'init' task. + */ +package com.github.pflooky.datagen + +import com.github.pflooky.datagen.core.plan.PlanProcessor +import org.apache.log4j.Logger + +import java.time.{Duration, LocalDateTime} + +object App { + + private val LOGGER = Logger.getLogger(getClass.getName) + + def main(args: Array[String]): Unit = { + val startTime = LocalDateTime.now() + LOGGER.info("Starting Data Caterer") + PlanProcessor.determineAndExecutePlan() + val endTime = LocalDateTime.now() + val duration = Duration.between(startTime, endTime) + LOGGER.info(s"Completed in ${duration.toSeconds}s") + System.exit(0) + } +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/config/ConfigParser.scala b/app/src/main/scala/com/github/pflooky/datagen/core/config/ConfigParser.scala new file mode 100644 index 00000000..97769768 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/config/ConfigParser.scala @@ -0,0 +1,85 @@ +package com.github.pflooky.datagen.core.config + +import com.github.pflooky.datacaterer.api.model.Constants.FORMAT +import com.github.pflooky.datacaterer.api.model.{DataCatererConfiguration, FlagsConfig, FoldersConfig, GenerationConfig, MetadataConfig, ValidationConfig} +import com.github.pflooky.datagen.core.model.Constants.{APPLICATION_CONFIG_PATH, RUNTIME_MASTER, SUPPORTED_CONNECTION_FORMATS} +import com.github.pflooky.datagen.core.util.ObjectMapperUtil +import com.typesafe.config.{Config, ConfigFactory, ConfigValueType} +import org.apache.log4j.Logger + +import java.io.File +import scala.collection.JavaConverters.collectionAsScalaIterableConverter +import scala.util.Try + +object ConfigParser { + + private val LOGGER = Logger.getLogger(getClass.getName) + + lazy val config: Config = getConfig + lazy val flagsConfig: FlagsConfig = ObjectMapperUtil.jsonObjectMapper.convertValue(config.getObject("flags").unwrapped(), classOf[FlagsConfig]) + lazy val foldersConfig: FoldersConfig = ObjectMapperUtil.jsonObjectMapper.convertValue(config.getObject("folders").unwrapped(), classOf[FoldersConfig]) + lazy val metadataConfig: MetadataConfig = ObjectMapperUtil.jsonObjectMapper.convertValue(config.getObject("metadata").unwrapped(), classOf[MetadataConfig]) + lazy val generationConfig: GenerationConfig = ObjectMapperUtil.jsonObjectMapper.convertValue(config.getObject("generation").unwrapped(), classOf[GenerationConfig]) + lazy val validationConfig: ValidationConfig = ObjectMapperUtil.jsonObjectMapper.convertValue(config.getObject("validation").unwrapped(), classOf[ValidationConfig]) + lazy val baseRuntimeConfig: Map[String, String] = ObjectMapperUtil.jsonObjectMapper.convertValue(config.getObject("runtime.config").unwrapped(), classOf[Map[String, String]]) + lazy val master: String = config.getString(RUNTIME_MASTER) + lazy val connectionConfigsByName: Map[String, Map[String, String]] = getConnectionConfigsByName + lazy val sparkConnectionConfig: Map[String, String] = getSparkConnectionConfig + + def getConfig: Config = { + val appConfEnv = System.getenv(APPLICATION_CONFIG_PATH) + val appConfProp = System.getProperty(APPLICATION_CONFIG_PATH) + val applicationConfPath = (appConfEnv, appConfProp) match { + case (null, null) => "application.conf" + case (env, _) if env != null => env + case (_, prop) if prop != null => prop + case _ => "application.conf" + } + LOGGER.debug(s"Using application config file path, path=$applicationConfPath") + val applicationConfFile = new File(applicationConfPath) + if (!applicationConfFile.exists()) { + val confFromClassPath = getClass.getClassLoader.getResource(applicationConfPath) + ConfigFactory.parseURL(confFromClassPath).resolve() + } else { + ConfigFactory.parseFile(applicationConfFile).resolve() + } + } + + def getConnectionConfigsByName: Map[String, Map[String, String]] = { + SUPPORTED_CONNECTION_FORMATS.map(format => { + val tryBaseConfig = Try(config.getConfig(format)) + tryBaseConfig.map(baseConfig => { + val connectionNames = baseConfig.root().keySet().asScala + connectionNames.flatMap(name => { + baseConfig.getValue(name).valueType() match { + case ConfigValueType.OBJECT => + val connectionConfig = baseConfig.getConfig(name) + val configValueMap = connectionConfig.entrySet().asScala + .map(e => (e.getKey, e.getValue.render().replaceAll("\"", ""))) + .toMap + Map(name -> (configValueMap ++ Map(FORMAT -> format))) + case _ => Map[String, Map[String, String]]() + } + }).toMap + }).getOrElse(Map()) + }).reduce((x, y) => x ++ y) + } + + def getSparkConnectionConfig: Map[String, String] = { + connectionConfigsByName.flatMap(connectionConf => connectionConf._2.filter(_._1.startsWith("spark"))) + } + + def toDataCatererConfiguration: DataCatererConfiguration = { + DataCatererConfiguration( + flagsConfig, + foldersConfig, + metadataConfig, + generationConfig, + validationConfig, + connectionConfigsByName, + baseRuntimeConfig ++ sparkConnectionConfig, + master + ) + } + +} \ No newline at end of file diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/exception/Exceptions.scala b/app/src/main/scala/com/github/pflooky/datagen/core/exception/Exceptions.scala new file mode 100644 index 00000000..6841356a --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/exception/Exceptions.scala @@ -0,0 +1,34 @@ +package com.github.pflooky.datagen.core.exception + +import com.github.pflooky.datacaterer.api.model.{Count, Field, Step} +import org.apache.spark.sql.types.{DataType, StructField} + +class ParseFileException(filePath: String, parseToType: String, throwable: Throwable) extends RuntimeException(throwable) { + override def getMessage: String = s"Failed to parse file to expected type, file=$filePath, parse-to-type=$parseToType" +} + +class UnsupportedDataGeneratorType(returnType: String) extends RuntimeException { + override def getMessage: String = s"Unsupported return type for data generator: type=$returnType" +} + +class UnsupportedRealTimeDataSourceFormat(format: String) extends RuntimeException { + override def getMessage: String = s"Unsupported data source format for creating real-time data, format=$format" +} + +class InvalidDataGeneratorConfigurationException(structField: StructField, undefinedMetadataField: String) extends RuntimeException { + override def getMessage: String = s"Undefined configuration in metadata for the data generator defined. Please help to define 'undefined-metadata-field' " + + s"in field 'metadata' to allow data to be generated, " + + s"name=${structField.name}, data-type=${structField.dataType}, undefined-metadata-field=$undefinedMetadataField, metadata=${structField.metadata}" +} + +class InvalidStepCountGeneratorConfigurationException(step: Step) extends RuntimeException { + override def getMessage: String = s"'total' or 'generator' needs to be defined in count for step, step-name=${step.name}, schema=${step.schema}, count=${step.count}" +} + +class InvalidFieldConfigurationException(field: Field) extends RuntimeException { + override def getMessage: String = s"Field should have ('generator' and 'type' defined) or 'schema' defined, name=${field.name}" +} + +class InvalidWaitConditionException(waitCondition: String) extends RuntimeException { + override def getMessage: String = s"Invalid wait condition for validation, wait-condition=$waitCondition" +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/generator/BatchDataProcessor.scala b/app/src/main/scala/com/github/pflooky/datagen/core/generator/BatchDataProcessor.scala new file mode 100644 index 00000000..00a7e0f6 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/generator/BatchDataProcessor.scala @@ -0,0 +1,101 @@ +package com.github.pflooky.datagen.core.generator + +import com.github.pflooky.datacaterer.api.model.{FlagsConfig, FoldersConfig, GenerationConfig, MetadataConfig, Plan, Task, TaskSummary} +import com.github.pflooky.datagen.core.model.DataSourceResult +import com.github.pflooky.datagen.core.util.PlanImplicits.StepOps +import com.github.pflooky.datagen.core.sink.SinkFactory +import com.github.pflooky.datagen.core.util.GeneratorUtil.getDataSourceName +import com.github.pflooky.datagen.core.util.RecordCountUtil.calculateNumBatches +import com.github.pflooky.datagen.core.util.{ForeignKeyUtil, UniqueFieldsUtil} +import net.datafaker.Faker +import org.apache.log4j.Logger +import org.apache.spark.sql.{DataFrame, SparkSession} + +import java.io.Serializable +import java.time.LocalDateTime +import java.util.{Locale, Random} +import scala.util.{Failure, Success, Try} + +class BatchDataProcessor(connectionConfigsByName: Map[String, Map[String, String]], foldersConfig: FoldersConfig, + metadataConfig: MetadataConfig, flagsConfig: FlagsConfig, generationConfig: GenerationConfig)(implicit sparkSession: SparkSession) { + + private val LOGGER = Logger.getLogger(getClass.getName) + private lazy val sinkFactory = new SinkFactory(flagsConfig, metadataConfig) + + def splitAndProcess(plan: Plan, executableTasks: List[(TaskSummary, Task)]) + (implicit sparkSession: SparkSession): List[DataSourceResult] = { + val faker = getDataFaker(plan) + val dataGeneratorFactory = new DataGeneratorFactory(faker) + val uniqueFieldUtil = new UniqueFieldsUtil(executableTasks) + val tasks = executableTasks.map(_._2) + var (numBatches, trackRecordsPerStep) = calculateNumBatches(tasks, generationConfig) + + val dataSourceResults = (1 to numBatches).flatMap(batch => { + val startTime = LocalDateTime.now() + LOGGER.info(s"Starting batch, batch=$batch, num-batches=$numBatches") + val generatedDataForeachTask = executableTasks.flatMap(task => + task._2.steps.filter(_.enabled).map(s => { + val dataSourceStepName = getDataSourceName(task._1, s) + val recordStepName = s"${task._2.name}_${s.name}" + val stepRecords = trackRecordsPerStep(recordStepName) + val startIndex = stepRecords.currentNumRecords + val endIndex = stepRecords.currentNumRecords + stepRecords.numRecordsPerBatch + + val genDf = dataGeneratorFactory.generateDataForStep(s, task._1.dataSourceName, startIndex, endIndex) + val df = if (s.gatherPrimaryKeys.nonEmpty && flagsConfig.enableUniqueCheck) uniqueFieldUtil.getUniqueFieldsValues(dataSourceStepName, genDf) else genDf + + if (!df.storageLevel.useMemory) df.cache() + val dfRecordCount = if (flagsConfig.enableCount) df.count() else stepRecords.numRecordsPerBatch + LOGGER.debug(s"Step record count for batch, batch=$batch, step-name=${s.name}, target-num-records=${stepRecords.numRecordsPerBatch}, actual-num-records=$dfRecordCount") + trackRecordsPerStep = trackRecordsPerStep ++ Map(recordStepName -> stepRecords.copy(currentNumRecords = dfRecordCount)) + (dataSourceStepName, df) + }) + ).toMap + + val sinkDf = plan.sinkOptions + .map(_ => ForeignKeyUtil.getDataFramesWithForeignKeys(plan, generatedDataForeachTask)) + .getOrElse(generatedDataForeachTask.toList) + val sinkResults = pushDataToSinks(executableTasks, sinkDf, batch, startTime) + sinkDf.foreach(_._2.unpersist()) + LOGGER.info(s"Finished batch, batch=$batch, num-batches=$numBatches") + sinkResults + }).toList + LOGGER.debug(s"Completed all batches, num-batches=$numBatches") + dataSourceResults + } + + def pushDataToSinks(executableTasks: List[(TaskSummary, Task)], sinkDf: List[(String, DataFrame)], batchNum: Int, + startTime: LocalDateTime): List[DataSourceResult] = { + val stepAndTaskByDataSourceName = executableTasks.flatMap(task => + task._2.steps.map(s => (getDataSourceName(task._1, s), (s, task._2))) + ).toMap + + sinkDf.map(df => { + val dataSourceName = df._1.split("\\.").head + val (step, task) = stepAndTaskByDataSourceName(df._1) + val dataSourceConfig = connectionConfigsByName.getOrElse(dataSourceName, Map()) + val stepWithDataSourceConfig = step.copy(options = dataSourceConfig ++ step.options) + val sinkResult = sinkFactory.pushToSink(df._2, dataSourceName, stepWithDataSourceConfig, flagsConfig, startTime) + DataSourceResult(dataSourceName, task, stepWithDataSourceConfig, sinkResult, batchNum) + }) + } + + private def getDataFaker(plan: Plan): Faker with Serializable = { + val optSeed = plan.sinkOptions.flatMap(_.seed) + val optLocale = plan.sinkOptions.flatMap(_.locale) + val trySeed = Try(optSeed.map(_.toInt).get) + + (optSeed, trySeed, optLocale) match { + case (None, _, Some(locale)) => + LOGGER.info(s"Locale defined at plan level. All data will be generated with the set locale, locale=$locale") + new Faker(Locale.forLanguageTag(locale)) with Serializable + case (Some(_), Success(seed), Some(locale)) => + LOGGER.info(s"Seed and locale defined at plan level. All data will be generated with the set seed and locale, seed-value=$seed, locale=$locale") + new Faker(Locale.forLanguageTag(locale), new Random(seed)) with Serializable + case (Some(_), Failure(exception), _) => + throw new RuntimeException("Failed to get seed value from plan sink options", exception) + case _ => new Faker() with Serializable + } + } +} + diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/generator/DataGeneratorFactory.scala b/app/src/main/scala/com/github/pflooky/datagen/core/generator/DataGeneratorFactory.scala new file mode 100644 index 00000000..7470014a --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/generator/DataGeneratorFactory.scala @@ -0,0 +1,135 @@ +package com.github.pflooky.datagen.core.generator + +import com.github.pflooky.datacaterer.api.model.Constants.SQL_GENERATOR +import com.github.pflooky.datacaterer.api.model.{Field, PerColumnCount, Step} +import com.github.pflooky.datagen.core.exception.InvalidStepCountGeneratorConfigurationException +import com.github.pflooky.datagen.core.generator.provider.DataGenerator +import com.github.pflooky.datagen.core.model.Constants._ +import com.github.pflooky.datagen.core.util.PlanImplicits.FieldOps +import com.github.pflooky.datagen.core.sink.SinkProcessor +import com.github.pflooky.datagen.core.util.GeneratorUtil.{applySqlExpressions, getDataGenerator} +import com.github.pflooky.datagen.core.util.ObjectMapperUtil +import net.datafaker.Faker +import org.apache.spark.sql.expressions.UserDefinedFunction +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types._ +import org.apache.spark.sql.{DataFrame, Row, SparkSession} + +import scala.util.Random + + +case class Holder(__index_inc: Long) + +class DataGeneratorFactory(faker: Faker)(implicit val sparkSession: SparkSession) { + + private val OBJECT_MAPPER = ObjectMapperUtil.jsonObjectMapper + private val RANDOM = new Random() + registerSparkFunctions + + def generateDataForStep(step: Step, dataSourceName: String, startIndex: Long, endIndex: Long): DataFrame = { + val structFieldsWithDataGenerators = step.schema.fields.map(getStructWithGenerators).getOrElse(List()) + val indexedDf = sparkSession.createDataFrame(Seq.range(startIndex, endIndex).map(Holder)) + generateDataViaSql(structFieldsWithDataGenerators, step, indexedDf) + .alias(s"$dataSourceName.${step.name}") + } + + def generateDataViaSql(dataGenerators: List[DataGenerator[_]], step: Step, indexedDf: DataFrame): DataFrame = { + val structType = StructType(dataGenerators.map(_.structField)) + val genSqlExpression = dataGenerators.map(dg => s"${dg.generateSqlExpressionWrapper} AS `${dg.structField.name}`") + val df = indexedDf.selectExpr(genSqlExpression: _*) + + val perColDf = step.count.perColumn + .map(perCol => generateRecordsPerColumn(dataGenerators, step, perCol, df)) + .getOrElse(df) + if (!perColDf.storageLevel.useMemory) perColDf.cache() + + val dfWithMetadata = attachMetadata(perColDf, structType) + val dfAllFields = attachMetadata(applySqlExpressions(dfWithMetadata), structType) + if (!dfAllFields.storageLevel.useMemory) dfAllFields.cache() + dfAllFields + } + + def generateData(dataGenerators: List[DataGenerator[_]], step: Step): DataFrame = { + val structType = StructType(dataGenerators.map(_.structField)) + val count = step.count + + val generatedData = if (count.generator.isDefined) { + val metadata = Metadata.fromJson(OBJECT_MAPPER.writeValueAsString(count.generator.get.options)) + val countStructField = StructField(RECORD_COUNT_GENERATOR_COL, IntegerType, false, metadata) + val generatedCount = getDataGenerator(count.generator, countStructField, faker).generate.asInstanceOf[Int].toLong + (1L to generatedCount).map(_ => Row.fromSeq(dataGenerators.map(_.generateWrapper()))) + } else if (count.records.isDefined) { + (1L to count.records.get.asInstanceOf[Number].longValue()).map(_ => Row.fromSeq(dataGenerators.map(_.generateWrapper()))) + } else { + throw new InvalidStepCountGeneratorConfigurationException(step) + } + + val rddGeneratedData = sparkSession.sparkContext.parallelize(generatedData) + val df = sparkSession.createDataFrame(rddGeneratedData, structType) + + var dfPerCol = count.perColumn + .map(perCol => generateRecordsPerColumn(dataGenerators, step, perCol, df)) + .getOrElse(df) + val sqlGeneratedFields = structType.fields.filter(f => f.metadata.contains(SQL_GENERATOR)) + sqlGeneratedFields.foreach(field => { + val allFields = structType.fields.filter(_ != field).map(_.name) ++ Array(s"${field.metadata.getString(SQL_GENERATOR)} AS `${field.name}`") + dfPerCol = dfPerCol.selectExpr(allFields: _*) + }) + dfPerCol + } + + private def generateRecordsPerColumn(dataGenerators: List[DataGenerator[_]], step: Step, + perColumnCount: PerColumnCount, df: DataFrame): DataFrame = { + val fieldsToBeGenerated = dataGenerators.filter(x => !perColumnCount.columnNames.contains(x.structField.name)) + + val perColumnRange = if (perColumnCount.generator.isDefined) { + val metadata = Metadata.fromJson(OBJECT_MAPPER.writeValueAsString(perColumnCount.generator.get.options)) + val countStructField = StructField(RECORD_COUNT_GENERATOR_COL, IntegerType, false, metadata) + val generatedCount = getDataGenerator(perColumnCount.generator, countStructField, faker).asInstanceOf[DataGenerator[Int]] + val numList = generateDataWithSchema(generatedCount, fieldsToBeGenerated) + df.withColumn(PER_COLUMN_COUNT, numList()) + } else if (perColumnCount.count.isDefined) { + val numList = generateDataWithSchema(perColumnCount.count.get, fieldsToBeGenerated) + df.withColumn(PER_COLUMN_COUNT, numList()) + } else { + throw new InvalidStepCountGeneratorConfigurationException(step) + } + + val explodeCount = perColumnRange.withColumn(PER_COLUMN_INDEX_COL, explode(col(PER_COLUMN_COUNT))) + .drop(col(PER_COLUMN_COUNT)) + explodeCount.select(PER_COLUMN_INDEX_COL + ".*", perColumnCount.columnNames: _*) + } + + private def generateDataWithSchema(countGenerator: DataGenerator[Int], dataGenerators: List[DataGenerator[_]]): UserDefinedFunction = { + udf(() => { + (1L to countGenerator.generate) + .toList + .map(_ => Row.fromSeq(dataGenerators.map(_.generateWrapper()))) + }, ArrayType(StructType(dataGenerators.map(_.structField)))) + } + + private def generateDataWithSchema(count: Long, dataGenerators: List[DataGenerator[_]]): UserDefinedFunction = { + udf(() => { + (1L to count) + .toList + .map(_ => Row.fromSeq(dataGenerators.map(_.generateWrapper()))) + }, ArrayType(StructType(dataGenerators.map(_.structField)))) + } + + private def getStructWithGenerators(fields: List[Field]): List[DataGenerator[_]] = { + fields.map(field => getDataGenerator(field.generator, field.toStructField, faker)) + } + + private def registerSparkFunctions = { + sparkSession.udf.register(GENERATE_REGEX_UDF, udf((s: String) => faker.regexify(s)).asNondeterministic()) + sparkSession.udf.register(GENERATE_FAKER_EXPRESSION_UDF, udf((s: String) => faker.expression(s)).asNondeterministic()) + sparkSession.udf.register(GENERATE_RANDOM_ALPHANUMERIC_STRING_UDF, udf((minLength: Int, maxLength: Int) => { + val length = RANDOM.nextInt(maxLength + 1) + minLength + RANDOM.alphanumeric.take(length).mkString("") + }).asNondeterministic()) + } + + private def attachMetadata(df: DataFrame, structType: StructType): DataFrame = { + sparkSession.createDataFrame(df.selectExpr(structType.fieldNames: _*).rdd, structType) + } +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/generator/DataGeneratorProcessor.scala b/app/src/main/scala/com/github/pflooky/datagen/core/generator/DataGeneratorProcessor.scala new file mode 100644 index 00000000..86769017 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/generator/DataGeneratorProcessor.scala @@ -0,0 +1,70 @@ +package com.github.pflooky.datagen.core.generator + +import com.github.pflooky.datacaterer.api.model.{DataCatererConfiguration, Plan, Task, TaskSummary, ValidationConfiguration} +import com.github.pflooky.datagen.core.generator.result.DataGenerationResultWriter +import com.github.pflooky.datagen.core.listener.SparkRecordListener +import com.github.pflooky.datagen.core.util.PlanImplicits.TaskOps +import com.github.pflooky.datagen.core.parser.PlanParser +import com.github.pflooky.datagen.core.validator.ValidationProcessor +import org.apache.log4j.Logger +import org.apache.spark.sql.SparkSession + +class DataGeneratorProcessor(dataCatererConfiguration: DataCatererConfiguration)(implicit sparkSession: SparkSession) { + + private val LOGGER = Logger.getLogger(getClass.getName) + private val connectionConfigsByName = dataCatererConfiguration.connectionConfigByName + private val foldersConfig = dataCatererConfiguration.foldersConfig + private val metadataConfig = dataCatererConfiguration.metadataConfig + private val flagsConfig = dataCatererConfiguration.flagsConfig + private val generationConfig = dataCatererConfiguration.generationConfig + private lazy val dataGenerationResultWriter = new DataGenerationResultWriter(metadataConfig, foldersConfig, flagsConfig) + private lazy val batchDataProcessor = new BatchDataProcessor(connectionConfigsByName, foldersConfig, metadataConfig, flagsConfig, generationConfig) + private lazy val sparkRecordListener = new SparkRecordListener(flagsConfig.enableCount) + sparkSession.sparkContext.addSparkListener(sparkRecordListener) + + def generateData(): Unit = { + val plan = PlanParser.parsePlan(foldersConfig.planFilePath) + val enabledPlannedTasks = plan.tasks.filter(_.enabled) + val enabledTaskMap = enabledPlannedTasks.map(t => (t.name, t)).toMap + val tasks = PlanParser.parseTasks(foldersConfig.taskFolderPath) + val enabledTasks = tasks.filter(t => enabledTaskMap.contains(t.name)).toList + + generateData(plan.copy(tasks = enabledPlannedTasks), enabledTasks, None) + } + + def generateData(plan: Plan, tasks: List[Task], optValidations: Option[List[ValidationConfiguration]]): Unit = { + val tasksByName = tasks.map(t => (t.name, t)).toMap + val summaryWithTask = plan.tasks.map(t => (t, tasksByName(t.name))) + generateDataWithResult(plan, summaryWithTask, optValidations) + } + + private def generateDataWithResult(plan: Plan, summaryWithTask: List[(TaskSummary, Task)], optValidations: Option[List[ValidationConfiguration]]): Unit = { + if (flagsConfig.enableDeleteGeneratedRecords) { + LOGGER.warn("Both enableGenerateData and enableDeleteGeneratedData are true. Please only enable one at a time. Will continue with generating data") + } + if (LOGGER.isDebugEnabled) { + LOGGER.debug(s"Following tasks are enabled and will be executed: num-tasks=${summaryWithTask.size}, tasks=($summaryWithTask)") + summaryWithTask.foreach(t => LOGGER.debug(s"Enabled task details: ${t._2.toTaskDetailString}")) + } + val stepNames = summaryWithTask.map(t => s"task=${t._2.name}, num-steps=${t._2.steps.size}, steps=${t._2.steps.map(_.name).mkString(",")}").mkString("||") + + if (summaryWithTask.isEmpty) { + LOGGER.warn("No tasks found or no tasks enabled. No data will be generated") + } else { + val generationResult = if (flagsConfig.enableGenerateData) { + LOGGER.info(s"Following tasks are enabled and will be executed: num-tasks=${summaryWithTask.size}, tasks=$stepNames") + batchDataProcessor.splitAndProcess(plan, summaryWithTask) + } else List() + + val validationResults = if (flagsConfig.enableValidation) { + new ValidationProcessor(connectionConfigsByName, optValidations, dataCatererConfiguration.validationConfig, foldersConfig) + .executeValidations + } else List() + + if (flagsConfig.enableSaveReports) { + dataGenerationResultWriter.writeResult(plan, generationResult, validationResults, sparkRecordListener) + } + } + } + +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/DataGenerator.scala b/app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/DataGenerator.scala new file mode 100644 index 00000000..0a93b0b5 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/DataGenerator.scala @@ -0,0 +1,120 @@ +package com.github.pflooky.datagen.core.generator.provider + +import com.github.pflooky.datacaterer.api.model.Constants.{ARRAY_MAXIMUM_LENGTH, ARRAY_MINIMUM_LENGTH, ENABLED_EDGE_CASE, ENABLED_NULL, IS_UNIQUE, PROBABILITY_OF_EDGE_CASE, PROBABILITY_OF_NULL, RANDOM_SEED, STATIC} +import com.github.pflooky.datacaterer.api.model.generator.BaseGenerator +import net.datafaker.Faker +import org.apache.spark.sql.Column +import org.apache.spark.sql.functions.{expr, lit, rand, when} +import org.apache.spark.sql.types.StructField + +import scala.annotation.tailrec +import scala.collection.mutable +import scala.language.higherKinds +import scala.util.Random + +trait DataGenerator[T] extends BaseGenerator[T] with Serializable { + + val structField: StructField + val faker: Faker + + lazy val optRandomSeed: Option[Long] = if (structField.metadata.contains(RANDOM_SEED)) Some(structField.metadata.getString(RANDOM_SEED).toLong) else None + lazy val sqlRandom: String = optRandomSeed.map(seed => s"RAND($seed)").getOrElse("RAND()") + lazy val random: Random = if (structField.metadata.contains(RANDOM_SEED)) new Random(structField.metadata.getString(RANDOM_SEED).toLong) else new Random() + lazy val enabledNull: Boolean = if (structField.metadata.contains(ENABLED_NULL)) structField.metadata.getString(ENABLED_NULL).toBoolean else false + lazy val enabledEdgeCases: Boolean = if (structField.metadata.contains(ENABLED_EDGE_CASE)) structField.metadata.getString(ENABLED_EDGE_CASE).toBoolean else false + lazy val isUnique: Boolean = if (structField.metadata.contains(IS_UNIQUE)) structField.metadata.getString(IS_UNIQUE).toBoolean else false + lazy val probabilityOfNull: Double = if (structField.metadata.contains(PROBABILITY_OF_NULL)) structField.metadata.getString(PROBABILITY_OF_NULL).toDouble else 0.1 + lazy val probabilityOfEdgeCases: Double = if (structField.metadata.contains(PROBABILITY_OF_EDGE_CASE)) structField.metadata.getString(PROBABILITY_OF_EDGE_CASE).toDouble else 0.5 + lazy val prevGenerated: mutable.Set[T] = mutable.Set[T]() + lazy val optStatic: Option[String] = if (structField.metadata.contains(STATIC)) Some(structField.metadata.getString(STATIC)) else None + + def generateSqlExpressionWrapper: String = { + if (optStatic.isDefined) { + return s"'${optStatic.get}'" + } + val baseSqlExpression = replaceLambdaFunction(generateSqlExpression) + val caseRandom = optRandomSeed.map(s => rand(s)).getOrElse(rand()) + val expression = (enabledEdgeCases, enabledNull) match { + case (true, true) => + when(caseRandom.leq(probabilityOfEdgeCases), edgeCases(random.nextInt(edgeCases.size))) + .otherwise(when(caseRandom.leq(probabilityOfEdgeCases + probabilityOfNull), null)) + .otherwise(expr(baseSqlExpression)) + .expr.sql + case (true, false) => + when(caseRandom.leq(probabilityOfEdgeCases), edgeCases(random.nextInt(edgeCases.size))) + .otherwise(expr(baseSqlExpression)) + .expr.sql + case (false, true) => + when(caseRandom.leq(probabilityOfNull), null) + .otherwise(expr(baseSqlExpression)) + .expr.sql + case _ => baseSqlExpression + } + replaceLambdaFunction(expression) + } + + def generateWrapper(count: Int = 0): T = { + val randDouble = random.nextDouble() + val generatedValue = if (enabledEdgeCases && randDouble <= probabilityOfEdgeCases) { + edgeCases(random.nextInt(edgeCases.size)) + } else { + generate + } + if (count > 10) { + //TODO: logic doesn't work when field is auto_increment, need to be aware if data system automatically takes care of it (therefore, field can be omitted from generation) + throw new RuntimeException(s"Failed to generate new unique value for field, retries=$count, name=${structField.name}, " + + s"metadata=${structField.metadata}, sample-previously-generated=${prevGenerated.take(3)}") + } else if (isUnique) { + if (prevGenerated.contains(generatedValue)) { + generateWrapper(count + 1) + } else { + prevGenerated.add(generatedValue) + generatedValue + } + } else { + generatedValue + } + } + + @tailrec + private def replaceLambdaFunction(sql: String): String = { + val lambdaRegex = ".*lambdafunction\\((.+?), i\\).*".r.pattern + val matcher = lambdaRegex.matcher(sql) + if (matcher.matches()) { + val innerFunction = matcher.group(1) + val replace = sql.replace(s"lambdafunction($innerFunction, i)", s"i -> $innerFunction") + replaceLambdaFunction(replace) + } else sql + } +} + +trait NullableDataGenerator[T >: Null] extends DataGenerator[T] { + + override def generateWrapper(count: Int = 0): T = { + val randDouble = random.nextDouble() + if (enabledNull && structField.nullable && randDouble <= probabilityOfNull) { + null + } else if (enabledEdgeCases && edgeCases.nonEmpty && + ((structField.nullable && randDouble <= probabilityOfEdgeCases + probabilityOfNull) || + (!structField.nullable && randDouble <= probabilityOfEdgeCases))) { + edgeCases(random.nextInt(edgeCases.size)) + } else { + generate + } + } +} + +trait ArrayDataGenerator[T] extends NullableDataGenerator[List[T]] { + + lazy val arrayMaxSize: Int = if (structField.metadata.contains(ARRAY_MAXIMUM_LENGTH)) structField.metadata.getString(ARRAY_MAXIMUM_LENGTH).toInt else 5 + lazy val arrayMinSize: Int = if (structField.metadata.contains(ARRAY_MINIMUM_LENGTH)) structField.metadata.getString(ARRAY_MINIMUM_LENGTH).toInt else 0 + + def elementGenerator: DataGenerator[T] + + override def generate: List[T] = { + val listSize = random.nextInt(arrayMaxSize) + arrayMinSize + (arrayMinSize to listSize) + .map(_ => elementGenerator.generate) + .toList + } +} \ No newline at end of file diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/OneOfDataGenerator.scala b/app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/OneOfDataGenerator.scala new file mode 100644 index 00000000..e4d46dee --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/OneOfDataGenerator.scala @@ -0,0 +1,45 @@ +package com.github.pflooky.datagen.core.generator.provider + +import com.github.pflooky.datacaterer.api.model.Constants.{ONE_OF_GENERATOR, ONE_OF_GENERATOR_DELIMITER} +import net.datafaker.Faker +import org.apache.spark.sql.types.StructField + +import scala.util.{Failure, Success, Try} + +object OneOfDataGenerator { + + def getGenerator(structField: StructField, faker: Faker = new Faker()): DataGenerator[Any] = { + new RandomOneOfDataGenerator(structField, faker) + } + + class RandomOneOfDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends DataGenerator[Any] { + private lazy val oneOfValues = getOneOfList + private lazy val oneOfArrayLength = oneOfValues.length + assert(structField.metadata.contains(ONE_OF_GENERATOR), s"$ONE_OF_GENERATOR not defined for data generator in metadata, unable to generate data, name=${structField.name}, " + + s"type=${structField.dataType}, metadata=${structField.metadata}") + + override def generate: Any = { + oneOfValues(random.nextInt(oneOfArrayLength)) + } + + override def generateSqlExpression: String = { + val oneOfValuesString = oneOfValues.mkString("||") + s"CAST(SPLIT('$oneOfValuesString', '\\\\|\\\\|')[CAST(RAND() * $oneOfArrayLength AS INT)] AS ${structField.dataType.sql})" + } + + private def getOneOfList: Array[String] = { + val tryStringArray = Try(structField.metadata.getStringArray(ONE_OF_GENERATOR)) + tryStringArray match { + case Failure(_) => + val tryString = Try(structField.metadata.getString(ONE_OF_GENERATOR)) + tryString match { + case Failure(exception) => throw new RuntimeException(s"Failed to get $ONE_OF_GENERATOR from field metadata, " + + s"field-name=${structField.name}, field-type=${structField.dataType.typeName}", exception) + case Success(value) => value.split(ONE_OF_GENERATOR_DELIMITER) + } + case Success(value) => value + } + } + } + +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/RandomDataGenerator.scala b/app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/RandomDataGenerator.scala new file mode 100644 index 00000000..c3238d5b --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/RandomDataGenerator.scala @@ -0,0 +1,410 @@ +package com.github.pflooky.datagen.core.generator.provider + +import com.github.pflooky.datacaterer.api.model.Constants.{ARRAY_MAXIMUM_LENGTH, ARRAY_MINIMUM_LENGTH, DEFAULT_VALUE, DISTINCT_COUNT, EXPRESSION, MAXIMUM, MAXIMUM_LENGTH, MEAN, MINIMUM, MINIMUM_LENGTH, NUMERIC_PRECISION, NUMERIC_SCALE, ROW_COUNT, STANDARD_DEVIATION} +import com.github.pflooky.datagen.core.exception.UnsupportedDataGeneratorType +import com.github.pflooky.datagen.core.model.Constants._ +import com.github.pflooky.datagen.core.util.GeneratorUtil +import net.datafaker.Faker +import org.apache.spark.sql.functions.{array_repeat, expr, lit} +import org.apache.spark.sql.{Row, functions} +import org.apache.spark.sql.types._ + +import java.sql.{Date, Timestamp} +import java.time.temporal.ChronoUnit +import java.time.{Instant, LocalDate} +import scala.util.Try + +object RandomDataGenerator { + + def getGeneratorForStructType(structType: StructType, faker: Faker = new Faker()): Array[DataGenerator[_]] = { + structType.fields.map(getGeneratorForStructField(_, faker)) + } + + def getGeneratorForStructField(structField: StructField, faker: Faker = new Faker()): DataGenerator[_] = { + structField.dataType match { + case StringType => new RandomStringDataGenerator(structField, faker) + case IntegerType => new RandomIntDataGenerator(structField, faker) + case LongType => new RandomLongDataGenerator(structField, faker) + case ShortType => new RandomShortDataGenerator(structField, faker) + case DecimalType() => new RandomDecimalDataGenerator(structField, faker) + case DoubleType => new RandomDoubleDataGenerator(structField, faker) + case FloatType => new RandomFloatDataGenerator(structField, faker) + case DateType => new RandomDateDataGenerator(structField, faker) + case TimestampType => new RandomTimestampDataGenerator(structField, faker) + case BooleanType => new RandomBooleanDataGenerator(structField, faker) + case BinaryType => new RandomBinaryDataGenerator(structField, faker) + case ByteType => new RandomByteDataGenerator(structField, faker) + case ArrayType(dt, _) => new RandomArrayDataGenerator(structField, dt, faker) + case StructType(_) => new RandomStructTypeDataGenerator(structField, faker) + case x => throw new UnsupportedDataGeneratorType(s"Unsupported type for random data generation: name=${structField.name}, type=${x.typeName}") + } + } + + class RandomStringDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends NullableDataGenerator[String] { + private val minLength = tryGetValue(structField.metadata, MINIMUM_LENGTH, 1) + private val maxLength = tryGetValue(structField.metadata, MAXIMUM_LENGTH, 20) + assert(minLength <= maxLength, s"minLength has to be less than or equal to maxLength, field-name=${structField.name}, minLength=$minLength, maxLength=$maxLength") + private lazy val tryExpression = Try(structField.metadata.getString(EXPRESSION)) + private val characterSet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 " + private val characterSetSize = characterSet.length + + override val edgeCases: List[String] = List("", "\n", "\r", "\t", " ", "\\u0000", "\\ufff", + "İyi günler", "Спасибо", "Καλημέρα", "صباح الخير", "Förlåt", "你好吗", "Nhà vệ sinh ở đâu", "こんにちは", "नमस्ते", "Բարեւ", "Здравейте") + + override def generate: String = { + if (tryExpression.isSuccess) { + faker.expression(tryExpression.get) + } else { + val stringLength = (random.nextDouble() * (maxLength - minLength) + minLength).toInt + random.alphanumeric.take(stringLength).mkString + } + } + + override def generateSqlExpression: String = { + if (tryExpression.isSuccess) { + s"$GENERATE_FAKER_EXPRESSION_UDF('${tryExpression.get}')" + } else { + val randLength = s"CAST(ROUND($sqlRandom * ${maxLength - minLength} + $minLength, 0) AS INT)" + s"CONCAT_WS('', TRANSFORM(SEQUENCE(1, $randLength), i -> SUBSTR('$characterSet', CEIL(RAND() * $characterSetSize), 1)))" + } + } + } + + class RandomIntDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends DataGenerator[Int] { + private val min = tryGetValue(structField.metadata, MINIMUM, 0) + private val max = tryGetValue(structField.metadata, MAXIMUM, 100000) + assert(min <= max, s"min has to be less than or equal to max, field-name=${structField.name}, min=$min, max=$max") + + override val edgeCases: List[Int] = List(Int.MaxValue, Int.MinValue, 0) + + override def generate: Int = { + faker.random().nextInt(min, max) + } + + override def generateSqlExpression: String = sqlExpressionForNumeric(structField.metadata, "INT", sqlRandom) + } + + class RandomShortDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends DataGenerator[Short] { + private val min = tryGetValue(structField.metadata, MINIMUM, 0) + private val max = tryGetValue(structField.metadata, MAXIMUM, 1000) + assert(min <= max, s"min has to be less than or equal to max, field-name=${structField.name}, min=$min, max=$max") + + override val edgeCases: List[Short] = List(Short.MaxValue, Short.MinValue, 0) + + override def generate: Short = { + (random.nextDouble() * (max - min) + min).toShort + } + + override def generateSqlExpression: String = sqlExpressionForNumeric(structField.metadata, "SHORT", sqlRandom) + } + + class RandomLongDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends DataGenerator[Long] { + private lazy val min = tryGetValue(structField.metadata, MINIMUM, 0L) + private lazy val max = tryGetValue(structField.metadata, MAXIMUM, 100000L) + assert(min <= max, s"min has to be less than or equal to max, field-name=${structField.name}, min=$min, max=$max") + + override val edgeCases: List[Long] = List(Long.MaxValue, Long.MinValue, 0) + + override def generate: Long = { + faker.random().nextLong(min, max) + } + + override def generateSqlExpression: String = sqlExpressionForNumeric(structField.metadata, "LONG", sqlRandom) + } + + class RandomDecimalDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends DataGenerator[BigDecimal] { + private lazy val min = tryGetValue(structField.metadata, MINIMUM, BigDecimal.valueOf(0)) + private lazy val max = tryGetValue(structField.metadata, MAXIMUM, BigDecimal.valueOf(100000)) + private val decimalType = structField.dataType.asInstanceOf[DecimalType] + private lazy val precision = tryGetValue(structField.metadata, NUMERIC_PRECISION, decimalType.precision) + private lazy val scale = tryGetValue(structField.metadata, NUMERIC_SCALE, decimalType.scale) + assert(min <= max, s"min has to be less than or equal to max, field-name=${structField.name}, min=$min, max=$max") + + override val edgeCases: List[BigDecimal] = List(Long.MaxValue, Long.MinValue, 0) + + override def generate: BigDecimal = { + random.nextDouble() * (max - min) + min + } + + override def generateSqlExpression: String = sqlExpressionForNumeric(structField.metadata, s"DECIMAL($precision, $scale)", sqlRandom) + } + + class RandomDoubleDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends DataGenerator[Double] { + private lazy val min = tryGetValue(structField.metadata, MINIMUM, 0.0) + private lazy val max = tryGetValue(structField.metadata, MAXIMUM, 100000.0) + assert(min <= max, s"min has to be less than or equal to max, field-name=${structField.name}, min=$min, max=$max") + + override val edgeCases: List[Double] = List(Double.PositiveInfinity, Double.MaxValue, Double.MinPositiveValue, + 0.0, -0.0, Double.MinValue, Double.NegativeInfinity, Double.NaN) + + override def generate: Double = { + faker.random().nextDouble(min, max) + } + + override def generateSqlExpression: String = sqlExpressionForNumeric(structField.metadata, "DOUBLE", sqlRandom) + } + + class RandomFloatDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends DataGenerator[Float] { + private lazy val min = tryGetValue(structField.metadata, MINIMUM, 0.0.toFloat) + private lazy val max = tryGetValue(structField.metadata, MAXIMUM, 100000.0.toFloat) + assert(min <= max, s"min has to be less than or equal to max, field-name=${structField.name}, min=$min, max=$max") + + override val edgeCases: List[Float] = List(Float.PositiveInfinity, Float.MaxValue, Float.MinPositiveValue, + 0.0f, -0.0f, Float.MinValue, Float.NegativeInfinity, Float.NaN) + + override def generate: Float = { + faker.random().nextDouble(min, max).toFloat + } + + override def generateSqlExpression: String = sqlExpressionForNumeric(structField.metadata, "FLOAT", sqlRandom) + } + + class RandomDateDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends NullableDataGenerator[Date] { + private lazy val min = getMinValue + private lazy val max = getMaxValue + assert(min.isBefore(max) || min.isEqual(max), s"min has to be less than or equal to max, field-name=${structField.name}, min=$min, max=$max") + private lazy val maxDays = java.time.temporal.ChronoUnit.DAYS.between(min, max).toInt + + //from here: https://github.com/apache/spark/blob/master/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala#L206 + override val edgeCases: List[Date] = List( + Date.valueOf("0001-01-01"), + Date.valueOf("1582-10-15"), + Date.valueOf("1970-01-01"), + Date.valueOf("9999-12-31") + ) + + override def generate: Date = { + Date.valueOf(min.plusDays(random.nextInt(maxDays))) + } + + private def getMinValue: LocalDate = { + Try(structField.metadata.getString(MINIMUM)).map(LocalDate.parse) + .getOrElse(LocalDate.now().minusDays(365)) + } + + private def getMaxValue: LocalDate = { + Try(structField.metadata.getString(MAXIMUM)).map(LocalDate.parse) + .getOrElse(LocalDate.now()) + } + + override def generateSqlExpression: String = { + s"DATE_ADD('${min.toString}', CAST($sqlRandom * $maxDays AS INT))" + } + } + + class RandomTimestampDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends NullableDataGenerator[Timestamp] { + private lazy val min = getMinValue + private lazy val max = getMaxValue + assert(min <= max, s"min has to be less than or equal to max, field-name=${structField.name}, min=$min, max=$max") + + //from here: https://github.com/apache/spark/blob/master/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala#L159 + override val edgeCases: List[Timestamp] = List( + Timestamp.valueOf("0001-01-01 00:00:00"), + Timestamp.valueOf("1582-10-15 23:59:59"), + Timestamp.valueOf("1970-01-01 00:00:00"), + Timestamp.valueOf("9999-12-31 23:59:59") + ) + + override def generate: Timestamp = { + val milliSecondsSinceEpoch = (random.nextDouble() * (max - min) + min).toLong + Timestamp.from(Instant.ofEpochMilli(milliSecondsSinceEpoch)) + } + + private def getMinValue: Long = { + Try(structField.metadata.getString(MINIMUM)).map(Timestamp.valueOf) + .getOrElse(Timestamp.from(Instant.now().minus(365, ChronoUnit.DAYS))) + .toInstant.toEpochMilli + } + + private def getMaxValue: Long = { + Try(structField.metadata.getString(MAXIMUM)).map(Timestamp.valueOf) + .getOrElse(Timestamp.from(Instant.now())) + .toInstant.toEpochMilli + 1L + } + + override def generateSqlExpression: String = { + s"CAST(TIMESTAMP_MILLIS(CAST($sqlRandom * ${max - min} + $min AS LONG)) AS TIMESTAMP)" + } + } + + class RandomBooleanDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends DataGenerator[Boolean] { + override def generate: Boolean = { + random.nextBoolean() + } + + override def generateSqlExpression: String = { + s"BOOLEAN(ROUND($sqlRandom))" + } + } + + class RandomBinaryDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends NullableDataGenerator[Array[Byte]] { + private lazy val minLength = tryGetValue(structField.metadata, MINIMUM_LENGTH, 1) + private lazy val maxLength = tryGetValue(structField.metadata, MAXIMUM_LENGTH, 20) + assert(minLength <= maxLength, s"minLength has to be less than or equal to maxLength, field-name=${structField.name}, minLength=$minLength, maxLength=$maxLength") + + override val edgeCases: List[Array[Byte]] = List(Array(), "\n".getBytes, "\r".getBytes, "\t".getBytes, + " ".getBytes, "\\u0000".getBytes, "\\ufff".getBytes, Array(Byte.MinValue), Array(Byte.MaxValue)) + + override def generate: Array[Byte] = { + val byteLength = (random.nextDouble() * (maxLength - minLength) + minLength).toInt + faker.random().nextRandomBytes(byteLength) + } + + override def generateSqlExpression: String = { + s"TO_BINARY(ARRAY_JOIN(TRANSFORM(ARRAY_REPEAT(1, CAST($sqlRandom * ${maxLength - minLength} + $minLength AS INT)), i -> CHAR(ROUND($sqlRandom * 94 + 32, 0))), ''), 'utf-8')" + } + } + + class RandomByteDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends DataGenerator[Byte] { + override val edgeCases: List[Byte] = List(Byte.MinValue, Byte.MaxValue) + + override def generate: Byte = { + faker.random().nextRandomBytes(1).head + } + + override def generateSqlExpression: String = { + s"TO_BINARY(CHAR(ROUND($sqlRandom * 94 + 32, 0)))" + } + } + + class RandomArrayDataGenerator[T](val structField: StructField, val dataType: DataType, val faker: Faker = new Faker()) extends ArrayDataGenerator[T] { + override lazy val arrayMinSize: Int = tryGetValue(structField.metadata, ARRAY_MINIMUM_LENGTH, 0) + override lazy val arrayMaxSize: Int = tryGetValue(structField.metadata, ARRAY_MAXIMUM_LENGTH, 5) + + override def elementGenerator: DataGenerator[T] = { + dataType match { + case structType: StructType => + new RandomStructTypeDataGenerator(StructField(structField.name, structType), faker).asInstanceOf[DataGenerator[T]] + case _ => + getGeneratorForStructField(structField.copy(dataType = dataType), faker).asInstanceOf[DataGenerator[T]] + } + } + + override def generateSqlExpression: String = { + val nestedSqlExpressions = dataType match { + case structType: StructType => + val structGen = new RandomStructTypeDataGenerator(StructField(structField.name, structType)) + structGen.generateSqlExpressionWrapper + case _ => + getGeneratorForStructField(structField.copy(dataType = dataType)).generateSqlExpressionWrapper + } + s"TRANSFORM(ARRAY_REPEAT(1, CAST($sqlRandom * ${arrayMaxSize - arrayMinSize} + $arrayMinSize AS INT)), i -> $nestedSqlExpressions)" + } + } + + class RandomStructTypeDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends DataGenerator[Row] { + override def generate: Row = { + structField.dataType match { + case ArrayType(dt, _) => + val listGenerator = new RandomArrayDataGenerator(structField, dt, faker) + Row.fromSeq(listGenerator.generate) + case StructType(fields) => + val dataGenerators = fields.map(field => getGeneratorForStructField(field, faker)) + Row.fromSeq(dataGenerators.map(_.generateWrapper())) + } + } + + override def generateSqlExpression: String = { + val nestedSqlExpression = structField.dataType match { + case ArrayType(dt, _) => + val listGenerator = new RandomArrayDataGenerator(structField, dt) + listGenerator.generateSqlExpressionWrapper + case StructType(fields) => + fields.map(f => GeneratorUtil.getDataGenerator(f, faker)) + .map(f => s"'${f.structField.name}', ${f.generateSqlExpressionWrapper}") + .mkString(",") + case _ => + getGeneratorForStructField(structField).generateSqlExpressionWrapper + } + s"NAMED_STRUCT($nestedSqlExpression)" + } + } + + + def sqlExpressionForNumeric(metadata: Metadata, typeName: String, sqlRand: String): String = { + val (min, max, diff, mean) = typeName match { + case "INT" => + val min = tryGetValue(metadata, MINIMUM, 0) + val max = tryGetValue(metadata, MAXIMUM, 100000) + val diff = max - min + val mean = tryGetValue(metadata, MEAN, diff.toDouble) + (min, max, diff, mean) + case "SHORT" => + val min = tryGetValue(metadata, MINIMUM, 0) + val max = tryGetValue(metadata, MAXIMUM, 1000) + val diff = max - min + val mean = tryGetValue(metadata, MEAN, diff.toDouble) + (min, max, diff, mean) + case "LONG" => + val min = tryGetValue(metadata, MINIMUM, 0L) + val max = tryGetValue(metadata, MAXIMUM, 100000L) + val diff = max - min + val mean = tryGetValue(metadata, MEAN, diff.toDouble) + (min, max, diff, mean) + case x if x.startsWith("DECIMAL") => + val min = tryGetValue(metadata, MINIMUM, BigDecimal.valueOf(0)) + val max = tryGetValue(metadata, MAXIMUM, BigDecimal.valueOf(100000)) + val diff = max - min + val mean = tryGetValue(metadata, MEAN, diff.toDouble) + (min, max, diff, mean) + case "DOUBLE" => + val min = tryGetValue(metadata, MINIMUM, 0.0) + val max = tryGetValue(metadata, MAXIMUM, 100000.0) + val diff = max - min + val mean = tryGetValue(metadata, MEAN, diff) + (min, max, diff, mean) + case "FLOAT" => + val min = tryGetValue(metadata, MINIMUM, 0.0.toFloat) + val max = tryGetValue(metadata, MAXIMUM, 100000.0.toFloat) + val diff = max - min + val mean = tryGetValue(metadata, MEAN, diff) + (min, max, diff, mean) + } + val defaultValue = tryGetValue(metadata, DEFAULT_VALUE, "") + val standardDeviation = tryGetValue(metadata, STANDARD_DEVIATION, 1.0) + val distinctCount = tryGetValue(metadata, DISTINCT_COUNT, 0) + val count = tryGetValue(metadata, ROW_COUNT, 0) + + val baseFormula = if (defaultValue.toLowerCase.startsWith("nextval") || (distinctCount == count && distinctCount > 0)) { + s"$max + $INDEX_INC_COL + 1" //index col starts at 0 + } else if (metadata.contains(STANDARD_DEVIATION) && metadata.contains(MEAN)) { + val randNormal = sqlRand.replace("RAND", "RANDN") + s"$randNormal * $standardDeviation + $mean" + } else { + s"$sqlRand * $diff + $min" + } + + if (!baseFormula.contains(INDEX_INC_COL) && (typeName == "INT" || typeName == "SHORT" || typeName == "LONG")) { + s"CAST(ROUND($baseFormula, 0) AS $typeName)" + } else { + s"CAST($baseFormula AS $typeName)" + } + } + + def tryGetValue[T](metadata: Metadata, key: String, default: T)(implicit converter: Converter[T]): T = { + Try(converter.convert(metadata.getString(key))) + .getOrElse(default) + } + + trait Converter[T] { + self => + def convert(v: String): T + } + + object Converter { + implicit val stringLoader: Converter[String] = (v: String) => v + + implicit val intLoader: Converter[Int] = (v: String) => v.toInt + + implicit val longLoader: Converter[Long] = (v: String) => v.toLong + + implicit val shortLoader: Converter[Short] = (v: String) => v.toShort + + implicit val doubleLoader: Converter[Double] = (v: String) => v.toDouble + + implicit val floatLoader: Converter[Float] = (v: String) => v.toFloat + + implicit val decimalLoader: Converter[BigDecimal] = (v: String) => BigDecimal(v) + } +} + diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/RegexDataGenerator.scala b/app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/RegexDataGenerator.scala new file mode 100644 index 00000000..6829b26c --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/generator/provider/RegexDataGenerator.scala @@ -0,0 +1,32 @@ +package com.github.pflooky.datagen.core.generator.provider + +import com.github.pflooky.datacaterer.api.model.Constants.REGEX_GENERATOR +import com.github.pflooky.datagen.core.exception.InvalidDataGeneratorConfigurationException +import com.github.pflooky.datagen.core.model.Constants.GENERATE_REGEX_UDF +import net.datafaker.Faker +import org.apache.spark.sql.types.StructField + +import scala.util.Try + +object RegexDataGenerator { + + def getGenerator(structField: StructField, faker: Faker = new Faker()): DataGenerator[_] = { + new RandomRegexDataGenerator(structField, faker) + } + + class RandomRegexDataGenerator(val structField: StructField, val faker: Faker = new Faker()) extends NullableDataGenerator[String] { + private val regex = Try(structField.metadata.getString(REGEX_GENERATOR)) + .getOrElse(throw new InvalidDataGeneratorConfigurationException(structField, REGEX_GENERATOR)) + + override val edgeCases: List[String] = List() + + override def generate: String = { + faker.regexify(regex) + } + + override def generateSqlExpression: String = { + s"$GENERATE_REGEX_UDF('$regex')" + } + } + +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/generator/result/DataGenerationResultWriter.scala b/app/src/main/scala/com/github/pflooky/datagen/core/generator/result/DataGenerationResultWriter.scala new file mode 100644 index 00000000..232392ac --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/generator/result/DataGenerationResultWriter.scala @@ -0,0 +1,117 @@ +package com.github.pflooky.datagen.core.generator.result + +import com.fasterxml.jackson.annotation.JsonInclude.Include +import com.github.pflooky.datacaterer.api.model.Constants.DEFAULT_GENERATED_REPORTS_FOLDER_PATH +import com.github.pflooky.datacaterer.api.model.{Field, FlagsConfig, FoldersConfig, MetadataConfig, Plan, Step, Task} +import com.github.pflooky.datagen.core.listener.SparkRecordListener +import com.github.pflooky.datagen.core.model.Constants.{REPORT_DATA_SOURCES_HTML, REPORT_FIELDS_HTML, REPORT_HOME_HTML, REPORT_VALIDATIONS_HTML} +import com.github.pflooky.datagen.core.model.{DataSourceResult, DataSourceResultSummary, StepResultSummary, TaskResultSummary, ValidationConfigResult} +import com.github.pflooky.datagen.core.util.FileUtil.writeStringToFile +import com.github.pflooky.datagen.core.util.ObjectMapperUtil +import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.log4j.Logger +import org.apache.spark.sql.SparkSession + +import java.io.File +import scala.util.{Failure, Success, Try} +import scala.xml.Node + +class DataGenerationResultWriter(metadataConfig: MetadataConfig, foldersConfig: FoldersConfig, flagsConfig: FlagsConfig)(implicit sparkSession: SparkSession) { + + private lazy val LOGGER = Logger.getLogger(getClass.getName) + private lazy val OBJECT_MAPPER = ObjectMapperUtil.jsonObjectMapper + + def writeResult( + plan: Plan, + generationResult: List[DataSourceResult], + validationResults: List[ValidationConfigResult], + sparkRecordListener: SparkRecordListener + ): Unit = { + OBJECT_MAPPER.setSerializationInclusion(Include.NON_ABSENT) + val (stepSummary, taskSummary, dataSourceSummary) = getSummaries(generationResult) + val fileSystem = FileSystem.get(sparkSession.sparkContext.hadoopConfiguration) + fileSystem.setWriteChecksum(false) + + LOGGER.info(s"Writing data generation summary to HTML files, folder-path=${foldersConfig.generatedReportsFolderPath}") + val htmlWriter = new ResultHtmlWriter() + val fileWriter = writeToFile(fileSystem, foldersConfig.generatedReportsFolderPath) _ + + try { + fileWriter(REPORT_HOME_HTML, htmlWriter.index(plan, stepSummary, taskSummary, dataSourceSummary, validationResults, flagsConfig, sparkRecordListener)) + fileWriter("tasks.html", htmlWriter.taskDetails(taskSummary)) + fileWriter(REPORT_FIELDS_HTML, htmlWriter.stepDetails(stepSummary)) + fileWriter(REPORT_DATA_SOURCES_HTML, htmlWriter.dataSourceDetails(stepSummary.flatMap(_.dataSourceResults))) + fileWriter(REPORT_VALIDATIONS_HTML, htmlWriter.validations(validationResults)) + + copyHtmlResources(fileSystem) + } catch { + case ex: Exception => + LOGGER.error("Failed to write data generation summary to HTML files", ex) + } + } + + private def copyHtmlResources(fileSystem: FileSystem): Unit = { + val resources = List("main.css", "data_catering_transparent.svg") + if (!foldersConfig.generatedReportsFolderPath.equalsIgnoreCase(DEFAULT_GENERATED_REPORTS_FOLDER_PATH)) { + resources.foreach(resource => { + val defaultResourcePath = new Path(s"file:///$DEFAULT_GENERATED_REPORTS_FOLDER_PATH/$resource") + val tryLocalUri = Try(new Path(getClass.getResource(s"/report/$resource").toURI)) + val resourcePath = tryLocalUri match { + case Failure(_) => + defaultResourcePath + case Success(value) => + Try(value.getName) match { + case Failure(_) => defaultResourcePath + case Success(name) => + if (name.startsWith("jar:")) defaultResourcePath else value + } + } + val destination = s"file:///${foldersConfig.generatedReportsFolderPath}/$resource" + fileSystem.copyFromLocalFile(resourcePath, new Path(destination)) + }) + } + } + + private def writeToFile(fileSystem: FileSystem, folderPath: String)(fileName: String, content: Node): Unit = { + writeStringToFile(fileSystem, s"$folderPath/$fileName", content.toString()) + } + + private def getSummaries(generationResult: List[DataSourceResult]): (List[StepResultSummary], List[TaskResultSummary], List[DataSourceResultSummary]) = { + val resultByStep = generationResult.groupBy(_.step).map(getResultSummary).toList + val resultByTask = generationResult.groupBy(_.task).map(getResultSummary).toList + val resultByDataSource = generationResult.groupBy(_.name).map(getResultSummary).toList + (resultByStep, resultByTask, resultByDataSource) + } + + private def getResultSummary(result: (Step, List[DataSourceResult])): StepResultSummary = { + val (totalRecords, isSuccess, _, _) = summariseDataSourceResult(result._2) + StepResultSummary(result._1, totalRecords, isSuccess, result._2) + } + + private def getResultSummary(result: (Task, List[DataSourceResult])): TaskResultSummary = { + val (totalRecords, isSuccess, _, _) = summariseDataSourceResult(result._2) + val stepResults = result._1.steps.map(step => getResultSummary((step, result._2.filter(_.step == step)))) + TaskResultSummary(result._1, totalRecords, isSuccess, stepResults) + } + + private def getResultSummary(result: (String, List[DataSourceResult])): DataSourceResultSummary = { + val (totalRecords, isSuccess, _, _) = summariseDataSourceResult(result._2) + DataSourceResultSummary(result._1, totalRecords, isSuccess, result._2) + } + + private def summariseDataSourceResult(dataSourceResults: List[DataSourceResult]): (Long, Boolean, List[String], List[Field]) = { + val totalRecords = dataSourceResults.map(_.sinkResult.count).sum + val isSuccess = dataSourceResults.forall(_.sinkResult.isSuccess) + val sample = dataSourceResults.flatMap(_.sinkResult.sample).take(metadataConfig.numGeneratedSamples) + val fieldMetadata = dataSourceResults.flatMap(_.sinkResult.generatedMetadata) + .groupBy(_.name) + .map(field => { + val metadataList = field._2.map(_.generator.map(gen => gen.options).getOrElse(Map())) + //TODO combine the metadata from each batch together to show summary + field._2.head + }).toList + (totalRecords, isSuccess, sample, fieldMetadata) + } + +} + diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/generator/result/ResultHtmlWriter.scala b/app/src/main/scala/com/github/pflooky/datagen/core/generator/result/ResultHtmlWriter.scala new file mode 100644 index 00000000..2e9a2772 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/generator/result/ResultHtmlWriter.scala @@ -0,0 +1,777 @@ +package com.github.pflooky.datagen.core.generator.result + +import com.github.pflooky.datacaterer.api.model.Constants.HISTOGRAM +import com.github.pflooky.datacaterer.api.model.{ExpressionValidation, FlagsConfig, Generator, GroupByValidation, Plan, Step, UpstreamDataSourceValidation, Validation} +import com.github.pflooky.datagen.core.listener.{SparkRecordListener, SparkTaskRecordSummary} +import com.github.pflooky.datagen.core.model.Constants.{REPORT_DATA_SOURCES_HTML, REPORT_FIELDS_HTML, REPORT_HOME_HTML, REPORT_VALIDATIONS_HTML} +import com.github.pflooky.datagen.core.util.PlanImplicits.CountOps +import com.github.pflooky.datagen.core.model.{DataSourceResult, DataSourceResultSummary, StepResultSummary, TaskResultSummary, ValidationConfigResult} +import org.joda.time.DateTime + +import scala.math.BigDecimal.RoundingMode +import scala.xml.{Node, NodeBuffer, NodeSeq} + +class ResultHtmlWriter { + + def index(plan: Plan, stepResultSummary: List[StepResultSummary], taskResultSummary: List[TaskResultSummary], + dataSourceResultSummary: List[DataSourceResultSummary], validationResults: List[ValidationConfigResult], + flagsConfig: FlagsConfig, sparkRecordListener: SparkRecordListener): Node = { + + + + Data Caterer{plugins} + + + {topNavBar}{overview(plan, stepResultSummary, taskResultSummary, dataSourceResultSummary, validationResults, flagsConfig, sparkRecordListener)} + {bodyScripts} + + } + + def overview(plan: Plan, stepResultSummary: List[StepResultSummary], taskResultSummary: List[TaskResultSummary], + dataSourceResultSummary: List[DataSourceResultSummary], validationResults: List[ValidationConfigResult], + flagsConfig: FlagsConfig, sparkRecordListener: SparkRecordListener): Node = { +
+

Data Caterer Summary

+

Flags

{flagsSummary(flagsConfig)}

Plan

{planSummary(plan, stepResultSummary, taskResultSummary, dataSourceResultSummary)}

Tasks

{tasksSummary(taskResultSummary)}

Validations

{validationSummary(validationResults)}

Output Rows Per Second

{createLineGraph("outputRowsPerSecond", sparkRecordListener.outputRows.toList)}
+ Generated at + {DateTime.now()} +
+
+ } + + def topNavBar: NodeBuffer = { +
+ + + Data Caterer + +
+ + } + + def flagsSummary(flagsConfig: FlagsConfig): Node = { + + + + + + + + + + + + + + + + + + + + + + + +
Generate MetadataGenerate DataRecord TrackingDelete DataCalculate Generated Records MetadataValidate DataUnique Check
+ {checkMark(flagsConfig.enableGeneratePlanAndTasks)} + + {checkMark(flagsConfig.enableGenerateData)} + + {checkMark(flagsConfig.enableRecordTracking)} + + {checkMark(flagsConfig.enableDeleteGeneratedRecords)} + + {checkMark(flagsConfig.enableSinkMetadata)} + + {checkMark(flagsConfig.enableValidation)} + + {checkMark(flagsConfig.enableUniqueCheck)} +
+ } + + def planSummary(plan: Plan, stepResultSummary: List[StepResultSummary], + taskResultSummary: List[TaskResultSummary], dataSourceResultSummary: List[DataSourceResultSummary]): Node = { + val totalRecords = stepResultSummary.map(_.numRecords).sum + val isSuccess = stepResultSummary.forall(_.isSuccess) + + + + + + + + + + + + + + + + + + + + + + + +
Plan NameNum RecordsSuccessTasksStepsData SourcesForeign Keys
+ {plan.name} + + {totalRecords} + + {checkMark(isSuccess)} + + {taskResultSummary.size} + + {stepResultSummary.size} + + {dataSourceResultSummary.size} + + {plan.sinkOptions.map(_.foreignKeys).getOrElse(Map())} +
+ } + + def tasksSummary(taskResultSummary: List[TaskResultSummary]): Node = { + + + + + + + + + + + {taskResultSummary.map(res => { + val taskRef = s"tasks.html#${res.task.name}" + + + + + + + })} + +
NameNum RecordsSuccessSteps
+ + {res.task.name} + + + {res.numRecords} + + {checkMark(res.isSuccess)} + + {toStepLinks(res.task.steps)} +
+ } + + def taskDetails(taskResultSummary: List[TaskResultSummary]): Node = { + + + + Task Details - Data Caterer + {plugins} + + + {topNavBar}

Tasks

+ + + + + + + + + {taskResultSummary.map(res => { + + + + + })} + +
NameSteps
+ {res.task.name} + + {toStepLinks(res.task.steps)} +
+ {bodyScripts} + + } + + def stepsSummary(stepResultSummary: List[StepResultSummary]): Node = { + + + + + + + + + + + + + {stepResultSummary.map(res => { + val stepLink = s"$REPORT_FIELDS_HTML#${res.step.name}" + + + + + + + + + })} + +
NameNum RecordsSuccessOptionsNum BatchesTime Taken (s)
+ + {res.step.name} + + + {res.numRecords} + + {checkMark(res.isSuccess)} + + {optionsString(res)} + + {res.dataSourceResults.map(_.batchNum).max} + + {res.dataSourceResults.map(_.sinkResult.durationInSeconds).sum} +
+ } + + def stepDetails(stepResultSummary: List[StepResultSummary]): Node = { + + + + Step Details - Data Caterer + {plugins} + + + {topNavBar}
+
+

Steps

+ + + + + + + + + + + + + + + {stepResultSummary.map(res => { + val fieldMetadataOnClick = s"showFieldMetadata('${res.step.name}', this)" + + + + + + + + + + + })} + +
NameNum RecordsSuccessTypeEnabledOptionsCountFields
+ {res.step.name} + + {res.numRecords} + + {checkMark(res.isSuccess)} + + {res.step.`type`} + + {checkMark(res.step.enabled)} + + {optionsString(res)} + + {keyValueTable(res.step.count.numRecordsString._2)} + + +
+ {fieldMetadata(res.step, res.dataSourceResults)} +
+
+
+
...
{if (stepResultSummary.nonEmpty) { +
+ {fieldMetadata(stepResultSummary.head.step, stepResultSummary.head.dataSourceResults)} +
+ }} +
+ {bodyScripts} + + } + + def fieldMetadata(step: Step, dataSourceResults: List[DataSourceResult]): Node = { + val originalFields = step.schema.fields.getOrElse(List()) + val generatedFields = dataSourceResults.head.sinkResult.generatedMetadata + val metadataMatch = originalFields.map(field => { + val optGenField = generatedFields.find(f => f.name == field.name) + val genMetadata = optGenField.map(_.generator.getOrElse(Generator()).options).getOrElse(Map()) + val originalMetadata = field.generator.getOrElse(Generator()).options + val metadataCompare = (originalMetadata.keys ++ genMetadata.keys).filter(_ != HISTOGRAM).toList.distinct + .map(key => { + List(key, originalMetadata.getOrElse(key, "").toString, genMetadata.getOrElse(key, "").toString) + }) + (field.name, metadataCompare) + }).toMap + val fieldMetadataId = s"field-metadata-${step.name}" + +
+

Field Details: + {step.name} +

+ + + + + + + + + + + + {originalFields.map(field => { + val generator = field.generator.getOrElse(Generator()) + + + + + + + + })} + +
NameTypeNullableGenerator TypeGenerated Records Metadata Comparison
+ {field.name} + + {field.`type`.getOrElse("string")} + + {checkMark(field.nullable)} + + {generator.`type`} + + {keyValueTable(metadataMatch(field.name), Some(List("Metadata Field", "Original Value", "Generated Value")), true)} +
+
+ } + + def dataSourceDetails(dataSourceResults: List[DataSourceResult]): Node = { + val resByDataSource = dataSourceResults.groupBy(_.sinkResult.name) + + + + Data Source Details - Data Caterer + {plugins} + + + {topNavBar}

Data Sources

+ + + + + + + + + + + + {resByDataSource.map(ds => { + val numRecords = ds._2.map(_.sinkResult.count).sum + val success = ds._2.forall(_.sinkResult.isSuccess) + + + + + + + + })} + +
NameNum RecordsSuccessFormatOptions
+ {ds._1} + + {numRecords} + + {checkMark(success)} + + {ds._2.map(_.sinkResult.format).distinct.mkString("\n")} + + {keyValueTable(ds._2.flatMap(x => x.sinkResult.options.map(y => List(y._1, y._2))))} +
+ {bodyScripts} + + } + + def validations(validationResults: List[ValidationConfigResult]): Node = { + + + + Validations - Data Caterer + {plugins} + + + {topNavBar}

Validations

{validationSummary(validationResults)}

Details

+ + + + + + + + + + + + + + {validationResults.flatMap(validationConfRes => { + validationConfRes.dataSourceValidationResults.flatMap(dataSourceValidationRes => { + val dataSourceLink = s"$REPORT_DATA_SOURCES_HTML#${dataSourceValidationRes.dataSourceName}" + dataSourceValidationRes.validationResults.map(validationRes => { + val numSuccess = validationRes.total - validationRes.numErrors + + + + + + + + + + }) + }) + })} + +
DescriptionData SourceOptionsSuccessWithin Error ThresholdValidationError Sample
+ {validationRes.validation.description.getOrElse("Validate")} + + + {dataSourceValidationRes.dataSourceName} + + + {formatOptions(dataSourceValidationRes.options)} + + {progressBar(numSuccess, validationRes.total)} + + {checkMark(validationRes.isSuccess)} + + {keyValueTable(getValidationOptions(validationRes.validation))} + + {if (validationRes.isSuccess) "" else keyValueTable(validationRes.sampleErrorValues.get.take(5).map(e => List(e.json)).toList)} +
+ {bodyScripts} + + } + + def validationSummary(validationResults: List[ValidationConfigResult]): Node = { + + + + + + + + + + + {validationResults.map(validationConfRes => { + val validationLink = s"$REPORT_VALIDATIONS_HTML#${validationConfRes.name}" + val resultsForDataSource = validationConfRes.dataSourceValidationResults.flatMap(_.validationResults) + val numSuccess = resultsForDataSource.count(_.isSuccess) + + + + + + + })} + +
NameData SourcesDescriptionSuccess
+ + {validationConfRes.name} + + + {toDataSourceLinks(validationConfRes.dataSourceValidationResults.map(_.dataSourceName).distinct)} + + {validationConfRes.description} + + {progressBar(numSuccess, resultsForDataSource.size)} +
+ } + + def createLineGraph(name: String, recordSummary: List[SparkTaskRecordSummary]): NodeBuffer = { + if (recordSummary.nonEmpty) { + val sumRowsPerFinishTime = recordSummary + .map(x => { + val roundFinishTimeToSecond = x.finishTime - (x.finishTime % 1000) + 1000 + (roundFinishTimeToSecond, x.numRecords) + }) + .groupBy(_._1) + .map(t => (t._1, t._2.map(_._2).sum)) + val sortedSumRows = sumRowsPerFinishTime.toList.sortBy(_._1) + val timeSeriesValues = (sortedSumRows.head._1 to sortedSumRows.last._1 by 1000) + .map(t => (t, sumRowsPerFinishTime.getOrElse(t, 0L))) + .toList + + val xValues = timeSeriesValues.map(x => new DateTime(x._1).toString("HH:mm:ss")).map(s => "\"" + s + "\"") + val yValues = timeSeriesValues.map(_._2) + val (yMin, yMax) = if (yValues.nonEmpty) (Math.max(0, yValues.min - 2), yValues.max + 2) else (0, 0) + createChart(name, xValues, yValues, yMin, yMax) + } else { +
+

No data found

+
+

+ } + } + + private def createChart[T, K](name: String, xValues: List[T], yValues: List[K], minY: K, maxY: K): NodeBuffer = { + val xValuesStr = s"[${xValues.mkString(",")}]" + val yValuesStr = s"[${yValues.mkString(",")}]" + + + } + + private def checkMark(isSuccess: Boolean): NodeSeq = if (isSuccess) xml.EntityRef("#9989") else xml.EntityRef("#10060") + + private def progressBar(success: Long, total: Long): NodeBuffer = { + val percent = if (success > 0 && total > 0) BigDecimal(success.toDouble / total * 100).setScale(2, RoundingMode.HALF_UP).toString() else "0" + val width = s"width:$percent%" + val progressBarText = s"$success/$total ($percent%)" +
+
+
+
+ {progressBarText} +
+ } + + private def keyValueTable(keyValues: List[List[String]], optHeader: Option[List[String]] = None, isCollapsible: Boolean = false): Node = { + val baseTable = + + {optHeader.map(headers => { + + + {headers.map(header => { + + })} + + + }).getOrElse(List())} + {keyValues.map(kv => { + + {kv.tail.map(kvt => { + + })} + + })} + +
+ {header} +
+ {if (kv.size == 1) { + { + kv.head + } + } else { + + {kv.head} + + }} + + {kvt} +
+ + if (isCollapsible) { + { + xml.Group(Seq( + , +
+ {baseTable} +
+ )) + } + } else { + { + baseTable + } + } + } + + private def optionsString(res: StepResultSummary): Node = { + val dataSourceResult = res.dataSourceResults + val baseOptions = if (dataSourceResult.nonEmpty) { + dataSourceResult.head.sinkResult.options + } else { + res.step.options + } + val optionsToList = baseOptions.map(x => List(x._1, x._2)).toList + keyValueTable(optionsToList) + } + + private def formatOptions(options: Map[String, String]): String = options.map(s => s"${s._1} -> ${s._2}").mkString("\n") + + private def toStepLinks(steps: List[Step]): Node = { + { + xml.Group(steps.map(step => { + val stepLink = s"$REPORT_FIELDS_HTML#${step.name}" + + {s"${step.name}"} + + })) + } + } + + private def toDataSourceLinks(dataSourceNames: List[String]): Node = { + { + xml.Group(dataSourceNames.map(dataSource => { + val dataSourceLink = s"$REPORT_DATA_SOURCES_HTML#$dataSource" + + {dataSource} + + })) + } + } + + private def getValidationOptions(validation: Validation): List[List[String]] = { + val options = validation match { + case ExpressionValidation(expr) => + List( + List("expr", expr), + List("errorThreshold", validation.errorThreshold.getOrElse(0.0).toString) + ) + case GroupByValidation(groupByCols, aggCol, aggType, expr) => + List( + List("expr", expr), + List("groupByColumns", groupByCols.mkString(",")), + List("aggregationColumn", aggCol), + List("aggregationType", aggType), + List("errorThreshold", validation.errorThreshold.getOrElse(0.0).toString) + ) + case UpstreamDataSourceValidation(validationBuilder, upstreamDataSource, _, joinCols, joinType) => + val nestedValidation = getValidationOptions(validationBuilder.validation) + List( + List("upstreamDataSource", upstreamDataSource.connectionConfigWithTaskBuilder.dataSourceName), + List("joinColumns", joinCols.mkString(",")), + List("joinType", joinType), + ) ++ nestedValidation + case _ => List() + } + options.filter(_.forall(_.nonEmpty)) + } + + def bodyScripts: NodeBuffer = { + + + } + + def plugins: NodeBuffer = { + + + + + + + + + } +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/listener/SparkRecordListener.scala b/app/src/main/scala/com/github/pflooky/datagen/core/listener/SparkRecordListener.scala new file mode 100644 index 00000000..b3954b19 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/listener/SparkRecordListener.scala @@ -0,0 +1,25 @@ +package com.github.pflooky.datagen.core.listener + +import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd} + +import scala.collection.mutable.ListBuffer + +class SparkRecordListener(enableCount: Boolean = true) extends SparkListener { + + var outputRows: ListBuffer[SparkTaskRecordSummary] = ListBuffer() + + override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { + if (taskEnd.taskType.equalsIgnoreCase("resulttask") && enableCount) { + synchronized { + + outputRows.append(SparkTaskRecordSummary( + taskEnd.taskInfo.finishTime, + taskEnd.taskMetrics.outputMetrics.recordsWritten, + taskEnd.taskExecutorMetrics.getMetricValue("OnHeapExecutionMemory") + )) + } + } + } +} + +case class SparkTaskRecordSummary(finishTime: Long, numRecords: Long, onHeapExecutionMemory: Long) diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/model/Constants.scala b/app/src/main/scala/com/github/pflooky/datagen/core/model/Constants.scala new file mode 100644 index 00000000..0cce408b --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/model/Constants.scala @@ -0,0 +1,58 @@ +package com.github.pflooky.datagen.core.model + +import com.github.pflooky.datacaterer.api.model.Constants.{CASSANDRA, CSV, HTTP, JDBC, JMS, JSON, KAFKA, ORC, PARQUET} + +object Constants { + + //base config + lazy val RUNTIME_MASTER = "runtime.master" + + //supported data formats + lazy val SUPPORTED_CONNECTION_FORMATS: List[String] = List(CSV, JSON, ORC, PARQUET, CASSANDRA, JDBC, HTTP, JMS, KAFKA) + + //special column names + lazy val PER_COLUMN_COUNT = "_per_col_count" + lazy val JOIN_FOREIGN_KEY_COL = "_join_foreign_key" + lazy val PER_COLUMN_INDEX_COL = "_per_col_index" + lazy val RECORD_COUNT_GENERATOR_COL = "record_count_generator" + lazy val INDEX_INC_COL = "__index_inc" + lazy val REAL_TIME_BODY_COL = "value" + lazy val REAL_TIME_BODY_CONTENT_COL = "bodyContent" + lazy val REAL_TIME_PARTITION_COL = "partition" + lazy val REAL_TIME_HEADERS_COL = "headers" + lazy val REAL_TIME_METHOD_COL = "method" + lazy val REAL_TIME_CONTENT_TYPE_COL = "content_type" + lazy val REAL_TIME_URL_COL = "url" + lazy val HTTP_HEADER_COL_PREFIX = "header" + lazy val HTTP_PATH_PARAM_COL_PREFIX = "pathParam" + lazy val HTTP_QUERY_PARAM_COL_PREFIX = "queryParam" + + //spark udf + lazy val GENERATE_REGEX_UDF = "GENERATE_REGEX" + lazy val GENERATE_FAKER_EXPRESSION_UDF = "GENERATE_FAKER_EXPRESSION" + lazy val GENERATE_RANDOM_ALPHANUMERIC_STRING_UDF = "GENERATE_RANDOM_ALPHANUMERIC_STRING" + + //status + lazy val STARTED = "started" + lazy val FINISHED = "finished" + lazy val FAILED = "failed" + + //count + lazy val COUNT_TYPE = "countType" + lazy val COUNT_BASIC = "basic-count" + lazy val COUNT_GENERATED = "generated-count" + lazy val COUNT_PER_COLUMN = "per-column-count" + lazy val COUNT_GENERATED_PER_COLUMN = "generated-per-column-count" + lazy val COUNT_COLUMNS = "columns" + lazy val COUNT_NUM_RECORDS = "numRecords" + + //report + lazy val REPORT_DATA_SOURCES_HTML = "data-sources.html" + lazy val REPORT_FIELDS_HTML = "steps.html" + lazy val REPORT_HOME_HTML = "index.html" + lazy val REPORT_VALIDATIONS_HTML = "validations.html" + + //misc + lazy val APPLICATION_CONFIG_PATH = "APPLICATION_CONFIG_PATH" + +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/model/ForeignKeyModels.scala b/app/src/main/scala/com/github/pflooky/datagen/core/model/ForeignKeyModels.scala new file mode 100644 index 00000000..313b1000 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/model/ForeignKeyModels.scala @@ -0,0 +1,6 @@ +package com.github.pflooky.datagen.core.model + +import com.github.pflooky.datacaterer.api.model.ForeignKeyRelation + +case class ForeignKeyRelationship(key: ForeignKeyRelation, foreignKey: ForeignKeyRelation) + diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/model/ResultModels.scala b/app/src/main/scala/com/github/pflooky/datagen/core/model/ResultModels.scala new file mode 100644 index 00000000..e3be59ff --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/model/ResultModels.scala @@ -0,0 +1,51 @@ +package com.github.pflooky.datagen.core.model + +import com.github.pflooky.datacaterer.api.model.{Field, Step, Task} + +import java.time.{Duration, LocalDateTime} + +case class DataSourceResultSummary( + name: String, + numRecords: Long, + isSuccess: Boolean, + dataSourceResults: List[DataSourceResult] + ) + +case class DataSourceResult( + name: String, + task: Task, + step: Step, + sinkResult: SinkResult, + batchNum: Int = 0 + ) + +case class TaskResultSummary( + task: Task, + numRecords: Long, + isSuccess: Boolean, + stepResults: List[StepResultSummary] + ) + +case class StepResultSummary( + step: Step, + numRecords: Long, + isSuccess: Boolean, + dataSourceResults: List[DataSourceResult] + ) + +case class SinkResult( + name: String, + format: String, + saveMode: String, + options: Map[String, String] = Map(), + count: Long = -1, + isSuccess: Boolean = true, + sample: Array[String] = Array(), + startTime: LocalDateTime = LocalDateTime.now(), + endTime: LocalDateTime = LocalDateTime.now(), + generatedMetadata: Array[Field] = Array(), + exception: Option[Throwable] = None + ) { + + def durationInSeconds: Long = Duration.between(startTime, endTime).toSeconds +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/model/ValidationModels.scala b/app/src/main/scala/com/github/pflooky/datagen/core/model/ValidationModels.scala new file mode 100644 index 00000000..25afd6a8 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/model/ValidationModels.scala @@ -0,0 +1,30 @@ +package com.github.pflooky.datagen.core.model + +import com.github.pflooky.datacaterer.api.model.{ExpressionValidation, Validation, ValidationConfiguration} +import org.apache.spark.sql.DataFrame + +case class ValidationConfigResult( + name: String = "default_validation_result", + description: String = "Validation result for data sources", + dataSourceValidationResults: List[DataSourceValidationResult] = List() + ) + +case class DataSourceValidationResult( + dataSourceName: String = "default_data_source", + options: Map[String, String] = Map(), + validationResults: List[ValidationResult] = List() + ) + +case class ValidationResult( + validation: Validation = ExpressionValidation(), + isSuccess: Boolean = true, + numErrors: Long = 0, + total: Long = 0, + sampleErrorValues: Option[DataFrame] = None + ) + +object ValidationResult { + def fromValidationWithBaseResult(validation: Validation, validationResult: ValidationResult): ValidationResult = { + ValidationResult(validation, validationResult.isSuccess, validationResult.numErrors, validationResult.total, validationResult.sampleErrorValues) + } +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/parser/PlanParser.scala b/app/src/main/scala/com/github/pflooky/datagen/core/parser/PlanParser.scala new file mode 100644 index 00000000..0212c1cb --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/parser/PlanParser.scala @@ -0,0 +1,66 @@ +package com.github.pflooky.datagen.core.parser + +import com.github.pflooky.datacaterer.api.model.Constants.ONE_OF_GENERATOR +import com.github.pflooky.datacaterer.api.model.{Plan, Schema, Task} +import com.github.pflooky.datagen.core.util.FileUtil.{getFileContentFromFileSystem, isCloudStoragePath} +import com.github.pflooky.datagen.core.util.{FileUtil, ObjectMapperUtil} +import org.apache.hadoop.fs.FileSystem +import org.apache.log4j.Logger +import org.apache.spark.sql.SparkSession + +object PlanParser { + + private val LOGGER = Logger.getLogger(getClass.getName) + private val OBJECT_MAPPER = ObjectMapperUtil.yamlObjectMapper + + def parsePlan(planFilePath: String)(implicit sparkSession: SparkSession): Plan = { + val parsedPlan = if (isCloudStoragePath(planFilePath)) { + val fileContent = getFileContentFromFileSystem(FileSystem.get(sparkSession.sparkContext.hadoopConfiguration), planFilePath) + OBJECT_MAPPER.readValue(fileContent, classOf[Plan]) + } else { + val planFile = FileUtil.getFile(planFilePath) + OBJECT_MAPPER.readValue(planFile, classOf[Plan]) + } + LOGGER.info(s"Found plan file and parsed successfully, plan-file-path=$planFilePath, plan-name=${parsedPlan.name}, plan-description=${parsedPlan.description}") + parsedPlan + } + + def parseTasks(taskFolderPath: String)(implicit sparkSession: SparkSession): Array[Task] = { + val parsedTasks = YamlFileParser.parseFiles[Task](taskFolderPath) + parsedTasks.map(convertTaskNumbersToString) + } + + private def convertTaskNumbersToString(task: Task): Task = { + val stringSteps = task.steps.map(step => { + val countPerColGenerator = step.count.perColumn.map(perColumnCount => { + val generator = perColumnCount.generator.map(gen => gen.copy(options = toStringValues(gen.options))) + perColumnCount.copy(generator = generator) + }) + val countGenerator = step.count.generator.map(gen => gen.copy(options = toStringValues(gen.options))) + val mappedSchema = schemaToString(step.schema) + step.copy( + count = step.count.copy(perColumn = countPerColGenerator, generator = countGenerator), + schema = mappedSchema + ) + }) + task.copy(steps = stringSteps) + } + + private def schemaToString(schema: Schema): Schema = { + val mappedFields = schema.fields.map(fields => { + fields.map(field => { + if (field.generator.isDefined && field.generator.get.`type` != ONE_OF_GENERATOR) { + val fieldGenOpt = toStringValues(field.generator.get.options) + field.copy(generator = Some(field.generator.get.copy(options = fieldGenOpt))) + } else { + field.copy(schema = field.schema.map(schemaToString)) + } + }) + }) + schema.copy(fields = mappedFields) + } + + private def toStringValues(options: Map[String, Any]): Map[String, Any] = { + options.map(x => (x._1, x._2.toString)) + } +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/parser/ValidationParser.scala b/app/src/main/scala/com/github/pflooky/datagen/core/parser/ValidationParser.scala new file mode 100644 index 00000000..6f760a00 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/parser/ValidationParser.scala @@ -0,0 +1,11 @@ +package com.github.pflooky.datagen.core.parser + +import com.github.pflooky.datacaterer.api.model.ValidationConfiguration +import org.apache.spark.sql.SparkSession + +object ValidationParser { + + def parseValidation(validationFolderPath: String)(implicit sparkSession: SparkSession): Array[ValidationConfiguration] = { + YamlFileParser.parseFiles[ValidationConfiguration](validationFolderPath) + } +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/parser/YamlFileParser.scala b/app/src/main/scala/com/github/pflooky/datagen/core/parser/YamlFileParser.scala new file mode 100644 index 00000000..f2e726c0 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/parser/YamlFileParser.scala @@ -0,0 +1,58 @@ +package com.github.pflooky.datagen.core.parser + +import com.github.pflooky.datagen.core.exception.ParseFileException +import com.github.pflooky.datagen.core.util.FileUtil.{getDirectory, getFileContentFromFileSystem, isCloudStoragePath} +import com.github.pflooky.datagen.core.util.ObjectMapperUtil +import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.log4j.Logger +import org.apache.spark.sql.SparkSession + +import java.io.File +import scala.reflect.ClassTag +import scala.util.{Failure, Success, Try} + +object YamlFileParser { + + private val LOGGER = Logger.getLogger(getClass.getName) + private val OBJECT_MAPPER = ObjectMapperUtil.yamlObjectMapper + + def parseFiles[T](folderPath: String)(implicit sparkSession: SparkSession, tag: ClassTag[T]): Array[T] = { + if (isCloudStoragePath(folderPath)) { + val fileSystem = FileSystem.get(sparkSession.sparkContext.hadoopConfiguration) + val allFiles = fileSystem.listFiles(new Path(folderPath), true) + val cls = tag.runtimeClass.asInstanceOf[Class[T]] + + val parsedFileArray = scala.collection.mutable.ArrayBuffer[T]() + while (allFiles.hasNext) { + val currentFile = allFiles.next().getPath.toString + val fileContent = getFileContentFromFileSystem(fileSystem, currentFile) + val parsedFile = OBJECT_MAPPER.readValue[T](fileContent, cls) + parsedFileArray.append(parsedFile) + } + parsedFileArray.toArray + } else { + val directory = getDirectory(folderPath) + getNestedFiles(directory).map(f => parseFile[T](f)) + } + } + + private def getNestedFiles(folder: File): Array[File] = { + if (!folder.isDirectory) { + LOGGER.warn(s"Folder is not a directory, unable to list files, path=${folder.getPath}") + Array() + } else { + val current = folder.listFiles().filter(_.getName.endsWith(".yaml")) + current ++ folder.listFiles + .filter(_.isDirectory) + .flatMap(getNestedFiles) + } + } + + private def parseFile[T](file: File)(implicit tag: ClassTag[T]): T = { + val cls = tag.runtimeClass.asInstanceOf[Class[T]] + Try(OBJECT_MAPPER.readValue[T](file, cls)) match { + case Failure(exception) => throw new ParseFileException(file.getAbsolutePath, cls.getName, exception) + case Success(value) => value + } + } +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/plan/PlanProcessor.scala b/app/src/main/scala/com/github/pflooky/datagen/core/plan/PlanProcessor.scala new file mode 100644 index 00000000..7f4e8d60 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/plan/PlanProcessor.scala @@ -0,0 +1,67 @@ +package com.github.pflooky.datagen.core.plan + +import com.github.pflooky.datacaterer.api.PlanRun +import com.github.pflooky.datacaterer.api.model.Constants.PLAN_CLASS +import com.github.pflooky.datacaterer.api.model.DataCatererConfiguration +import com.github.pflooky.datagen.core.config.ConfigParser +import com.github.pflooky.datagen.core.generator.DataGeneratorProcessor +import com.github.pflooky.datagen.core.util.SparkProvider +import org.apache.spark.sql.SparkSession + +import scala.util.{Success, Try} + +object PlanProcessor { + + def determineAndExecutePlan(optPlanRun: Option[PlanRun] = None): Unit = { + val optPlanClass = getPlanClass + optPlanClass.map(Class.forName) + .map(cls => { + cls.getDeclaredConstructor().newInstance() + val tryScalaPlan = Try(cls.getDeclaredConstructor().newInstance().asInstanceOf[PlanRun]) + val tryJavaPlan = Try(cls.getDeclaredConstructor().newInstance().asInstanceOf[com.github.pflooky.datacaterer.java.api.PlanRun]) + (tryScalaPlan, tryJavaPlan) match { + case (Success(value), _) => value + case (_, Success(value)) => value.getPlan + case _ => throw new RuntimeException(s"Failed to load class as either Java or Scala PlanRun, class=${optPlanClass.get}") + } + }) + .map(executePlan) + .getOrElse( + optPlanRun.map(executePlan) + .getOrElse(executePlan) + ) + } + + def determineAndExecutePlanJava(planRun: com.github.pflooky.datacaterer.java.api.PlanRun): Unit = + determineAndExecutePlan(Some(planRun.getPlan)) + + private def executePlan(planRun: PlanRun): Unit = { + val dataCatererConfiguration = planRun._configuration + executePlanWithConfig(dataCatererConfiguration, Some(planRun)) + } + + private def executePlan: Unit = { + val dataCatererConfiguration = ConfigParser.toDataCatererConfiguration + executePlanWithConfig(dataCatererConfiguration, None) + } + + private def executePlanWithConfig(dataCatererConfiguration: DataCatererConfiguration, optPlan: Option[PlanRun]): Unit = { + implicit val sparkSession: SparkSession = new SparkProvider(dataCatererConfiguration.master, dataCatererConfiguration.runtimeConfig).getSparkSession + + val dataGeneratorProcessor = new DataGeneratorProcessor(dataCatererConfiguration) + optPlan match { + case Some(plan) => dataGeneratorProcessor.generateData(plan._plan, plan._tasks, Some(plan._validations)) + case _ => dataGeneratorProcessor.generateData() + } + } + + private def getPlanClass: Option[String] = { + val envPlanClass = System.getenv(PLAN_CLASS) + val propPlanClass = System.getProperty(PLAN_CLASS) + (envPlanClass, propPlanClass) match { + case (env, _) if env != null && env.nonEmpty => Some(env) + case (_, prop) if prop != null && prop.nonEmpty => Some(prop) + case _ => None + } + } +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/sink/SinkFactory.scala b/app/src/main/scala/com/github/pflooky/datagen/core/sink/SinkFactory.scala new file mode 100644 index 00000000..05a575ac --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/sink/SinkFactory.scala @@ -0,0 +1,121 @@ +package com.github.pflooky.datagen.core.sink + +import com.github.pflooky.datacaterer.api.model.Constants.{DRIVER, FORMAT, JDBC, OMIT, PARTITIONS, PARTITION_BY, POSTGRES_DRIVER, SAVE_MODE} +import com.github.pflooky.datacaterer.api.model.{FlagsConfig, MetadataConfig, Step} +import com.github.pflooky.datagen.core.model.Constants.{FAILED, FINISHED, STARTED} +import com.github.pflooky.datagen.core.model.SinkResult +import com.github.pflooky.datagen.core.util.ConfigUtil +import com.github.pflooky.datagen.core.util.MetadataUtil.getFieldMetadata +import org.apache.log4j.Logger +import org.apache.spark.sql.{DataFrame, DataFrameWriter, Row, SaveMode, SparkSession} + +import java.time.LocalDateTime +import scala.util.{Failure, Success, Try} + +class SinkFactory(val flagsConfig: FlagsConfig, val metadataConfig: MetadataConfig)(implicit val sparkSession: SparkSession) { + + private val LOGGER = Logger.getLogger(getClass.getName) + private var HAS_LOGGED_COUNT_DISABLE_WARNING = false + + def pushToSink(df: DataFrame, dataSourceName: String, step: Step, flagsConfig: FlagsConfig, startTime: LocalDateTime): SinkResult = { + val dfWithoutOmitFields = removeOmitFields(df) + val saveMode = step.options.get(SAVE_MODE).map(_.toLowerCase.capitalize).map(SaveMode.valueOf).getOrElse(SaveMode.Append) + val format = step.options(FORMAT) + val enrichedConnectionConfig = additionalConnectionConfig(format, step.options) + + val count = if (flagsConfig.enableCount) { + dfWithoutOmitFields.count().toString + } else if (!HAS_LOGGED_COUNT_DISABLE_WARNING) { + LOGGER.warn("Count is disabled. It will help with performance. Defaulting to -1") + HAS_LOGGED_COUNT_DISABLE_WARNING = true + "-1" + } else "-1" + LOGGER.info(s"Pushing data to sink, data-source-name=$dataSourceName, step-name=${step.name}, save-mode=${saveMode.name()}, num-records=$count, status=$STARTED") + saveData(dfWithoutOmitFields, dataSourceName, step, enrichedConnectionConfig, saveMode, format, count, flagsConfig.enableFailOnError, startTime) + } + + private def saveData(df: DataFrame, dataSourceName: String, step: Step, connectionConfig: Map[String, String], + saveMode: SaveMode, format: String, count: String, enableFailOnError: Boolean, startTime: LocalDateTime): SinkResult = { + val baseSinkResult = SinkResult(dataSourceName, format, saveMode.name()) + //TODO might have use case where empty data can be tested, is it okay just to check for empty schema? + val sinkResult = if (df.schema.isEmpty) { + LOGGER.debug(s"Generated data schema is empty, not saving to data source, data-source-name=$dataSourceName, format=$format") + baseSinkResult + } else { + saveBatchData(dataSourceName, df, saveMode, connectionConfig, count, startTime) + } + + val finalSinkResult = (sinkResult.isSuccess, sinkResult.exception) match { + case (false, Some(exception)) => + LOGGER.error(s"Failed to save data for sink, data-source-name=$dataSourceName, step-name=${step.name}, save-mode=${saveMode.name()}, " + + s"num-records=$count, status=$FAILED, exception=${exception.getMessage.take(500)}") + if (enableFailOnError) throw new RuntimeException(exception) else baseSinkResult + case (true, None) => + LOGGER.info(s"Successfully saved data to sink, data-source-name=$dataSourceName, step-name=${step.name}, save-mode=${saveMode.name()}, " + + s"num-records=$count, status=$FINISHED") + sinkResult + case (isSuccess, optException) => + LOGGER.warn(s"Unexpected sink result scenario, is-success=$isSuccess, exception-exists=${optException.isDefined}") + sinkResult + } + df.unpersist() + finalSinkResult + } + + private def saveBatchData(dataSourceName: String, df: DataFrame, saveMode: SaveMode, connectionConfig: Map[String, String], + count: String, startTime: LocalDateTime): SinkResult = { + val format = connectionConfig(FORMAT) + val partitionedDf = partitionDf(df, connectionConfig) + val trySaveData = Try(partitionedDf + .format(format) + .mode(saveMode) + .options(connectionConfig) + .save()) + val optException = trySaveData match { + case Failure(exception) => Some(exception) + case Success(_) => None + } + mapToSinkResult(dataSourceName, df, saveMode, connectionConfig, count, format, trySaveData.isSuccess, startTime, optException) + } + + private def partitionDf(df: DataFrame, stepOptions: Map[String, String]): DataFrameWriter[Row] = { + val partitionDf = stepOptions.get(PARTITIONS) + .map(partitionNum => df.repartition(partitionNum.toInt)).getOrElse(df) + stepOptions.get(PARTITION_BY) + .map(partitionCols => partitionDf.write.partitionBy(partitionCols.split(",").map(_.trim): _*)) + .getOrElse(partitionDf.write) + } + + private def additionalConnectionConfig(format: String, connectionConfig: Map[String, String]): Map[String, String] = { + format match { + case JDBC => if (connectionConfig(DRIVER).equalsIgnoreCase(POSTGRES_DRIVER) && !connectionConfig.contains("stringtype")) { + connectionConfig ++ Map("stringtype" -> "unspecified") + } else connectionConfig + case _ => connectionConfig + } + } + + private def mapToSinkResult(dataSourceName: String, df: DataFrame, saveMode: SaveMode, connectionConfig: Map[String, String], + count: String, format: String, isSuccess: Boolean, startTime: LocalDateTime, + optException: Option[Throwable]): SinkResult = { + val cleansedOptions = ConfigUtil.cleanseOptions(connectionConfig) + val sinkResult = SinkResult(dataSourceName, format, saveMode.name(), cleansedOptions, count.toLong, isSuccess, Array(), startTime, exception = optException) + + if (flagsConfig.enableSinkMetadata) { + val sample = df.take(metadataConfig.numGeneratedSamples).map(_.json) + val fields = getFieldMetadata(dataSourceName, df, connectionConfig, metadataConfig) + sinkResult.copy(generatedMetadata = fields, sample = sample) + } else { + sinkResult + } + } + + private def removeOmitFields(df: DataFrame) = { + val dfOmitFields = df.schema.fields + .filter(field => field.metadata.contains(OMIT) && field.metadata.getString(OMIT).equalsIgnoreCase("true")) + .map(_.name) + val dfWithoutOmitFields = df.selectExpr(df.columns.filter(c => !dfOmitFields.contains(c)): _*) + if (!dfWithoutOmitFields.storageLevel.useMemory) dfWithoutOmitFields.cache() + dfWithoutOmitFields + } +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/sink/SinkProcessor.scala b/app/src/main/scala/com/github/pflooky/datagen/core/sink/SinkProcessor.scala new file mode 100644 index 00000000..91c70b10 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/sink/SinkProcessor.scala @@ -0,0 +1,16 @@ +package com.github.pflooky.datagen.core.sink + +import com.github.pflooky.datacaterer.api.model.Step +import org.apache.spark.sql.Row + +trait SinkProcessor[T] { + + var connectionConfig: Map[String, String] + var step: Step + + def createConnection(connectionConfig: Map[String, String], step: Step): T + + def pushRowToSink(row: Row): Unit + + def close: Unit +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/util/CombinationCalculator.scala b/app/src/main/scala/com/github/pflooky/datagen/core/util/CombinationCalculator.scala new file mode 100644 index 00000000..44edd6db --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/util/CombinationCalculator.scala @@ -0,0 +1,91 @@ +package com.github.pflooky.datagen.core.util + +import com.github.pflooky.datacaterer.api.model.Constants.{EXPRESSION, ONE_OF_GENERATOR} +import com.github.pflooky.datacaterer.api.model.Schema +import net.datafaker.Faker +import org.apache.log4j.Logger + +import java.util +import scala.collection.JavaConverters.mapAsScalaMapConverter +import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable` + +object CombinationCalculator { + + private val LOGGER = Logger.getLogger(getClass.getName) + private val FAKER_EXPRESSION_REGEX = "#\\{(.+?)}".r + + def totalCombinationsForSchema(schema: Schema, faker: Faker): Option[BigInt] = { + schema.fields.map(fields => + fields.map(field => { + if (field.generator.isDefined) { + val generator = field.generator.get + if (generator.options.contains(EXPRESSION)) { + val expression = field.generator.get.options(EXPRESSION).toString + val totalCombinations = getNumberCombinationsForFakerExpression(expression, faker) + LOGGER.info(s"Total combinations for faker expression, expression=$expression, combinations=$totalCombinations") + totalCombinations + } else if (generator.`type` == ONE_OF_GENERATOR && generator.options.contains(ONE_OF_GENERATOR)) { + BigInt(generator.options(ONE_OF_GENERATOR).asInstanceOf[List[_]].size) + } else { + BigInt(1) + } + } else if (field.schema.isDefined) { + totalCombinationsForSchema(field.schema.get, faker).getOrElse(BigInt(1)) + } else { + BigInt(1) + } + }).product + ) + } + + private def getNumberCombinationsForFakerExpression(expression: String, faker: Faker): BigInt = { + val allMatches = FAKER_EXPRESSION_REGEX.findAllMatchIn(expression).toList + LOGGER.info(s"Found faker expression matches, num-matches=${allMatches.size}, matches=$allMatches") + val totalCombinations: BigInt = allMatches.map(m => fetchNumValues(m.group(1), faker)).product + totalCombinations + } + + /* + different scenarios for faker expressions + 1. #{Name.name} => Map[String, List[String]] + 2. #{first_name} #{last_name} => List[String] that contains #{} pattern + 3. #{male_first_name} => List[String] + */ + private def fetchNumValues(key: String, faker: Faker, baseMap: Map[String, util.List[String]] = Map()): BigInt = { + val spt = key.toLowerCase.split("\\.") + if (baseMap.nonEmpty) { + val expressionValues = baseMap(key).toList + if (containsFakerExpression(expressionValues)) { + expressionValues.map(exp => { + val allMatches = FAKER_EXPRESSION_REGEX.findAllMatchIn(exp).toList + allMatches.map(expMatch => { + val mapMatch = baseMap(expMatch.group(1)).toList + if (containsFakerExpression(mapMatch)) { + mapMatch.map(m => { + val innerMatch = FAKER_EXPRESSION_REGEX.findAllMatchIn(m).toList + innerMatch.map(i => fetchNumValues(i.group(1), faker, baseMap)).product + }).sum + } else { + LOGGER.debug(s"Inner expression match, expression=$exp, inner-expression=${expMatch.group(1)}, size=${mapMatch.size}") + BigInt(mapMatch.size) + } + }).product + }).sum + } else { + LOGGER.debug(s"Simple list match, expression=$key, size=${expressionValues.size}") + BigInt(expressionValues.size) + } + } else { + if (spt.length < 2) throw new RuntimeException("Expressions require '.' in name, check test/resources/datafaker/expressions.txt for reference") + val fileObject = faker.fakeValuesService.fetchObject(spt.head, faker.getContext) + fileObject match { + case stringToStrings: util.Map[String, util.List[String]] => + val mapFakerExpressions = stringToStrings.asScala.toMap + fetchNumValues(spt.last, faker, mapFakerExpressions) + case _ => throw new RuntimeException(s"Unexpected return type from faker object, key=$key") + } + } + } + + private def containsFakerExpression(expressions: List[String]): Boolean = expressions.exists(FAKER_EXPRESSION_REGEX.pattern.matcher(_).matches()) +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/util/ConfigUtil.scala b/app/src/main/scala/com/github/pflooky/datagen/core/util/ConfigUtil.scala new file mode 100644 index 00000000..5946ed56 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/util/ConfigUtil.scala @@ -0,0 +1,16 @@ +package com.github.pflooky.datagen.core.util + +object ConfigUtil { + + def cleanseOptions(config: Map[String, String]): Map[String, String] = { + config.filter(o => + !( + o._1.toLowerCase.contains("password") || o._2.toLowerCase.contains("password") || + o._1.toLowerCase.contains("token") || + o._1.toLowerCase.contains("secret") || + o._1.toLowerCase.contains("private") + ) + ) + } + +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/util/FileUtil.scala b/app/src/main/scala/com/github/pflooky/datagen/core/util/FileUtil.scala new file mode 100644 index 00000000..e6be6fec --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/util/FileUtil.scala @@ -0,0 +1,58 @@ +package com.github.pflooky.datagen.core.util + +import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.spark.sql.SparkSession + +import java.io.File +import java.nio.charset.StandardCharsets +import scala.util.matching.Regex +import scala.util.{Success, Try} + +object FileUtil { + + val CLOUD_STORAGE_REGEX: Regex = "^(s3(a|n?)://|wasb(s?)://|gs://).*".r + + def isCloudStoragePath(path: String): Boolean = { + CLOUD_STORAGE_REGEX.pattern.matcher(path).matches() + } + + def getFile(filePath: String)(implicit sparkSession: SparkSession): File = { + val (directFile, classFile, classLoaderFile) = getDirectAndClassFiles(filePath) + (directFile.exists(), classFile.map(_.exists), classLoaderFile.map(_.exists)) match { + case (true, _, _) => directFile + case (_, Success(true), _) => classFile.get + case (_, _, Success(true)) => classLoaderFile.get + case _ => throw new RuntimeException(s"Failed for find file, path=$filePath") + } + } + + def getDirectory(folderPath: String): File = { + val (directFile, classFile, classLoaderFile) = getDirectAndClassFiles(folderPath) + (directFile.isDirectory, classFile.map(_.isDirectory), classLoaderFile.map(_.isDirectory)) match { + case (true, _, _) => directFile + case (_, Success(true), _) => classFile.get + case (_, _, Success(true)) => classLoaderFile.get + case _ => throw new RuntimeException(s"Failed for find directory, path=$folderPath") + } + } + + def writeStringToFile(fileSystem: FileSystem, filePath: String, fileContent: String): Unit = { + val fsOutput = fileSystem.create(new Path(filePath)) + fsOutput.writeBytes(fileContent) + fsOutput.flush() + fsOutput.close() + } + + def getFileContentFromFileSystem(fileSystem: FileSystem, filePath: String): String = { + val fileContentBytes = fileSystem.open(new Path(filePath)).readAllBytes() + new String(fileContentBytes, StandardCharsets.UTF_8) + } + + private def getDirectAndClassFiles(filePath: String): (File, Try[File], Try[File]) = { + val directFile = new File(filePath) + val classFile = Try(new File(getClass.getResource(filePath).getPath)) + val classLoaderFile = Try(new File(getClass.getClassLoader.getResource(filePath).getPath)) + (directFile, classFile, classLoaderFile) + } + +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/util/ForeignKeyUtil.scala b/app/src/main/scala/com/github/pflooky/datagen/core/util/ForeignKeyUtil.scala new file mode 100644 index 00000000..367080a7 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/util/ForeignKeyUtil.scala @@ -0,0 +1,287 @@ +package com.github.pflooky.datagen.core.util + +import com.github.pflooky.datacaterer.api.PlanRun +import com.github.pflooky.datacaterer.api.model.Constants.OMIT +import com.github.pflooky.datacaterer.api.model.{ForeignKeyRelation, Plan} +import ForeignKeyRelationHelper.updateForeignKeyName +import com.github.pflooky.datagen.core.model.ForeignKeyRelationship +import PlanImplicits.{ForeignKeyRelationOps, SinkOptionsOps} +import com.github.pflooky.datagen.core.util.GeneratorUtil.applySqlExpressions +import org.apache.log4j.Logger +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.types.{ArrayType, DataType, LongType, Metadata, MetadataBuilder, StructField, StructType} +import org.apache.spark.sql.{DataFrame, Dataset, Row} + +import scala.annotation.tailrec +import scala.collection.mutable + +object ForeignKeyUtil { + + private val LOGGER = Logger.getLogger(getClass.getName) + + /** + * Apply same values from source data frame columns to target foreign key columns + * + * @param plan where foreign key definitions are defined + * @param generatedDataForeachTask map of . => generated data as dataframe + * @return map of . => dataframe + */ + def getDataFramesWithForeignKeys(plan: Plan, generatedDataForeachTask: Map[String, DataFrame]): List[(String, DataFrame)] = { + val enabledSources = plan.tasks.filter(_.enabled).map(_.dataSourceName) + val sinkOptions = plan.sinkOptions.get + val foreignKeyRelations = sinkOptions.foreignKeys + .map(fk => sinkOptions.gatherForeignKeyRelations(fk._1)) + val enabledForeignKeys = foreignKeyRelations + .filter(fkr => isValidForeignKeyRelation(generatedDataForeachTask, enabledSources, fkr)) + var taskDfs = generatedDataForeachTask + + val foreignKeyAppliedDfs = enabledForeignKeys.flatMap(foreignKeyDetails => { + val sourceDfName = foreignKeyDetails._1.dataFrameName + LOGGER.debug(s"Getting source dataframe, source=$sourceDfName") + if (!taskDfs.contains(sourceDfName)) { + throw new RuntimeException(s"Cannot create target foreign key as one of the data sources not created. " + + s"Please ensure there exists a data source with name (.): $sourceDfName") + } + val sourceDf = taskDfs(sourceDfName) + + val sourceDfsWithForeignKey = foreignKeyDetails._2.map(target => { + val targetDfName = target.dataFrameName + LOGGER.debug(s"Getting target dataframe, source=$targetDfName") + val targetDf = taskDfs(targetDfName) + if (target.columns.forall(targetDf.columns.contains)) { + val dfWithForeignKeys = applyForeignKeysToTargetDf(sourceDf, targetDf, foreignKeyDetails._1.columns, target.columns) + if (!dfWithForeignKeys.storageLevel.useMemory) dfWithForeignKeys.cache() + (targetDfName, dfWithForeignKeys) + } else { + LOGGER.warn("Foreign key data source does not contain foreign key defined in plan, defaulting to base generated data") + (targetDfName, targetDf) + } + }) + taskDfs ++= sourceDfsWithForeignKey.toMap + sourceDfsWithForeignKey + }) + + val insertOrder = getInsertOrder(foreignKeyRelations.map(f => (f._1.dataFrameName, f._2.map(_.dataFrameName)))) + val insertOrderDfs = insertOrder + .filter(i => foreignKeyAppliedDfs.exists(f => f._1.equalsIgnoreCase(i))) + .map(s => (s, foreignKeyAppliedDfs.filter(f => f._1.equalsIgnoreCase(s)).head._2)) + taskDfs.toList.filter(t => !insertOrderDfs.exists(_._1.equalsIgnoreCase(t._1))) ++ insertOrderDfs + } + + private def isValidForeignKeyRelation(generatedDataForeachTask: Map[String, DataFrame], enabledSources: List[String], fkr: (ForeignKeyRelation, List[ForeignKeyRelation])) = { + val isMainForeignKeySourceEnabled = enabledSources.contains(fkr._1.dataSource) + val subForeignKeySources = fkr._2.map(_.dataSource) + val isSubForeignKeySourceEnabled = subForeignKeySources.forall(enabledSources.contains) + val disabledSubSources = subForeignKeySources.filter(s => !enabledSources.contains(s)) + val mainDfFields = generatedDataForeachTask(fkr._1.dataFrameName).schema.fields + val columnExistsMain = fkr._1.columns.forall(c => hasDfContainColumn(c, mainDfFields)) + + if (!isMainForeignKeySourceEnabled) { + LOGGER.warn(s"Foreign key data source is not enabled. Data source needs to be enabled for foreign key relationship " + + s"to exist from generated data, data-source-name=${fkr._1.dataSource}") + } + if (!isSubForeignKeySourceEnabled) { + LOGGER.warn(s"Sub data sources within foreign key relationship are not enabled, disabled-task=${disabledSubSources.mkString(",")}") + } + if (!columnExistsMain) { + LOGGER.warn(s"Main column for foreign key references is not created, data-source-name=${fkr._1.dataSource}, column=${fkr._1.columns}") + } + isMainForeignKeySourceEnabled && isSubForeignKeySourceEnabled && columnExistsMain + } + + def hasDfContainColumn(column: String, fields: Array[StructField]): Boolean = { + if (column.contains(".")) { + val spt = column.split("\\.") + fields.find(_.name == spt.head) + .exists(field => checkNestedFields(spt, field.dataType)) + } else { + fields.exists(_.name == column) + } + } + + @tailrec + private def checkNestedFields(spt: Array[String], dataType: DataType): Boolean = { + val tailColName = spt.tail + dataType match { + case StructType(nestedFields) => + hasDfContainColumn(tailColName.mkString("."), nestedFields) + case ArrayType(elementType, _) => + checkNestedFields(spt, elementType) + case _ => false + } + } + + private def applyForeignKeysToTargetDf(sourceDf: DataFrame, targetDf: DataFrame, sourceColumns: List[String], targetColumns: List[String]): DataFrame = { + if (!sourceDf.storageLevel.useMemory) sourceDf.cache() //TODO do we checkpoint instead of cache? checkpoint based on total number of records? + if (!targetDf.storageLevel.useMemory) targetDf.cache() + val sourceColRename = sourceColumns.map(c => { + if (c.contains(".")) { + val lastCol = c.split("\\.").last + (lastCol, s"_src_$lastCol") + } else { + (c, s"_src_$c") + } + }).toMap + val distinctSourceKeys = zipWithIndex( + sourceDf.selectExpr(sourceColumns: _*).distinct() + .withColumnsRenamed(sourceColRename) + ) + val distinctTargetKeys = zipWithIndex(targetDf.selectExpr(targetColumns: _*).distinct()) + + LOGGER.debug(s"Attempting to join source DF keys with target DF, source=${sourceColumns.mkString(",")}, target=${targetColumns.mkString(",")}") + val joinDf = distinctSourceKeys.join(distinctTargetKeys, Seq("_join_foreign_key")) + .drop("_join_foreign_key") + val targetColRename = targetColumns.zip(sourceColumns).map(c => { + if (c._2.contains(".")) { + val lastCol = c._2.split("\\.").last + (c._1, col(s"_src_$lastCol")) + } else { + (c._1, col(s"_src_${c._2}")) + } + }).toMap + val res = targetDf.join(joinDf, targetColumns) + .withColumns(targetColRename) + .drop(sourceColRename.values.toList: _*) + + LOGGER.debug(s"Applied source DF keys with target DF, source=${sourceColumns.mkString(",")}, target=${targetColumns.mkString(",")}") + if (!res.storageLevel.useMemory) res.cache() + //need to add back original metadata as it will use the metadata from the sourceDf and override the targetDf metadata + val dfMetadata = combineMetadata(sourceDf, sourceColumns, targetDf, targetColumns, res) + applySqlExpressions(dfMetadata, targetColumns, false) + } + + /** + * Consolidate all the foreign key relationships into a list of foreign keys to a list of their relationships. + * Foreign key relationships string follows the pattern of .. + * + * @param dataSourceForeignKeys Foreign key relationships for each data source + * @return Map of data source columns to respective foreign key columns (which may be in other data sources) + */ + def getAllForeignKeyRelationships( + dataSourceForeignKeys: List[Dataset[ForeignKeyRelationship]], + optPlanRun: Option[PlanRun], + stepNameMapping: Map[String, String] + ): List[(String, List[String])] = { + val generatedForeignKeys = dataSourceForeignKeys.flatMap(_.collect()) + .groupBy(_.key) + .map(x => (x._1.toString, x._2.map(_.foreignKey.toString))) + .toList + val userForeignKeys = optPlanRun.flatMap(planRun => planRun._plan.sinkOptions.map(_.foreignKeys)) + .getOrElse(List()) + .map(userFk => { + val fkMapped = updateForeignKeyName(stepNameMapping, userFk._1) + val subFkNamesMapped = userFk._2.map(subFk => updateForeignKeyName(stepNameMapping, subFk)) + (fkMapped, subFkNamesMapped) + }) + + val mergedForeignKeys = generatedForeignKeys.map(genFk => { + userForeignKeys.find(userFk => userFk._1 == genFk._1) + .map(matchUserFk => { + //generated foreign key takes precedence due to constraints from underlying data source need to be adhered + (matchUserFk._1, matchUserFk._2 ++ genFk._2) + }) + .getOrElse(genFk) + }) + val allForeignKeys = mergedForeignKeys ++ userForeignKeys.filter(userFk => !generatedForeignKeys.exists(_._1 == userFk._1)) + allForeignKeys + } + + //get delete order + def getInsertOrder(foreignKeys: List[(String, List[String])]): List[String] = { + val result = mutable.ListBuffer.empty[String] + val visited = mutable.Set.empty[String] + + def visit(table: String): Unit = { + if (!visited.contains(table)) { + visited.add(table) + foreignKeys.find(f => f._1 == table).map(_._2).getOrElse(List.empty).foreach(visit) + result.prepend(table) + } + } + + foreignKeys.map(_._1).foreach(visit) + result.toList + } + + def getDeleteOrder(foreignKeys: List[(String, List[String])]): List[String] = { + //given map of foreign key relationships, need to order the foreign keys by leaf nodes first, parents after + //could be nested foreign keys + //e.g. key1 -> key2 + //key2 -> key3 + //resulting order of deleting should be key3, key2, key1 + val fkMap = foreignKeys.toMap + var visited = Set[String]() + + def getForeignKeyOrder(currKey: String): List[String] = { + if (!visited.contains(currKey)) { + visited = visited ++ Set(currKey) + + if (fkMap.contains(currKey)) { + val children = foreignKeys.find(f => f._1 == currKey).map(_._2).getOrElse(List()) + val nested = children.flatMap(c => { + if (!visited.contains(c)) { + val nestedChildren = getForeignKeyOrder(c) + visited = visited ++ Set(c) + nestedChildren + } else { + List() + } + }) + nested ++ List(currKey) + } else { + List(currKey) + } + } else { + List() + } + } + + foreignKeys.flatMap(x => getForeignKeyOrder(x._1)) + } + + private def zipWithIndex(df: DataFrame): DataFrame = { + if (!df.storageLevel.useMemory) df.cache() + df.sqlContext.createDataFrame( + df.rdd.zipWithIndex.map(ln => + Row.fromSeq(ln._1.toSeq ++ Seq(ln._2)) + ), + StructType( + df.schema.fields ++ Array(StructField("_join_foreign_key", LongType, false)) + ) + ) + } + + private def combineMetadata(sourceDf: DataFrame, sourceCols: List[String], targetDf: DataFrame, targetCols: List[String], df: DataFrame): DataFrame = { + val sourceColsMetadata = sourceCols.map(c => { + val baseMetadata = getMetadata(c, sourceDf.schema.fields) + new MetadataBuilder().withMetadata(baseMetadata).remove(OMIT).build() + }) + val targetColsMetadata = targetCols.map(c => (c, getMetadata(c, targetDf.schema.fields))) + val newMetadata = sourceColsMetadata.zip(targetColsMetadata).map(meta => (meta._2._1, new MetadataBuilder().withMetadata(meta._2._2).withMetadata(meta._1).build())) + //also should apply any further sql statements + newMetadata.foldLeft(df)((metaDf, meta) => metaDf.withMetadata(meta._1, meta._2)) + } + + private def getMetadata(column: String, fields: Array[StructField]): Metadata = { + val optMetadata = if (column.contains(".")) { + val spt = column.split("\\.") + val optField = fields.find(_.name == spt.head) + optField.map(field => checkNestedForMetadata(spt, field.dataType)) + } else { + fields.find(_.name == column).map(_.metadata) + } + if (optMetadata.isEmpty) { + LOGGER.warn(s"Unable to find metadata for column, defaulting to empty metadata, column-name=$column") + Metadata.empty + } else optMetadata.get + } + + @tailrec + private def checkNestedForMetadata(spt: Array[String], dataType: DataType): Metadata = { + dataType match { + case StructType(nestedFields) => getMetadata(spt.tail.mkString("."), nestedFields) + case ArrayType(elementType, _) => checkNestedForMetadata(spt, elementType) + case _ => Metadata.empty + } + } +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/util/GeneratorUtil.scala b/app/src/main/scala/com/github/pflooky/datagen/core/util/GeneratorUtil.scala new file mode 100644 index 00000000..5b3f248c --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/util/GeneratorUtil.scala @@ -0,0 +1,79 @@ +package com.github.pflooky.datagen.core.util + +import com.github.pflooky.datacaterer.api.model.Constants.{ONE_OF_GENERATOR, RANDOM_GENERATOR, REGEX_GENERATOR, SQL_GENERATOR} +import com.github.pflooky.datacaterer.api.model.{Generator, Step, TaskSummary} +import com.github.pflooky.datagen.core.exception.UnsupportedDataGeneratorType +import com.github.pflooky.datagen.core.generator.provider.{DataGenerator, OneOfDataGenerator, RandomDataGenerator, RegexDataGenerator} +import net.datafaker.Faker +import org.apache.log4j.Logger +import org.apache.spark.sql.types.{LongType, StructField, StructType} +import org.apache.spark.sql.{DataFrame, Row} + +object GeneratorUtil { + + private val LOGGER = Logger.getLogger(getClass.getName) + + def getDataGenerator(structField: StructField, faker: Faker): DataGenerator[_] = { + val hasRegex = structField.metadata.contains(REGEX_GENERATOR) + val hasOneOf = structField.metadata.contains(ONE_OF_GENERATOR) + (hasRegex, hasOneOf) match { + case (true, _) => RegexDataGenerator.getGenerator(structField, faker) + case (_, true) => OneOfDataGenerator.getGenerator(structField, faker) + case _ => RandomDataGenerator.getGeneratorForStructField(structField, faker) + } + } + + def getDataGenerator(optGenerator: Option[Generator], structField: StructField, faker: Faker): DataGenerator[_] = { + if (optGenerator.isDefined) { + optGenerator.get.`type` match { + //TODO: Slightly abusing random data generator giving back correct data type for sql type generated data + case RANDOM_GENERATOR | SQL_GENERATOR => RandomDataGenerator.getGeneratorForStructField(structField, faker) + case ONE_OF_GENERATOR => OneOfDataGenerator.getGenerator(structField, faker) + case REGEX_GENERATOR => RegexDataGenerator.getGenerator(structField, faker) + case x => throw new UnsupportedDataGeneratorType(x) + } + } else { + LOGGER.debug(s"No generator defined, will default to random generator, field-name=${structField.name}") + RandomDataGenerator.getGeneratorForStructField(structField, faker) + } + } + + def zipWithIndex(df: DataFrame, colName: String): DataFrame = { + df.sqlContext.createDataFrame( + df.rdd.zipWithIndex.map(ln => + Row.fromSeq(ln._1.toSeq ++ Seq(ln._2)) + ), + StructType( + df.schema.fields ++ Array(StructField(colName, LongType, false)) + ) + ) + } + + def getDataSourceName(taskSummary: TaskSummary, step: Step): String = { + s"${taskSummary.dataSourceName}.${step.name}" + } + + def applySqlExpressions(df: DataFrame, foreignKeyCols: List[String] = List(), isIgnoreForeignColExists: Boolean = true): DataFrame = { + def getSqlExpr(field: StructField): String = { + field.dataType match { + case StructType(fields) => + val nestedSqlExpr = fields.map(f => s"'${f.name}', ${getSqlExpr(f.copy(name = s"${field.name}.${f.name}"))}").mkString(",") + s"NAMED_STRUCT($nestedSqlExpr)" + case _ => + if (field.metadata.contains(SQL_GENERATOR) && + (isIgnoreForeignColExists || foreignKeyCols.exists(col => field.metadata.getString(SQL_GENERATOR).contains(col)))) { + field.metadata.getString(SQL_GENERATOR) + } else { + field.name + } + } + } + + val sqlExpressions = df.schema.fields.map(f => s"${getSqlExpr(f)} as ${f.name}") + val res = df.selectExpr(sqlExpressions: _*) + .selectExpr(sqlExpressions: _*) //fix for nested SQL references but I don't think it would work longer term + //TODO have to figure out the order of the SQL expressions and execute accordingly + res + } + +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/util/HttpUtil.scala b/app/src/main/scala/com/github/pflooky/datagen/core/util/HttpUtil.scala new file mode 100644 index 00000000..c7b77136 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/util/HttpUtil.scala @@ -0,0 +1,19 @@ +package com.github.pflooky.datagen.core.util + +import com.github.pflooky.datacaterer.api.model.Constants.{PASSWORD, USERNAME} + +import java.util.Base64 + +object HttpUtil { + + def getAuthHeader(connectionConfig: Map[String, String]): Map[String, String] = { + if (connectionConfig.contains(USERNAME) && connectionConfig.contains(PASSWORD)) { + val user = connectionConfig(USERNAME) + val password = connectionConfig(PASSWORD) + val encodedUserPassword = Base64.getEncoder.encodeToString(s"$user:$password".getBytes) + Map("Authorization" -> s"Basic $encodedUserPassword") + } else { + Map() + } + } +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/util/MetadataUtil.scala b/app/src/main/scala/com/github/pflooky/datagen/core/util/MetadataUtil.scala new file mode 100644 index 00000000..1c57b6eb --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/util/MetadataUtil.scala @@ -0,0 +1,154 @@ +package com.github.pflooky.datagen.core.util + +import com.github.pflooky.datacaterer.api.model.Constants._ +import com.github.pflooky.datacaterer.api.model.{Field, MetadataConfig} +import org.apache.log4j.Logger +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.CatalogColumnStat +import org.apache.spark.sql.execution.command.AnalyzeColumnCommand +import org.apache.spark.sql.types.{BinaryType, BooleanType, DataType, DateType, DecimalType, DoubleType, FloatType, IntegerType, LongType, Metadata, MetadataBuilder, ShortType, StringType, StructField, TimestampType} +import org.apache.spark.sql.{DataFrame, SparkSession} + +import scala.util.{Failure, Success, Try} + +object MetadataUtil { + + private val LOGGER = Logger.getLogger(getClass.getName) + private val OBJECT_MAPPER = ObjectMapperUtil.jsonObjectMapper + private val mapStringToAnyClass = Map[String, Any]() + private val TEMP_CACHED_TABLE_NAME = "__temp_table" + + def metadataToMap(metadata: Metadata): Map[String, Any] = { + OBJECT_MAPPER.readValue(metadata.json, mapStringToAnyClass.getClass) + } + + def mapToMetadata(mapMetadata: Map[String, Any]): Metadata = { + Metadata.fromJson(OBJECT_MAPPER.writeValueAsString(mapMetadata)) + } + + def mapToStructFields(sourceData: DataFrame, columnDataProfilingMetadata: List[DataProfilingMetadata]) + (implicit sparkSession: SparkSession): Array[StructField] = { + val fieldsWithMetadata = sourceData.schema.fields.map(field => { + val baseMetadata = new MetadataBuilder().withMetadata(field.metadata) + columnDataProfilingMetadata.find(_.columnName == field.name).foreach(c => baseMetadata.withMetadata(mapToMetadata(c.metadata))) + field.copy(metadata = baseMetadata.build()) + }) + + if (sparkSession.catalog.tableExists(TEMP_CACHED_TABLE_NAME)) { + sparkSession.catalog.uncacheTable(TEMP_CACHED_TABLE_NAME) + } + fieldsWithMetadata + } + + def getFieldDataProfilingMetadata( + sourceData: DataFrame, + dataSourceReadOptions: Map[String, String], + dataSourceName: String, + metadataConfig: MetadataConfig + )(implicit sparkSession: SparkSession): List[DataProfilingMetadata] = { + val dataSourceFormat = dataSourceReadOptions(FORMAT) + computeColumnStatistics(sourceData, dataSourceReadOptions, dataSourceName, dataSourceFormat) + val columnLevelStatistics = sparkSession.sharedState.cacheManager.lookupCachedData(sourceData).get.cachedRepresentation.stats + val rowCount = columnLevelStatistics.rowCount.getOrElse(BigInt(0)) + LOGGER.info(s"Computed metadata statistics for data source, name=$dataSourceName, format=$dataSourceFormat, " + + s"details=$dataSourceReadOptions, rows-analysed=$rowCount, size-in-bytes=${columnLevelStatistics.sizeInBytes}, " + + s"num-columns-analysed=${columnLevelStatistics.attributeStats.size}") + + columnLevelStatistics.attributeStats.map(x => { + val columnName = x._1.name + val statisticsMap = columnStatToMap(x._2.toCatalogColumnStat(columnName, x._1.dataType)) ++ Map(ROW_COUNT -> rowCount.toString) + val optOneOfColumn = determineIfOneOfColumn(sourceData, columnName, statisticsMap, metadataConfig) + val optionalMetadataMap = optOneOfColumn.map(oneOf => Map(ONE_OF_GENERATOR -> oneOf)).getOrElse(Map()) + val statWithOptionalMetadata = statisticsMap ++ optionalMetadataMap + + LOGGER.debug(s"Column summary statistics, name=$dataSourceName, format=$dataSourceFormat, column-name=$columnName, " + + s"statistics=${statWithOptionalMetadata - s"$columnName.$HISTOGRAM"}") + DataProfilingMetadata(columnName, statWithOptionalMetadata) + }).toList + } + + private def computeColumnStatistics( + sourceData: DataFrame, + dataSourceReadOptions: Map[String, String], + dataSourceName: String, + dataSourceFormat: String + )(implicit sparkSession: SparkSession): Unit = { + //have to create temp view then analyze the column stats which can be found in the cached data + sourceData.createOrReplaceTempView(TEMP_CACHED_TABLE_NAME) + if (!sparkSession.catalog.isCached(TEMP_CACHED_TABLE_NAME)) sparkSession.catalog.cacheTable(TEMP_CACHED_TABLE_NAME) + val optColumnsToAnalyze = Some(sourceData.schema.fields.filter(f => analyzeSupportsType(f.dataType)).map(_.name).toSeq) + val tryAnalyzeData = Try(AnalyzeColumnCommand(TableIdentifier(TEMP_CACHED_TABLE_NAME), optColumnsToAnalyze, false).run(sparkSession)) + tryAnalyzeData match { + case Failure(exception) => + LOGGER.error(s"Failed to analyze all columns in data source, name=$dataSourceName, format=$dataSourceFormat, " + + s"options=$dataSourceReadOptions, error-message=${exception.getMessage}") + case Success(_) => + LOGGER.debug(s"Successfully analyzed all columns in data source, name=$dataSourceName, " + + s"format=$dataSourceFormat, options=$dataSourceReadOptions") + } + } + + def determineIfOneOfColumn( + sourceData: DataFrame, + columnName: String, + statisticsMap: Map[String, String], + metadataConfig: MetadataConfig + ): Option[Array[String]] = { + val columnDataType = sourceData.schema.fields.find(_.name == columnName).map(_.dataType) + val count = statisticsMap(ROW_COUNT).toLong + (columnDataType, count) match { + case (Some(DateType), _) => None + case (_, 0) => None + case (Some(_), c) if c >= metadataConfig.oneOfMinCount => + val distinctCount = statisticsMap(DISTINCT_COUNT).toDouble + if (distinctCount / count <= metadataConfig.oneOfDistinctCountVsCountThreshold) { + LOGGER.debug(s"Identified column as a 'oneOf' column as distinct count / total count is below threshold, threshold=${metadataConfig.oneOfDistinctCountVsCountThreshold}") + Some(sourceData.select(columnName).distinct().collect().map(_.mkString)) + } else { + None + } + case _ => None + } + } + + def getFieldMetadata( + dataSourceName: String, + df: DataFrame, + connectionConfig: Map[String, String], + metadataConfig: MetadataConfig + )(implicit sparkSession: SparkSession): Array[Field] = { + val fieldMetadata = getFieldDataProfilingMetadata(df, connectionConfig, dataSourceName, metadataConfig) + val structFields = mapToStructFields(df, fieldMetadata) + structFields.map(FieldHelper.fromStructField) + } + + private def analyzeSupportsType(dataType: DataType): Boolean = dataType match { + case IntegerType | ShortType | LongType | DecimalType() | DoubleType | FloatType => true + case BooleanType => true + case BinaryType | StringType => true + case TimestampType | DateType => true + case _ => false + } + + /** + * Rename Spark column statistics to be aligned with Data Caterer statistic names. Remove 'version' + * + * @param catalogColumnStat Spark column statistics + * @return Map of statistics for column + */ + private def columnStatToMap(catalogColumnStat: CatalogColumnStat): Map[String, String] = { + catalogColumnStat.toMap("col") + .map(kv => { + val baseStatName = kv._1.replaceFirst("col\\.", "") + if (baseStatName.equalsIgnoreCase("minvalue")) { + ("min", kv._2) + } else if (baseStatName.equalsIgnoreCase("maxvalue")) { + ("max", kv._2) + } else (baseStatName, kv._2) + }) + .filter(_._1 != "version") + } +} + + +case class DataProfilingMetadata(columnName: String, metadata: Map[String, Any], nestedProfiling: List[DataProfilingMetadata] = List()) diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/util/ObjectMapperUtil.scala b/app/src/main/scala/com/github/pflooky/datagen/core/util/ObjectMapperUtil.scala new file mode 100644 index 00000000..26100ef5 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/util/ObjectMapperUtil.scala @@ -0,0 +1,17 @@ +package com.github.pflooky.datagen.core.util + +import com.fasterxml.jackson.annotation.JsonInclude.Include +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory +import com.fasterxml.jackson.module.scala.DefaultScalaModule + +object ObjectMapperUtil { + + val yamlObjectMapper = new ObjectMapper(new YAMLFactory()) + yamlObjectMapper.registerModule(DefaultScalaModule) + yamlObjectMapper.setSerializationInclusion(Include.NON_ABSENT) + + val jsonObjectMapper = new ObjectMapper() + jsonObjectMapper.registerModule(DefaultScalaModule) + +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/util/ProtobufUtil.scala b/app/src/main/scala/com/github/pflooky/datagen/core/util/ProtobufUtil.scala new file mode 100644 index 00000000..9c151c06 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/util/ProtobufUtil.scala @@ -0,0 +1,75 @@ +package com.github.pflooky.datagen.core.util + +import com.google.protobuf.DescriptorProtos +import com.google.protobuf.DescriptorProtos.FieldDescriptorProto +import com.google.protobuf.Descriptors.FieldDescriptor +import com.google.protobuf.Descriptors.FieldDescriptor.JavaType +import org.apache.spark.sql.types.{DataType, DataTypes, StructField, StructType} + +import java.io.{BufferedInputStream, FileInputStream} +import scala.collection.JavaConverters.asScalaBufferConverter + +object ProtobufUtil { + + def toStructType(descriptorFile: String): Map[String, StructType] = { + val file = new BufferedInputStream(new FileInputStream(descriptorFile)) + val fileDescriptorSet = DescriptorProtos.FileDescriptorSet.parseFrom(file) + fileDescriptorSet.getFileList.asScala + .flatMap(fd => { + fd.getMessageTypeList.asScala.toList.map(message => { + (message.getName, StructType(getSchemaFromFieldsProto(message.getFieldList.asScala.toList))) + }) + // (fd.getName, StructType(getSchemaFromFields(fd.getMessageTypeList.asScala.toList))) + }).toMap + } + + private def getSchemaFromFields(fields: List[FieldDescriptor]): Array[StructField] = { + fields.map(field => { + val dataType = getDataTypeForField(field) + StructField(field.getName, dataType, !field.isRequired) + }).toArray + } + + private def getSchemaFromFieldsProto(fields: List[FieldDescriptorProto]): Array[StructField] = { + fields.map(field => { + val dataType = getDataTypeForField(field) + StructField(field.getName, dataType) + }).toArray + } + + private def getDataTypeForField(fieldDescriptor: FieldDescriptor): DataType = { + fieldDescriptor.getJavaType match { + case JavaType.BOOLEAN => DataTypes.BooleanType + case JavaType.INT => DataTypes.IntegerType + case JavaType.LONG => DataTypes.LongType + case JavaType.DOUBLE => DataTypes.DoubleType + case JavaType.FLOAT => DataTypes.FloatType + case JavaType.STRING => DataTypes.StringType + case JavaType.ENUM => DataTypes.StringType + case JavaType.BYTE_STRING => DataTypes.BinaryType + case JavaType.MESSAGE => { + new StructType(getSchemaFromFields(fieldDescriptor.getMessageType.getFields.asScala.toList)) + } + case _ => throw new RuntimeException(s"Unable to parse proto type, type=${fieldDescriptor.getType}") + } + } + + private def getDataTypeForField(fieldDescriptor: FieldDescriptorProto): DataType = { + // val nonProtoField = FieldDescriptor.Type.valueOf(fieldDescriptor.getType) + FieldDescriptor.Type.valueOf(fieldDescriptor.getType).getJavaType match { + case JavaType.BOOLEAN => DataTypes.BooleanType + case JavaType.INT => DataTypes.IntegerType + case JavaType.LONG => DataTypes.LongType + case JavaType.DOUBLE => DataTypes.DoubleType + case JavaType.FLOAT => DataTypes.FloatType + case JavaType.STRING => DataTypes.StringType + case JavaType.ENUM => DataTypes.StringType + case JavaType.BYTE_STRING => DataTypes.BinaryType + case JavaType.MESSAGE => { + new StructType(getSchemaFromFields(fieldDescriptor.getDescriptorForType.getFields.asScala.toList)) + } + case _ => throw new RuntimeException(s"Unable to parse proto type, type=${fieldDescriptor}") + } + } + +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/util/RecordCountUtil.scala b/app/src/main/scala/com/github/pflooky/datagen/core/util/RecordCountUtil.scala new file mode 100644 index 00000000..0f02ee1a --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/util/RecordCountUtil.scala @@ -0,0 +1,58 @@ +package com.github.pflooky.datagen.core.util + +import com.github.pflooky.datacaterer.api.model.{GenerationConfig, Task} +import PlanImplicits.{CountOps, PerColumnCountOps} +import org.apache.log4j.Logger + +object RecordCountUtil { + + private val LOGGER = Logger.getLogger(getClass.getName) + + def calculateNumBatches(tasks: List[Task], generationConfig: GenerationConfig): (Int, Map[String, StepRecordCount]) = { + if (tasks.isEmpty) return (0, Map()) + val countPerStep = getCountPerStep(tasks, generationConfig).toMap + val totalRecordsToGenerate = countPerStep.values.sum + if (totalRecordsToGenerate <= generationConfig.numRecordsPerBatch) { + LOGGER.debug(s"Generating all records for all steps in single batch, total-records=$totalRecordsToGenerate, configured-records-per-batch=${generationConfig.numRecordsPerBatch}") + } + + val numBatches = Math.max(Math.ceil(totalRecordsToGenerate / generationConfig.numRecordsPerBatch.toDouble).toInt, 1) + LOGGER.info(s"Number of batches for data generation, num-batches=$numBatches, num-records-per-batch=${generationConfig.numRecordsPerBatch}, total-records=$totalRecordsToGenerate") + val trackRecordsPerStep = stepToRecordCountMap(tasks, generationConfig, numBatches) + (numBatches, trackRecordsPerStep) + } + + private def stepToRecordCountMap(tasks: List[Task], generationConfig: GenerationConfig, numBatches: Long): Map[String, StepRecordCount] = { + tasks.flatMap(task => + task.steps + .map(step => { + val stepRecords = generationConfig.numRecordsPerStep.map(r => step.count.copy(records = Some(r)).numRecords).getOrElse(step.count.numRecords) + val averagePerCol = step.count.perColumn.map(_.averageCountPerColumn).getOrElse(1L) + ( + s"${task.name}_${step.name}", + StepRecordCount(0L, (stepRecords / averagePerCol) / numBatches, stepRecords) + ) + })).toMap + } + + private def getCountPerStep(tasks: List[Task], generationConfig: GenerationConfig): List[(String, Long)] = { + //TODO need to take into account the foreign keys defined + //the main foreign key controls the number of records produced by the children data sources + val baseStepCounts = tasks.flatMap(task => { + task.steps.map(step => { + val stepName = s"${task.name}_${step.name}" + val stepCount = generationConfig.numRecordsPerStep + .map(c => { + LOGGER.debug(s"Step count total is defined in generation config, overriding count total defined in step, " + + s"task-name=${task.name}, step-name=${step.name}, records-per-step=$c") + step.count.copy(records = Some(c)) + }) + .getOrElse(step.count) + (stepName, stepCount.numRecords) + }) + }) + baseStepCounts + } +} + +case class StepRecordCount(currentNumRecords: Long, numRecordsPerBatch: Long, numTotalRecords: Long) diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/util/RowUtil.scala b/app/src/main/scala/com/github/pflooky/datagen/core/util/RowUtil.scala new file mode 100644 index 00000000..e8ea5d70 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/util/RowUtil.scala @@ -0,0 +1,31 @@ +package com.github.pflooky.datagen.core.util + +import org.apache.log4j.Logger +import org.apache.spark.sql.Row + +import scala.util.{Failure, Success, Try} + +object RowUtil { + + private val LOGGER = Logger.getLogger(getClass.getName) + + def getRowValue[T](row: Row, colName: String, default: T = null): T = { + val hasColumn = row.schema.fields.exists(_.name.equalsIgnoreCase(colName)) + if (hasColumn) { + val tryGetAsType = Try(row.getAs[T](colName)) + tryGetAsType match { + case Failure(exception) => + val message = s"Failed to get column as data type, column-name=$colName, exception=$exception" + LOGGER.error(message) + throw new RuntimeException(message, exception) + case Success(value) => value + } + } else if (default == null) { + throw new RuntimeException(s"Invalid schema definition due to missing column, column-name=$colName") + } else { + LOGGER.debug(s"Column missing from schema definition, will revert to default value, column-name=$colName, default=$default") + default + } + } + +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/util/SchemaUtil.scala b/app/src/main/scala/com/github/pflooky/datagen/core/util/SchemaUtil.scala new file mode 100644 index 00000000..82b5d79c --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/util/SchemaUtil.scala @@ -0,0 +1,340 @@ +package com.github.pflooky.datagen.core.util + +import com.github.pflooky.datacaterer.api.model.Constants.{DEFAULT_FIELD_NULLABLE, FOREIGN_KEY_DELIMITER, FOREIGN_KEY_DELIMITER_REGEX, IS_PRIMARY_KEY, IS_UNIQUE, MAXIMUM, MINIMUM, ONE_OF_GENERATOR, PRIMARY_KEY_POSITION, RANDOM_GENERATOR, REGEX_GENERATOR, STATIC} +import com.github.pflooky.datacaterer.api.model.{Count, Field, ForeignKeyRelation, Generator, PerColumnCount, Schema, SinkOptions, Step, Task} +import com.github.pflooky.datagen.core.exception.InvalidFieldConfigurationException +import com.github.pflooky.datagen.core.model.Constants.{COUNT_BASIC, COUNT_COLUMNS, COUNT_GENERATED, COUNT_GENERATED_PER_COLUMN, COUNT_NUM_RECORDS, COUNT_PER_COLUMN, COUNT_TYPE} +import org.apache.log4j.Logger +import org.apache.spark.sql.types.{ArrayType, DataType, Metadata, MetadataBuilder, StructField, StructType} + +import scala.language.implicitConversions + + +object ForeignKeyRelationHelper { + def fromString(str: String): ForeignKeyRelation = { + val strSpt = str.split(FOREIGN_KEY_DELIMITER_REGEX, 3) + if (strSpt.length == 2) { + ForeignKeyRelation(strSpt.head, strSpt.last, List()) + } else { + ForeignKeyRelation(strSpt.head, strSpt(1), strSpt.last.split(",").toList) + } + } + + def updateForeignKeyName(stepNameMapping: Map[String, String], foreignKey: String): String = { + val fkDataSourceStep = foreignKey.split(FOREIGN_KEY_DELIMITER_REGEX).take(2).mkString(FOREIGN_KEY_DELIMITER) + stepNameMapping.get(fkDataSourceStep) + .map(newName => foreignKey.replace(fkDataSourceStep, newName)) + .getOrElse(foreignKey) + } +} + +object SchemaHelper { + private val LOGGER = Logger.getLogger(getClass.getName) + + def fromStructType(structType: StructType): Schema = { + val fields = structType.fields.map(FieldHelper.fromStructField).toList + Schema(Some(fields)) + } + + /** + * Merge the field definitions together, taking schema2 field definition as preference + * + * @param schema1 First schema all fields defined + * @param schema2 Second schema which may have all or subset of fields defined where it will override if same + * options defined in schema1 + * @return Merged schema + */ + def mergeSchemaInfo(schema1: Schema, schema2: Schema): Schema = { + (schema1.fields, schema2.fields) match { + case (Some(fields1), Some(fields2)) => + val mergedFields = fields1.map(field => { + val filterInSchema2 = fields2.filter(f2 => f2.name == field.name) + val optFieldToMerge = if (filterInSchema2.nonEmpty) { + if (filterInSchema2.size > 1) { + LOGGER.warn(s"Multiple field definitions found. Only taking the first definition, field-name=${field.name}") + } + Some(filterInSchema2.head) + } else { + None + } + optFieldToMerge.map(f2 => { + val fieldSchema = (field.schema, f2.schema) match { + case (Some(fSchema), Some(f2Schema)) => Some(mergeSchemaInfo(fSchema, f2Schema)) + case (Some(fSchema), None) => Some(fSchema) + case (None, Some(_)) => + LOGGER.warn(s"Schema from metadata source or from data source has no nested schema for field but has nested schema defined by user. " + + s"Ignoring user defined nested schema, field-name=${field.name}") + None + case _ => None + } + val fieldType = mergeFieldType(field, f2) + val fieldGenerator = mergeGenerator(field, f2) + val fieldNullable = mergeNullable(field, f2) + val fieldStatic = mergeStaticValue(field, f2) + Field(field.name, fieldType, fieldGenerator, fieldNullable, fieldStatic, fieldSchema) + }).getOrElse(field) + }) + Schema(Some(mergedFields)) + case (Some(_), None) => schema1 + case (None, Some(_)) => schema2 + case _ => + throw new RuntimeException("Schema not defined from auto generation, metadata source or from user") + } + } + + private def mergeStaticValue(field: Field, f2: Field) = { + (field.static, f2.static) match { + case (Some(fStatic), Some(f2Static)) => + if (fStatic.equalsIgnoreCase(f2Static)) { + field.static + } else { + LOGGER.warn(s"User has defined static value different to metadata source or from data source. " + + s"Using user defined static value, field-name=${field.name}, user-static-value=$f2Static, data-static-value=$fStatic") + f2.static + } + case (Some(_), None) => field.static + case (None, Some(_)) => f2.static + case _ => None + } + } + + private def mergeNullable(field: Field, f2: Field) = { + (field.nullable, f2.nullable) match { + case (false, _) => false + case (true, false) => false + case _ => DEFAULT_FIELD_NULLABLE + } + } + + private def mergeGenerator(field: Field, f2: Field) = { + (field.generator, f2.generator) match { + case (Some(fGen), Some(f2Gen)) => + val genType = if (fGen.`type`.equalsIgnoreCase(f2Gen.`type`)) fGen.`type` else f2Gen.`type` + val options = fGen.options ++ f2Gen.options + Some(Generator(genType, options)) + case (Some(_), None) => field.generator + case (None, Some(_)) => f2.generator + case _ => None + } + } + + private def mergeFieldType(field: Field, f2: Field) = { + (field.`type`, f2.`type`) match { + case (Some(fType), Some(f2Type)) => + if (fType.equalsIgnoreCase(f2Type)) { + field.`type` + } else { + LOGGER.warn(s"User has defined data type different to metadata source or from data source. " + + s"Using data source defined type, field-name=${field.name}, user-type=$f2Type, data-source-type=$fType") + field.`type` + } + case (Some(_), None) => field.`type` + case (None, Some(_)) => f2.`type` + case _ => field.`type` + } + } +} + +object FieldHelper { + + def fromStructField(structField: StructField): Field = { + val metadataOptions = MetadataUtil.metadataToMap(structField.metadata) + val generatorType = if (structField.metadata.contains(ONE_OF_GENERATOR)) { + ONE_OF_GENERATOR + } else if (structField.metadata.contains(REGEX_GENERATOR)) { + REGEX_GENERATOR + } else { + RANDOM_GENERATOR + } + val generator = Generator(generatorType, metadataOptions) + val optStatic = if (structField.metadata.contains(STATIC)) Some(structField.metadata.getString(STATIC)) else None + val optSchema = if (structField.dataType.typeName == "struct") { + Some(SchemaHelper.fromStructType(structField.dataType.asInstanceOf[StructType])) + } else if (structField.dataType.typeName == "array" && structField.dataType.asInstanceOf[ArrayType].elementType.typeName == "struct") { + Some(SchemaHelper.fromStructType(structField.dataType.asInstanceOf[ArrayType].elementType.asInstanceOf[StructType])) + } else { + None + } + Field(structField.name, Some(structField.dataType.sql.toLowerCase), Some(generator), structField.nullable, optStatic, optSchema) + } +} + +object PlanImplicits { + + implicit class ForeignKeyRelationOps(foreignKeyRelation: ForeignKeyRelation) { + def dataFrameName = s"${foreignKeyRelation.dataSource}.${foreignKeyRelation.step}" + } + + implicit class SinkOptionsOps(sinkOptions: SinkOptions) { + def gatherForeignKeyRelations(key: String): (ForeignKeyRelation, List[ForeignKeyRelation]) = { + val source = ForeignKeyRelationHelper.fromString(key) + val targets = sinkOptions.foreignKeys.filter(f => f._1.equalsIgnoreCase(key)).flatMap(_._2) + val targetForeignKeys = targets.map(ForeignKeyRelationHelper.fromString) + (source, targetForeignKeys) + } + + def foreignKeysWithoutColumnNames: List[(String, List[String])] = { + sinkOptions.foreignKeys.map(foreignKey => { + val mainFk = foreignKey._1.split(FOREIGN_KEY_DELIMITER_REGEX).take(2).mkString(FOREIGN_KEY_DELIMITER) + val subFks = foreignKey._2.map(sFk => sFk.split(FOREIGN_KEY_DELIMITER_REGEX).take(2).mkString(FOREIGN_KEY_DELIMITER)) + (mainFk, subFks) + }) + } + } + + implicit class TaskOps(task: Task) { + def toTaskDetailString: String = { + val enabledSteps = task.steps.filter(_.enabled) + val stepSummary = enabledSteps.map(_.toStepDetailString).mkString(",") + s"name=${task.name}, num-steps=${task.steps.size}, num-enabled-steps=${enabledSteps.size}, enabled-steps-summary=($stepSummary)" + } + } + + implicit class StepOps(step: Step) { + def toStepDetailString: String = { + s"name=${step.name}, type=${step.`type`}, options=${step.options}, step-num-records=(${step.count.numRecordsString._1}), schema-summary=(${step.schema.toString})" + } + + def gatherPrimaryKeys: List[String] = { + if (step.schema.fields.isDefined) { + val fields = step.schema.fields.get + fields.filter(field => { + if (field.generator.isDefined) { + val metadata = field.generator.get.options + metadata.contains(IS_PRIMARY_KEY) && metadata(IS_PRIMARY_KEY).toString.toBoolean + } else false + }) + .map(field => (field.name, field.generator.get.options.getOrElse(PRIMARY_KEY_POSITION, "1").toString.toInt)) + .sortBy(_._2) + .map(_._1) + } else List() + } + + def gatherUniqueFields: List[String] = { + step.schema.fields.map(fields => { + fields.filter(field => { + field.generator + .flatMap(gen => gen.options.get(IS_UNIQUE).map(_.toString.toBoolean)) + .getOrElse(false) + }).map(_.name) + }).getOrElse(List()) + } + } + + implicit class CountOps(count: Count) { + def numRecordsString: (String, List[List[String]]) = { + if (count.records.isDefined && count.perColumn.isDefined && count.perColumn.get.count.isDefined && count.perColumn.get.generator.isEmpty) { + val records = (count.records.get * count.perColumn.get.count.get).toString + val columns = count.perColumn.get.columnNames.mkString(",") + val str = s"per-column-count: columns=$columns, num-records=$records" + val list = List( + List(COUNT_TYPE, COUNT_PER_COLUMN), + List(COUNT_COLUMNS, columns), + List(COUNT_NUM_RECORDS, records) + ) + (str, list) + } else if (count.perColumn.isDefined && count.perColumn.get.generator.isDefined) { + val records = (count.records.get * count.perColumn.get.count.get).toString + val columns = count.perColumn.get.columnNames.mkString(",") + val str = s"per-column-count: columns=$columns, num-records-via-generator=$records" + val list = List( + List(COUNT_TYPE, COUNT_GENERATED_PER_COLUMN), + List(COUNT_COLUMNS, columns), + List(COUNT_NUM_RECORDS, records) + ) + (str, list) + } else if (count.records.isDefined) { + val records = count.records.get.toString + val str = s"basic-count: num-records=$records" + val list = List( + List(COUNT_TYPE, COUNT_BASIC), + List(COUNT_NUM_RECORDS, records) + ) + (str, list) + } else if (count.generator.isDefined) { + val records = count.generator.toString + val str = s"generated-count: num-records=$records" + val list = List( + List(COUNT_TYPE, COUNT_GENERATED), + List(COUNT_NUM_RECORDS, records) + ) + (str, list) + } else { + //TODO: should throw error here? + ("0", List()) + } + } + + def numRecords: Long = { + (count.records, count.generator, count.perColumn, count.perColumn.flatMap(_.generator)) match { + case (Some(t), None, Some(perCol), Some(_)) => + perCol.averageCountPerColumn * t + case (Some(t), None, Some(perCol), None) => + perCol.count.get * t + case (Some(t), Some(gen), None, None) => + gen.averageCount * t + case (None, Some(gen), None, None) => + gen.averageCount + case (Some(t), None, None, None) => + t + case _ => 1000L + } + } + } + + implicit class PerColumnCountOps(perColumnCount: PerColumnCount) { + def averageCountPerColumn: Long = { + perColumnCount.generator.map(_.averageCount).getOrElse(perColumnCount.count.map(identity).getOrElse(1L)) + } + } + + implicit class SchemaOps(schema: Schema) { + def toStructType: StructType = { + if (schema.fields.isDefined) { + val structFields = schema.fields.get.map(_.toStructField) + StructType(structFields) + } else { + StructType(Seq()) + } + } + } + + implicit class FieldOps(field: Field) { + def toStructField: StructField = { + if (field.static.isDefined) { + val metadata = new MetadataBuilder().withMetadata(getMetadata).putString(STATIC, field.static.get).build() + StructField(field.name, DataType.fromDDL(field.`type`.get), field.nullable, metadata) + } else if (field.schema.isDefined) { + val innerStructFields = field.schema.get.toStructType + StructField( + field.name, + if (field.`type`.isDefined && field.`type`.get.toLowerCase.startsWith("array")) ArrayType(innerStructFields, field.nullable) else innerStructFields, + field.nullable, + getMetadata + ) + } else if (field.`type`.isDefined) { + StructField(field.name, DataType.fromDDL(field.`type`.get), field.nullable, getMetadata) + } else { + throw new InvalidFieldConfigurationException(this.field) + } + } + + private def getMetadata: Metadata = { + if (field.generator.isDefined) { + Metadata.fromJson(ObjectMapperUtil.jsonObjectMapper.writeValueAsString(field.generator.get.options)) + } else { + Metadata.empty + } + } + } + + implicit class GeneratorOps(generator: Generator) { + def averageCount: Long = { + if (generator.`type`.equalsIgnoreCase(RANDOM_GENERATOR)) { + val min = generator.options.get(MINIMUM).map(_.toString.toLong).getOrElse(1L) + val max = generator.options.get(MAXIMUM).map(_.toString.toLong).getOrElse(10L) + (max + min + 1) / 2 + } else 1L + } + } +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/util/SparkProvider.scala b/app/src/main/scala/com/github/pflooky/datagen/core/util/SparkProvider.scala new file mode 100644 index 00000000..8d761711 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/util/SparkProvider.scala @@ -0,0 +1,15 @@ +package com.github.pflooky.datagen.core.util + +import org.apache.spark.sql.SparkSession + +class SparkProvider(master: String, config: Map[String, String]) { + + def getSparkSession: SparkSession = { + SparkSession.builder() + .master(master) + .appName("data-caterer") + .config(config) + .getOrCreate() + } + +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/util/UniqueFieldsUtil.scala b/app/src/main/scala/com/github/pflooky/datagen/core/util/UniqueFieldsUtil.scala new file mode 100644 index 00000000..7c39c8b6 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/util/UniqueFieldsUtil.scala @@ -0,0 +1,55 @@ +package com.github.pflooky.datagen.core.util + +import com.github.pflooky.datacaterer.api.model.{Task, TaskSummary} +import PlanImplicits.StepOps +import org.apache.spark.sql.{DataFrame, SparkSession} + +class UniqueFieldsUtil(executableTasks: List[(TaskSummary, Task)])(implicit sparkSession: SparkSession) { + + var uniqueFieldsDf: Map[UniqueFields, DataFrame] = getUniqueFields + + def getUniqueFieldsValues(dataSourceStep: String, df: DataFrame): DataFrame = { + //get all the unique values that have been generated for each column so far + val existingFieldValues = uniqueFieldsDf.filter(uniqueDf => uniqueDf._1.getDataSourceName == dataSourceStep) + var finalDf = df + if (!finalDf.storageLevel.useMemory) finalDf.cache() + + //drop duplicate records for data via dropDuplicates and then anti join with previously generated values + existingFieldValues.foreach(existingCol => { + val columns = existingCol._1.columns + val dfWithUnique = finalDf.dropDuplicates(columns) + finalDf = if (existingCol._2.columns.nonEmpty) dfWithUnique.join(existingCol._2, columns, "left_anti") else dfWithUnique + }) + + //update the map with the latest values + existingFieldValues.foreach(col => { + val existingDf = uniqueFieldsDf(col._1) + val newFieldValuesDf = finalDf.selectExpr(col._1.columns: _*) + if (!existingDf.storageLevel.useMemory) existingDf.cache() + if (!newFieldValuesDf.storageLevel.useMemory) newFieldValuesDf.cache() + val combinedValuesDf = if (existingDf.isEmpty) newFieldValuesDf else newFieldValuesDf.union(existingDf) + if (!combinedValuesDf.storageLevel.useMemory) combinedValuesDf.cache() + uniqueFieldsDf = uniqueFieldsDf ++ Map(col._1 -> combinedValuesDf) + }) + finalDf + } + + private def getUniqueFields: Map[UniqueFields, DataFrame] = { + val uniqueFields = executableTasks.flatMap(t => { + t._2.steps + .flatMap(step => { + val primaryKeys = step.gatherPrimaryKeys + val primaryKeyUf = if (primaryKeys.nonEmpty) List(UniqueFields(t._1.dataSourceName, step.name, primaryKeys)) else List() + val uniqueKeys = step.gatherUniqueFields + val uniqueKeyUf = if (uniqueKeys.nonEmpty) uniqueKeys.map(u => UniqueFields(t._1.dataSourceName, step.name, List(u))) else List() + primaryKeyUf ++ uniqueKeyUf + }) + }) + uniqueFields.map(uc => (uc, sparkSession.emptyDataFrame)).toMap + } + +} + +case class UniqueFields(dataSource: String, step: String, columns: List[String]) { + def getDataSourceName: String = s"$dataSource.$step" +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/validator/ValidationOperations.scala b/app/src/main/scala/com/github/pflooky/datagen/core/validator/ValidationOperations.scala new file mode 100644 index 00000000..20993d05 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/validator/ValidationOperations.scala @@ -0,0 +1,99 @@ +package com.github.pflooky.datagen.core.validator + +import com.github.pflooky.datacaterer.api.model.Constants.{AGGREGATION_COUNT, FORMAT, VALIDATION_PREFIX_JOIN_EXPRESSION, VALIDATION_UNIQUE} +import com.github.pflooky.datacaterer.api.model.{ExpressionValidation, GroupByValidation, UpstreamDataSourceValidation, Validation} +import com.github.pflooky.datagen.core.model.ValidationResult +import org.apache.spark.sql.functions.{col, expr} +import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} + +abstract class ValidationOps(validation: Validation) { + def validate(df: DataFrame, dfCount: Long): ValidationResult + + def validateWithExpression(df: DataFrame, dfCount: Long, expression: String): ValidationResult = { + val notEqualDf = df.where(s"!($expression)") + val (isSuccess, sampleErrors, numErrors) = getIsSuccessAndSampleErrors(notEqualDf, dfCount) + ValidationResult(validation, isSuccess, numErrors, dfCount, sampleErrors) + } + + def getIsSuccessAndSampleErrors(notEqualDf: Dataset[Row], dfCount: Long): (Boolean, Option[DataFrame], Long) = { + val numErrors = notEqualDf.count() + val (isSuccess, sampleErrors) = (numErrors, validation.errorThreshold) match { + case (c, Some(threshold)) if c > 0 => + if ((threshold >= 1 && c > threshold) || (threshold < 1 && c.toDouble / dfCount > threshold)) { + (false, Some(notEqualDf)) + } else (true, None) + case (c, None) if c > 0 => (false, Some(notEqualDf)) + case _ => (true, None) + } + (isSuccess, sampleErrors, numErrors) + } +} + +class ExpressionValidationOps(expressionValidation: ExpressionValidation) extends ValidationOps(expressionValidation) { + override def validate(df: DataFrame, dfCount: Long): ValidationResult = { + validateWithExpression(df, dfCount, expressionValidation.expr) + } +} + +class GroupByValidationOps(groupByValidation: GroupByValidation) extends ValidationOps(groupByValidation) { + override def validate(df: DataFrame, dfCount: Long): ValidationResult = { + val groupByDf = df.groupBy(groupByValidation.groupByCols.map(col): _*) + val (aggregateDf, validationCount) = if ((groupByValidation.aggCol == VALIDATION_UNIQUE || groupByValidation.aggCol.isEmpty) && groupByValidation.aggType == AGGREGATION_COUNT) { + (groupByDf.count(), 1L) + } else { + val aggDf = groupByDf.agg(Map( + groupByValidation.aggCol -> groupByValidation.aggType + )) + (aggDf, aggDf.count()) + } + validateWithExpression(aggregateDf, validationCount, groupByValidation.expr) + } +} + +class UpstreamDataSourceValidationOps( + upstreamDataSourceValidation: UpstreamDataSourceValidation, + recordTrackingForValidationFolderPath: String + ) extends ValidationOps(upstreamDataSourceValidation) { + override def validate(df: DataFrame, dfCount: Long): ValidationResult = { + val upstreamDf = getUpstreamData(df.sparkSession) + val joinedDf = getJoinedDf(df, upstreamDf) + val joinedCount = joinedDf.count() + + val baseValidationOp = upstreamDataSourceValidation.validationBuilder.validation match { + case expr: ExpressionValidation => new ExpressionValidationOps(expr) + case grp: GroupByValidation => new GroupByValidationOps(grp) + case up: UpstreamDataSourceValidation => new UpstreamDataSourceValidationOps(up, recordTrackingForValidationFolderPath) + case x => throw new RuntimeException(s"Unsupported validation type, validation=$x") + } + val result = baseValidationOp.validate(joinedDf, joinedCount) + ValidationResult.fromValidationWithBaseResult(upstreamDataSourceValidation, result) + } + + private def getJoinedDf(df: DataFrame, upstreamDf: DataFrame): DataFrame = { + val joinCols = upstreamDataSourceValidation.joinCols + val joinType = upstreamDataSourceValidation.joinType + val upstreamName = upstreamDataSourceValidation.upstreamDataSource.connectionConfigWithTaskBuilder.dataSourceName + + val upstreamColsToRename = upstreamDf.columns.filter(c => !joinCols.contains(c)) + .map(c => c -> s"${upstreamName}_$c").toMap + val renamedUpstreamDf = upstreamDf.withColumnsRenamed(upstreamColsToRename) + + val joinedDf = if (joinCols.size == 1 && joinCols.head.startsWith(VALIDATION_PREFIX_JOIN_EXPRESSION)) { + df.join(renamedUpstreamDf, expr(joinCols.head.replaceFirst(VALIDATION_PREFIX_JOIN_EXPRESSION, "")), joinType) + } else { + df.join(renamedUpstreamDf, joinCols, joinType) + } + if (!joinedDf.storageLevel.useMemory) joinedDf.cache() + joinedDf + } + + private def getUpstreamData(sparkSession: SparkSession): DataFrame = { + val upstreamConnectionOptions = upstreamDataSourceValidation.upstreamDataSource.connectionConfigWithTaskBuilder.options ++ + upstreamDataSourceValidation.upstreamReadOptions + val upstreamFormat = upstreamConnectionOptions(FORMAT) + sparkSession.read + .format(upstreamFormat) + .options(upstreamConnectionOptions) + .load() + } +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/validator/ValidationProcessor.scala b/app/src/main/scala/com/github/pflooky/datagen/core/validator/ValidationProcessor.scala new file mode 100644 index 00000000..27d992fd --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/validator/ValidationProcessor.scala @@ -0,0 +1,140 @@ +package com.github.pflooky.datagen.core.validator + +import com.github.pflooky.datacaterer.api.model.Constants.{FORMAT, HTTP, JMS} +import com.github.pflooky.datacaterer.api.model.{DataSourceValidation, ExpressionValidation, FoldersConfig, GroupByValidation, UpstreamDataSourceValidation, ValidationConfig, ValidationConfiguration} +import com.github.pflooky.datagen.core.model.{DataSourceValidationResult, ValidationConfigResult} +import com.github.pflooky.datagen.core.parser.ValidationParser +import com.github.pflooky.datagen.core.validator.ValidationWaitImplicits.WaitConditionOps +import org.apache.log4j.Logger +import org.apache.spark.sql.{DataFrame, SparkSession} + +import java.io.File +import scala.reflect.io.Directory + +/* +Given a list of validations, check and report on the success and failure of each +Flag to enable +Validations can occur on any data source defined in application config +Validations will only occur on datasets not on the response from the data source (i.e. no HTTP status code validations) +Defined at plan level what validations are run post data generation +Validations lie within separate files +Validations have a wait condition. Wait for: webhook, pause, file exists, data exists +Different types of validations: +- simple column validations (amount < 100) +- aggregates (sum of amount per account is > 500) +- ordering (transactions are ordered by date) +- relationship (one account entry in history table per account in accounts table) +- data profile (how close the generated data profile is compared to the expected data profile) + */ +class ValidationProcessor( + connectionConfigsByName: Map[String, Map[String, String]], + optValidationConfigs: Option[List[ValidationConfiguration]], + validationConfig: ValidationConfig, + foldersConfig: FoldersConfig + )(implicit sparkSession: SparkSession) { + + private val LOGGER = Logger.getLogger(getClass.getName) + + def executeValidations: List[ValidationConfigResult] = { + LOGGER.info("Executing data validations") + val validationResults = getValidations.map(vc => { + val dataSourceValidationResults = vc.dataSources.flatMap(dataSource => { + val dataSourceName = dataSource._1 + val dataSourceValidations = dataSource._2 + val numValidations = dataSourceValidations.flatMap(_.validations).size + + LOGGER.info(s"Executing data validations for data source, name=${vc.name}," + + s"data-source-name=$dataSourceName, num-validations=$numValidations") + dataSourceValidations.map(dataSourceValidation => executeDataValidations(vc, dataSourceName, dataSourceValidation)) + }).toList + ValidationConfigResult(vc.name, vc.description, dataSourceValidationResults) + }).toList + + logValidationErrors(validationResults) + validationResults + } + + private def executeDataValidations( + vc: ValidationConfiguration, + dataSourceName: String, + dataSourceValidation: DataSourceValidation + ): DataSourceValidationResult = { + LOGGER.debug(s"Waiting for validation condition to be successful before running validations, name=${vc.name}," + + s"data-source-name=$dataSourceName, details=${dataSourceValidation.options}, num-validations=${dataSourceValidation.validations.size}") + dataSourceValidation.waitCondition.waitForCondition(connectionConfigsByName) + + val df = getDataFrame(dataSourceName, dataSourceValidation.options) + if (df.isEmpty) { + LOGGER.info("No data found to run validations") + DataSourceValidationResult(dataSourceName, dataSourceValidation.options, List()) + } else { + val count = df.count() + val results = dataSourceValidation.validations.map(validBuilder => { + val validationOps = validBuilder.validation match { + case exprValid: ExpressionValidation => new ExpressionValidationOps(exprValid) + case grpValid: GroupByValidation => new GroupByValidationOps(grpValid) + case upValid: UpstreamDataSourceValidation => new UpstreamDataSourceValidationOps(upValid, foldersConfig.recordTrackingForValidationFolderPath) + case x => throw new RuntimeException(s"Unsupported validation type, validation=$x") + } + validationOps.validate(df, count) + }) + df.unpersist() + LOGGER.debug(s"Finished data validations, name=${vc.name}," + + s"data-source-name=$dataSourceName, details=${dataSourceValidation.options}, num-validations=${dataSourceValidation.validations.size}") + cleanRecordTrackingFiles() + DataSourceValidationResult(dataSourceName, dataSourceValidation.options, results) + } + } + + private def cleanRecordTrackingFiles(): Unit = { + if (validationConfig.enableDeleteRecordTrackingFiles) { + LOGGER.debug(s"Deleting all record tracking files from directory, " + + s"record-tracking-for-validation-directory=${foldersConfig.recordTrackingForValidationFolderPath}") + new Directory(new File(foldersConfig.recordTrackingForValidationFolderPath)).deleteRecursively() + } + } + + private def getValidations: Array[ValidationConfiguration] = { + optValidationConfigs.map(_.toArray).getOrElse(ValidationParser.parseValidation(foldersConfig.validationFolderPath)) + } + + private def getDataFrame(dataSourceName: String, options: Map[String, String]): DataFrame = { + val connectionConfig = connectionConfigsByName(dataSourceName) + val format = connectionConfig(FORMAT) + if (format == HTTP || format == JMS) { + LOGGER.warn("No support for HTTP or JMS data validations, will skip validations") + sparkSession.emptyDataFrame + } else { + val df = sparkSession.read + .format(format) + .options(connectionConfig ++ options) + .load() + if (!df.storageLevel.useMemory) df.cache() + df + } + } + + private def logValidationErrors(validationResults: List[ValidationConfigResult]): Unit = { + validationResults.foreach(vcr => vcr.dataSourceValidationResults.map(dsr => { + val failedValidations = dsr.validationResults.filter(r => !r.isSuccess) + + if (failedValidations.isEmpty) { + LOGGER.info(s"Data validations successful for validation, name=${vcr.name}, description=${vcr.description}, data-source-name=${dsr.dataSourceName}, " + + s"data-source-options=${dsr.options}, is-success=true") + } else { + failedValidations.foreach(validationRes => { + val (validationType, validationCheck) = validationRes.validation match { + case ExpressionValidation(expr) => ("expression", expr) + case GroupByValidation(_, _, _, expr) => ("groupByAggregate", expr) + //TODO get validationCheck from validationBuilder -> make this a recursive method to get validationCheck + case UpstreamDataSourceValidation(validationBuilder, upstreamDataSource, _, _, _) => ("upstreamDataSource", "") + case _ => ("Unknown", "") + } + val sampleErrors = validationRes.sampleErrorValues.get.take(validationConfig.numSampleErrorRecords).map(_.json).mkString(",") + LOGGER.error(s"Failed validation: validation-name=${vcr.name}, description=${vcr.description}, data-source-name=${dsr.dataSourceName}, " + + s"data-source-options=${dsr.options}, is-success=${validationRes.isSuccess}, validation-type=$validationType, check=$validationCheck, sample-errors=$sampleErrors") + }) + } + })) + } +} diff --git a/app/src/main/scala/com/github/pflooky/datagen/core/validator/ValidationWaitImplicits.scala b/app/src/main/scala/com/github/pflooky/datagen/core/validator/ValidationWaitImplicits.scala new file mode 100644 index 00000000..8e9e6044 --- /dev/null +++ b/app/src/main/scala/com/github/pflooky/datagen/core/validator/ValidationWaitImplicits.scala @@ -0,0 +1,96 @@ +package com.github.pflooky.datagen.core.validator + +import com.github.pflooky.datacaterer.api.model.Constants.FORMAT +import com.github.pflooky.datacaterer.api.model.{DataExistsWaitCondition, FileExistsWaitCondition, PauseWaitCondition, WaitCondition, WebhookWaitCondition} +import com.github.pflooky.datagen.core.exception.InvalidWaitConditionException +import com.github.pflooky.datagen.core.util.HttpUtil.getAuthHeader +import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.log4j.Logger +import org.apache.spark.sql.SparkSession +import org.asynchttpclient.Dsl.asyncHttpClient + +import scala.util.{Failure, Success, Try} + + +object ValidationWaitImplicits { + implicit class WaitConditionOps(waitCondition: WaitCondition = PauseWaitCondition()) { + def checkCondition(implicit sparkSession: SparkSession): Boolean = true + + def checkCondition(connectionConfigByName: Map[String, Map[String, String]])(implicit sparkSession: SparkSession): Boolean = true + + def waitForCondition(connectionConfigByName: Map[String, Map[String, String]])(implicit sparkSession: SparkSession): Unit = { + if (waitCondition.isRetryable) { + var retries = 0 + while (retries < waitCondition.maxRetries) { + val isDataAvailable = waitCondition match { + case DataExistsWaitCondition(_, _, _) | WebhookWaitCondition(_, _, _, _) => this.checkCondition(connectionConfigByName) + case FileExistsWaitCondition(_) => this.checkCondition + case x => throw new InvalidWaitConditionException(x.getClass.getName) + } + if (!isDataAvailable) { + Thread.sleep(waitCondition.waitBeforeRetrySeconds * 1000) + retries += 1 + } else { + return + } + } + } else { + this.checkCondition + } + } + } + + implicit class PauseWaitConditionOps(pauseWaitCondition: PauseWaitCondition) extends WaitConditionOps(pauseWaitCondition) { + override def checkCondition(implicit sparkSession: SparkSession): Boolean = { + Thread.sleep(pauseWaitCondition.pauseInSeconds * 1000) + true + } + } + + implicit class FileExistsWaitConditionOps(fileExistsWaitCondition: FileExistsWaitCondition) extends WaitConditionOps(fileExistsWaitCondition) { + override def checkCondition(implicit sparkSession: SparkSession): Boolean = { + val fs = FileSystem.get(sparkSession.sparkContext.hadoopConfiguration) + fs.exists(new Path(fileExistsWaitCondition.path)) + } + } + + implicit class DataExistsWaitConditionOps(dataExistsWaitCondition: DataExistsWaitCondition) extends WaitConditionOps(dataExistsWaitCondition) { + override def checkCondition(connectionConfigByName: Map[String, Map[String, String]])(implicit sparkSession: SparkSession): Boolean = { + val connectionOptions = connectionConfigByName(dataExistsWaitCondition.dataSourceName) + val loadData = sparkSession.read + .format(connectionOptions(FORMAT)) + .options(connectionOptions ++ dataExistsWaitCondition.options) + .load() + .where(dataExistsWaitCondition.expr) + !loadData.isEmpty + } + } + + implicit class WebhookWaitConditionOps(webhookWaitCondition: WebhookWaitCondition) extends WaitConditionOps(webhookWaitCondition) { + private val LOGGER = Logger.getLogger(getClass.getName) + + override def checkCondition(connectionConfigByName: Map[String, Map[String, String]])(implicit sparkSession: SparkSession): Boolean = { + val webhookOptions = connectionConfigByName.getOrElse(webhookWaitCondition.dataSourceName, Map()) + val request = asyncHttpClient().prepare(webhookWaitCondition.method, webhookWaitCondition.url) + val authHeader = getAuthHeader(webhookOptions) + val requestWithAuth = if (authHeader.nonEmpty) request.setHeader(authHeader.head._1, authHeader.head._2) else request + + val tryResponse = Try(requestWithAuth.execute().get()) + + tryResponse match { + case Failure(exception) => + LOGGER.error(s"Failed to execute HTTP wait condition request, url=${webhookWaitCondition.url}", exception) + false + case Success(value) => + if (webhookWaitCondition.statusCodes.contains(value.getStatusCode)) { + true + } else { + LOGGER.debug(s"HTTP wait condition status code did not match expected status code, url=${webhookWaitCondition.url}, " + + s"expected-status-code=${webhookWaitCondition.statusCodes}, actual-status-code=${value.getStatusCode}, " + + s"response-body=${value.getResponseBody}") + false + } + } + } + } +} diff --git a/app/src/test/resources/application-cassandra.conf b/app/src/test/resources/application-cassandra.conf new file mode 100644 index 00000000..90ad54f3 --- /dev/null +++ b/app/src/test/resources/application-cassandra.conf @@ -0,0 +1,66 @@ +flags { + enableGeneratePlanAndTasks = true + enableGeneratePlanAndTasks = ${?ENABLE_GENERATE_PLAN_AND_TASKS} + enableCount = true + enableCount = ${?ENABLE_COUNT} + enableGenerateData = true + enableGenerateData = ${?ENABLE_GENERATE_DATA} + enableRecordTracking = true + enableRecordTracking = ${?ENABLE_RECORD_TRACKING} + enableDeleteGeneratedRecords = false + enableDeleteGeneratedRecords = ${?ENABLE_DELETE_GENERATED_RECORDS} +} + +folders { + generatedPlanAndTaskFolderPath = "/tmp" + generatedPlanAndTaskFolderPath = ${?GENERATED_PLAN_AND_TASK_FOLDER_PATH} + planFilePath = "/plan/customer-create-plan.yaml" + planFilePath = ${?PLAN_FILE_PATH} + taskFolderPath = "/task" + taskFolderPath = ${?TASK_FOLDER_PATH} + recordTrackingFolderPath = "/tmp/data/generated/recordTracking" + recordTrackingFolderPath = ${?RECORD_TRACKING_FOLDER_PATH} +} + +metadata { + numRecordsFromDataSource = 10000 + numRecordsFromDataSource = ${?METADATA_NUM_RECORDS_FROM_DATA_SOURCE} + numRecordsForAnalysis = 10000 + numRecordsForAnalysis = ${?METADATA_NUM_RECORDS_FOR_ANALYSIS} + oneOfDistinctCountVsCountThreshold = 0.1 + oneOfDistinctCountVsCountThreshold = ${?METADATA_ONE_OF_DISTINCT_COUNT_VS_COUNT_THRESHOLD} +} + +generation {} + +runtime{ + master = "local[*]" + master = ${?DATA_CATERER_MASTER} + config { + "spark.sql.cbo.enabled" = "true" + "spark.sql.adaptive.enabled" = "true" + "spark.sql.cbo.planStats.enabled" = "true" + "spark.sql.legacy.allowUntypedScalaUDF" = "true" + "spark.sql.statistics.histogram.enabled" = "true" + "spark.sql.shuffle.partitions" = "10" + "spark.sql.catalog.postgres" = "" + "spark.sql.catalog.cassandra" = "com.datastax.spark.connector.datasource.CassandraCatalog" + "spark.hadoop.fs.s3a.directory.marker.retention" = "keep" + "spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled" = "true" + } +} + +org.apache.spark.sql.cassandra { + cassandra { + spark.cassandra.connection.host = "localhost" + spark.cassandra.connection.host = ${?CASSANDRA_HOST} + spark.cassandra.connection.port = "9042" + spark.cassandra.connection.port = ${?CASSANDRA_PORT} + spark.cassandra.auth.username = "cassandra" + spark.cassandra.auth.username = ${?CASSANDRA_USERNAME} + spark.cassandra.auth.password = "cassandra" + spark.cassandra.auth.password = ${?CASSANDRA_PASSWORD} + } +} + +datastax-java-driver.advanced.metadata.schema.refreshed-keyspaces = [ "/.*/" ] diff --git a/app/src/test/resources/application-s3.conf b/app/src/test/resources/application-s3.conf new file mode 100644 index 00000000..4cb3387a --- /dev/null +++ b/app/src/test/resources/application-s3.conf @@ -0,0 +1,59 @@ +flags { + enableGeneratePlanAndTasks = true + enableGeneratePlanAndTasks = ${?ENABLE_GENERATE_PLAN_AND_TASKS} + enableCount = false + enableCount = ${?ENABLE_COUNT} + enableGenerateData = true + enableGenerateData = ${?ENABLE_GENERATE_DATA} + enableRecordTracking = true + enableRecordTracking = ${?ENABLE_RECORD_TRACKING} + enableDeleteGeneratedRecords = false + enableDeleteGeneratedRecords = ${?ENABLE_DELETE_GENERATED_RECORDS} +} + +folders { + generatedPlanAndTaskFolderPath = "s3a://my-bucket/test/generated" + generatedPlanAndTaskFolderPath = ${?GENERATED_PLAN_AND_TASK_FOLDER_PATH} + planFilePath = "s3a://my-bucket/test/generated/plan/plan_2023-08-02_12:15.yaml" + planFilePath = ${?PLAN_FILE_PATH} + taskFolderPath = "s3a://my-bucket/test/generated/task" + taskFolderPath = ${?TASK_FOLDER_PATH} + recordTrackingFolderPath = "s3a://my-bucket/test/generated/record-tracking" + recordTrackingFolderPath = ${?RECORD_TRACKING_FOLDER_PATH} +} + +metadata { + numRecordsFromDataSource = 10000 + numRecordsFromDataSource = ${?METADATA_NUM_RECORDS_FROM_DATA_SOURCE} + numRecordsForAnalysis = 10000 + numRecordsForAnalysis = ${?METADATA_NUM_RECORDS_FOR_ANALYSIS} + oneOfDistinctCountVsCountThreshold = 0.1 + oneOfDistinctCountVsCountThreshold = ${?METADATA_ONE_OF_DISTINCT_COUNT_VS_COUNT_THRESHOLD} +} + +generation {} + +runtime{ + master = "local[*]" + master = ${?DATA_CATERER_MASTER} + config { + "spark.sql.cbo.enabled" = "true" + "spark.sql.adaptive.enabled" = "true" + "spark.sql.cbo.planStats.enabled" = "true" + "spark.sql.legacy.allowUntypedScalaUDF" = "true" + "spark.sql.statistics.histogram.enabled" = "true" + "spark.sql.shuffle.partitions" = "10" + "spark.sql.catalog.postgres" = "" + "spark.sql.catalog.cassandra" = "com.datastax.spark.connector.datasource.CassandraCatalog" + "spark.hadoop.fs.s3a.directory.marker.retention" = "keep" + "spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled" = "true" + "spark.hadoop.fs.defaultFS" = "s3a://my-bucket" + } +} + +csv { + csv { + path = "s3a://my-bucket/test/files/csv/transactions" + path = ${?CSV_PATH} + } +} \ No newline at end of file diff --git a/app/src/test/resources/application.conf b/app/src/test/resources/application.conf new file mode 100644 index 00000000..324db98d --- /dev/null +++ b/app/src/test/resources/application.conf @@ -0,0 +1,43 @@ +folders { + generatedPlanAndTaskFolderPath = "src/test/resources/sample" + planFilePath = "sample/plan/simple-json-plan.yaml" + taskFolderPath = "sample/task" + recordTrackingFolderPath = "src/test/resources/sample/data/recordTracking" +} + +flags { + enableCount = true + enableGenerateData = true + enableGeneratePlanAndTasks = true + enableRecordTracking = true + enableDeleteGeneratedRecords = false +} + +metadata { + numRecordsFromDataSource = 10000 + numRecordsForAnalysis = 10000 + oneOfDistinctCountVsCountThreshold = 0.1 +} + +generation {} + +runtime{ + master = "local[*]" + config { + "spark.sql.cbo.enabled" = "true" + "spark.sql.adaptive.enabled" = "true" + "spark.sql.cbo.planStats.enabled" = "true" + "spark.sql.legacy.allowUntypedScalaUDF" = "true" + "spark.sql.statistics.histogram.enabled" = "true" + "spark.sql.shuffle.partitions" = "10" + "spark.sql.catalog.postgres" = "" + "spark.sql.catalog.cassandra" = "com.datastax.spark.connector.datasource.CassandraCatalog" + "spark.hadoop.fs.s3a.directory.marker.retention" = "keep" + "spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled" = "true" + } +} + +json { + account_json { + } +} \ No newline at end of file diff --git a/app/src/test/resources/datafaker/expressions.txt b/app/src/test/resources/datafaker/expressions.txt new file mode 100644 index 00000000..55799fb8 --- /dev/null +++ b/app/src/test/resources/datafaker/expressions.txt @@ -0,0 +1,1024 @@ +Address.buildingNumber +Address.city +Address.cityName +Address.cityPrefix +Address.citySuffix +Address.country +Address.countryCode +Address.countyByZipCode +Address.fullAddress +Address.latLon +Address.latitude +Address.lonLat +Address.longitude +Address.mailBox +Address.postcode +Address.secondaryAddress +Address.state +Address.stateAbbr +Address.streetAddress +Address.streetAddressNumber +Address.streetName +Address.streetPrefix +Address.streetSuffix +Address.timeZone +Address.zipCode +Address.zipCodeByState +Address.zipCodePlus4 +Ancient.god +Ancient.hero +Ancient.primordial +Ancient.titan +Animal.genus +Animal.name +Animal.scientificName +Animal.species +App.author +App.name +App.version +Appliance.brand +Appliance.equipment +AquaTeenHungerForce.character +Artist.name +Australia.animals +Australia.locations +Australia.states +Avatar.image +Aviation.METAR +Aviation.aircraft +Aviation.airline +Aviation.airport +Aviation.flight +Aws.accountId +Aws.acmARN +Aws.albARN +Aws.albTargetGroupARN +Aws.region +Aws.route53ZoneId +Aws.securityGroupId +Aws.subnetId +Aws.vpcId +Azure.appServiceEnvironment +Azure.appServicePlan +Azure.applicationGateway +Azure.bastionHost +Azure.containerApps +Azure.containerAppsEnvironment +Azure.containerInstance +Azure.containerRegistry +Azure.cosmosDBDatabase +Azure.firewall +Azure.keyVault +Azure.loadBalancer +Azure.loadTesting +Azure.logAnalytics +Azure.managementGroup +Azure.mysqlDatabase +Azure.networkSecurityGroup +Azure.postgreSQLDatabase +Azure.region +Azure.resourceGroup +Azure.serviceBus +Azure.serviceBusQueue +Azure.serviceBusTopic +Azure.sqlDatabase +Azure.staticWebApp +Azure.storageAccount +Azure.subscriptionId +Azure.tenantId +Azure.virtualMachine +Azure.virtualNetwork +Azure.virtualWan +Babylon5.character +Babylon5.quote +BackToTheFuture.character +BackToTheFuture.date +BackToTheFuture.quote +Barcode.type +Baseball.coaches +Baseball.players +Baseball.positions +Baseball.teams +Basketball.coaches +Basketball.players +Basketball.positions +Basketball.teams +Battlefield1.classes +Battlefield1.faction +Battlefield1.map +Battlefield1.vehicle +Battlefield1.weapon +Beer.brand +Beer.hop +Beer.malt +Beer.name +Beer.style +Beer.yeast +BigBangTheory.character +BigBangTheory.quote +BloodType.aboTypes +BloodType.bloodGroup +BloodType.pTypes +BloodType.rhTypes +BojackHorseman.characters +BojackHorseman.quotes +BojackHorseman.tongueTwisters +Book.author +Book.genre +Book.publisher +Book.title +BossaNova.artist +BossaNova.song +Brand.car +Brand.sport +Brand.watch +BreakingBad.character +BreakingBad.episode +BrooklynNineNine.characters +BrooklynNineNine.quotes +Buffy.bigBads +Buffy.celebrities +Buffy.characters +Buffy.episodes +Buffy.quotes +Business.creditCardExpiry +Business.creditCardNumber +Business.creditCardType +Business.securityCode +CNPJ.invalid +CNPJ.valid +CPF.invalid +CPF.valid +Camera.brand +Camera.brandWithModel +Camera.model +Cannabis.brands +Cannabis.buzzwords +Cannabis.cannabinoidAbbreviations +Cannabis.cannabinoids +Cannabis.categories +Cannabis.healthBenefits +Cannabis.medicalUses +Cannabis.strains +Cannabis.terpenes +Cannabis.types +Cat.breed +Cat.name +Cat.registry +Chess.opening +Chess.player +Chess.title +Chess.tournament +Chiquito.expressions +Chiquito.jokes +Chiquito.sentences +Chiquito.terms +ChuckNorris.fact +ClashOfClans.defensiveBuilding +ClashOfClans.rank +ClashOfClans.troop +Code.asin +Code.ean13 +Code.ean8 +Code.gtin13 +Code.gtin8 +Code.imei +Code.isbn10 +Code.isbn13 +Code.isbnGroup +Code.isbnGs1 +Code.isbnRegistrant +Coffee.blendName +Coffee.body +Coffee.country +Coffee.descriptor +Coffee.intensifier +Coffee.name1 +Coffee.name2 +Coffee.notes +Coffee.region +Coffee.variety +Coin.flip +Color.hex +Color.name +Commerce.brand +Commerce.department +Commerce.material +Commerce.price +Commerce.productName +Commerce.promotionCode +Commerce.vendor +Community.character +Community.quote +Company.bs +Company.buzzword +Company.catchPhrase +Company.industry +Company.logo +Company.name +Company.profession +Company.suffix +Company.url +Compass.abbreviation +Compass.azimuth +Compass.word +Computer.brand +Computer.linux +Computer.macos +Computer.operatingSystem +Computer.platform +Computer.type +Computer.windows +Construction.heavyEquipment +Construction.materials +Construction.roles +Construction.standardCostCodes +Construction.subcontractCategories +Construction.trades +Control.alteredItem +Control.alteredWorldEvent +Control.character +Control.hiss +Control.location +Control.objectOfPower +Control.quote +Control.theBoard +Cosmere.allomancers +Cosmere.aons +Cosmere.feruchemists +Cosmere.heralds +Cosmere.knightsRadiant +Cosmere.metals +Cosmere.shardWorlds +Cosmere.shards +Cosmere.sprens +Cosmere.surges +Country.capital +Country.countryCode2 +Country.countryCode3 +Country.currency +Country.currencyCode +Country.flag +Country.name +CowboyBebop.character +CowboyBebop.episode +CowboyBebop.quote +CowboyBebop.song +Cricket.formats +Cricket.players +Cricket.teams +Cricket.tournaments +CryptoCoin.coin +CultureSeries.books +CultureSeries.civs +CultureSeries.cultureShipClassAbvs +CultureSeries.cultureShipClasses +CultureSeries.cultureShips +CultureSeries.planets +Currency.code +Currency.name +DarkSouls.classes +DarkSouls.covenants +DarkSouls.shield +DarkSouls.stats +DateAndTime.between +DateAndTime.birthday +DateAndTime.future +DateAndTime.past +DcComics.hero +DcComics.heroine +DcComics.name +DcComics.title +DcComics.villain +Demographic.demonym +Demographic.educationalAttainment +Demographic.maritalStatus +Demographic.race +Demographic.sex +Departed.actor +Departed.character +Departed.quote +Dessert.flavor +Dessert.topping +Dessert.variety +DetectiveConan.characters +DetectiveConan.gadgets +DetectiveConan.vehicles +Device.manufacturer +Device.modelName +Device.platform +Device.serial +Disease.dermatology +Disease.dermatolory +Disease.gynecologyAndObstetrics +Disease.internalDisease +Disease.neurology +Disease.ophthalmologyAndOtorhinolaryngology +Disease.paediatrics +Disease.surgery +DoctorWho.actor +DoctorWho.catchPhrase +DoctorWho.character +DoctorWho.doctor +DoctorWho.quote +DoctorWho.species +DoctorWho.villain +Dog.age +Dog.breed +Dog.coatLength +Dog.gender +Dog.memePhrase +Dog.name +Dog.size +Dog.sound +Domain.firstLevelDomain +Domain.fullDomain +Domain.secondLevelDomain +Domain.validDomain +Doraemon.character +Doraemon.gadget +Doraemon.location +DragonBall.character +DrivingLicense.drivingLicense +Drone.batteryCapacity +Drone.batteryType +Drone.batteryVoltage +Drone.batteryWeight +Drone.chargingTemperature +Drone.flightTime +Drone.iso +Drone.maxAltitude +Drone.maxAngularVelocity +Drone.maxAscentSpeed +Drone.maxChargingPower +Drone.maxDescentSpeed +Drone.maxFlightDistance +Drone.maxResolution +Drone.maxShutterSpeed +Drone.maxSpeed +Drone.maxTiltAngle +Drone.maxWindResistance +Drone.minShutterSpeed +Drone.name +Drone.operatingTemperature +Drone.photoFormat +Drone.shutterSpeedUnits +Drone.videoFormat +Drone.weight +DumbAndDumber.actor +DumbAndDumber.character +DumbAndDumber.quote +Dune.character +Dune.planet +Dune.quote +Dune.saying +Dune.title +DungeonsAndDragons.alignments +DungeonsAndDragons.backgrounds +DungeonsAndDragons.cities +DungeonsAndDragons.klasses +DungeonsAndDragons.languages +DungeonsAndDragons.meleeWeapons +DungeonsAndDragons.monsters +DungeonsAndDragons.races +DungeonsAndDragons.rangedWeapons +Educator.campus +Educator.course +Educator.secondarySchool +Educator.subjectWithNumber +Educator.university +EldenRing.location +EldenRing.npc +EldenRing.skill +EldenRing.spell +EldenRing.weapon +ElderScrolls.city +ElderScrolls.creature +ElderScrolls.dragon +ElderScrolls.firstName +ElderScrolls.lastName +ElderScrolls.quote +ElderScrolls.race +ElderScrolls.region +ElectricalComponents.active +ElectricalComponents.electromechanical +ElectricalComponents.passive +Emoji.cat +Emoji.smiley +EnglandFootBall.league +EnglandFootBall.team +Esports.event +Esports.game +Esports.league +Esports.player +Esports.team +Fallout.character +Fallout.faction +Fallout.location +Fallout.quote +FamilyGuy.character +FamilyGuy.location +FamilyGuy.quote +FamousLastWords.lastWords +File.extension +File.fileName +File.mimeType +FinalSpace.character +FinalSpace.quote +FinalSpace.vehicle +Finance.bic +Finance.creditCard +Finance.iban +Finance.nasdaqTicker +Finance.nyseTicker +Finance.stockMarket +Food.dish +Food.fruit +Food.ingredient +Food.measurement +Food.spice +Food.sushi +Food.vegetable +Football.coaches +Football.competitions +Football.players +Football.positions +Football.teams +Formula1.circuit +Formula1.driver +Formula1.grandPrix +Formula1.team +FreshPrinceOfBelAir.celebrities +FreshPrinceOfBelAir.characters +FreshPrinceOfBelAir.quotes +Friends.character +Friends.location +Friends.quote +FullmetalAlchemist.character +FullmetalAlchemist.city +FullmetalAlchemist.country +FunnyName.name +Futurama.character +Futurama.hermesCatchPhrase +Futurama.location +Futurama.quote +GameOfThrones.character +GameOfThrones.city +GameOfThrones.dragon +GameOfThrones.house +GameOfThrones.quote +GarmentSize.size +Gender.binaryTypes +Gender.shortBinaryTypes +Gender.types +Ghostbusters.actor +Ghostbusters.character +Ghostbusters.quote +GratefulDead.players +GratefulDead.songs +GreekPhilosopher.name +GreekPhilosopher.quote +Hacker.abbreviation +Hacker.adjective +Hacker.ingverb +Hacker.noun +Hacker.verb +HalfLife.character +HalfLife.enemy +HalfLife.location +HarryPotter.book +HarryPotter.character +HarryPotter.house +HarryPotter.location +HarryPotter.quote +HarryPotter.spell +Hashing.md2 +Hashing.md5 +Hashing.sha1 +Hashing.sha256 +Hashing.sha384 +Hashing.sha512 +Hearthstone.mainCharacter +Hearthstone.mainPattern +Hearthstone.mainProfession +Hearthstone.standardRank +Hearthstone.wildRank +HeroesOfTheStorm.battleground +HeroesOfTheStorm.hero +HeroesOfTheStorm.heroClass +HeroesOfTheStorm.quote +HeyArnold.characters +HeyArnold.locations +HeyArnold.quotes +Hipster.word +HitchhikersGuideToTheGalaxy.character +HitchhikersGuideToTheGalaxy.location +HitchhikersGuideToTheGalaxy.marvinQuote +HitchhikersGuideToTheGalaxy.planet +HitchhikersGuideToTheGalaxy.quote +HitchhikersGuideToTheGalaxy.species +HitchhikersGuideToTheGalaxy.starship +Hobbit.character +Hobbit.location +Hobbit.quote +Hobbit.thorinsCompany +Hobby.activity +Hololive.talent +Horse.breed +Horse.name +House.furniture +House.room +HowIMetYourMother.catchPhrase +HowIMetYourMother.character +HowIMetYourMother.highFive +HowIMetYourMother.quote +HowToTrainYourDragon.characters +HowToTrainYourDragon.dragons +HowToTrainYourDragon.locations +IdNumber.inValidEnZaSsn +IdNumber.invalid +IdNumber.invalidEsMXSsn +IdNumber.invalidPtNif +IdNumber.invalidSvSeSsn +IdNumber.peselNumber +IdNumber.singaporeanFin +IdNumber.singaporeanFinBefore2000 +IdNumber.singaporeanUin +IdNumber.singaporeanUinBefore2000 +IdNumber.ssnValid +IdNumber.valid +IdNumber.validEnZaSsn +IdNumber.validEsMXSsn +IdNumber.validKoKrRrn +IdNumber.validPtNif +IdNumber.validSvSeSsn +IdNumber.validZhCNSsn +IndustrySegments.industry +IndustrySegments.sector +IndustrySegments.subSector +IndustrySegments.superSector +Internet.botUserAgent +Internet.botUserAgentAny +Internet.domainName +Internet.domainSuffix +Internet.domainWord +Internet.emailAddress +Internet.httpMethod +Internet.image +Internet.ipV4Address +Internet.ipV4Cidr +Internet.ipV6Address +Internet.ipV6Cidr +Internet.macAddress +Internet.password +Internet.privateIpV4Address +Internet.publicIpV4Address +Internet.safeEmailAddress +Internet.slug +Internet.url +Internet.userAgent +Internet.uuid +Internet.uuidv3 +Job.field +Job.keySkills +Job.position +Job.seniority +Job.title +Kaamelott.character +Kaamelott.quote +Kpop.boyBands +Kpop.girlGroups +Kpop.iGroups +Kpop.iiGroups +Kpop.iiiGroups +Kpop.solo +LeagueOfLegends.champion +LeagueOfLegends.location +LeagueOfLegends.masteries +LeagueOfLegends.quote +LeagueOfLegends.rank +LeagueOfLegends.summonerSpell +Lebowski.actor +Lebowski.character +Lebowski.quote +Locality.displayName +Locality.localeString +Locality.localeStringWithRandom +Locality.localeStringWithoutReplacement +LordOfTheRings.character +LordOfTheRings.location +Lorem.characters +Lorem.fixedString +Lorem.maxLengthSentence +Lorem.paragraph +Lorem.sentence +Lorem.word +Marketing.buzzwords +MarvelSnap.character +MarvelSnap.event +MarvelSnap.rank +MarvelSnap.zone +MassEffect.character +MassEffect.cluster +MassEffect.planet +MassEffect.quote +MassEffect.specie +Matz.quote +Mbti.characteristic +Mbti.merit +Mbti.name +Mbti.personage +Mbti.type +Mbti.weakness +Measurement.height +Measurement.length +Measurement.metricHeight +Measurement.metricLength +Measurement.metricVolume +Measurement.metricWeight +Measurement.volume +Measurement.weight +Medical.diagnosisCode +Medical.diseaseName +Medical.hospitalName +Medical.medicineName +Medical.procedureCode +Medical.symptoms +Military.airForceRank +Military.armyRank +Military.dodPaygrade +Military.marinesRank +Military.navyRank +Minecraft.animalName +Minecraft.entityName +Minecraft.itemName +Minecraft.monsterName +Minecraft.tileItemName +Minecraft.tileName +Money.currency +Money.currencyCode +MoneyHeist.character +MoneyHeist.heist +MoneyHeist.quote +Mood.emotion +Mood.feeling +Mood.tone +Mountain.name +Mountain.range +Mountaineering.mountaineer +Movie.quote +Music.chord +Music.genre +Music.instrument +Music.key +Myst.ages +Myst.characters +Myst.creatures +Myst.games +Myst.quotes +Name.firstName +Name.fullName +Name.lastName +Name.name +Name.nameWithMiddle +Name.prefix +Name.suffix +Name.title +Name.username +Naruto.character +Naruto.demon +Naruto.eye +Naruto.village +Nation.capitalCity +Nation.flag +Nation.isoCountry +Nation.isoLanguage +Nation.language +Nation.nationality +NatoPhoneticAlphabet.codeWord +NewGirl.characters +NewGirl.quotes +Nigeria.celebrities +Nigeria.food +Nigeria.name +Nigeria.places +Nigeria.schools +Number.digit +Number.digits +OlympicSport.ancientOlympics +OlympicSport.summerOlympics +OlympicSport.summerParalympics +OlympicSport.unusual +OlympicSport.winterOlympics +OlympicSport.winterParalympics +OnePiece.akumasNoMi +OnePiece.character +OnePiece.island +OnePiece.location +OnePiece.quote +OnePiece.sea +Options.option +OscarMovie.actor +OscarMovie.character +OscarMovie.getChoice +OscarMovie.getYear +OscarMovie.movieName +OscarMovie.quote +OscarMovie.releaseDate +Overwatch.hero +Overwatch.location +Overwatch.quote +Passport.valid +PhoneNumber.cellPhone +PhoneNumber.extension +PhoneNumber.phoneNumber +PhoneNumber.phoneNumberInternational +PhoneNumber.phoneNumberNational +PhoneNumber.subscriberNumber +Photography.aperture +Photography.brand +Photography.camera +Photography.genre +Photography.imageTag +Photography.iso +Photography.lens +Photography.shutter +Photography.term +Pokemon.location +Pokemon.move +Pokemon.name +Pokemon.type +PrincessBride.character +PrincessBride.quote +ProgrammingLanguage.creator +ProgrammingLanguage.name +Relationship.any +Relationship.direct +Relationship.extended +Relationship.inLaw +Relationship.parent +Relationship.sibling +Relationship.spouse +ResidentEvil.biologicalAgent +ResidentEvil.character +ResidentEvil.creature +ResidentEvil.equipment +ResidentEvil.location +Restaurant.description +Restaurant.name +Restaurant.namePrefix +Restaurant.nameSuffix +Restaurant.review +Restaurant.type +RickAndMorty.character +RickAndMorty.location +RickAndMorty.quote +Robin.quote +RockBand.name +RuPaulDragRace.queen +RuPaulDragRace.quote +Science.bosons +Science.element +Science.elementSymbol +Science.leptons +Science.quark +Science.scientist +Science.tool +Science.unit +Seinfeld.business +Seinfeld.character +Seinfeld.quote +Shakespeare.asYouLikeItQuote +Shakespeare.hamletQuote +Shakespeare.kingRichardIIIQuote +Shakespeare.romeoAndJulietQuote +Show.adultMusical +Show.kidsMusical +Show.play +SiliconValley.app +SiliconValley.character +SiliconValley.company +SiliconValley.email +SiliconValley.invention +SiliconValley.motto +SiliconValley.quote +SiliconValley.url +Simpsons.character +Simpsons.location +Simpsons.quote +Sip.bodyString +Sip.clientErrorResponsePhrase +Sip.contentType +Sip.globalErrorResponsePhrase +Sip.method +Sip.nameAddress +Sip.provisionalResponsePhrase +Sip.redirectResponsePhrase +Sip.serverErrorResponsePhrase +Sip.successResponsePhrase +Size.adjective +SlackEmoji.activity +SlackEmoji.celebration +SlackEmoji.custom +SlackEmoji.emoji +SlackEmoji.foodAndDrink +SlackEmoji.nature +SlackEmoji.objectsAndSymbols +SlackEmoji.people +SlackEmoji.travelAndPlaces +SonicTheHedgehog.character +SonicTheHedgehog.game +SonicTheHedgehog.zone +SoulKnight.bosses +SoulKnight.buffs +SoulKnight.characters +SoulKnight.enemies +SoulKnight.statues +SoulKnight.weapons +SouthPark.characters +SouthPark.quotes +Space.agency +Space.agencyAbbreviation +Space.company +Space.constellation +Space.distanceMeasurement +Space.galaxy +Space.meteorite +Space.moon +Space.nasaSpaceCraft +Space.nebula +Space.planet +Space.star +Space.starCluster +Spongebob.characters +Spongebob.episodes +Spongebob.quotes +StarCraft.building +StarCraft.character +StarCraft.planet +StarCraft.unit +StarTrek.character +StarTrek.klingon +StarTrek.location +StarTrek.species +StarTrek.villain +StarWars.alternateCharacterSpelling +StarWars.callSign +StarWars.character +StarWars.droids +StarWars.planets +StarWars.quotes +StarWars.species +StarWars.vehicles +StarWars.wookieWords +Stargate.characters +Stargate.planets +Stargate.quotes +Stock.nsdqSymbol +Stock.nyseSymbol +StrangerThings.character +StrangerThings.quote +StreetFighter.characters +StreetFighter.moves +StreetFighter.quotes +StreetFighter.stages +StudioGhibli.character +StudioGhibli.movie +StudioGhibli.quote +Subscription.paymentMethods +Subscription.paymentTerms +Subscription.plans +Subscription.statuses +Subscription.subscriptionTerms +Suits.characters +Suits.quotes +SuperMario.characters +SuperMario.games +SuperMario.locations +SuperSmashBros.fighter +SuperSmashBros.stage +Superhero.descriptor +Superhero.name +Superhero.power +Superhero.prefix +Superhero.suffix +Supernatural.character +Supernatural.creature +Supernatural.weapon +SwordArtOnline.gameName +SwordArtOnline.item +SwordArtOnline.location +SwordArtOnline.realName +Tea.type +Tea.variety +Team.creature +Team.name +Team.sport +Team.state +Text.text +TheExpanse.characters +TheExpanse.locations +TheExpanse.quotes +TheExpanse.ships +TheItCrowd.actors +TheItCrowd.characters +TheItCrowd.emails +TheItCrowd.quotes +TheKingkillerChronicle.book +TheKingkillerChronicle.character +TheKingkillerChronicle.creature +TheKingkillerChronicle.location +TheRoom.actors +TheRoom.characters +TheRoom.locations +TheRoom.quotes +TheThickOfIt.characters +TheThickOfIt.departments +TheThickOfIt.positions +TheVentureBros.character +TheVentureBros.organization +TheVentureBros.quote +TheVentureBros.vehicle +Time.between +Time.future +Time.past +Touhou.characterFirstName +Touhou.characterLastName +Touhou.characterName +Touhou.gameName +Touhou.trackName +Tron.alternateCharacterSpelling +Tron.character +Tron.game +Tron.location +Tron.quote +Tron.tagline +Tron.vehicle +TwinPeaks.character +TwinPeaks.location +TwinPeaks.quote +Twitter.getLink +Twitter.text +Twitter.twitterId +Twitter.userId +Twitter.userName +Unique.fetchFromYaml +University.name +University.prefix +University.suffix +VForVendetta.characters +VForVendetta.quotes +VForVendetta.speeches +Vehicle.carType +Vehicle.color +Vehicle.doors +Vehicle.driveType +Vehicle.engine +Vehicle.fuelType +Vehicle.licensePlate +Vehicle.make +Vehicle.makeAndModel +Vehicle.manufacturer +Vehicle.model +Vehicle.style +Vehicle.transmission +Vehicle.upholstery +Vehicle.upholsteryColor +Vehicle.upholsteryFabric +Vehicle.vin +Verb.base +Verb.ingForm +Verb.past +Verb.pastParticiple +Verb.simplePresent +VideoGame.genre +VideoGame.platform +VideoGame.title +Volleyball.coach +Volleyball.formation +Volleyball.player +Volleyball.position +Volleyball.team +WarhammerFantasy.creatures +WarhammerFantasy.factions +WarhammerFantasy.heros +WarhammerFantasy.locations +WarhammerFantasy.quotes +Weather.description +Weather.temperatureCelsius +Weather.temperatureFahrenheit +Witcher.book +Witcher.character +Witcher.location +Witcher.monster +Witcher.potion +Witcher.quote +Witcher.school +Witcher.sign +Witcher.witcher +WorldOfWarcraft.hero +WorldOfWarcraft.quotes +Yoda.quote +Zelda.character +Zelda.game +Zodiac.sign \ No newline at end of file diff --git a/app/src/test/resources/log4j2.properties b/app/src/test/resources/log4j2.properties new file mode 100644 index 00000000..1a02e10d --- /dev/null +++ b/app/src/test/resources/log4j2.properties @@ -0,0 +1,55 @@ +rootLogger.level = ${env:LOG_LEVEL:-info} +rootLogger.appenderRef.stdout.ref = console + +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_OUT +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{dd/MM/yyyy HH:mm:ss} [%-5p] %c: %m%n%ex + +# Settings to quiet third party logs that are too verbose +logger.spark.name = org.apache.spark +logger.spark.level = error +logger.jetty.name = org.sparkproject.jetty +logger.jetty.level = warn +logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle +logger.jetty2.level = error +logger.repl1.name = org.apache.spark.repl.SparkIMain$exprTyper +logger.repl1.level = info +logger.repl2.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter +logger.repl2.level = info + +# Set the default spark-shell log level to WARN. When running the spark-shell, the +# log level for this class is used to overwrite the root logger's log level, so that +# the user can have different defaults for the shell and regular Spark apps. +logger.repl.name = org.apache.spark.repl.Main +logger.repl.level = warn + +# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs +# in SparkSQL with Hive support +logger.hadoop.name = org.apache.hadoop +logger.hadoop.level = error +logger.metastore.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler +logger.metastore.level = fatal +logger.hive_functionregistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry +logger.hive_functionregistry.level = error + +# Parquet +logger.parquet.name = org.apache.parquet +logger.parquet.level = warn +logger.parquet1.name = org.apache.parquet.CorruptStatistics +logger.parquet1.level = error +logger.parquet2.name = parquet.CorruptStatistics +logger.parquet2.level = error + +# Datastax +logger.dse.name = com.datastax +logger.dse.level = warn + +# Solace +logger.solace.name = com.solacesystems.jcsmp +logger.solace.level = warn + +# Kafka +logger.kafka.name = org.apache.kafka +logger.kafka.level = warn diff --git a/app/src/test/resources/sample/conf/mysql.conf b/app/src/test/resources/sample/conf/mysql.conf new file mode 100644 index 00000000..8f8d81c7 --- /dev/null +++ b/app/src/test/resources/sample/conf/mysql.conf @@ -0,0 +1,46 @@ +folders { + generatedPlanAndTaskFolderPath = "/tmp" + generatedPlanAndTaskFolderPath = ${?GENERATED_PLAN_AND_TASK_FOLDER_PATH} + planFilePath = "/plan/customer-create-plan.yaml" + planFilePath = ${?PLAN_FILE_PATH} + taskFolderPath = "/task" + taskFolderPath = ${?TASK_FOLDER_PATH} + recordTrackingFolderPath = "/data/generated/recordTracking" + recordTrackingFolderPath = ${?RECORD_TRACKING_FOLDER_PATH} +} + +flags { + enableCount = false + enableCount = ${?ENABLE_COUNT} + enableGenerateData = true + enableGenerateData = ${?ENABLE_GENERATE_DATA} + enableGeneratePlanAndTasks = true + enableGeneratePlanAndTasks = ${?ENABLE_GENERATE_PLAN_AND_TASKS} + enableRecordTracking = true + enableRecordTracking = ${?ENABLE_RECORD_TRACKING} + enableDeleteGeneratedRecords = false + enableDeleteGeneratedRecords = ${?ENABLE_DELETE_GENERATED_RECORDS} +} + +metadata { + numRecordsFromDataSource = 10000 + numRecordsForAnalysis = 10000 + oneOfDistinctCountVsCountThreshold = 0.1 +} + +runtime{ + master = "local[*]" + master = ${?DATA_CATERER_MASTER} +} + +jdbc { + mysql { + url = "jdbc:mysql://localhost:3306/customer" + url = ${?MYSQL_URL} + user = "root" + user = ${?MYSQL_USERNAME} + password = "root" + password = ${?MYSQL_PASSWORD} + driver = "com.mysql.cj.jdbc.Driver" + } +} diff --git a/app/src/test/resources/sample/cql/customer.cql b/app/src/test/resources/sample/cql/customer.cql new file mode 100644 index 00000000..75683c1f --- /dev/null +++ b/app/src/test/resources/sample/cql/customer.cql @@ -0,0 +1,28 @@ +CREATE +KEYSPACE IF NOT EXISTS account +WITH REPLICATION = { + 'class' : 'SimpleStrategy', + 'replication_factor' : 1 +}; +CREATE TABLE IF NOT EXISTS account.accounts +( + account_id TEXT, + amount DOUBLE, + created_by TEXT, + name TEXT, + open_time TIMESTAMP, + status TEXT, + PRIMARY KEY (account_id) +); +CREATE TABLE IF NOT EXISTS account.account_status_history +( + account_id TEXT, + eod_date DATE, + status TEXT, + updated_by TEXT, + updated_time TIMESTAMP, + PRIMARY KEY ((account_id), + eod_date +) + ); + diff --git a/app/src/test/resources/sample/files/avro/avro_schema.avsc b/app/src/test/resources/sample/files/avro/avro_schema.avsc new file mode 100644 index 00000000..53560d9b --- /dev/null +++ b/app/src/test/resources/sample/files/avro/avro_schema.avsc @@ -0,0 +1,61 @@ +{ + "type": "record", + "name": "userInfo", + "namespace": "my.example", + "fields": [ + { + "name": "username", + "type": "string", + "default": "NONE" + }, + { + "name": "age", + "type": "int", + "default": -1 + }, + { + "name": "phone", + "type": "string", + "default": "NONE" + }, + { + "name": "housenum", + "type": "string", + "default": "NONE" + }, + { + "name": "address", + "type": { + "type": "record", + "name": "mailing_address", + "fields": [ + { + "name": "street", + "type": "string", + "default": "NONE" + }, + { + "name": "city", + "type": "string", + "default": "NONE" + }, + { + "name": "state_prov", + "type": "string", + "default": "NONE" + }, + { + "name": "country", + "type": "string", + "default": "NONE" + }, + { + "name": "zip", + "type": "string", + "default": "NONE" + } + ] + } + } + ] +} \ No newline at end of file diff --git a/app/src/test/resources/sample/files/avro/users.avro b/app/src/test/resources/sample/files/avro/users.avro new file mode 100644 index 0000000000000000000000000000000000000000..27c526ab114b2f42f6d4e13325c373706ba0f880 GIT binary patch literal 334 zcmeZI%3@>@ODrqO*DFrWNX<=rz+A0VQdy9yWTl`~l$xAhl%k}gpp=)Gn_66um<$$9 ztw_u*$Vt@$>4Hgul!q3l7J>L_nW;G`#Xym0gi*yMMVWc&$f`j`D%I*Jz|}-6At@@& z$x(`hS`0EfEwL=WD6=FrJ~=-pzX(NNwGvP~7i6DOW?l)%3Yhy7i;5B}L2AM7M=>U^ zG&d==s932swpIk}`{ewT)MSo4puG%vlk4vPb+WF0^sw`-e)omlECxJ|IhDo5iA)@9 TLUI}mY)+|p3~WWIDHtjNiNSH? literal 0 HcmV?d00001 diff --git a/app/src/test/resources/sample/files/csv/account/account.csv b/app/src/test/resources/sample/files/csv/account/account.csv new file mode 100644 index 00000000..ca911a53 --- /dev/null +++ b/app/src/test/resources/sample/files/csv/account/account.csv @@ -0,0 +1,2 @@ +account_number,name +acc123,peter \ No newline at end of file diff --git a/app/src/test/resources/sample/files/csv/json/sample.json b/app/src/test/resources/sample/files/csv/json/sample.json new file mode 100644 index 00000000..b94ab013 --- /dev/null +++ b/app/src/test/resources/sample/files/csv/json/sample.json @@ -0,0 +1,13 @@ +{ + "account-id": "ACC213813", + "name": "Peter", + "balance": 10.2, + "details": { + "date-of-birth": "01-01-1970", + "phone": "123456789", + "updated": { + "user": "peter", + "time": "01-01-2023 00:00:00" + } + } +} \ No newline at end of file diff --git a/app/src/test/resources/sample/files/csv/transactions/part-00000-867c895a-43b3-4649-88f2-318c4bd69f8b-c000.csv b/app/src/test/resources/sample/files/csv/transactions/part-00000-867c895a-43b3-4649-88f2-318c4bd69f8b-c000.csv new file mode 100644 index 00000000..1a358152 --- /dev/null +++ b/app/src/test/resources/sample/files/csv/transactions/part-00000-867c895a-43b3-4649-88f2-318c4bd69f8b-c000.csv @@ -0,0 +1,701 @@ +2021,87.13784836161501,2021-03-16,ACC1134921116,Lavona Thiel +2021,47.39039885805337,2021-01-16,ACC1134921116,Lavona Thiel +2022,28.7360236730764,2021-03-04,ACC1134921116,Lavona Thiel +2022,20.110288063865347,2021-08-12,ACC1134921116,Lavona Thiel +2022,97.2569591029022,2021-07-26,ACC1134921116,Lavona Thiel +2022,44.35501272950884,2021-10-27,ACC1134921116,Lavona Thiel +2022,85.36841943799475,2021-03-04,ACC1320511295,Khalilah Boyle +2021,35.51624404723664,2021-02-27,ACC1320511295,Khalilah Boyle +2022,74.43627395955482,2021-09-24,ACC1320511295,Khalilah Boyle +2021,39.1562195979956,2021-08-31,ACC1320511295,Khalilah Boyle +2021,64.69639489544096,2021-11-02,ACC1320511295,Khalilah Boyle +2021,95.4082599282694,2021-05-17,ACC1320511295,Khalilah Boyle +2021,23.18757061533575,2021-01-17,ACC1320511295,Khalilah Boyle +2022,53.08039433420156,2021-05-04,ACC1320511295,Khalilah Boyle +2022,70.76051305794785,2021-02-01,ACC1320511295,Khalilah Boyle +2021,21.206426759088806,2021-08-04,ACC1014210858,Mrs. Isreal Donnelly +2021,66.38853741114673,2021-10-18,ACC1014210858,Mrs. Isreal Donnelly +2022,21.061669006907323,2021-02-19,ACC1014210858,Mrs. Isreal Donnelly +2021,88.15532063066172,2021-10-15,ACC1014210858,Mrs. Isreal Donnelly +2022,17.89590895302367,2021-08-29,ACC1229428673,Carylon Buckridge +2022,61.72587535535875,2021-08-22,ACC1229428673,Carylon Buckridge +2022,46.96507037276361,2021-10-29,ACC1022589278,Karl King +2021,45.19595915616625,2021-08-10,ACC1022589278,Karl King +2022,46.18922777372392,2021-05-26,ACC1022589278,Karl King +2022,11.772056371725427,2021-01-06,ACC1022589278,Karl King +2021,16.441216136140415,2021-12-20,ACC1449097039,Dagmar VonRueden +2021,15.301641058859952,2021-06-15,ACC1449097039,Dagmar VonRueden +2021,54.32778787546308,2021-06-08,ACC1449097039,Dagmar VonRueden +2022,51.98049699851675,2021-05-02,ACC1449097039,Dagmar VonRueden +2021,66.77331894766256,2021-07-25,ACC1449097039,Dagmar VonRueden +2022,78.0125707080575,2021-09-17,ACC1449097039,Dagmar VonRueden +2022,64.8777516004898,2021-01-27,ACC1449097039,Dagmar VonRueden +2022,93.4802010970121,2021-10-02,ACC1449097039,Dagmar VonRueden +2022,11.158587987618917,2021-02-07,ACC1449097039,Dagmar VonRueden +2022,76.35893774464338,2021-06-24,ACC1449097039,Dagmar VonRueden +2021,53.05200007156093,2021-09-27,ACC1473100208,Garth Doyle Sr. +2021,94.73189338589717,2021-12-04,ACC1473100208,Garth Doyle Sr. +2021,68.16114201979659,2021-05-17,ACC1473100208,Garth Doyle Sr. +2021,81.64807479514467,2021-05-23,ACC1473100208,Garth Doyle Sr. +2021,31.038660975853386,2021-12-18,ACC1473100208,Garth Doyle Sr. +2021,67.60166085456098,2021-08-07,ACC1473100208,Garth Doyle Sr. +2022,13.600486818401922,2021-01-16,ACC1473100208,Garth Doyle Sr. +2022,11.868809974088318,2021-11-24,ACC1473100208,Garth Doyle Sr. +2021,89.20584415057519,2021-02-25,ACC1473100208,Garth Doyle Sr. +2021,85.61278831991136,2021-12-11,ACC1473100208,Garth Doyle Sr. +2021,79.78883604038658,2021-08-31,ACC1129706063,Thora Kerluke +2022,53.78422603951637,2021-11-06,ACC1129706063,Thora Kerluke +2022,59.09876359798169,2021-07-23,ACC1129706063,Thora Kerluke +2021,91.27972446136735,2021-06-17,ACC1129706063,Thora Kerluke +2021,56.90410110663898,2021-06-09,ACC1129706063,Thora Kerluke +2022,66.84301949821881,2021-03-14,ACC1129706063,Thora Kerluke +2022,48.94891405142438,2021-03-20,ACC1129706063,Thora Kerluke +2022,37.718940225057125,2021-05-01,ACC1129706063,Thora Kerluke +2021,71.94593362463843,2021-05-26,ACC1360966995,Genaro Ullrich +2022,17.63761555658976,2021-02-24,ACC1360966995,Genaro Ullrich +2022,28.274944783090415,2021-02-22,ACC1360966995,Genaro Ullrich +2022,94.54701618323666,2021-06-27,ACC1360966995,Genaro Ullrich +2022,28.402321363500423,2021-08-06,ACC1360966995,Genaro Ullrich +2022,79.22759122653807,2021-05-25,ACC1360966995,Genaro Ullrich +2021,87.73332918869038,2021-06-01,ACC1708993707,Leandro Weimann MD +2022,74.48942921003018,2021-07-26,ACC1708993707,Leandro Weimann MD +2021,74.53661948035507,2021-04-25,ACC1529152288,Mi Bernier +2022,74.43591156202785,2021-02-16,ACC1529152288,Mi Bernier +2021,78.38622216798169,2021-06-13,ACC1529152288,Mi Bernier +2022,38.87664931057654,2021-03-03,ACC1529152288,Mi Bernier +2022,84.48491739717542,2021-09-30,ACC1529152288,Mi Bernier +2022,99.0672895196366,2021-09-06,ACC1529152288,Mi Bernier +2021,95.62595612489272,2021-09-09,ACC1932281107,Shondra Balistreri +2021,85.106412124324,2021-09-28,ACC1932281107,Shondra Balistreri +2021,60.05338963199713,2021-11-21,ACC1932281107,Shondra Balistreri +2022,33.84521295267906,2021-10-13,ACC1932281107,Shondra Balistreri +2022,82.92876967656665,2021-10-10,ACC1932281107,Shondra Balistreri +2022,21.281469915488735,2021-10-08,ACC1932281107,Shondra Balistreri +2021,40.357190367338475,2021-06-15,ACC1932281107,Shondra Balistreri +2021,34.753967395289976,2021-06-30,ACC1932281107,Shondra Balistreri +2022,87.52910682285474,2021-07-09,ACC1413828100,Mr. Willie Boyle +2022,15.321993496659978,2021-12-01,ACC1413828100,Mr. Willie Boyle +2022,79.35442790611636,2021-08-21,ACC1413828100,Mr. Willie Boyle +2022,78.03747711337073,2021-01-19,ACC1413828100,Mr. Willie Boyle +2022,34.70499162582453,2021-04-01,ACC1413828100,Mr. Willie Boyle +2021,13.172254172572195,2021-06-19,ACC1413828100,Mr. Willie Boyle +2021,63.69389782610725,2021-12-01,ACC1413828100,Mr. Willie Boyle +2022,80.77216211229373,2021-11-24,ACC1413828100,Mr. Willie Boyle +2021,79.04045324683683,2021-10-22,ACC1413828100,Mr. Willie Boyle +2021,65.73082951012324,2021-03-28,ACC1975063388,Solomon Lockman +2022,32.32983740608609,2021-07-11,ACC1975063388,Solomon Lockman +2021,13.170648665193003,2021-08-23,ACC1277468038,Ms. Fritz Hessel +2022,41.76100258656753,2021-03-13,ACC1277468038,Ms. Fritz Hessel +2021,94.92064172188755,2021-10-14,ACC1714384035,Mikel Nicolas +2022,95.23213625443033,2021-08-07,ACC1714384035,Mikel Nicolas +2021,15.721071320535977,2021-11-18,ACC1714384035,Mikel Nicolas +2022,26.282392262913362,2021-01-12,ACC1714384035,Mikel Nicolas +2021,16.838068507891634,2021-06-22,ACC1714384035,Mikel Nicolas +2021,57.017133216711976,2021-05-02,ACC1229093509,Dr. Venus Walker +2021,59.58289694537715,2021-07-27,ACC1229093509,Dr. Venus Walker +2021,23.58372780123149,2021-08-26,ACC1229093509,Dr. Venus Walker +2022,60.116851344640835,2021-11-14,ACC1229093509,Dr. Venus Walker +2021,31.0230330648585,2021-08-01,ACC1229093509,Dr. Venus Walker +2022,16.75814499850788,2021-02-11,ACC1229093509,Dr. Venus Walker +2022,73.32908798765487,2021-08-01,ACC1229093509,Dr. Venus Walker +2021,36.31674540968571,2021-08-12,ACC1229093509,Dr. Venus Walker +2022,22.372135769505526,2021-02-04,ACC1984138884,Omar Crona +2021,54.770809139850634,2021-06-26,ACC1984138884,Omar Crona +2022,31.17980511048593,2021-08-12,ACC1984138884,Omar Crona +2022,79.13008566100929,2021-06-11,ACC1984138884,Omar Crona +2021,29.74706184500333,2021-12-02,ACC1984138884,Omar Crona +2022,72.990820631233,2021-05-04,ACC1984138884,Omar Crona +2021,26.336670822364542,2021-06-08,ACC1984138884,Omar Crona +2022,79.7254427226794,2021-07-17,ACC1984138884,Omar Crona +2021,98.26688408209212,2021-02-08,ACC1984138884,Omar Crona +2021,55.32358553289935,2021-11-26,ACC1299264368,Ms. Ramiro Heller +2022,32.045266026915634,2021-05-07,ACC1299264368,Ms. Ramiro Heller +2021,71.50566185185198,2021-05-08,ACC1673970850,Demetrius Durgan +2021,21.727386501646293,2021-08-31,ACC1673970850,Demetrius Durgan +2022,24.829442014523558,2021-01-07,ACC1673970850,Demetrius Durgan +2021,96.33811593131489,2021-06-16,ACC1673970850,Demetrius Durgan +2022,43.92705974797744,2021-01-30,ACC1931202909,Anthony Barton DVM +2022,25.869253858667733,2021-01-25,ACC1931202909,Anthony Barton DVM +2021,77.27774179524901,2021-06-27,ACC1931202909,Anthony Barton DVM +2021,64.76590887421055,2021-10-05,ACC1931202909,Anthony Barton DVM +2021,70.8958518177289,2021-01-29,ACC1035534293,Mrs. Sam Hickle +2021,65.6960924661859,2021-10-23,ACC1035534293,Mrs. Sam Hickle +2021,88.74167865882275,2021-04-01,ACC1035534293,Mrs. Sam Hickle +2022,50.518742243960226,2021-10-11,ACC1035534293,Mrs. Sam Hickle +2022,43.8417042903331,2021-04-21,ACC1985622913,Jonathan Nader +2021,41.663669306913036,2021-10-25,ACC1985622913,Jonathan Nader +2021,94.0796712846888,2021-11-21,ACC1985622913,Jonathan Nader +2022,65.90921872993553,2021-07-05,ACC1985622913,Jonathan Nader +2022,28.289247657949048,2021-05-20,ACC1985622913,Jonathan Nader +2021,49.907458249009224,2021-04-03,ACC1985622913,Jonathan Nader +2021,95.13215441583519,2021-05-31,ACC1985622913,Jonathan Nader +2021,37.63225429052132,2021-03-06,ACC1985622913,Jonathan Nader +2021,17.050082004654442,2021-09-11,ACC1985622913,Jonathan Nader +2022,71.17204946730581,2021-04-23,ACC1985622913,Jonathan Nader +2021,41.95162798729256,2021-04-11,ACC1491260688,Ms. Necole Abernathy +2022,93.6645126261991,2021-07-10,ACC1491260688,Ms. Necole Abernathy +2022,70.7476258861858,2021-02-21,ACC1491260688,Ms. Necole Abernathy +2022,10.693390106402274,2021-12-07,ACC1491260688,Ms. Necole Abernathy +2022,18.695018702293453,2021-03-14,ACC1491260688,Ms. Necole Abernathy +2022,21.763421042658113,2021-05-26,ACC1491260688,Ms. Necole Abernathy +2021,78.3668477412702,2021-08-19,ACC1471710938,Luvenia Dickens +2021,67.63919495243789,2021-12-08,ACC1471710938,Luvenia Dickens +2022,27.433201596687773,2021-05-22,ACC1471710938,Luvenia Dickens +2021,90.89509247999509,2021-12-21,ACC1471710938,Luvenia Dickens +2022,23.806767114394574,2021-04-26,ACC1471710938,Luvenia Dickens +2022,81.82886918409686,2021-08-25,ACC1471710938,Luvenia Dickens +2022,65.24116876705878,2021-06-12,ACC1661555556,Jamey Schiller +2021,76.41133894424375,2021-09-19,ACC1756466050,Hyman Bogan +2021,72.63763047952636,2021-06-27,ACC1756466050,Hyman Bogan +2022,28.04259860751223,2021-07-17,ACC1756466050,Hyman Bogan +2022,63.308413237475506,2021-05-18,ACC1126460970,Jed Kozey I +2022,87.91355150353652,2021-09-20,ACC1126460970,Jed Kozey I +2021,62.40331191376968,2021-07-17,ACC1126460970,Jed Kozey I +2021,95.77320409657763,2021-05-30,ACC1126460970,Jed Kozey I +2022,44.746673412345764,2021-10-24,ACC1126460970,Jed Kozey I +2021,85.19624508957251,2021-01-22,ACC1126460970,Jed Kozey I +2022,55.66730362951218,2021-04-04,ACC1542407923,Wilbert Medhurst +2022,75.97669406991483,2021-08-07,ACC1542407923,Wilbert Medhurst +2021,33.269865978981365,2021-04-04,ACC1542407923,Wilbert Medhurst +2022,68.48501985289082,2021-12-28,ACC1542407923,Wilbert Medhurst +2021,56.60741513771394,2021-09-24,ACC1542407923,Wilbert Medhurst +2021,49.32111627153106,2021-10-23,ACC1542407923,Wilbert Medhurst +2022,63.60452943596929,2021-12-29,ACC1542407923,Wilbert Medhurst +2022,96.95707425285134,2021-03-15,ACC1341395810,Laverne Becker +2021,68.83053924312684,2021-01-04,ACC1341395810,Laverne Becker +2021,13.935272838832736,2021-01-02,ACC1341395810,Laverne Becker +2021,76.31451835931898,2021-05-15,ACC1341395810,Laverne Becker +2022,22.01154939073978,2021-04-11,ACC1341395810,Laverne Becker +2021,81.03426258354614,2021-05-16,ACC1341395810,Laverne Becker +2021,26.192106626072853,2021-04-02,ACC1341395810,Laverne Becker +2022,50.68523293806917,2021-01-06,ACC1341395810,Laverne Becker +2022,95.3762931335874,2021-01-28,ACC1296902740,Dr. Denis Russel +2021,83.29885535293235,2021-10-21,ACC1296902740,Dr. Denis Russel +2021,35.64466044091354,2021-12-27,ACC1296902740,Dr. Denis Russel +2022,82.36701735508578,2021-11-02,ACC1296902740,Dr. Denis Russel +2022,27.093765595877525,2021-08-29,ACC1779410417,Lisabeth Gleason +2022,61.57753684064216,2021-10-24,ACC1779410417,Lisabeth Gleason +2022,52.465371254432654,2021-07-21,ACC1779410417,Lisabeth Gleason +2021,65.83292582877539,2021-03-05,ACC1779410417,Lisabeth Gleason +2022,45.655105900815734,2021-06-19,ACC1779410417,Lisabeth Gleason +2021,80.87837073000618,2021-09-10,ACC1779410417,Lisabeth Gleason +2021,88.34346914143984,2021-11-26,ACC1779410417,Lisabeth Gleason +2021,44.25058312304463,2021-03-05,ACC1581335168,Danae Leannon +2021,92.31865272056035,2021-01-28,ACC1581335168,Danae Leannon +2021,25.824080356226183,2021-04-01,ACC1581335168,Danae Leannon +2022,16.87896341367908,2021-10-10,ACC1581335168,Danae Leannon +2021,21.481683388582677,2021-04-10,ACC1581335168,Danae Leannon +2021,73.1897444574754,2021-03-02,ACC1581335168,Danae Leannon +2022,70.20474420848704,2021-02-14,ACC1581335168,Danae Leannon +2021,88.73898885889403,2021-02-01,ACC1684165844,Blair Beatty +2022,16.44413759653184,2021-06-28,ACC1684165844,Blair Beatty +2022,19.653969283248102,2021-01-23,ACC1684165844,Blair Beatty +2021,62.153629036237554,2021-05-15,ACC1684165844,Blair Beatty +2022,61.8230822198196,2021-05-01,ACC1684165844,Blair Beatty +2022,75.54752873059789,2021-02-24,ACC1684165844,Blair Beatty +2022,32.95381173381071,2021-07-07,ACC1344015747,Ms. Wilbur King +2021,39.23168905241252,2021-01-01,ACC1344015747,Ms. Wilbur King +2021,94.81511388835278,2021-09-23,ACC1344015747,Ms. Wilbur King +2022,66.4363877359281,2021-02-07,ACC1344015747,Ms. Wilbur King +2021,97.49267980504852,2021-11-05,ACC1344015747,Ms. Wilbur King +2021,72.73346256933742,2021-11-28,ACC1344015747,Ms. Wilbur King +2022,97.34356843405968,2021-08-06,ACC1344015747,Ms. Wilbur King +2021,54.725957767422585,2021-04-28,ACC1344015747,Ms. Wilbur King +2021,90.23196734847959,2021-05-10,ACC1708948004,Mr. Dirk Heller +2022,52.884482406135376,2021-07-30,ACC1755873865,Hwa Bayer DVM +2022,89.84901311793968,2021-03-05,ACC1755873865,Hwa Bayer DVM +2022,76.22983839298553,2021-04-12,ACC1477526249,Mrs. Kisha Spinka +2021,99.05656173774499,2021-10-25,ACC1477526249,Mrs. Kisha Spinka +2022,52.60797520943379,2021-03-26,ACC1477526249,Mrs. Kisha Spinka +2021,90.52316148003771,2021-10-15,ACC1934295246,Ilda Schumm +2022,35.56419666974888,2021-06-22,ACC1934295246,Ilda Schumm +2022,70.05057741512894,2021-10-22,ACC1934295246,Ilda Schumm +2021,97.72512784822297,2021-05-14,ACC1934295246,Ilda Schumm +2021,22.43019672641947,2021-05-08,ACC1934295246,Ilda Schumm +2022,31.505527701212262,2021-03-29,ACC1991272100,Millard Terry +2021,26.5236048125083,2021-08-22,ACC1991272100,Millard Terry +2021,58.87553856258007,2021-05-22,ACC1991272100,Millard Terry +2022,92.42257644391255,2021-09-19,ACC1991272100,Millard Terry +2022,27.91057342019791,2021-07-01,ACC1991272100,Millard Terry +2022,78.81080479056773,2021-05-08,ACC1616491054,Vicky Ankunding III +2022,98.77788181757691,2021-06-01,ACC1616491054,Vicky Ankunding III +2022,16.50343033502516,2021-12-28,ACC1616491054,Vicky Ankunding III +2022,84.86676276101797,2021-04-18,ACC1616491054,Vicky Ankunding III +2022,93.46270110013135,2021-01-25,ACC1616491054,Vicky Ankunding III +2021,92.84487950813725,2021-04-16,ACC1616491054,Vicky Ankunding III +2021,74.43509290959116,2021-08-11,ACC1720466055,Frederic Roberts +2021,83.2243323472753,2021-05-30,ACC1720466055,Frederic Roberts +2021,83.79570851660675,2021-12-17,ACC1720466055,Frederic Roberts +2021,92.2923284708361,2021-11-11,ACC1720466055,Frederic Roberts +2022,67.92160118666513,2021-04-05,ACC1720466055,Frederic Roberts +2022,84.87545294403012,2021-05-18,ACC1720466055,Frederic Roberts +2021,41.460964225155486,2021-09-02,ACC1819141248,Jean Lynch +2022,93.58466927368367,2021-08-19,ACC1650985469,Odell Fisher +2021,67.89512065311237,2021-02-13,ACC1650985469,Odell Fisher +2022,79.4461380721329,2021-10-21,ACC1650985469,Odell Fisher +2022,15.233405650082535,2021-05-19,ACC1650985469,Odell Fisher +2022,62.44115198769262,2021-01-21,ACC1650985469,Odell Fisher +2021,91.19719113789704,2021-05-03,ACC1650985469,Odell Fisher +2021,77.30017526710314,2021-02-10,ACC1552405026,Luciana Schimmel +2021,89.96490291606244,2021-04-02,ACC1552405026,Luciana Schimmel +2022,17.439891193480328,2021-09-27,ACC1552405026,Luciana Schimmel +2021,13.827625050429257,2021-03-12,ACC1813901573,Eveline Waelchi +2022,94.01214868390038,2021-08-16,ACC1813901573,Eveline Waelchi +2021,37.51290505445759,2021-11-23,ACC1813901573,Eveline Waelchi +2021,78.11924495621139,2021-07-15,ACC1383206335,Chet Trantow V +2021,12.639172962602602,2021-12-21,ACC1269169558,Scot Stehr +2022,76.10866754189927,2021-12-23,ACC1269169558,Scot Stehr +2021,15.57332279509927,2021-08-09,ACC1269169558,Scot Stehr +2021,77.04451244801018,2021-06-30,ACC1269169558,Scot Stehr +2022,81.90608184940083,2021-07-21,ACC1269169558,Scot Stehr +2021,54.483424477704716,2021-08-15,ACC1269169558,Scot Stehr +2021,55.900028130796265,2021-08-10,ACC1269169558,Scot Stehr +2021,91.06895693547443,2021-08-23,ACC1597621068,Alise Hintz +2021,64.47170220615348,2021-06-27,ACC1597621068,Alise Hintz +2021,91.00705189960605,2021-03-07,ACC1597621068,Alise Hintz +2022,23.65565356788506,2021-04-26,ACC1597621068,Alise Hintz +2021,34.823640440790825,2021-10-06,ACC1597621068,Alise Hintz +2022,44.67034859003486,2021-11-28,ACC1597621068,Alise Hintz +2021,46.11113195545536,2021-09-27,ACC1597621068,Alise Hintz +2021,25.50732863535722,2021-10-19,ACC1597621068,Alise Hintz +2022,23.970952344175615,2021-03-05,ACC1645829790,Miss Lauren Brown +2021,22.87804964231991,2021-12-25,ACC1645829790,Miss Lauren Brown +2022,52.201291244510486,2021-10-18,ACC1645829790,Miss Lauren Brown +2021,58.586594182737535,2021-11-24,ACC1645829790,Miss Lauren Brown +2021,87.13591679692514,2021-03-26,ACC1645829790,Miss Lauren Brown +2021,68.37202839098298,2021-10-02,ACC1135933062,Griselda Cartwright +2021,77.44538357447044,2021-10-06,ACC1135933062,Griselda Cartwright +2022,45.732561436007536,2021-09-06,ACC1135933062,Griselda Cartwright +2021,71.48400881284998,2021-05-26,ACC1135933062,Griselda Cartwright +2021,22.107487171317175,2021-04-24,ACC1135933062,Griselda Cartwright +2021,96.38523386533042,2021-08-15,ACC1135933062,Griselda Cartwright +2021,58.265539081861554,2021-11-16,ACC1135933062,Griselda Cartwright +2022,94.82641354035232,2021-07-13,ACC1745743056,Randall Dietrich +2021,63.78429015501521,2021-09-17,ACC1745743056,Randall Dietrich +2022,73.8677199688766,2021-06-12,ACC1745743056,Randall Dietrich +2021,28.58934532322926,2021-10-21,ACC1745743056,Randall Dietrich +2021,77.72663757049072,2021-07-01,ACC1745743056,Randall Dietrich +2022,62.123587074884625,2021-01-15,ACC1745743056,Randall Dietrich +2022,70.21348168996246,2021-07-28,ACC1745743056,Randall Dietrich +2021,63.389778208656715,2021-06-23,ACC1745743056,Randall Dietrich +2021,21.466115551522307,2021-10-24,ACC1745743056,Randall Dietrich +2022,14.631139787175828,2021-02-12,ACC1745743056,Randall Dietrich +2021,49.95439468995879,2021-12-19,ACC1466679822,Ned Tremblay DVM +2021,29.39354510893611,2021-03-19,ACC1444416188,Kimberli O'Kon +2022,89.58122775420168,2021-12-07,ACC1253308326,Miss Alfonso Weissnat +2022,54.856609488631,2021-01-02,ACC1253308326,Miss Alfonso Weissnat +2022,74.80340476169837,2021-05-24,ACC1253308326,Miss Alfonso Weissnat +2021,87.58321995582614,2021-03-17,ACC1253308326,Miss Alfonso Weissnat +2022,42.259953553886476,2021-09-21,ACC1253308326,Miss Alfonso Weissnat +2022,85.9203813915192,2021-09-23,ACC1253308326,Miss Alfonso Weissnat +2021,72.99890411213119,2021-07-26,ACC1253308326,Miss Alfonso Weissnat +2021,43.428736924532345,2021-09-19,ACC1253308326,Miss Alfonso Weissnat +2022,18.355226576546855,2021-06-23,ACC1253308326,Miss Alfonso Weissnat +2021,55.4865708627353,2021-01-06,ACC1253308326,Miss Alfonso Weissnat +2021,47.38279737300684,2021-09-23,ACC1526648972,Alphonso Runte PhD +2022,84.80412770760931,2021-11-05,ACC1526648972,Alphonso Runte PhD +2021,65.36964648959909,2021-01-03,ACC1244185066,Irish Hyatt +2022,21.140953250828254,2021-05-18,ACC1244185066,Irish Hyatt +2021,61.81498257172767,2021-06-11,ACC1244185066,Irish Hyatt +2021,29.65460621732332,2021-06-14,ACC1244185066,Irish Hyatt +2022,10.247074572877512,2021-05-12,ACC1244185066,Irish Hyatt +2021,37.60002889963897,2021-10-10,ACC1244185066,Irish Hyatt +2022,41.20367197041102,2021-07-19,ACC1244185066,Irish Hyatt +2021,92.13587284211833,2021-07-04,ACC1244185066,Irish Hyatt +2022,36.77101627011023,2021-07-17,ACC1244185066,Irish Hyatt +2022,85.38919871231052,2021-09-19,ACC1244185066,Irish Hyatt +2022,63.66704425383497,2021-01-13,ACC1670977233,Stephen McDermott +2022,89.04026270362019,2021-11-18,ACC1670977233,Stephen McDermott +2022,90.98433597610321,2021-12-19,ACC1670977233,Stephen McDermott +2022,33.16934573422746,2021-01-19,ACC1614554219,Tomi Olson +2022,74.97369556925453,2021-04-22,ACC1614554219,Tomi Olson +2022,28.366283434665892,2021-09-10,ACC1614554219,Tomi Olson +2021,66.47270774525535,2021-09-13,ACC1614554219,Tomi Olson +2021,64.93310459657008,2021-07-12,ACC1614554219,Tomi Olson +2022,96.49104701128533,2021-11-09,ACC1614554219,Tomi Olson +2021,18.33268922220066,2021-03-13,ACC1614554219,Tomi Olson +2021,72.84764090661025,2021-02-04,ACC1614554219,Tomi Olson +2022,34.675729921120016,2021-11-08,ACC1614554219,Tomi Olson +2021,31.27976774411167,2021-12-13,ACC1614554219,Tomi Olson +2022,54.69701678881762,2021-08-04,ACC1704685677,Truman Rowe PhD +2021,45.368519204809246,2021-11-21,ACC1704685677,Truman Rowe PhD +2022,32.640468279324764,2021-12-01,ACC1704685677,Truman Rowe PhD +2021,64.2776152647112,2021-12-25,ACC1704685677,Truman Rowe PhD +2021,10.337705893984175,2021-05-17,ACC1704685677,Truman Rowe PhD +2022,41.109156779255784,2021-04-14,ACC1704685677,Truman Rowe PhD +2021,83.33114779506927,2021-06-01,ACC1704685677,Truman Rowe PhD +2022,18.714154271480684,2021-07-22,ACC1704685677,Truman Rowe PhD +2022,14.808949525552618,2021-05-16,ACC1163369376,Miss Delfina Runolfsdottir +2021,90.43378005921585,2021-03-21,ACC1163369376,Miss Delfina Runolfsdottir +2021,50.21791475645994,2021-01-09,ACC1163369376,Miss Delfina Runolfsdottir +2022,42.6961658612498,2021-02-23,ACC1163369376,Miss Delfina Runolfsdottir +2022,36.11156740796121,2021-08-03,ACC1163369376,Miss Delfina Runolfsdottir +2022,60.39668325329659,2021-11-21,ACC1163369376,Miss Delfina Runolfsdottir +2021,17.839118521227647,2021-07-20,ACC1163369376,Miss Delfina Runolfsdottir +2021,40.83823796615155,2021-01-21,ACC1163369376,Miss Delfina Runolfsdottir +2022,30.809912100095545,2021-08-13,ACC1163369376,Miss Delfina Runolfsdottir +2021,82.91502768617801,2021-11-04,ACC1163319465,Golda Jacobson +2021,70.44235408613577,2021-01-24,ACC1163319465,Golda Jacobson +2022,29.338141920392815,2021-07-24,ACC1163319465,Golda Jacobson +2021,87.67114475788559,2021-10-05,ACC1163319465,Golda Jacobson +2022,51.503161537003926,2021-11-29,ACC1215311381,Darnell Blick +2022,97.13853712475877,2021-09-12,ACC1215311381,Darnell Blick +2021,49.46923038317969,2021-01-21,ACC1215311381,Darnell Blick +2021,93.74011536006145,2021-01-11,ACC1215311381,Darnell Blick +2022,17.02643271984445,2021-04-01,ACC1215311381,Darnell Blick +2021,56.32737197719126,2021-09-02,ACC1215311381,Darnell Blick +2022,30.42357111626764,2021-01-14,ACC1215311381,Darnell Blick +2022,44.47819823723581,2021-11-14,ACC1421443115,Miss Tyson Towne +2021,32.259622220137274,2021-01-29,ACC1421443115,Miss Tyson Towne +2021,68.88284728054748,2021-08-23,ACC1421443115,Miss Tyson Towne +2021,42.26179301222452,2021-06-03,ACC1421443115,Miss Tyson Towne +2022,43.19436154901357,2021-02-17,ACC1421443115,Miss Tyson Towne +2022,26.18507669243391,2021-02-19,ACC1421443115,Miss Tyson Towne +2022,19.466742861094524,2021-08-25,ACC1898267767,Dr. Stephan Koch +2022,82.73997397935517,2021-01-15,ACC1159701592,Olga Johnston III +2021,72.26959580892935,2021-03-08,ACC1159701592,Olga Johnston III +2022,55.66052861164137,2021-11-27,ACC1159701592,Olga Johnston III +2021,34.97117571405191,2021-12-01,ACC1477997825,Ernie Blick +2022,82.7860758804853,2021-02-22,ACC1477997825,Ernie Blick +2021,10.936028211315081,2021-04-17,ACC1477997825,Ernie Blick +2021,99.77192347827364,2021-04-19,ACC1477997825,Ernie Blick +2021,90.27199831216869,2021-06-04,ACC1477997825,Ernie Blick +2022,28.975595713114895,2021-07-10,ACC1477997825,Ernie Blick +2021,75.10093489602386,2021-04-29,ACC1477997825,Ernie Blick +2021,77.90133874140184,2021-02-14,ACC1477997825,Ernie Blick +2022,37.630117661346546,2021-05-31,ACC1148367437,Lillia Franecki PhD +2021,98.50401411075985,2021-05-01,ACC1564301594,Sharen Mueller +2021,62.56740793599439,2021-04-16,ACC1564301594,Sharen Mueller +2021,58.76220190675253,2021-05-08,ACC1564301594,Sharen Mueller +2022,18.038880442390845,2021-11-06,ACC1564301594,Sharen Mueller +2022,67.25509354913896,2021-04-23,ACC1525959220,Torrie Altenwerth +2021,52.45294387251813,2021-11-12,ACC1525959220,Torrie Altenwerth +2022,54.008817999958346,2021-07-18,ACC1525959220,Torrie Altenwerth +2022,43.535895441841696,2021-04-24,ACC1525959220,Torrie Altenwerth +2021,85.21867900381348,2021-04-04,ACC1525959220,Torrie Altenwerth +2021,53.82489594362923,2021-12-01,ACC1752775618,Joshua Mante +2022,58.42434309106275,2021-03-20,ACC1752775618,Joshua Mante +2022,22.59552417162754,2021-06-26,ACC1752775618,Joshua Mante +2021,56.26543845524318,2021-12-02,ACC1752775618,Joshua Mante +2021,50.696236701655344,2021-08-13,ACC1752775618,Joshua Mante +2022,95.5157278322576,2021-01-01,ACC1752775618,Joshua Mante +2021,85.86573612776279,2021-08-12,ACC1752775618,Joshua Mante +2021,86.45706706118246,2021-12-21,ACC1752775618,Joshua Mante +2021,66.36735719692729,2021-09-12,ACC1940948822,Sonny Lynch +2022,33.033647728038275,2021-06-23,ACC1940948822,Sonny Lynch +2021,84.48905234275149,2021-03-16,ACC1940948822,Sonny Lynch +2021,85.34416234293663,2021-12-25,ACC1940948822,Sonny Lynch +2021,86.41275374887509,2021-07-15,ACC1480080305,Lyndon Dicki +2021,71.15185537353557,2021-09-26,ACC1480080305,Lyndon Dicki +2022,55.27327461787713,2021-01-30,ACC1480080305,Lyndon Dicki +2021,29.013897433518476,2021-04-27,ACC1480080305,Lyndon Dicki +2021,73.54012060683107,2021-12-01,ACC1258346212,Margurite Pfannerstill +2022,93.70087272196328,2021-10-03,ACC1258346212,Margurite Pfannerstill +2022,23.222459243319882,2021-05-18,ACC1258346212,Margurite Pfannerstill +2021,91.42399879227891,2021-09-01,ACC1258346212,Margurite Pfannerstill +2021,57.94414185151042,2021-04-08,ACC1258346212,Margurite Pfannerstill +2022,85.10203423475159,2021-12-29,ACC1583668380,Art Mueller +2022,45.59970451927665,2021-11-13,ACC1583668380,Art Mueller +2022,12.736097365186453,2021-05-18,ACC1583668380,Art Mueller +2021,32.80140555009061,2021-05-26,ACC1583668380,Art Mueller +2022,90.48513647518347,2021-11-21,ACC1583668380,Art Mueller +2022,10.164741461620796,2021-09-12,ACC1583668380,Art Mueller +2022,54.39590190941502,2021-12-22,ACC1583668380,Art Mueller +2022,26.74703583613261,2021-05-05,ACC1792316522,Devon Hintz +2021,54.85347029600204,2021-05-14,ACC1792316522,Devon Hintz +2022,49.031155225038624,2021-04-21,ACC1792316522,Devon Hintz +2022,64.98614648693209,2021-08-05,ACC1792316522,Devon Hintz +2021,14.606186189477155,2021-11-05,ACC1792316522,Devon Hintz +2022,52.74783461714814,2021-10-03,ACC1792316522,Devon Hintz +2022,26.168427790661788,2021-11-14,ACC1792316522,Devon Hintz +2022,26.85005412370901,2021-11-05,ACC1792316522,Devon Hintz +2022,71.29105875332478,2021-10-15,ACC1792316522,Devon Hintz +2021,51.30336373773679,2021-01-23,ACC1792316522,Devon Hintz +2021,91.72968789695176,2021-06-07,ACC1234619703,Gustavo Hermiston MD +2021,47.04316659483488,2021-12-16,ACC1486302043,Marquis Feil +2022,72.0568361607184,2021-10-02,ACC1486302043,Marquis Feil +2021,68.8008832808751,2021-08-24,ACC1486302043,Marquis Feil +2022,59.25428076515018,2021-12-18,ACC1486302043,Marquis Feil +2021,97.12102939903676,2021-03-16,ACC1486302043,Marquis Feil +2021,63.86631136125543,2021-11-06,ACC1486302043,Marquis Feil +2021,78.77255828716922,2021-10-18,ACC1486302043,Marquis Feil +2021,72.05945802421897,2021-11-19,ACC1819764240,Dr. Sachiko Frami +2022,61.80040939809461,2021-01-05,ACC1819764240,Dr. Sachiko Frami +2022,22.049770466700277,2021-05-25,ACC1819764240,Dr. Sachiko Frami +2021,25.981169444744793,2021-11-17,ACC1819764240,Dr. Sachiko Frami +2022,32.30916277114691,2021-11-12,ACC1819764240,Dr. Sachiko Frami +2021,56.294676552268506,2021-04-29,ACC1819764240,Dr. Sachiko Frami +2021,14.187559070732231,2021-06-11,ACC1819764240,Dr. Sachiko Frami +2021,29.02171147132989,2021-09-24,ACC1819764240,Dr. Sachiko Frami +2022,67.03769799905297,2021-10-10,ACC1291370690,Myrtle Boehm +2021,88.41531893269072,2021-05-04,ACC1291370690,Myrtle Boehm +2022,38.334760392865675,2021-02-20,ACC1291370690,Myrtle Boehm +2021,58.72253233601273,2021-06-20,ACC1291370690,Myrtle Boehm +2022,51.66096165656589,2021-03-10,ACC1082269181,Jeromy Hayes II +2022,18.6581346592572,2021-02-16,ACC1082269181,Jeromy Hayes II +2022,48.36609067895548,2021-06-22,ACC1082269181,Jeromy Hayes II +2021,29.080293648548913,2021-03-24,ACC1082269181,Jeromy Hayes II +2021,67.00153698897549,2021-08-21,ACC1082269181,Jeromy Hayes II +2021,27.74188234819053,2021-05-28,ACC1082269181,Jeromy Hayes II +2022,67.01424486043936,2021-12-08,ACC1082269181,Jeromy Hayes II +2021,90.66320048367126,2021-11-10,ACC1082269181,Jeromy Hayes II +2022,98.76404327164363,2021-06-14,ACC1919461816,Mitchell Howell +2021,66.34033751823128,2021-10-21,ACC1919461816,Mitchell Howell +2022,91.74501743614964,2021-08-12,ACC1919461816,Mitchell Howell +2021,16.685928983649404,2021-04-21,ACC1919461816,Mitchell Howell +2021,99.93597830196182,2021-09-14,ACC1919461816,Mitchell Howell +2022,10.119544850469971,2021-02-22,ACC1919461816,Mitchell Howell +2022,19.363885286603534,2021-04-22,ACC1919461816,Mitchell Howell +2022,15.565955270145663,2021-08-15,ACC1919461816,Mitchell Howell +2021,12.97816609892323,2021-07-04,ACC1522362008,Mrs. Tisha Casper +2022,82.97831853802477,2021-07-12,ACC1522362008,Mrs. Tisha Casper +2022,79.93291631633707,2021-06-17,ACC1522362008,Mrs. Tisha Casper +2021,26.657169338470375,2021-03-07,ACC1522362008,Mrs. Tisha Casper +2021,17.650954162150736,2021-10-14,ACC1522362008,Mrs. Tisha Casper +2021,60.36847181864645,2021-07-13,ACC1522362008,Mrs. Tisha Casper +2021,49.37036247626264,2021-06-08,ACC1522362008,Mrs. Tisha Casper +2022,77.67723058238332,2021-09-11,ACC1522362008,Mrs. Tisha Casper +2022,77.37392918733478,2021-04-24,ACC1522362008,Mrs. Tisha Casper +2021,17.374984772406926,2021-10-12,ACC1522362008,Mrs. Tisha Casper +2021,63.60734144183751,2021-02-08,ACC1905834000,Dell Stamm +2022,21.41291588406942,2021-03-31,ACC1905834000,Dell Stamm +2021,96.84298847353926,2021-06-22,ACC1556409481,Mr. Lauren Sipes +2021,77.51479262940316,2021-06-15,ACC1556409481,Mr. Lauren Sipes +2022,20.486262507465675,2021-09-22,ACC1556409481,Mr. Lauren Sipes +2022,92.5692347702084,2021-09-29,ACC1556409481,Mr. Lauren Sipes +2022,38.75041968331496,2021-03-17,ACC1556409481,Mr. Lauren Sipes +2021,67.44165065605486,2021-05-31,ACC1861947271,Mrs. Faye Bahringer +2021,11.413327437600548,2021-10-06,ACC1861947271,Mrs. Faye Bahringer +2021,23.468923788032484,2021-11-26,ACC1861947271,Mrs. Faye Bahringer +2021,78.06304264318847,2021-05-20,ACC1861947271,Mrs. Faye Bahringer +2022,36.4475374879951,2021-01-18,ACC1861947271,Mrs. Faye Bahringer +2021,10.834371650374504,2021-11-24,ACC1861947271,Mrs. Faye Bahringer +2022,25.879474429761537,2021-07-24,ACC1750290642,Mr. Will Weissnat +2022,44.37272537990321,2021-10-16,ACC1750290642,Mr. Will Weissnat +2022,95.18116767018174,2021-08-17,ACC1840706450,Miss Annamae Quitzon +2021,11.923451931148293,2021-07-16,ACC1840706450,Miss Annamae Quitzon +2022,67.69929391816515,2021-06-02,ACC1840706450,Miss Annamae Quitzon +2021,26.150797637049095,2021-08-27,ACC1840706450,Miss Annamae Quitzon +2021,67.20188288033009,2021-07-20,ACC1840706450,Miss Annamae Quitzon +2021,42.059260327737164,2021-07-25,ACC1840706450,Miss Annamae Quitzon +2022,30.50546015283939,2021-10-12,ACC1194008644,Mr. Carmine Walsh +2021,48.55868922203008,2021-12-26,ACC1194008644,Mr. Carmine Walsh +2022,81.79863211209603,2021-07-01,ACC1194008644,Mr. Carmine Walsh +2021,68.64523970594513,2021-11-12,ACC1194008644,Mr. Carmine Walsh +2022,84.77788511025551,2021-10-17,ACC1194008644,Mr. Carmine Walsh +2021,59.37490934935689,2021-12-11,ACC1194008644,Mr. Carmine Walsh +2021,72.44140580393106,2021-03-29,ACC1194008644,Mr. Carmine Walsh +2022,73.77833914192661,2021-08-27,ACC1194008644,Mr. Carmine Walsh +2021,19.71148960693072,2021-02-03,ACC1162680657,Mui Bednar +2021,85.58432919437317,2021-04-24,ACC1714975937,Ms. Cammy Hamill +2022,78.0186256291534,2021-07-10,ACC1714975937,Ms. Cammy Hamill +2021,91.50746877274266,2021-05-14,ACC1714975937,Ms. Cammy Hamill +2021,75.95362525887501,2021-05-27,ACC1730463491,Annabelle Schumm +2021,87.46554805431276,2021-06-25,ACC1730463491,Annabelle Schumm +2022,44.12714741422513,2021-02-08,ACC1730463491,Annabelle Schumm +2022,36.15257010733571,2021-04-24,ACC1730463491,Annabelle Schumm +2021,82.66871791883258,2021-07-25,ACC1730463491,Annabelle Schumm +2022,38.530955571896854,2021-11-23,ACC1730463491,Annabelle Schumm +2022,84.54657136241957,2021-07-20,ACC1730463491,Annabelle Schumm +2022,91.1290263616424,2021-04-28,ACC1730463491,Annabelle Schumm +2021,97.59507121610983,2021-01-01,ACC1730463491,Annabelle Schumm +2022,24.421655515784963,2021-03-11,ACC1524022884,Angelica Bauch I +2021,45.32163493515351,2021-09-22,ACC1524022884,Angelica Bauch I +2021,43.35676760763234,2021-02-12,ACC1524022884,Angelica Bauch I +2021,56.64703011330379,2021-12-17,ACC1052393985,Dr. Brittani Stiedemann +2021,84.40850954434403,2021-04-05,ACC1052393985,Dr. Brittani Stiedemann +2022,50.5053218160029,2021-08-26,ACC1052393985,Dr. Brittani Stiedemann +2022,91.94246588130086,2021-07-21,ACC1052393985,Dr. Brittani Stiedemann +2021,77.29510352478982,2021-09-03,ACC1052393985,Dr. Brittani Stiedemann +2021,94.72323850069506,2021-12-15,ACC1052393985,Dr. Brittani Stiedemann +2022,18.865894166603493,2021-07-24,ACC1052393985,Dr. Brittani Stiedemann +2021,21.326652847579624,2021-05-05,ACC1581652235,Marita Kuvalis +2021,97.63918867440718,2021-12-17,ACC1581652235,Marita Kuvalis +2022,39.01188851662596,2021-08-23,ACC1581652235,Marita Kuvalis +2021,28.930339422408874,2021-05-04,ACC1581652235,Marita Kuvalis +2021,18.517206940876257,2021-10-12,ACC1581652235,Marita Kuvalis +2022,46.55483487215302,2021-10-04,ACC1581652235,Marita Kuvalis +2022,39.974974221688285,2021-06-07,ACC1809269465,Adan Herman +2021,51.556807563405776,2021-05-27,ACC1809269465,Adan Herman +2022,56.900421681373594,2021-11-30,ACC1809269465,Adan Herman +2021,10.752313222960728,2021-10-29,ACC1809269465,Adan Herman +2022,37.9988892670075,2021-05-08,ACC1809269465,Adan Herman +2021,19.511917260625303,2021-07-28,ACC1829971714,Ilene Kemmer I +2021,69.66280141401262,2021-01-19,ACC1829971714,Ilene Kemmer I +2021,29.931163706184623,2021-05-21,ACC1829971714,Ilene Kemmer I +2021,89.3220961599592,2021-08-03,ACC1829971714,Ilene Kemmer I +2022,18.08597143753763,2021-12-03,ACC1829971714,Ilene Kemmer I +2021,16.83166834752302,2021-06-22,ACC1829971714,Ilene Kemmer I +2022,94.07302775348539,2021-05-01,ACC1313397104,Ms. Rueben Dickinson +2022,89.37374025363897,2021-05-04,ACC1313397104,Ms. Rueben Dickinson +2022,32.25638694186311,2021-04-15,ACC1313397104,Ms. Rueben Dickinson +2021,54.28003608129703,2021-04-11,ACC1313397104,Ms. Rueben Dickinson +2022,98.79274191989049,2021-02-14,ACC1313397104,Ms. Rueben Dickinson +2021,17.782967835997137,2021-07-20,ACC1142327986,Miss Johnetta Heidenreich +2022,23.532386871518383,2021-07-24,ACC1142327986,Miss Johnetta Heidenreich +2021,61.22347788611113,2021-04-04,ACC1142327986,Miss Johnetta Heidenreich +2022,93.66310537215853,2021-03-22,ACC1142327986,Miss Johnetta Heidenreich +2021,81.33661607614735,2021-11-12,ACC1142327986,Miss Johnetta Heidenreich +2021,99.14981245784253,2021-01-19,ACC1142327986,Miss Johnetta Heidenreich +2021,53.422032906815126,2021-04-05,ACC1142327986,Miss Johnetta Heidenreich +2022,51.82353031416491,2021-10-28,ACC1142327986,Miss Johnetta Heidenreich +2021,41.556838264311125,2021-12-09,ACC1142327986,Miss Johnetta Heidenreich +2021,21.760164781306383,2021-09-06,ACC1267541686,Romana Kilback +2021,10.085709961338276,2021-12-13,ACC1267541686,Romana Kilback +2021,53.75798509229021,2021-03-26,ACC1267541686,Romana Kilback +2022,10.637479811077556,2021-04-11,ACC1267541686,Romana Kilback +2021,58.52037136399203,2021-03-18,ACC1267541686,Romana Kilback +2021,66.94389686869685,2021-04-03,ACC1267541686,Romana Kilback +2021,98.63808451545839,2021-05-12,ACC1837470074,Nickie Ryan V +2021,88.82332261090096,2021-10-09,ACC1837470074,Nickie Ryan V +2021,52.40913406975475,2021-07-14,ACC1837470074,Nickie Ryan V +2022,66.1023577600411,2021-01-31,ACC1837470074,Nickie Ryan V +2022,43.78494196272051,2021-08-28,ACC1837470074,Nickie Ryan V +2022,48.01575792361773,2021-05-09,ACC1837470074,Nickie Ryan V +2021,60.79116335496844,2021-10-05,ACC1837470074,Nickie Ryan V +2021,56.29777647580759,2021-08-28,ACC1837470074,Nickie Ryan V +2021,59.84046351425571,2021-03-12,ACC1157431494,Trudi O'Connell MD +2021,79.89540864698093,2021-03-24,ACC1157431494,Trudi O'Connell MD +2022,45.19371907609112,2021-04-27,ACC1157431494,Trudi O'Connell MD +2021,91.10464253427557,2021-12-01,ACC1157431494,Trudi O'Connell MD +2021,35.457919710263745,2021-08-20,ACC1157431494,Trudi O'Connell MD +2022,86.63070914099595,2021-08-18,ACC1157431494,Trudi O'Connell MD +2022,86.13810413761806,2021-12-18,ACC1215490132,Vanessa Lockman I +2022,68.11453580158658,2021-12-24,ACC1317381663,Kathlene Gusikowski +2022,57.55277330125363,2021-08-20,ACC1317381663,Kathlene Gusikowski +2022,66.8073548237396,2021-07-20,ACC1317381663,Kathlene Gusikowski +2021,96.47285304296227,2021-06-20,ACC1317381663,Kathlene Gusikowski +2022,46.79159107729104,2021-06-09,ACC1317381663,Kathlene Gusikowski +2022,12.5018390710726,2021-07-03,ACC1317381663,Kathlene Gusikowski +2021,22.390155697212805,2021-07-24,ACC1317381663,Kathlene Gusikowski +2022,59.241766411120516,2021-08-16,ACC1317381663,Kathlene Gusikowski +2021,55.79910447702732,2021-08-26,ACC1317381663,Kathlene Gusikowski +2021,92.96202398582913,2021-08-30,ACC1211735684,Ozzie Bergstrom +2021,21.562472433375916,2021-12-22,ACC1211735684,Ozzie Bergstrom +2022,84.99376295547238,2021-06-30,ACC1211735684,Ozzie Bergstrom +2021,32.79120361018911,2021-02-26,ACC1211735684,Ozzie Bergstrom +2022,74.39017723722746,2021-04-06,ACC1211735684,Ozzie Bergstrom +2021,83.03489331649959,2021-02-25,ACC1211735684,Ozzie Bergstrom +2021,64.98044815891146,2021-07-07,ACC1211735684,Ozzie Bergstrom +2021,69.56458907798869,2021-02-08,ACC1211735684,Ozzie Bergstrom +2022,14.483204695444966,2021-03-05,ACC1211735684,Ozzie Bergstrom +2022,45.95858613695785,2021-04-30,ACC1211735684,Ozzie Bergstrom +2022,99.51221561791635,2021-02-22,ACC1298848982,Miss Eduardo Muller +2022,23.733234499442545,2021-02-16,ACC1298848982,Miss Eduardo Muller +2021,29.16090248629362,2021-05-03,ACC1525570262,Mr. Emilio Hammes +2022,29.52496370149808,2021-07-02,ACC1525570262,Mr. Emilio Hammes +2022,53.07106492432793,2021-05-30,ACC1525570262,Mr. Emilio Hammes +2021,16.97053808567235,2021-12-25,ACC1525570262,Mr. Emilio Hammes +2021,59.96161655601002,2021-12-28,ACC1525570262,Mr. Emilio Hammes +2021,57.49376415198878,2021-05-21,ACC1525570262,Mr. Emilio Hammes +2021,42.04844002693216,2021-09-25,ACC1370746845,Kaycee Kulas +2021,49.916463653141996,2021-02-07,ACC1370746845,Kaycee Kulas +2021,18.381285811300327,2021-06-02,ACC1370746845,Kaycee Kulas +2022,30.5907124740561,2021-12-22,ACC1370746845,Kaycee Kulas +2021,51.793472404356756,2021-12-16,ACC1370746845,Kaycee Kulas +2022,86.48226087116186,2021-08-25,ACC1370746845,Kaycee Kulas +2022,51.41100967498528,2021-07-21,ACC1351119524,Fredrick Botsford +2021,57.21254220935729,2021-02-18,ACC1351119524,Fredrick Botsford +2021,17.798415778131563,2021-10-06,ACC1351119524,Fredrick Botsford +2021,99.20000930170967,2021-10-14,ACC1351119524,Fredrick Botsford +2022,31.1483500130506,2021-12-02,ACC1351119524,Fredrick Botsford +2021,72.762238991555,2021-05-16,ACC1351119524,Fredrick Botsford +2021,30.768466761812874,2021-06-06,ACC1351119524,Fredrick Botsford +2021,79.32487296383393,2021-12-17,ACC1351119524,Fredrick Botsford +2021,58.436662710407695,2021-09-20,ACC1351119524,Fredrick Botsford +2022,38.59640967435723,2021-02-03,ACC1351119524,Fredrick Botsford +2021,72.67728698951751,2021-08-03,ACC1941392062,Graig Aufderhar +2021,87.75210253415894,2021-03-10,ACC1997032121,Emeline Welch +2021,58.94476907554541,2021-08-10,ACC1997032121,Emeline Welch +2021,90.625513178579,2021-07-23,ACC1997032121,Emeline Welch +2021,24.14758357181468,2021-07-09,ACC1997032121,Emeline Welch +2021,68.99483089736853,2021-11-17,ACC1997032121,Emeline Welch +2021,39.19013789588694,2021-08-02,ACC1997032121,Emeline Welch +2021,91.70668446119606,2021-02-11,ACC1997032121,Emeline Welch +2022,40.763198734260705,2021-03-03,ACC1997032121,Emeline Welch +2021,95.00234974048624,2021-02-11,ACC1997032121,Emeline Welch +2022,57.16790848689211,2021-01-24,ACC1969163171,Marlon Keebler +2021,53.26138172553853,2021-07-30,ACC1969163171,Marlon Keebler +2021,97.749371353714,2021-02-15,ACC1969163171,Marlon Keebler +2021,76.47914233061746,2021-01-06,ACC1969163171,Marlon Keebler +2021,80.70698698078046,2021-07-11,ACC1969163171,Marlon Keebler +2022,26.500148710333388,2021-01-05,ACC1969163171,Marlon Keebler +2021,94.8036174064755,2021-09-25,ACC1969163171,Marlon Keebler +2021,90.09130959316035,2021-03-01,ACC1716439560,Earnest Boyle +2022,32.68672192854719,2021-06-07,ACC1716439560,Earnest Boyle +2021,86.0456012033555,2021-10-04,ACC1716439560,Earnest Boyle +2021,82.07060769282002,2021-04-20,ACC1716439560,Earnest Boyle +2022,91.07355185341574,2021-11-01,ACC1716439560,Earnest Boyle +2021,78.37749488283676,2021-03-26,ACC1716439560,Earnest Boyle +2022,50.9942179999409,2021-08-27,ACC1716439560,Earnest Boyle +2022,17.788841653749916,2021-01-05,ACC1716439560,Earnest Boyle +2021,98.23628337418215,2021-07-31,ACC1716439560,Earnest Boyle +2022,33.72098180202041,2021-05-28,ACC1716439560,Earnest Boyle +2021,23.86283357170127,2021-03-10,ACC1244048987,Halley Roberts MD +2021,45.571785063311,2021-01-26,ACC1244048987,Halley Roberts MD +2021,65.40621947416678,2021-09-30,ACC1244048987,Halley Roberts MD +2021,46.282358860873806,2021-10-15,ACC1244048987,Halley Roberts MD +2021,27.179899427470616,2021-09-28,ACC1244048987,Halley Roberts MD +2021,90.59284110473945,2021-05-30,ACC1244048987,Halley Roberts MD +2021,15.930008289098627,2021-01-20,ACC1244048987,Halley Roberts MD +2022,46.015136928383505,2021-09-13,ACC1748115502,Dr. Antione Stiedemann +2021,39.820040963089845,2021-02-14,ACC1871872601,Mrs. Domenic Kihn +2021,93.60238224597626,2021-12-10,ACC1871872601,Mrs. Domenic Kihn +2022,16.807067352241035,2021-04-28,ACC1871872601,Mrs. Domenic Kihn +2022,70.59417755505939,2021-11-30,ACC1871872601,Mrs. Domenic Kihn +2022,36.543124363964566,2021-06-08,ACC1871872601,Mrs. Domenic Kihn +2021,86.52709499384981,2021-07-12,ACC1871872601,Mrs. Domenic Kihn +2022,68.19458542377131,2021-09-01,ACC1871872601,Mrs. Domenic Kihn +2022,87.39908887366244,2021-04-04,ACC1871872601,Mrs. Domenic Kihn +2022,43.168033119848765,2021-07-09,ACC1202459477,Mr. Betty Abbott +2022,28.89480121329428,2021-11-27,ACC1202459477,Mr. Betty Abbott +2022,37.728011219852284,2021-04-08,ACC1202459477,Mr. Betty Abbott +2022,59.149247760854756,2021-07-23,ACC1202459477,Mr. Betty Abbott +2021,70.08354195314163,2021-07-10,ACC1202459477,Mr. Betty Abbott +2022,60.610506244609894,2021-05-23,ACC1202459477,Mr. Betty Abbott +2021,57.53261858704685,2021-06-27,ACC1202459477,Mr. Betty Abbott +2022,43.827983541466246,2021-04-10,ACC1202459477,Mr. Betty Abbott +2022,56.92125306317578,2021-10-15,ACC1202459477,Mr. Betty Abbott +2022,24.526424883260894,2021-02-17,ACC1202459477,Mr. Betty Abbott +2022,13.032813375414332,2021-11-28,ACC1769375341,Malik Torphy I +2021,78.45920215876426,2021-01-06,ACC1769375341,Malik Torphy I +2022,72.62802429483625,2021-11-01,ACC1769375341,Malik Torphy I +2022,46.162027699681204,2021-01-02,ACC1769375341,Malik Torphy I +2021,28.398505515562967,2021-02-26,ACC1301497681,Shaunte Schulist +2022,47.38589790203852,2021-04-06,ACC1301497681,Shaunte Schulist +2022,87.55919220005273,2021-11-11,ACC1301497681,Shaunte Schulist +2022,95.66621518225338,2021-07-15,ACC1301497681,Shaunte Schulist +2021,80.19101873953724,2021-12-28,ACC1301497681,Shaunte Schulist +2021,18.663285828102083,2021-03-01,ACC1301497681,Shaunte Schulist +2021,90.30193502429653,2021-01-09,ACC1353282646,Jason Hauck PhD +2022,88.93097497875529,2021-07-29,ACC1353282646,Jason Hauck PhD +2021,91.54508804851655,2021-09-28,ACC1353282646,Jason Hauck PhD +2021,73.11666115225579,2021-12-21,ACC1353282646,Jason Hauck PhD +2022,72.98367665866023,2021-02-21,ACC1353282646,Jason Hauck PhD +2021,27.295022024880392,2021-08-21,ACC1353282646,Jason Hauck PhD +2021,56.1297796139588,2021-09-01,ACC1353282646,Jason Hauck PhD +2022,93.05631534888492,2021-05-24,ACC1353282646,Jason Hauck PhD +2021,45.07185561129905,2021-06-16,ACC1353282646,Jason Hauck PhD +2021,37.498705478322165,2021-03-02,ACC1353282646,Jason Hauck PhD +2021,82.67529517207508,2021-01-12,ACC1576819598,Dr. Charla Mosciski +2022,10.690438279893861,2021-04-08,ACC1375020644,Alonso Nienow Sr. +2021,61.70490915122514,2021-10-18,ACC1375020644,Alonso Nienow Sr. +2022,72.619355971311,2021-10-22,ACC1866091756,Charlyn Upton DDS +2022,55.34919764235988,2021-10-03,ACC1866091756,Charlyn Upton DDS +2022,20.771341917447614,2021-06-10,ACC1866091756,Charlyn Upton DDS +2021,32.992060058405436,2021-12-18,ACC1866091756,Charlyn Upton DDS +2022,83.64826877326048,2021-01-14,ACC1866091756,Charlyn Upton DDS +2022,53.72910726549754,2021-07-29,ACC1866091756,Charlyn Upton DDS +2022,35.55212900646403,2021-07-07,ACC1370179115,Clark Waelchi +2022,14.11640352032448,2021-06-18,ACC1370179115,Clark Waelchi +2021,79.99011710902067,2021-11-06,ACC1370179115,Clark Waelchi +2021,71.74328848132666,2021-02-28,ACC1370179115,Clark Waelchi +2022,63.059086276552904,2021-03-04,ACC1370179115,Clark Waelchi +2021,14.240475064701847,2021-05-04,ACC1370179115,Clark Waelchi +2022,91.51389344579293,2021-04-22,ACC1194002499,Naomi Reichert +2021,35.83509398580409,2021-02-06,ACC1194002499,Naomi Reichert +2022,77.85842049821636,2021-01-27,ACC1194002499,Naomi Reichert +2021,14.728216446441298,2021-06-27,ACC1194002499,Naomi Reichert +2022,38.7561197706565,2021-07-16,ACC1194002499,Naomi Reichert +2021,57.20080992640434,2021-08-01,ACC1194002499,Naomi Reichert +2021,29.706195913913284,2021-04-10,ACC1194002499,Naomi Reichert diff --git a/app/src/test/resources/sample/files/json/sample.json b/app/src/test/resources/sample/files/json/sample.json new file mode 100644 index 00000000..b94ab013 --- /dev/null +++ b/app/src/test/resources/sample/files/json/sample.json @@ -0,0 +1,13 @@ +{ + "account-id": "ACC213813", + "name": "Peter", + "balance": 10.2, + "details": { + "date-of-birth": "01-01-1970", + "phone": "123456789", + "updated": { + "user": "peter", + "time": "01-01-2023 00:00:00" + } + } +} \ No newline at end of file diff --git a/app/src/test/resources/sample/files/parquet/account/country=AU/part-00000-3862dc01-3d49-4de7-9135-765936fa55b8-c000.snappy.parquet b/app/src/test/resources/sample/files/parquet/account/country=AU/part-00000-3862dc01-3d49-4de7-9135-765936fa55b8-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..19b185f556a7ec649152378acc137d4a9dae3396 GIT binary patch literal 11370 zcma)?cUV(P*S8Y{q!$GQLKuAwOMMDy5APF68*t^)khKdD`?bxvw zM6qG-y&_f=d%^N8Jm-1Nd9UyNxL!vbunRxs&Qlf6 zmrp(1Ve-2Rgul9Qc)*84&VP4e8@)sttV{xXXl!UIQ^M(3+V#(631ELfpU;>pgzpIh zerO^4BD`Ego6^4`Zw%`-5huwu`hmSX97?Pwi)++kwB&T@ST0Hg|N=+#f8qY zBp_#O?O*wNAbbdZ^Jl+LL2$!Z7t{JN8Z2f}i(bBFg9Uxqvm~|<>^e@hENvPH{`N@; zDQ%H(I$=}j?jZu;2rSJM9@ZWO?thuHy|78zVPX@4Ybi2&RuXIoldVZhPO zdvRXXH zWUnpG{zD1ZvlpMK-Kd1e_mu7Tbu0)S^TjaqkO%^IzN5$z#n418*VkgaP2Y8~C#MP_ z^3?R4$?uh5ckAad)#W76EXb@HeOLl9+hT^i$YFt}dQy}1*Dx^ipER@8S_;;^9^BBa zi-WS#>z&jL5^QCR+P#&?fJsmL*?c&j1SO8m_m9c~q1WZ{%il?pp;Mm3Tz!rOkG7Qe z&2S$K19uI2J=Tl|0uR`!)0*M&2hw|PJkJadvm?;Uz${Bsu21kG#1+QVVyvb5t*E-|6jd`uN-5*;*zaho=$ zh#`?V!`5YX9E|%oZ({!r7My6k{-{rj1rsQ}w=wDf%I`d*MME)H4-P=Y473H{8a5MIw^o_ z*L$3T8*EtYpS{O8PXfUOOF9B)N?_lJH%m8hlHp!dU;nf^4)EK9r|xf*!!ONL>z0{u zV4wSF)_HOgtXN$YXLBY7&VS)-GOm?@f0c!J>Fgw!N3~PB9hJiJ18Vbd5g%SUPaW4` z8U?wUy7CE!6wvnR`IFE+a_FodWk3B7Dj2rzcrkO4463c@id`TB`h;}SnO#)ao_2=M z?7;(-G1W}WmcxlT0Vgj#}co709fl+YC9;B1N0%i zmbALd!ByC#U45Mj#@JD5TgFJim+4e*YeR-b>SModmx{pSa7OhRh=9{`@)d}#lP>B&MOLV3$YH^x?K(<{rJtWng#A#?N65P3m%Y8H{i~x7Z|&4*MSWed+sN0b}p$N`~D^2655h#^*UQurmDU=|6)EK9gT=v)rnH zUy-wGXY5Y~;a;`_{jnUTuI74kZbd`Lizauk{w&D+^6XeS#6gT}!*jknAH4FC_XwIq zkUs5MeB%ibF20EO%FfT+X6GFk0@!ibt zBtp~q=vnP&2g2&I9ku6hoh)g4xcR#l&zq|I3~m}3QXbtMaJ*dzuToc6%_@$7RKLqT z{f~%1X0^22z-x&xc1+Yszp5y(yW@5?(t--6UKMSzO?=>9(#!8J7D36z_9J{_9Eh@P z=U!XGgIS9N`vfBrL6nzmxSmRZ65%Vq7qeqw=u%(eo>B2IJ@20R=Rg^_e>FaiWgdd!#Yu=L8O9?MZSFQMXiUDnIt0IP`$3a8Yb3wVk z1eR`<$;T}cfb^m46vre6%#V)Fj9bZp56=VUaQDQ*64{NPLop_{D-rEOAS%tlS1oGkdlH@Dv>;=G0Gj~xfi-V>s+V0=aiy*tns%C6l zAb1|NrUbj=`Z<$567YVX_Mv4|!zc;7pLb`TX(J83&DI|q)~*0SPzS-iodlbd?fuSV z2w~3cy6Ltlu`uyzZS=%nNpOJWUUp};5{7tt-=2Gp46#?U^}q1CSy<)Xn|)3IvYJ6B zZj~lO+NY(hku)(l8GD9BIZ0vPRpZ2igb>JPo+my(76()Fj~<-8TLA?F;wRnAm%^i8 zBkNmZMd0!|(|(R!JV={%6XM^?;S9(AZE|-hw4b03mU+d%cNT#?F_Nw-}!GcqY?A@h~BH+Bb?tIHf3hUi3bPmgtLuTel`zLLK!6Z7xvifEc z{CT4Jd*TNfTz0=`kRD*dH~HxqJDn>GL-+T>jaSnq)77-bP+XeR(|io4O91Lg0goF|%>SUIno2*E{T~j0Mh#UgtaT zetz%cyo^#M2jF|6z9UPX@g)$34O)!t1@_=RGfD-Qw}=o z-A;uaBnUma)3|iF2sZCD8vSR9V4+3JZn9Ad0)}bv%R_@9x+*+;;o)d_5l@Z$a#jEb zb%!T*h((aqbJU_coB6QC-L(JMayE>5-u=nMWIAk1H*OiTl>-EM)7;5>Wgt+JT6UyJ zz@`KAPR3y0PCi6@>3yS<8x7~yt33oIT&SBbd-o|p1P6Xz4exKQ1b=f{ zVSr{Z*e$*__Fm&IVer*p#P#31OdXT7}2i2xIAn&R3$3A`OPp*a357w#=`42szi0XueF9eBxt z3k}!zI2|6yhhv%XRnxce!0n>f<%oJIoUeQ5vuvyYHa(q|vDg9Edr0D|8SkUuUDRU2 z@);B`bCR6>YKVj5m)a#k>zQz;aBXH>L;`GGPjK*X6T=p&@8rc)F6t;gHk#grKpM)qlMg z3YYKx+7voc0?OdhjQWyrSoF-}=mtA6FlJu!UlYgx_MDfs^BMxc=KZY4f}gRF%q!Sa zpBE1nPpX|;y%jKlbpMFk{y=ylnzQ8d+bAfExi-G}qZHo#;Z1luJO=i*ZIev)l0!pw#wodUkM&&J8sGMvtSjW|3ZIzCH(4ppS$>DB8<=3pv*a; zgy!lWfBg6u54%=a#a_6O1eCc)F7a4&2-_G^d%;5nje(`{vpOW8RvdOcd`Sv*Z{PG< z@g^Aj#w}QR#xns3jw?ygDT82V$;Bc5Yv{1%s{1t}-uE+FF5SI3B_7sJ-`p{)Rtax@ zu|3B>6M?DIU_;|vC7gTI+asV1%Z-hBlGl?ABZjQmVQR^O%?a;jO^+qRH19(L659oE zton6f(`v=Zy_XcTKf)KJFgt~Uu6SFj~Gmc7m|Z-x=qLQ1CQ;>tbwrmsZo9R zsuBpB0_yzE>k2OgPsa6Tsvg#BL^J?GmBVKVLhAB4MNc(-K5 zrBy2w(7K}XDcwp0wqciz+wu9XmCBwEJ$uWI9UeP`MoY&8Nvg*k`G6w?M(vzCgQK>MgbUZRw`*J0We;&<;Quy zczCyUNK5i~32ZnX(8%xrh#Yg^f@BOCDxWUB+15`1r`3Zu$J_?kMyfb)Ps)K4Z_{#K z;{8(kzVXYiXW`JO39#*TnMq^NvWsmuQt7+e-jHib#P*>jbzVAC&T86a)4NIzyB8NT4K8 z?mKuE6>K?!JddVIp{&Og5BfnFTr+Rqv?`keFYKbz73V_XRNaWyo8nlAdp(`mXFL90 zu<4q>M1uF*5P$pVH zhRx}WmXts##N2x1a-JItVg3E)E*O&tp&y$@(AWwvemkUum3U9?*R*HQom=P z_$a}BtMk}i7vxa9=&6&+j1JENZLVFv8w-N_L!4H;lSA_8H^)v4z0}bBZXbvzMAz^v!L2z+OpT(L{RoC>d3ql8H9xtH1)#$;!;`thdnwUxD$CU zM!SLyPECXM+RtNxmuJ=d@81CWKeIV-*G&OGcP?(dcS;UcYc&V`DgxnSc*7rY2c-}f z*0|_FtpaWgJL=xnp9X7>PZByB@pq@;4V{&*l7Tw&;qBY43i#ODkhgbo0_e7EbR2S< z3c?+i-qu}|LBS=PQUhM+XMX}dGCEV73Nn7H`!zTDgCnUugzw{agNK)jL8w;8}rzI z7oLYF)jcnNAI$~hukh-!t$eVE>ZrGkBf;{a4HE`@$8p)UOtY?D4nrK=uN-TT;QsZZ zy6ve197YVa-~LksZ&!F(nx%2z!oYryhvEJA^MZbr{(Y!W=ev94aXuI7ugVV0Tpk0J zKNV}d590G*pT@f2U@3IWe;$=NTL3*iE%yG|84upA-^R?Skw8K2fx~TEnGhP9M@{=G zhX2yaj%$+s!UDhcUq*r!Rxo?bxe6Q>ofgu7YTPWflvm${@ zWmCi24?;LIhhkcCPzd+^zjM+y#=s-Nsx49bnILXa@mDsGAmt{vvuJZNtT=Xsken`t z*l&Zkr92hE&f3QM-opn&__oR73Bwb>CV;h^xdZQ~S7%yhJoJbBQgZ*Jwh3Ty=laV1 z;R+aEZT~cVE*pF}2MJyZ1zcO*eVfe;4!B7RU+T)(P<3W>=Heb=7~!tkH5yOjrov&i zKODm$|B{j9&?1I&p%uFr(}hqraf{34aZuE z>4y2e5jSK8rhg*w$U|fe@)qlup(Y@o(QZRsfy_Z(VR``SQ=|YHh14KKWCwB`DMdmt z_BT{7)DYC$$VX&2!XqB&NJf2(vAUzm5Cw7`vBL5~)IuZ-If7IpJ&~;l6EVRuU$E{h zv_~R+k;lZ252w4Kb2=tmv49fwI;JP1QV>%_g!Du7h&%EC^XH-dLOp<*gPcOnV)_>9 z4^%7EF9-o+9>+8r?U{%ad4}mn$Uw|HjOk{?7pXwX5J&XCK=nZ7qFzGAAvX{U^gY44 zG_>W&Rm2B??`p%(EKwh97gGxfG5Npi8iRy{`fl!f)hzsJ3_+tKZ zR6FD+)?JS}4QWJdF|Qrf85L0dk+;ZDq$m0=VfjgDo1yK1Y(P5{*@bixtfb2oWA}=wo7}W;j*rP@v=4f9<|2nkWQNvL!kUMBE zNBxStL*^ovk=yA1fjR*hg4kjGov0k74hcqF5r6cTquxOpkPBFkg?bFJ#Jszx5l9yD z5cBd-Z=v2p-XlFQ9fZ)4mq;Jv4Ej@0zoVLBj6-ODMzWEKh&}q|qb4JL&~`*!hGZb; zu+9k7r>I4!0hlj9l_1}dLC9L98S{6eu0}i%A7nEUjOACNenN&JQ;_Y*QX~ZPx+6NY z-y?63Ye;WoA(DigMJ{6OOjHf>8tu!-4de`_ov?l+>IcLd?GE%mLHjs@FT1weI3KWa zp1!<&jn%*3w=FjR;M(o)mtB@)2__bG_&sIIpSoU zs@oL1zA=?t2i7r*9=?eN#yIau)18A;_gHk8#M`)0v#xA4b~`c2BeJC9*~7=(=XkOv zI|Qb;TG;vUgaIi{Yo^Wj7p~$iraZK)4szKv^vJw8YioDLt_Ke;HMjH{z-yd%#4E#Q z9-;5a)_}ssnKP+bmvo{XYY9u~`S%*9_0JU8MHb(>v3G*MeV?d`PaV%MoH$=SxbmlI z58Bi@lUbAPy!dOfL=N$@Jfh#31YYXRn;+n|Ui18tS9B;zdueCju_=k;8O2tFlb8LY zOHwyE5Q7%16Pa_XDa9}5^^r=&>r1kIMw`h+?ca{H zn6FDaG^Uwo9;lc;V#-8Y32W1he$mH;ePU~ZO}o3Ft|8~Py>i=`+HIB5q=DsLFTK2c zeVu$(FV|e!X5x{Wyz7B^k&9m4YMXd_lHXlH;88Cxrmg;d)Kx3f&{~G0&+)IRAXv23 z=gFCR%7XFC^#r}on#k~c>wf+J+*-GPlJEK2qNy`C-B^>~XMf4-dtb+y*vCbcU#po@ z5MI@HQ_#oY8~dwYr&)~JU|w%s9n!Dg=+V;l8z-vL4qN;z+fzpA=3uU$Gp}Juhy4f` zcfYT#JJ-;>)Ki`29F%oo!lDU|V>}ZVotV`$Y0?MZ32W!9bg=*YrMlwc1*XPxAZgf3EA#p z+4IlYC++%sMe*wfp6Jmtdc$2((aPSuM0r5t4Bu^QnsWS-1)Z5y&fyF0hznXZr)!|P zjPlH3{&}m)?)+oly?CKEJDpE2*_z55_axLJaLB&acj`r_7uu_iYnqeS4$@P58SC8q zGOXt0=X}}`*gSsWQk7N7hbxnK2j(!6eAdjP5}lW+&%K)1vmkwT@v1fT4|(wu=mgwG z?D243)7$y$^2v|Pa2rAFdXHUe_u(?p4(}j_QVNfkm`Tq_&f~-`s0*Nx6EM{ipXK|9dYSZ)B#`K&VDp?#$DoVCQPq;xtr3-Vp zOzW;%rSS}UaRh@=oZc0g%gl+_CviD+F(qDIMinr7ptCTdOkS$VO=0j-Gjj^d^YnRL z-UyLEBH(2zj8sL2K08aTNcl6Rd92(7U(pV3JXLjG_nKQdc9tu5i*2& zLm@dcnU&c!Jo@xPN&+pVOq!*VQiVkHWLe?ARGE@0qKV>?o8Jn!hrt`{@ z3|-L@7SV^i-ZAzFd}H7OU1K8oid5srj_xgz!8Xv#aAt>cT`BO;Tu%Hd|kjW#ke2 z`xF^-^H@1TI@yNUt5Q~2UQVWSsVYNp*O-MFD)_M+nx3f+(@RT)bOwva^Ux|Y1)Pcq zHqpj2Szj(L$<^l(y>2jm4|$xBqmbpLYt*sCY-ORSRIknGnj2b1SwwhId@M;?Tuvbl zV@su!f=phSDm+{x)bi-`f^6n;8efqYt~Qc$D5Y_Rl2T%T24DNq%9!MEfvi{*LFMzZ zle=b2C(lq38$A<@DapwR?6L&n=zvsyY$-F5tSMKQ6MeYclyXwn&`KDEWGT;(m!&f& zMmliI6J(i0PbktC6?Lsxby0XYy_6ZQA@*Y=Q}WX_QZw{wi!;#X<=CjLLrq{5}=MV(AaEAL^3nit0$LPPUUCiMks!-B8@hepTQ(5o#>(> zv5HE($flH%RfZB-gigdMO4mxM)XrrMy9@e+%Y1!WoO#VUKR zYMP-Uo+@M|3gnCwjgCfa2q@(WlnNuupj8TpYR^&)U8PN^h!qkCGQx_)`X8QLAXIBCS?0WA3Ag zgeo#eAm^1R7zu_%;#G%;EJ+xdXz8FYF3~8*QKPa+!u^6PK`r<%tRToNO77Brq0~ zXA*mHvq%(PwoIU=r_vY&v0?fmdKhs(DU%}4W5lZ|Il0>85~3q1IU=G&6sFJ^B-A2> z*A<@xjy5q_qtQtUnKXv7f-lP=GxbHOqQvCPGOv&*z9J)$kyywv6zN#yMZydwcTzS% zPZW|kf{dKhh{D8(hzz!b_|{Zm@Dczom0QB5G3iC5xCkC6jIJZTq_Je_!ovJgg(|5u zu0lcF#T97d$O^GBlSU~M)8yefd@&vOhj%QTS9N#ZJLB{=>%aGh7S4FJ+idF7-K-Kk z9V|>u##vejow_-qAq;S`n#5WuB)Qs6Ir`9G>utmJ>0wE9>5v6DO>a&=GJ@o4W4`)g z7R`NDJmyDv_Hb}=G@ExyLs^{YH~-W`tQyqA+011|bNaTSp$l7{6zm;A>ET9opOvxW zrh!o_IPv&xSybPur|-Xxf;wSbG0KH)qLOimQs()&x<0L3I6D5 zwq)o!X;3x4UcBLIPDARS@{L!qBioe0E*mW!d|IAV984RCeVSm|%XCJwZA}2SqoXgq zm=m`2teJJd?e>!Wck>xtT_L8K=I=IhB+Ry5N;~oL>nMj9`@{F&bT>7bVEN8sQqx1n zdQ)XE@h;&avDxOtW5;R4PiC2})eih_co0Nel0#B|!5`S`m=))rz7L6ZvYHi-<6zln zx?W1WY<9~=yj~h)cE(e-@hbLc%k=@oG@ETo9o=@5?H$vNcWu`S^Y>yeZnsw)9*$#Z zH>0_0ApAmtJr=e&Xq_^;KP8R~T*DS@yq=3a#y=;$qk(}VW%h)Ioqt*wxeTMe{8(|& zG1X@G{kLV7J88cgdii-|%nExv0o)u-++0n&KEy-${Vkq}o6FyBi~qbHcXPG)&lg$F z|74qXJsM)It|#<=xvfe6+^;aV>*D%%u7?ZDd9n#cC%AdIoHTJhp!nNfmG9hP^SAw| zhUk2)dshN$4dvPt8dL?j!wc0}+B_AmKxY$Q6Iau6t*U_FY=x$|Dou=ch`RoFn6Zujj`KfW94u@E4-gw!gN?Ylv|%GL z1iw}J|MdUANB&nc-#lGrdtm{;iK%!LT@&Q`d$zic;-A$2auf%CH?U^K@1ww}@Nl+q z^R>VM`qwD_)x-y1m$m)+{GUTMdi1c-_{-tn^AwU-;Q9B&dXhuQlu(Li zKzebuPU9IKuBDRH3{nJ@%Ajl2VH7o0ouQ(uskHQPZ3dM}AyX+7bp$QYt?NYo?;l|R L?qP-amGS=rofA1X literal 0 HcmV?d00001 diff --git a/app/src/test/resources/sample/files/parquet/customer/country=AU/date=2023-01-01/part-00000-3862dc01-3d49-4de7-9135-765936fa55b8-c000.snappy.parquet b/app/src/test/resources/sample/files/parquet/customer/country=AU/date=2023-01-01/part-00000-3862dc01-3d49-4de7-9135-765936fa55b8-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..19b185f556a7ec649152378acc137d4a9dae3396 GIT binary patch literal 11370 zcma)?cUV(P*S8Y{q!$GQLKuAwOMMDy5APF68*t^)khKdD`?bxvw zM6qG-y&_f=d%^N8Jm-1Nd9UyNxL!vbunRxs&Qlf6 zmrp(1Ve-2Rgul9Qc)*84&VP4e8@)sttV{xXXl!UIQ^M(3+V#(631ELfpU;>pgzpIh zerO^4BD`Ego6^4`Zw%`-5huwu`hmSX97?Pwi)++kwB&T@ST0Hg|N=+#f8qY zBp_#O?O*wNAbbdZ^Jl+LL2$!Z7t{JN8Z2f}i(bBFg9Uxqvm~|<>^e@hENvPH{`N@; zDQ%H(I$=}j?jZu;2rSJM9@ZWO?thuHy|78zVPX@4Ybi2&RuXIoldVZhPO zdvRXXH zWUnpG{zD1ZvlpMK-Kd1e_mu7Tbu0)S^TjaqkO%^IzN5$z#n418*VkgaP2Y8~C#MP_ z^3?R4$?uh5ckAad)#W76EXb@HeOLl9+hT^i$YFt}dQy}1*Dx^ipER@8S_;;^9^BBa zi-WS#>z&jL5^QCR+P#&?fJsmL*?c&j1SO8m_m9c~q1WZ{%il?pp;Mm3Tz!rOkG7Qe z&2S$K19uI2J=Tl|0uR`!)0*M&2hw|PJkJadvm?;Uz${Bsu21kG#1+QVVyvb5t*E-|6jd`uN-5*;*zaho=$ zh#`?V!`5YX9E|%oZ({!r7My6k{-{rj1rsQ}w=wDf%I`d*MME)H4-P=Y473H{8a5MIw^o_ z*L$3T8*EtYpS{O8PXfUOOF9B)N?_lJH%m8hlHp!dU;nf^4)EK9r|xf*!!ONL>z0{u zV4wSF)_HOgtXN$YXLBY7&VS)-GOm?@f0c!J>Fgw!N3~PB9hJiJ18Vbd5g%SUPaW4` z8U?wUy7CE!6wvnR`IFE+a_FodWk3B7Dj2rzcrkO4463c@id`TB`h;}SnO#)ao_2=M z?7;(-G1W}WmcxlT0Vgj#}co709fl+YC9;B1N0%i zmbALd!ByC#U45Mj#@JD5TgFJim+4e*YeR-b>SModmx{pSa7OhRh=9{`@)d}#lP>B&MOLV3$YH^x?K(<{rJtWng#A#?N65P3m%Y8H{i~x7Z|&4*MSWed+sN0b}p$N`~D^2655h#^*UQurmDU=|6)EK9gT=v)rnH zUy-wGXY5Y~;a;`_{jnUTuI74kZbd`Lizauk{w&D+^6XeS#6gT}!*jknAH4FC_XwIq zkUs5MeB%ibF20EO%FfT+X6GFk0@!ibt zBtp~q=vnP&2g2&I9ku6hoh)g4xcR#l&zq|I3~m}3QXbtMaJ*dzuToc6%_@$7RKLqT z{f~%1X0^22z-x&xc1+Yszp5y(yW@5?(t--6UKMSzO?=>9(#!8J7D36z_9J{_9Eh@P z=U!XGgIS9N`vfBrL6nzmxSmRZ65%Vq7qeqw=u%(eo>B2IJ@20R=Rg^_e>FaiWgdd!#Yu=L8O9?MZSFQMXiUDnIt0IP`$3a8Yb3wVk z1eR`<$;T}cfb^m46vre6%#V)Fj9bZp56=VUaQDQ*64{NPLop_{D-rEOAS%tlS1oGkdlH@Dv>;=G0Gj~xfi-V>s+V0=aiy*tns%C6l zAb1|NrUbj=`Z<$567YVX_Mv4|!zc;7pLb`TX(J83&DI|q)~*0SPzS-iodlbd?fuSV z2w~3cy6Ltlu`uyzZS=%nNpOJWUUp};5{7tt-=2Gp46#?U^}q1CSy<)Xn|)3IvYJ6B zZj~lO+NY(hku)(l8GD9BIZ0vPRpZ2igb>JPo+my(76()Fj~<-8TLA?F;wRnAm%^i8 zBkNmZMd0!|(|(R!JV={%6XM^?;S9(AZE|-hw4b03mU+d%cNT#?F_Nw-}!GcqY?A@h~BH+Bb?tIHf3hUi3bPmgtLuTel`zLLK!6Z7xvifEc z{CT4Jd*TNfTz0=`kRD*dH~HxqJDn>GL-+T>jaSnq)77-bP+XeR(|io4O91Lg0goF|%>SUIno2*E{T~j0Mh#UgtaT zetz%cyo^#M2jF|6z9UPX@g)$34O)!t1@_=RGfD-Qw}=o z-A;uaBnUma)3|iF2sZCD8vSR9V4+3JZn9Ad0)}bv%R_@9x+*+;;o)d_5l@Z$a#jEb zb%!T*h((aqbJU_coB6QC-L(JMayE>5-u=nMWIAk1H*OiTl>-EM)7;5>Wgt+JT6UyJ zz@`KAPR3y0PCi6@>3yS<8x7~yt33oIT&SBbd-o|p1P6Xz4exKQ1b=f{ zVSr{Z*e$*__Fm&IVer*p#P#31OdXT7}2i2xIAn&R3$3A`OPp*a357w#=`42szi0XueF9eBxt z3k}!zI2|6yhhv%XRnxce!0n>f<%oJIoUeQ5vuvyYHa(q|vDg9Edr0D|8SkUuUDRU2 z@);B`bCR6>YKVj5m)a#k>zQz;aBXH>L;`GGPjK*X6T=p&@8rc)F6t;gHk#grKpM)qlMg z3YYKx+7voc0?OdhjQWyrSoF-}=mtA6FlJu!UlYgx_MDfs^BMxc=KZY4f}gRF%q!Sa zpBE1nPpX|;y%jKlbpMFk{y=ylnzQ8d+bAfExi-G}qZHo#;Z1luJO=i*ZIev)l0!pw#wodUkM&&J8sGMvtSjW|3ZIzCH(4ppS$>DB8<=3pv*a; zgy!lWfBg6u54%=a#a_6O1eCc)F7a4&2-_G^d%;5nje(`{vpOW8RvdOcd`Sv*Z{PG< z@g^Aj#w}QR#xns3jw?ygDT82V$;Bc5Yv{1%s{1t}-uE+FF5SI3B_7sJ-`p{)Rtax@ zu|3B>6M?DIU_;|vC7gTI+asV1%Z-hBlGl?ABZjQmVQR^O%?a;jO^+qRH19(L659oE zton6f(`v=Zy_XcTKf)KJFgt~Uu6SFj~Gmc7m|Z-x=qLQ1CQ;>tbwrmsZo9R zsuBpB0_yzE>k2OgPsa6Tsvg#BL^J?GmBVKVLhAB4MNc(-K5 zrBy2w(7K}XDcwp0wqciz+wu9XmCBwEJ$uWI9UeP`MoY&8Nvg*k`G6w?M(vzCgQK>MgbUZRw`*J0We;&<;Quy zczCyUNK5i~32ZnX(8%xrh#Yg^f@BOCDxWUB+15`1r`3Zu$J_?kMyfb)Ps)K4Z_{#K z;{8(kzVXYiXW`JO39#*TnMq^NvWsmuQt7+e-jHib#P*>jbzVAC&T86a)4NIzyB8NT4K8 z?mKuE6>K?!JddVIp{&Og5BfnFTr+Rqv?`keFYKbz73V_XRNaWyo8nlAdp(`mXFL90 zu<4q>M1uF*5P$pVH zhRx}WmXts##N2x1a-JItVg3E)E*O&tp&y$@(AWwvemkUum3U9?*R*HQom=P z_$a}BtMk}i7vxa9=&6&+j1JENZLVFv8w-N_L!4H;lSA_8H^)v4z0}bBZXbvzMAz^v!L2z+OpT(L{RoC>d3ql8H9xtH1)#$;!;`thdnwUxD$CU zM!SLyPECXM+RtNxmuJ=d@81CWKeIV-*G&OGcP?(dcS;UcYc&V`DgxnSc*7rY2c-}f z*0|_FtpaWgJL=xnp9X7>PZByB@pq@;4V{&*l7Tw&;qBY43i#ODkhgbo0_e7EbR2S< z3c?+i-qu}|LBS=PQUhM+XMX}dGCEV73Nn7H`!zTDgCnUugzw{agNK)jL8w;8}rzI z7oLYF)jcnNAI$~hukh-!t$eVE>ZrGkBf;{a4HE`@$8p)UOtY?D4nrK=uN-TT;QsZZ zy6ve197YVa-~LksZ&!F(nx%2z!oYryhvEJA^MZbr{(Y!W=ev94aXuI7ugVV0Tpk0J zKNV}d590G*pT@f2U@3IWe;$=NTL3*iE%yG|84upA-^R?Skw8K2fx~TEnGhP9M@{=G zhX2yaj%$+s!UDhcUq*r!Rxo?bxe6Q>ofgu7YTPWflvm${@ zWmCi24?;LIhhkcCPzd+^zjM+y#=s-Nsx49bnILXa@mDsGAmt{vvuJZNtT=Xsken`t z*l&Zkr92hE&f3QM-opn&__oR73Bwb>CV;h^xdZQ~S7%yhJoJbBQgZ*Jwh3Ty=laV1 z;R+aEZT~cVE*pF}2MJyZ1zcO*eVfe;4!B7RU+T)(P<3W>=Heb=7~!tkH5yOjrov&i zKODm$|B{j9&?1I&p%uFr(}hqraf{34aZuE z>4y2e5jSK8rhg*w$U|fe@)qlup(Y@o(QZRsfy_Z(VR``SQ=|YHh14KKWCwB`DMdmt z_BT{7)DYC$$VX&2!XqB&NJf2(vAUzm5Cw7`vBL5~)IuZ-If7IpJ&~;l6EVRuU$E{h zv_~R+k;lZ252w4Kb2=tmv49fwI;JP1QV>%_g!Du7h&%EC^XH-dLOp<*gPcOnV)_>9 z4^%7EF9-o+9>+8r?U{%ad4}mn$Uw|HjOk{?7pXwX5J&XCK=nZ7qFzGAAvX{U^gY44 zG_>W&Rm2B??`p%(EKwh97gGxfG5Npi8iRy{`fl!f)hzsJ3_+tKZ zR6FD+)?JS}4QWJdF|Qrf85L0dk+;ZDq$m0=VfjgDo1yK1Y(P5{*@bixtfb2oWA}=wo7}W;j*rP@v=4f9<|2nkWQNvL!kUMBE zNBxStL*^ovk=yA1fjR*hg4kjGov0k74hcqF5r6cTquxOpkPBFkg?bFJ#Jszx5l9yD z5cBd-Z=v2p-XlFQ9fZ)4mq;Jv4Ej@0zoVLBj6-ODMzWEKh&}q|qb4JL&~`*!hGZb; zu+9k7r>I4!0hlj9l_1}dLC9L98S{6eu0}i%A7nEUjOACNenN&JQ;_Y*QX~ZPx+6NY z-y?63Ye;WoA(DigMJ{6OOjHf>8tu!-4de`_ov?l+>IcLd?GE%mLHjs@FT1weI3KWa zp1!<&jn%*3w=FjR;M(o)mtB@)2__bG_&sIIpSoU zs@oL1zA=?t2i7r*9=?eN#yIau)18A;_gHk8#M`)0v#xA4b~`c2BeJC9*~7=(=XkOv zI|Qb;TG;vUgaIi{Yo^Wj7p~$iraZK)4szKv^vJw8YioDLt_Ke;HMjH{z-yd%#4E#Q z9-;5a)_}ssnKP+bmvo{XYY9u~`S%*9_0JU8MHb(>v3G*MeV?d`PaV%MoH$=SxbmlI z58Bi@lUbAPy!dOfL=N$@Jfh#31YYXRn;+n|Ui18tS9B;zdueCju_=k;8O2tFlb8LY zOHwyE5Q7%16Pa_XDa9}5^^r=&>r1kIMw`h+?ca{H zn6FDaG^Uwo9;lc;V#-8Y32W1he$mH;ePU~ZO}o3Ft|8~Py>i=`+HIB5q=DsLFTK2c zeVu$(FV|e!X5x{Wyz7B^k&9m4YMXd_lHXlH;88Cxrmg;d)Kx3f&{~G0&+)IRAXv23 z=gFCR%7XFC^#r}on#k~c>wf+J+*-GPlJEK2qNy`C-B^>~XMf4-dtb+y*vCbcU#po@ z5MI@HQ_#oY8~dwYr&)~JU|w%s9n!Dg=+V;l8z-vL4qN;z+fzpA=3uU$Gp}Juhy4f` zcfYT#JJ-;>)Ki`29F%oo!lDU|V>}ZVotV`$Y0?MZ32W!9bg=*YrMlwc1*XPxAZgf3EA#p z+4IlYC++%sMe*wfp6Jmtdc$2((aPSuM0r5t4Bu^QnsWS-1)Z5y&fyF0hznXZr)!|P zjPlH3{&}m)?)+oly?CKEJDpE2*_z55_axLJaLB&acj`r_7uu_iYnqeS4$@P58SC8q zGOXt0=X}}`*gSsWQk7N7hbxnK2j(!6eAdjP5}lW+&%K)1vmkwT@v1fT4|(wu=mgwG z?D243)7$y$^2v|Pa2rAFdXHUe_u(?p4(}j_QVNfkm`Tq_&f~-`s0*Nx6EM{ipXK|9dYSZ)B#`K&VDp?#$DoVCQPq;xtr3-Vp zOzW;%rSS}UaRh@=oZc0g%gl+_CviD+F(qDIMinr7ptCTdOkS$VO=0j-Gjj^d^YnRL z-UyLEBH(2zj8sL2K08aTNcl6Rd92(7U(pV3JXLjG_nKQdc9tu5i*2& zLm@dcnU&c!Jo@xPN&+pVOq!*VQiVkHWLe?ARGE@0qKV>?o8Jn!hrt`{@ z3|-L@7SV^i-ZAzFd}H7OU1K8oid5srj_xgz!8Xv#aAt>cT`BO;Tu%Hd|kjW#ke2 z`xF^-^H@1TI@yNUt5Q~2UQVWSsVYNp*O-MFD)_M+nx3f+(@RT)bOwva^Ux|Y1)Pcq zHqpj2Szj(L$<^l(y>2jm4|$xBqmbpLYt*sCY-ORSRIknGnj2b1SwwhId@M;?Tuvbl zV@su!f=phSDm+{x)bi-`f^6n;8efqYt~Qc$D5Y_Rl2T%T24DNq%9!MEfvi{*LFMzZ zle=b2C(lq38$A<@DapwR?6L&n=zvsyY$-F5tSMKQ6MeYclyXwn&`KDEWGT;(m!&f& zMmliI6J(i0PbktC6?Lsxby0XYy_6ZQA@*Y=Q}WX_QZw{wi!;#X<=CjLLrq{5}=MV(AaEAL^3nit0$LPPUUCiMks!-B8@hepTQ(5o#>(> zv5HE($flH%RfZB-gigdMO4mxM)XrrMy9@e+%Y1!WoO#VUKR zYMP-Uo+@M|3gnCwjgCfa2q@(WlnNuupj8TpYR^&)U8PN^h!qkCGQx_)`X8QLAXIBCS?0WA3Ag zgeo#eAm^1R7zu_%;#G%;EJ+xdXz8FYF3~8*QKPa+!u^6PK`r<%tRToNO77Brq0~ zXA*mHvq%(PwoIU=r_vY&v0?fmdKhs(DU%}4W5lZ|Il0>85~3q1IU=G&6sFJ^B-A2> z*A<@xjy5q_qtQtUnKXv7f-lP=GxbHOqQvCPGOv&*z9J)$kyywv6zN#yMZydwcTzS% zPZW|kf{dKhh{D8(hzz!b_|{Zm@Dczom0QB5G3iC5xCkC6jIJZTq_Je_!ovJgg(|5u zu0lcF#T97d$O^GBlSU~M)8yefd@&vOhj%QTS9N#ZJLB{=>%aGh7S4FJ+idF7-K-Kk z9V|>u##vejow_-qAq;S`n#5WuB)Qs6Ir`9G>utmJ>0wE9>5v6DO>a&=GJ@o4W4`)g z7R`NDJmyDv_Hb}=G@ExyLs^{YH~-W`tQyqA+011|bNaTSp$l7{6zm;A>ET9opOvxW zrh!o_IPv&xSybPur|-Xxf;wSbG0KH)qLOimQs()&x<0L3I6D5 zwq)o!X;3x4UcBLIPDARS@{L!qBioe0E*mW!d|IAV984RCeVSm|%XCJwZA}2SqoXgq zm=m`2teJJd?e>!Wck>xtT_L8K=I=IhB+Ry5N;~oL>nMj9`@{F&bT>7bVEN8sQqx1n zdQ)XE@h;&avDxOtW5;R4PiC2})eih_co0Nel0#B|!5`S`m=))rz7L6ZvYHi-<6zln zx?W1WY<9~=yj~h)cE(e-@hbLc%k=@oG@ETo9o=@5?H$vNcWu`S^Y>yeZnsw)9*$#Z zH>0_0ApAmtJr=e&Xq_^;KP8R~T*DS@yq=3a#y=;$qk(}VW%h)Ioqt*wxeTMe{8(|& zG1X@G{kLV7J88cgdii-|%nExv0o)u-++0n&KEy-${Vkq}o6FyBi~qbHcXPG)&lg$F z|74qXJsM)It|#<=xvfe6+^;aV>*D%%u7?ZDd9n#cC%AdIoHTJhp!nNfmG9hP^SAw| zhUk2)dshN$4dvPt8dL?j!wc0}+B_AmKxY$Q6Iau6t*U_FY=x$|Dou=ch`RoFn6Zujj`KfW94u@E4-gw!gN?Ylv|%GL z1iw}J|MdUANB&nc-#lGrdtm{;iK%!LT@&Q`d$zic;-A$2auf%CH?U^K@1ww}@Nl+q z^R>VM`qwD_)x-y1m$m)+{GUTMdi1c-_{-tn^AwU-;Q9B&dXhuQlu(Li zKzebuPU9IKuBDRH3{nJ@%Ajl2VH7o0ouQ(uskHQPZ3dM}AyX+7bp$QYt?NYo?;l|R L?qP-amGS=rofA1X literal 0 HcmV?d00001 diff --git a/app/src/test/resources/sample/files/parquet/transactions/._SUCCESS.crc b/app/src/test/resources/sample/files/parquet/transactions/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/app/src/test/resources/sample/files/parquet/transactions/.part-00000-3862dc01-3d49-4de7-9135-765936fa55b8-c000.snappy.parquet.crc b/app/src/test/resources/sample/files/parquet/transactions/.part-00000-3862dc01-3d49-4de7-9135-765936fa55b8-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..6e15de4c4a56b5de8d01237e0e2f663edfb2a36c GIT binary patch literal 100 zcmV-q0Gt0~a$^7h00IEKA$eRyDMINDAZZvudeZL6%mhn?t-P{t#`?Z_23<)Xk-`pi zAMn*GR literal 0 HcmV?d00001 diff --git a/app/src/test/resources/sample/files/parquet/transactions/_SUCCESS b/app/src/test/resources/sample/files/parquet/transactions/_SUCCESS new file mode 100644 index 00000000..e69de29b diff --git a/app/src/test/resources/sample/files/parquet/transactions/part-00000-3862dc01-3d49-4de7-9135-765936fa55b8-c000.snappy.parquet b/app/src/test/resources/sample/files/parquet/transactions/part-00000-3862dc01-3d49-4de7-9135-765936fa55b8-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..19b185f556a7ec649152378acc137d4a9dae3396 GIT binary patch literal 11370 zcma)?cUV(P*S8Y{q!$GQLKuAwOMMDy5APF68*t^)khKdD`?bxvw zM6qG-y&_f=d%^N8Jm-1Nd9UyNxL!vbunRxs&Qlf6 zmrp(1Ve-2Rgul9Qc)*84&VP4e8@)sttV{xXXl!UIQ^M(3+V#(631ELfpU;>pgzpIh zerO^4BD`Ego6^4`Zw%`-5huwu`hmSX97?Pwi)++kwB&T@ST0Hg|N=+#f8qY zBp_#O?O*wNAbbdZ^Jl+LL2$!Z7t{JN8Z2f}i(bBFg9Uxqvm~|<>^e@hENvPH{`N@; zDQ%H(I$=}j?jZu;2rSJM9@ZWO?thuHy|78zVPX@4Ybi2&RuXIoldVZhPO zdvRXXH zWUnpG{zD1ZvlpMK-Kd1e_mu7Tbu0)S^TjaqkO%^IzN5$z#n418*VkgaP2Y8~C#MP_ z^3?R4$?uh5ckAad)#W76EXb@HeOLl9+hT^i$YFt}dQy}1*Dx^ipER@8S_;;^9^BBa zi-WS#>z&jL5^QCR+P#&?fJsmL*?c&j1SO8m_m9c~q1WZ{%il?pp;Mm3Tz!rOkG7Qe z&2S$K19uI2J=Tl|0uR`!)0*M&2hw|PJkJadvm?;Uz${Bsu21kG#1+QVVyvb5t*E-|6jd`uN-5*;*zaho=$ zh#`?V!`5YX9E|%oZ({!r7My6k{-{rj1rsQ}w=wDf%I`d*MME)H4-P=Y473H{8a5MIw^o_ z*L$3T8*EtYpS{O8PXfUOOF9B)N?_lJH%m8hlHp!dU;nf^4)EK9r|xf*!!ONL>z0{u zV4wSF)_HOgtXN$YXLBY7&VS)-GOm?@f0c!J>Fgw!N3~PB9hJiJ18Vbd5g%SUPaW4` z8U?wUy7CE!6wvnR`IFE+a_FodWk3B7Dj2rzcrkO4463c@id`TB`h;}SnO#)ao_2=M z?7;(-G1W}WmcxlT0Vgj#}co709fl+YC9;B1N0%i zmbALd!ByC#U45Mj#@JD5TgFJim+4e*YeR-b>SModmx{pSa7OhRh=9{`@)d}#lP>B&MOLV3$YH^x?K(<{rJtWng#A#?N65P3m%Y8H{i~x7Z|&4*MSWed+sN0b}p$N`~D^2655h#^*UQurmDU=|6)EK9gT=v)rnH zUy-wGXY5Y~;a;`_{jnUTuI74kZbd`Lizauk{w&D+^6XeS#6gT}!*jknAH4FC_XwIq zkUs5MeB%ibF20EO%FfT+X6GFk0@!ibt zBtp~q=vnP&2g2&I9ku6hoh)g4xcR#l&zq|I3~m}3QXbtMaJ*dzuToc6%_@$7RKLqT z{f~%1X0^22z-x&xc1+Yszp5y(yW@5?(t--6UKMSzO?=>9(#!8J7D36z_9J{_9Eh@P z=U!XGgIS9N`vfBrL6nzmxSmRZ65%Vq7qeqw=u%(eo>B2IJ@20R=Rg^_e>FaiWgdd!#Yu=L8O9?MZSFQMXiUDnIt0IP`$3a8Yb3wVk z1eR`<$;T}cfb^m46vre6%#V)Fj9bZp56=VUaQDQ*64{NPLop_{D-rEOAS%tlS1oGkdlH@Dv>;=G0Gj~xfi-V>s+V0=aiy*tns%C6l zAb1|NrUbj=`Z<$567YVX_Mv4|!zc;7pLb`TX(J83&DI|q)~*0SPzS-iodlbd?fuSV z2w~3cy6Ltlu`uyzZS=%nNpOJWUUp};5{7tt-=2Gp46#?U^}q1CSy<)Xn|)3IvYJ6B zZj~lO+NY(hku)(l8GD9BIZ0vPRpZ2igb>JPo+my(76()Fj~<-8TLA?F;wRnAm%^i8 zBkNmZMd0!|(|(R!JV={%6XM^?;S9(AZE|-hw4b03mU+d%cNT#?F_Nw-}!GcqY?A@h~BH+Bb?tIHf3hUi3bPmgtLuTel`zLLK!6Z7xvifEc z{CT4Jd*TNfTz0=`kRD*dH~HxqJDn>GL-+T>jaSnq)77-bP+XeR(|io4O91Lg0goF|%>SUIno2*E{T~j0Mh#UgtaT zetz%cyo^#M2jF|6z9UPX@g)$34O)!t1@_=RGfD-Qw}=o z-A;uaBnUma)3|iF2sZCD8vSR9V4+3JZn9Ad0)}bv%R_@9x+*+;;o)d_5l@Z$a#jEb zb%!T*h((aqbJU_coB6QC-L(JMayE>5-u=nMWIAk1H*OiTl>-EM)7;5>Wgt+JT6UyJ zz@`KAPR3y0PCi6@>3yS<8x7~yt33oIT&SBbd-o|p1P6Xz4exKQ1b=f{ zVSr{Z*e$*__Fm&IVer*p#P#31OdXT7}2i2xIAn&R3$3A`OPp*a357w#=`42szi0XueF9eBxt z3k}!zI2|6yhhv%XRnxce!0n>f<%oJIoUeQ5vuvyYHa(q|vDg9Edr0D|8SkUuUDRU2 z@);B`bCR6>YKVj5m)a#k>zQz;aBXH>L;`GGPjK*X6T=p&@8rc)F6t;gHk#grKpM)qlMg z3YYKx+7voc0?OdhjQWyrSoF-}=mtA6FlJu!UlYgx_MDfs^BMxc=KZY4f}gRF%q!Sa zpBE1nPpX|;y%jKlbpMFk{y=ylnzQ8d+bAfExi-G}qZHo#;Z1luJO=i*ZIev)l0!pw#wodUkM&&J8sGMvtSjW|3ZIzCH(4ppS$>DB8<=3pv*a; zgy!lWfBg6u54%=a#a_6O1eCc)F7a4&2-_G^d%;5nje(`{vpOW8RvdOcd`Sv*Z{PG< z@g^Aj#w}QR#xns3jw?ygDT82V$;Bc5Yv{1%s{1t}-uE+FF5SI3B_7sJ-`p{)Rtax@ zu|3B>6M?DIU_;|vC7gTI+asV1%Z-hBlGl?ABZjQmVQR^O%?a;jO^+qRH19(L659oE zton6f(`v=Zy_XcTKf)KJFgt~Uu6SFj~Gmc7m|Z-x=qLQ1CQ;>tbwrmsZo9R zsuBpB0_yzE>k2OgPsa6Tsvg#BL^J?GmBVKVLhAB4MNc(-K5 zrBy2w(7K}XDcwp0wqciz+wu9XmCBwEJ$uWI9UeP`MoY&8Nvg*k`G6w?M(vzCgQK>MgbUZRw`*J0We;&<;Quy zczCyUNK5i~32ZnX(8%xrh#Yg^f@BOCDxWUB+15`1r`3Zu$J_?kMyfb)Ps)K4Z_{#K z;{8(kzVXYiXW`JO39#*TnMq^NvWsmuQt7+e-jHib#P*>jbzVAC&T86a)4NIzyB8NT4K8 z?mKuE6>K?!JddVIp{&Og5BfnFTr+Rqv?`keFYKbz73V_XRNaWyo8nlAdp(`mXFL90 zu<4q>M1uF*5P$pVH zhRx}WmXts##N2x1a-JItVg3E)E*O&tp&y$@(AWwvemkUum3U9?*R*HQom=P z_$a}BtMk}i7vxa9=&6&+j1JENZLVFv8w-N_L!4H;lSA_8H^)v4z0}bBZXbvzMAz^v!L2z+OpT(L{RoC>d3ql8H9xtH1)#$;!;`thdnwUxD$CU zM!SLyPECXM+RtNxmuJ=d@81CWKeIV-*G&OGcP?(dcS;UcYc&V`DgxnSc*7rY2c-}f z*0|_FtpaWgJL=xnp9X7>PZByB@pq@;4V{&*l7Tw&;qBY43i#ODkhgbo0_e7EbR2S< z3c?+i-qu}|LBS=PQUhM+XMX}dGCEV73Nn7H`!zTDgCnUugzw{agNK)jL8w;8}rzI z7oLYF)jcnNAI$~hukh-!t$eVE>ZrGkBf;{a4HE`@$8p)UOtY?D4nrK=uN-TT;QsZZ zy6ve197YVa-~LksZ&!F(nx%2z!oYryhvEJA^MZbr{(Y!W=ev94aXuI7ugVV0Tpk0J zKNV}d590G*pT@f2U@3IWe;$=NTL3*iE%yG|84upA-^R?Skw8K2fx~TEnGhP9M@{=G zhX2yaj%$+s!UDhcUq*r!Rxo?bxe6Q>ofgu7YTPWflvm${@ zWmCi24?;LIhhkcCPzd+^zjM+y#=s-Nsx49bnILXa@mDsGAmt{vvuJZNtT=Xsken`t z*l&Zkr92hE&f3QM-opn&__oR73Bwb>CV;h^xdZQ~S7%yhJoJbBQgZ*Jwh3Ty=laV1 z;R+aEZT~cVE*pF}2MJyZ1zcO*eVfe;4!B7RU+T)(P<3W>=Heb=7~!tkH5yOjrov&i zKODm$|B{j9&?1I&p%uFr(}hqraf{34aZuE z>4y2e5jSK8rhg*w$U|fe@)qlup(Y@o(QZRsfy_Z(VR``SQ=|YHh14KKWCwB`DMdmt z_BT{7)DYC$$VX&2!XqB&NJf2(vAUzm5Cw7`vBL5~)IuZ-If7IpJ&~;l6EVRuU$E{h zv_~R+k;lZ252w4Kb2=tmv49fwI;JP1QV>%_g!Du7h&%EC^XH-dLOp<*gPcOnV)_>9 z4^%7EF9-o+9>+8r?U{%ad4}mn$Uw|HjOk{?7pXwX5J&XCK=nZ7qFzGAAvX{U^gY44 zG_>W&Rm2B??`p%(EKwh97gGxfG5Npi8iRy{`fl!f)hzsJ3_+tKZ zR6FD+)?JS}4QWJdF|Qrf85L0dk+;ZDq$m0=VfjgDo1yK1Y(P5{*@bixtfb2oWA}=wo7}W;j*rP@v=4f9<|2nkWQNvL!kUMBE zNBxStL*^ovk=yA1fjR*hg4kjGov0k74hcqF5r6cTquxOpkPBFkg?bFJ#Jszx5l9yD z5cBd-Z=v2p-XlFQ9fZ)4mq;Jv4Ej@0zoVLBj6-ODMzWEKh&}q|qb4JL&~`*!hGZb; zu+9k7r>I4!0hlj9l_1}dLC9L98S{6eu0}i%A7nEUjOACNenN&JQ;_Y*QX~ZPx+6NY z-y?63Ye;WoA(DigMJ{6OOjHf>8tu!-4de`_ov?l+>IcLd?GE%mLHjs@FT1weI3KWa zp1!<&jn%*3w=FjR;M(o)mtB@)2__bG_&sIIpSoU zs@oL1zA=?t2i7r*9=?eN#yIau)18A;_gHk8#M`)0v#xA4b~`c2BeJC9*~7=(=XkOv zI|Qb;TG;vUgaIi{Yo^Wj7p~$iraZK)4szKv^vJw8YioDLt_Ke;HMjH{z-yd%#4E#Q z9-;5a)_}ssnKP+bmvo{XYY9u~`S%*9_0JU8MHb(>v3G*MeV?d`PaV%MoH$=SxbmlI z58Bi@lUbAPy!dOfL=N$@Jfh#31YYXRn;+n|Ui18tS9B;zdueCju_=k;8O2tFlb8LY zOHwyE5Q7%16Pa_XDa9}5^^r=&>r1kIMw`h+?ca{H zn6FDaG^Uwo9;lc;V#-8Y32W1he$mH;ePU~ZO}o3Ft|8~Py>i=`+HIB5q=DsLFTK2c zeVu$(FV|e!X5x{Wyz7B^k&9m4YMXd_lHXlH;88Cxrmg;d)Kx3f&{~G0&+)IRAXv23 z=gFCR%7XFC^#r}on#k~c>wf+J+*-GPlJEK2qNy`C-B^>~XMf4-dtb+y*vCbcU#po@ z5MI@HQ_#oY8~dwYr&)~JU|w%s9n!Dg=+V;l8z-vL4qN;z+fzpA=3uU$Gp}Juhy4f` zcfYT#JJ-;>)Ki`29F%oo!lDU|V>}ZVotV`$Y0?MZ32W!9bg=*YrMlwc1*XPxAZgf3EA#p z+4IlYC++%sMe*wfp6Jmtdc$2((aPSuM0r5t4Bu^QnsWS-1)Z5y&fyF0hznXZr)!|P zjPlH3{&}m)?)+oly?CKEJDpE2*_z55_axLJaLB&acj`r_7uu_iYnqeS4$@P58SC8q zGOXt0=X}}`*gSsWQk7N7hbxnK2j(!6eAdjP5}lW+&%K)1vmkwT@v1fT4|(wu=mgwG z?D243)7$y$^2v|Pa2rAFdXHUe_u(?p4(}j_QVNfkm`Tq_&f~-`s0*Nx6EM{ipXK|9dYSZ)B#`K&VDp?#$DoVCQPq;xtr3-Vp zOzW;%rSS}UaRh@=oZc0g%gl+_CviD+F(qDIMinr7ptCTdOkS$VO=0j-Gjj^d^YnRL z-UyLEBH(2zj8sL2K08aTNcl6Rd92(7U(pV3JXLjG_nKQdc9tu5i*2& zLm@dcnU&c!Jo@xPN&+pVOq!*VQiVkHWLe?ARGE@0qKV>?o8Jn!hrt`{@ z3|-L@7SV^i-ZAzFd}H7OU1K8oid5srj_xgz!8Xv#aAt>cT`BO;Tu%Hd|kjW#ke2 z`xF^-^H@1TI@yNUt5Q~2UQVWSsVYNp*O-MFD)_M+nx3f+(@RT)bOwva^Ux|Y1)Pcq zHqpj2Szj(L$<^l(y>2jm4|$xBqmbpLYt*sCY-ORSRIknGnj2b1SwwhId@M;?Tuvbl zV@su!f=phSDm+{x)bi-`f^6n;8efqYt~Qc$D5Y_Rl2T%T24DNq%9!MEfvi{*LFMzZ zle=b2C(lq38$A<@DapwR?6L&n=zvsyY$-F5tSMKQ6MeYclyXwn&`KDEWGT;(m!&f& zMmliI6J(i0PbktC6?Lsxby0XYy_6ZQA@*Y=Q}WX_QZw{wi!;#X<=CjLLrq{5}=MV(AaEAL^3nit0$LPPUUCiMks!-B8@hepTQ(5o#>(> zv5HE($flH%RfZB-gigdMO4mxM)XrrMy9@e+%Y1!WoO#VUKR zYMP-Uo+@M|3gnCwjgCfa2q@(WlnNuupj8TpYR^&)U8PN^h!qkCGQx_)`X8QLAXIBCS?0WA3Ag zgeo#eAm^1R7zu_%;#G%;EJ+xdXz8FYF3~8*QKPa+!u^6PK`r<%tRToNO77Brq0~ zXA*mHvq%(PwoIU=r_vY&v0?fmdKhs(DU%}4W5lZ|Il0>85~3q1IU=G&6sFJ^B-A2> z*A<@xjy5q_qtQtUnKXv7f-lP=GxbHOqQvCPGOv&*z9J)$kyywv6zN#yMZydwcTzS% zPZW|kf{dKhh{D8(hzz!b_|{Zm@Dczom0QB5G3iC5xCkC6jIJZTq_Je_!ovJgg(|5u zu0lcF#T97d$O^GBlSU~M)8yefd@&vOhj%QTS9N#ZJLB{=>%aGh7S4FJ+idF7-K-Kk z9V|>u##vejow_-qAq;S`n#5WuB)Qs6Ir`9G>utmJ>0wE9>5v6DO>a&=GJ@o4W4`)g z7R`NDJmyDv_Hb}=G@ExyLs^{YH~-W`tQyqA+011|bNaTSp$l7{6zm;A>ET9opOvxW zrh!o_IPv&xSybPur|-Xxf;wSbG0KH)qLOimQs()&x<0L3I6D5 zwq)o!X;3x4UcBLIPDARS@{L!qBioe0E*mW!d|IAV984RCeVSm|%XCJwZA}2SqoXgq zm=m`2teJJd?e>!Wck>xtT_L8K=I=IhB+Ry5N;~oL>nMj9`@{F&bT>7bVEN8sQqx1n zdQ)XE@h;&avDxOtW5;R4PiC2})eih_co0Nel0#B|!5`S`m=))rz7L6ZvYHi-<6zln zx?W1WY<9~=yj~h)cE(e-@hbLc%k=@oG@ETo9o=@5?H$vNcWu`S^Y>yeZnsw)9*$#Z zH>0_0ApAmtJr=e&Xq_^;KP8R~T*DS@yq=3a#y=;$qk(}VW%h)Ioqt*wxeTMe{8(|& zG1X@G{kLV7J88cgdii-|%nExv0o)u-++0n&KEy-${Vkq}o6FyBi~qbHcXPG)&lg$F z|74qXJsM)It|#<=xvfe6+^;aV>*D%%u7?ZDd9n#cC%AdIoHTJhp!nNfmG9hP^SAw| zhUk2)dshN$4dvPt8dL?j!wc0}+B_AmKxY$Q6Iau6t*U_FY=x$|Dou=ch`RoFn6Zujj`KfW94u@E4-gw!gN?Ylv|%GL z1iw}J|MdUANB&nc-#lGrdtm{;iK%!LT@&Q`d$zic;-A$2auf%CH?U^K@1ww}@Nl+q z^R>VM`qwD_)x-y1m$m)+{GUTMdi1c-_{-tn^AwU-;Q9B&dXhuQlu(Li zKzebuPU9IKuBDRH3{nJ@%Ajl2VH7o0ouQ(uskHQPZ3dM}AyX+7bp$QYt?NYo?;l|R L?qP-amGS=rofA1X literal 0 HcmV?d00001 diff --git a/app/src/test/resources/sample/files/protobuf/example.desc b/app/src/test/resources/sample/files/protobuf/example.desc new file mode 100644 index 0000000000000000000000000000000000000000..ad594d7c30cc0ff97e2f0757116b14e18beb10d3 GIT binary patch literal 963 zcmbVLT~FIE6eUgDv^V_-hKFc->8fd}G%Z3PA%w&O*$Sy9Qe>(zl6-jKhE+YSc zpT!T_kC>g*3EOx;d-FZH=Ukub6L_+qOCLzWW9soG6AYmrEb(p@;&wgv_|KTJyKML# zI#Y2=m*b)n?Lvd8+BF8osEvza_^S!MOQmse8ppGY(-m^ST%;mzGqE}%1a(0lgj>s* z`56V!qUkz`?n&&L67TJQ^8USr_oucO(v_gsV@br8qJJYXItDADs};GSZe0r5`-7K+ zo_oQXfIdKnaj{@2i6eFCh7>RZ1KS#%q~g&q&%CNMm^e@5DXryhoUS~U~SBe+K#+2ea=qxFxL@qXxbKLg89GTeGSh!Bm zS8#M$4h7lvnqBubb2AdXg;wDpBCaiO-Uzm^RUoI&KF-}vzT@(vk50f$2zR^r map = 13; +} \ No newline at end of file diff --git a/app/src/test/resources/sample/files/protobuf/simple.desc b/app/src/test/resources/sample/files/protobuf/simple.desc new file mode 100644 index 00000000..3fcbe3d6 --- /dev/null +++ b/app/src/test/resources/sample/files/protobuf/simple.desc @@ -0,0 +1,6 @@ + +F + simple.proto". +Simple +name ( Rname +age (Ragebproto3 \ No newline at end of file diff --git a/app/src/test/resources/sample/files/protobuf/simple/simple.proto b/app/src/test/resources/sample/files/protobuf/simple/simple.proto new file mode 100644 index 00000000..7f434b11 --- /dev/null +++ b/app/src/test/resources/sample/files/protobuf/simple/simple.proto @@ -0,0 +1,6 @@ +syntax = "proto3"; + +message Simple { + string name = 1; + int64 age = 2; +} diff --git a/app/src/test/resources/sample/http/openapi/petstore.json b/app/src/test/resources/sample/http/openapi/petstore.json new file mode 100644 index 00000000..5a6207d5 --- /dev/null +++ b/app/src/test/resources/sample/http/openapi/petstore.json @@ -0,0 +1,266 @@ +{ + "openapi": "3.0.0", + "info": { + "version": "1.0.0", + "title": "Swagger Petstore", + "description": "A sample API that uses a petstore as an example to demonstrate features in the OpenAPI 3.0 specification", + "termsOfService": "http://swagger.io/terms/", + "contact": { + "name": "Swagger API Team", + "email": "apiteam@swagger.io", + "url": "http://swagger.io" + }, + "license": { + "name": "Apache 2.0", + "url": "https://www.apache.org/licenses/LICENSE-2.0.html" + } + }, + "servers": [ + { + "url": "http://localhost:80/anything" + } + ], + "paths": { + "/pets": { + "get": { + "description": "Returns all pets from the system that the user has access to\nNam sed condimentum est. Maecenas tempor sagittis sapien, nec rhoncus sem sagittis sit amet. Aenean at gravida augue, ac iaculis sem. Curabitur odio lorem, ornare eget elementum nec, cursus id lectus. Duis mi turpis, pulvinar ac eros ac, tincidunt varius justo. In hac habitasse platea dictumst. Integer at adipiscing ante, a sagittis ligula. Aenean pharetra tempor ante molestie imperdiet. Vivamus id aliquam diam. Cras quis velit non tortor eleifend sagittis. Praesent at enim pharetra urna volutpat venenatis eget eget mauris. In eleifend fermentum facilisis. Praesent enim enim, gravida ac sodales sed, placerat id erat. Suspendisse lacus dolor, consectetur non augue vel, vehicula interdum libero. Morbi euismod sagittis libero sed lacinia.\n\nSed tempus felis lobortis leo pulvinar rutrum. Nam mattis velit nisl, eu condimentum ligula luctus nec. Phasellus semper velit eget aliquet faucibus. In a mattis elit. Phasellus vel urna viverra, condimentum lorem id, rhoncus nibh. Ut pellentesque posuere elementum. Sed a varius odio. Morbi rhoncus ligula libero, vel eleifend nunc tristique vitae. Fusce et sem dui. Aenean nec scelerisque tortor. Fusce malesuada accumsan magna vel tempus. Quisque mollis felis eu dolor tristique, sit amet auctor felis gravida. Sed libero lorem, molestie sed nisl in, accumsan tempor nisi. Fusce sollicitudin massa ut lacinia mattis. Sed vel eleifend lorem. Pellentesque vitae felis pretium, pulvinar elit eu, euismod sapien.\n", + "operationId": "findPets", + "parameters": [ + { + "name": "tags", + "in": "query", + "description": "tags to filter by", + "required": false, + "style": "form", + "schema": { + "type": "array", + "items": { + "type": "string" + } + } + }, + { + "name": "limit", + "in": "query", + "description": "maximum number of results to return", + "required": false, + "schema": { + "type": "integer", + "format": "int32" + } + } + ], + "responses": { + "200": { + "description": "pet response", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Pet" + } + } + } + } + }, + "default": { + "description": "unexpected error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + } + }, + "post": { + "description": "Creates a new pet in the store. Duplicates are allowed", + "operationId": "addPet", + "parameters": [ + { + "name": "Content-Type", + "in": "header", + "description": "Content type of the request body", + "required": true, + "schema": { + "type": "string", + "enum": [ + "application/json" + ] + } + }, + { + "name": "Content-Length", + "in": "header", + "description": "Content length of the request body", + "required": true, + "schema": { + "type": "integer" + } + } + ], + "requestBody": { + "description": "Pet to add to the store", + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/NewPet" + } + } + } + }, + "responses": { + "200": { + "description": "pet response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Pet" + } + } + } + }, + "default": { + "description": "unexpected error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + } + } + }, + "/pets/{id}": { + "get": { + "description": "Returns a user based on a single ID, if the user does not have access to the pet", + "operationId": "find pet by id", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "ID of pet to fetch", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "pet response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Pet" + } + } + } + }, + "default": { + "description": "unexpected error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + } + }, + "delete": { + "description": "deletes a single pet based on the ID supplied", + "operationId": "deletePet", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "ID of pet to delete", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "204": { + "description": "pet deleted" + }, + "default": { + "description": "unexpected error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "Pet": { + "allOf": [ + { + "$ref": "#/components/schemas/NewPet" + }, + { + "type": "object", + "required": [ + "id" + ], + "properties": { + "id": { + "type": "string" + } + } + } + ] + }, + "NewPet": { + "type": "object", + "required": [ + "id", + "name" + ], + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "tag": { + "type": "string" + } + } + }, + "Error": { + "type": "object", + "required": [ + "code", + "message" + ], + "properties": { + "code": { + "type": "integer", + "format": "int32" + }, + "message": { + "type": "string" + } + } + } + } + } +} \ No newline at end of file diff --git a/app/src/test/resources/sample/http/openapi/uspto.json b/app/src/test/resources/sample/http/openapi/uspto.json new file mode 100644 index 00000000..a23dc042 --- /dev/null +++ b/app/src/test/resources/sample/http/openapi/uspto.json @@ -0,0 +1,253 @@ +{ + "openapi": "3.0.1", + "servers": [ + { + "url": "{scheme}://localhost:80/anything", + "variables": { + "scheme": { + "description": "The Data Set API is accessible via https and http", + "enum": [ + "https", + "http" + ], + "default": "http" + } + } + } + ], + "info": { + "description": "The Data Set API (DSAPI) allows the public users to discover and search USPTO exported data sets. This is a generic API that allows USPTO users to make any CSV based data files searchable through API. With the help of GET call, it returns the list of data fields that are searchable. With the help of POST call, data can be fetched based on the filters on the field names. Please note that POST call is used to search the actual data. The reason for the POST call is that it allows users to specify any complex search criteria without worry about the GET size limitations as well as encoding of the input parameters.", + "version": "1.0.0", + "title": "USPTO Data Set API", + "contact": { + "name": "Open Data Portal", + "url": "https://developer.uspto.gov", + "email": "developer@uspto.gov" + } + }, + "tags": [ + { + "name": "metadata", + "description": "Find out about the data sets" + }, + { + "name": "search", + "description": "Search a data set" + } + ], + "paths": { + "/": { + "get": { + "tags": [ + "metadata" + ], + "operationId": "list-data-sets", + "summary": "List available data sets", + "responses": { + "200": { + "description": "Returns a list of data sets", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/dataSetList" + }, + "example": { + "total": 2, + "apis": [ + { + "apiKey": "oa_citations", + "apiVersionNumber": "v1", + "apiUrl": "https://developer.uspto.gov/ds-api/oa_citations/v1/fields", + "apiDocumentationUrl": "https://developer.uspto.gov/ds-api-docs/index.html?url=https://developer.uspto.gov/ds-api/swagger/docs/oa_citations.json" + }, + { + "apiKey": "cancer_moonshot", + "apiVersionNumber": "v1", + "apiUrl": "https://developer.uspto.gov/ds-api/cancer_moonshot/v1/fields", + "apiDocumentationUrl": "https://developer.uspto.gov/ds-api-docs/index.html?url=https://developer.uspto.gov/ds-api/swagger/docs/cancer_moonshot.json" + } + ] + } + } + } + } + } + } + }, + "/{dataset}/{version}/fields": { + "get": { + "tags": [ + "metadata" + ], + "summary": "Provides the general information about the API and the list of fields that can be used to query the dataset.", + "description": "This GET API returns the list of all the searchable field names that are in the oa_citations. Please see the 'fields' attribute which returns an array of field names. Each field or a combination of fields can be searched using the syntax options shown below.", + "operationId": "list-searchable-fields", + "parameters": [ + { + "name": "dataset", + "in": "path", + "description": "Name of the dataset.", + "required": true, + "example": "oa_citations", + "schema": { + "type": "string" + } + }, + { + "name": "version", + "in": "path", + "description": "Version of the dataset.", + "required": true, + "example": "v1", + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "The dataset API for the given version is found and it is accessible to consume.", + "content": { + "application/json": { + "schema": { + "type": "string" + } + } + } + }, + "404": { + "description": "The combination of dataset name and version is not found in the system or it is not published yet to be consumed by public.", + "content": { + "application/json": { + "schema": { + "type": "string" + } + } + } + } + } + } + }, + "/{dataset}/{version}/records": { + "post": { + "tags": [ + "search" + ], + "summary": "Provides search capability for the data set with the given search criteria.", + "description": "This API is based on Solr/Lucene Search. The data is indexed using SOLR. This GET API returns the list of all the searchable field names that are in the Solr Index. Please see the 'fields' attribute which returns an array of field names. Each field or a combination of fields can be searched using the Solr/Lucene Syntax. Please refer https://lucene.apache.org/core/3_6_2/queryparsersyntax.html#Overview for the query syntax. List of field names that are searchable can be determined using above GET api.", + "operationId": "perform-search", + "parameters": [ + { + "name": "version", + "in": "path", + "description": "Version of the dataset.", + "required": true, + "schema": { + "type": "string", + "default": "v1" + } + }, + { + "name": "dataset", + "in": "path", + "description": "Name of the dataset. In this case, the default value is oa_citations", + "required": true, + "schema": { + "type": "string", + "default": "oa_citations" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "type": "object" + } + } + } + } + } + }, + "404": { + "description": "No matching record found for the given criteria." + } + }, + "requestBody": { + "content": { + "application/x-www-form-urlencoded": { + "schema": { + "type": "object", + "properties": { + "criteria": { + "description": "Uses Lucene Query Syntax in the format of propertyName:value, propertyName:[num1 TO num2] and date range format: propertyName:[yyyyMMdd TO yyyyMMdd]. In the response please see the 'docs' element which has the list of record objects. Each record structure would consist of all the fields and their corresponding values.", + "type": "string", + "default": "*:*", + "pattern": "([a-zA-Z]+|\\*):([a-zA-Z0-9 ]+|\\*|[0-9]+ TO [0-9]+|[0-9]{8} TO [0-9]{8})" + }, + "start": { + "description": "Starting record number. Default value is 0.", + "type": "integer", + "default": 0 + }, + "rows": { + "description": "Specify number of rows to be returned. If you run the search with default values, in the response you will see 'numFound' attribute which will tell the number of records available in the dataset.", + "type": "integer", + "default": 100 + } + }, + "required": [ + "criteria" + ] + } + } + } + } + } + } + }, + "components": { + "schemas": { + "dataSetList": { + "type": "object", + "properties": { + "total": { + "type": "integer" + }, + "apis": { + "type": "array", + "items": { + "type": "object", + "properties": { + "apiKey": { + "type": "string", + "description": "To be used as a dataset parameter value" + }, + "apiVersionNumber": { + "type": "string", + "description": "To be used as a version parameter value" + }, + "apiUrl": { + "type": "string", + "format": "uriref", + "description": "The URL describing the dataset's fields" + }, + "apiDocumentationUrl": { + "type": "string", + "format": "uriref", + "description": "A URL to the API console for each API" + } + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/app/src/test/resources/sample/jms/solace/setup_solace.sh b/app/src/test/resources/sample/jms/solace/setup_solace.sh new file mode 100644 index 00000000..d3fd80bd --- /dev/null +++ b/app/src/test/resources/sample/jms/solace/setup_solace.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +echo "Creating new queue in Solace" +curl http://localhost:8080/SEMP/v2/config/msgVpns/default/queues \ + -X POST \ + -u admin:admin \ + -H "Content-type:application/json" \ + -d '{ "queueName":"generated_test_queue" }' + +echo "Creating JNDI queue object" +curl http://localhost:8080/SEMP/v2/config/msgVpns/default/jndiQueues \ + -X POST \ + -u admin:admin \ + -H "Content-type:application/json" \ + -d '{ "physicalName":"generated_test_queue", "queueName":"/JNDI/Q/generated_test_queue" }' diff --git a/app/src/test/resources/sample/kafka/setup-kafka.sh b/app/src/test/resources/sample/kafka/setup-kafka.sh new file mode 100644 index 00000000..d9af4065 --- /dev/null +++ b/app/src/test/resources/sample/kafka/setup-kafka.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +kafka-topics --create --topic account-topic --bootstrap-server localhost:9092 +kafka-topics --describe --topic account-topic --bootstrap-server localhost:9092 +kafka-console-consumer --topic account-topic --from-beginning --bootstrap-server localhost:9092 +kafka-topics --delete --topic account-topic --bootstrap-server localhost:9092 + diff --git a/app/src/test/resources/sample/metadata/marquez/get_dataset_api_response.json b/app/src/test/resources/sample/metadata/marquez/get_dataset_api_response.json new file mode 100644 index 00000000..0bd44292 --- /dev/null +++ b/app/src/test/resources/sample/metadata/marquez/get_dataset_api_response.json @@ -0,0 +1,85 @@ +{ + "id": { + "namespace": "food_delivery", + "name": "public.categories" + }, + "type": "DB_TABLE", + "name": "public.categories", + "physicalName": "public.categories", + "createdAt": "2020-02-22T22:42:42Z", + "updatedAt": "2020-02-22T22:42:42Z", + "namespace": "food_delivery", + "sourceName": "food_delivery_db", + "fields": [ + { + "name": "id", + "type": "INTEGER", + "tags": [], + "description": "The unique ID of the category." + }, + { + "name": "name", + "type": "VARCHAR", + "tags": [], + "description": "The name of the category." + }, + { + "name": "menu_id", + "type": "INTEGER", + "tags": [], + "description": "The ID of the menu related to the category." + }, + { + "name": "description", + "type": "TEXT", + "tags": [], + "description": "The description of the category." + } + ], + "tags": [], + "lastModifiedAt": null, + "lastLifecycleState": "", + "description": null, + "currentVersion": "5d81c700-d0ae-45dc-b3a9-e7db35f75e63", + "columnLineage": null, + "facets": { + "documentation": { + "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", + "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DocumentationDatasetFacet.json", + "description": "A table for categories." + }, + "schema": { + "fields": [ + { + "name": "id", + "type": "INTEGER", + "description": "The unique ID of the category." + }, + { + "name": "name", + "type": "VARCHAR", + "description": "The name of the category." + }, + { + "name": "menu_id", + "type": "INTEGER", + "description": "The ID of the menu related to the category." + }, + { + "name": "description", + "type": "TEXT", + "description": "The description of the category." + } + ], + "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", + "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json" + }, + "dataSource": { + "uri": "postgres://food_delivery:food_delivery@postgres:5432/food_delivery", + "name": "food_delivery_db", + "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", + "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json" + } + }, + "deleted": false +} \ No newline at end of file diff --git a/app/src/test/resources/sample/metadata/marquez/list_datasets_api_response.json b/app/src/test/resources/sample/metadata/marquez/list_datasets_api_response.json new file mode 100644 index 00000000..157897d8 --- /dev/null +++ b/app/src/test/resources/sample/metadata/marquez/list_datasets_api_response.json @@ -0,0 +1,90 @@ +{ + "totalCount": 1, + "datasets": [ + { + "id": { + "namespace": "food_delivery", + "name": "public.categories" + }, + "type": "DB_TABLE", + "name": "public.categories", + "physicalName": "public.categories", + "createdAt": "2020-02-22T22:42:42Z", + "updatedAt": "2020-02-22T22:42:42Z", + "namespace": "food_delivery", + "sourceName": "food_delivery_db", + "fields": [ + { + "name": "id", + "type": "INTEGER", + "tags": [], + "description": "The unique ID of the category." + }, + { + "name": "name", + "type": "VARCHAR", + "tags": [], + "description": "The name of the category." + }, + { + "name": "menu_id", + "type": "INTEGER", + "tags": [], + "description": "The ID of the menu related to the category." + }, + { + "name": "description", + "type": "TEXT", + "tags": [], + "description": "The description of the category." + } + ], + "tags": [], + "lastModifiedAt": null, + "lastLifecycleState": "", + "description": null, + "currentVersion": "5d81c700-d0ae-45dc-b3a9-e7db35f75e63", + "columnLineage": null, + "facets": { + "schema": { + "fields": [ + { + "name": "id", + "type": "INTEGER", + "description": "The unique ID of the category." + }, + { + "name": "name", + "type": "VARCHAR", + "description": "The name of the category." + }, + { + "name": "menu_id", + "type": "INTEGER", + "description": "The ID of the menu related to the category." + }, + { + "name": "description", + "type": "TEXT", + "description": "The description of the category." + } + ], + "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", + "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json" + }, + "documentation": { + "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", + "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DocumentationDatasetFacet.json", + "description": "A table for categories." + }, + "dataSource": { + "uri": "postgres://food_delivery:food_delivery@postgres:5432/food_delivery", + "name": "food_delivery_db", + "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", + "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json" + } + }, + "deleted": false + } + ] +} \ No newline at end of file diff --git a/app/src/test/resources/sample/metadata/openmetadata/get_table_response.json b/app/src/test/resources/sample/metadata/openmetadata/get_table_response.json new file mode 100644 index 00000000..d250a2c9 --- /dev/null +++ b/app/src/test/resources/sample/metadata/openmetadata/get_table_response.json @@ -0,0 +1,313 @@ +{ + "data": [ + { + "id": "464fe2d0-eb37-4351-b939-3358170c941a", + "name": "dim_address", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address", + "description": "This dimension table contains the billing and shipping addresses of customers. You can join this table with the sales table to generate lists of the billing and shipping addresses. Customers can enter their addresses more than once, so the same address can appear in more than one row in this table. This table contains one row per customer address.", + "version": 0.1, + "updatedAt": 1697177637240, + "updatedBy": "admin", + "href": "http://localhost:8585/api/v1/tables/464fe2d0-eb37-4351-b939-3358170c941a", + "tableType": "Regular", + "columns": [ + { + "name": "address_id", + "dataType": "NUMERIC", + "dataTypeDisplay": "numeric", + "description": "Unique identifier for the address.", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.address_id", + "tags": [], + "ordinalPosition": 1, + "customMetrics": [] + }, + { + "name": "shop_id", + "dataType": "NUMERIC", + "dataTypeDisplay": "numeric", + "description": "The ID of the store. This column is a foreign key reference to the shop_id column in the dim_shop table.", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.shop_id", + "tags": [], + "ordinalPosition": 2, + "customMetrics": [] + }, + { + "name": "first_name", + "dataType": "VARCHAR", + "dataLength": 100, + "dataTypeDisplay": "varchar", + "description": "First name of the customer.", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.first_name", + "tags": [], + "ordinalPosition": 3, + "customMetrics": [] + }, + { + "name": "last_name", + "dataType": "VARCHAR", + "dataLength": 100, + "dataTypeDisplay": "varchar", + "description": "Last name of the customer.", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.last_name", + "tags": [], + "ordinalPosition": 4, + "customMetrics": [] + }, + { + "name": "address1", + "dataType": "VARCHAR", + "dataLength": 500, + "dataTypeDisplay": "varchar", + "description": "The first address line. For example, 150 Elgin St.", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.address1", + "tags": [], + "ordinalPosition": 5, + "customMetrics": [] + }, + { + "name": "address2", + "dataType": "VARCHAR", + "dataLength": 500, + "dataTypeDisplay": "varchar", + "description": "The second address line. For example, Suite 800.", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.address2", + "tags": [], + "ordinalPosition": 6, + "customMetrics": [] + }, + { + "name": "company", + "dataType": "VARCHAR", + "dataLength": 100, + "dataTypeDisplay": "varchar", + "description": "The name of the customer's business, if one exists.", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.company", + "tags": [], + "ordinalPosition": 7, + "customMetrics": [] + }, + { + "name": "city", + "dataType": "VARCHAR", + "dataLength": 100, + "dataTypeDisplay": "varchar", + "description": "The name of the city. For example, Palo Alto.", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.city", + "tags": [], + "ordinalPosition": 8, + "customMetrics": [] + }, + { + "name": "region", + "dataType": "VARCHAR", + "dataLength": 512, + "dataTypeDisplay": "varchar", + "description": "The name of the region, such as a province or state, where the customer is located. For example, Ontario or New York. This column is the same as CustomerAddress.province in the Admin API.", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.region", + "tags": [], + "ordinalPosition": 9, + "customMetrics": [] + }, + { + "name": "zip", + "dataType": "VARCHAR", + "dataLength": 10, + "dataTypeDisplay": "varchar", + "description": "The ZIP or postal code. For example, 90210.", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.zip", + "tags": [], + "ordinalPosition": 10, + "customMetrics": [] + }, + { + "name": "country", + "dataType": "VARCHAR", + "dataLength": 50, + "dataTypeDisplay": "varchar", + "description": "The full name of the country. For example, Canada.", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.country", + "tags": [], + "ordinalPosition": 11, + "customMetrics": [] + }, + { + "name": "phone", + "dataType": "VARCHAR", + "dataLength": 15, + "dataTypeDisplay": "varchar", + "description": "The phone number of the customer.", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.phone", + "tags": [], + "ordinalPosition": 12, + "customMetrics": [] + } + ], + "tableConstraints": [ + { + "constraintType": "PRIMARY_KEY", + "columns": [ + "address_id", + "shop_id" + ] + } + ], + "databaseSchema": { + "id": "faa1f342-6087-414b-91e9-b3f3d506dc77", + "type": "databaseSchema", + "name": "shopify", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify", + "description": "This **mock** database contains schema related to shopify sales and orders with related dimension tables.", + "deleted": false, + "href": "http://localhost:8585/api/v1/databaseSchemas/faa1f342-6087-414b-91e9-b3f3d506dc77" + }, + "database": { + "id": "e89bd155-96c5-4ba5-89d5-a65134b3df91", + "type": "database", + "name": "ecommerce_db", + "fullyQualifiedName": "sample_data.ecommerce_db", + "description": "This **mock** database contains schemas related to shopify sales and orders with related dimension tables.", + "deleted": false, + "href": "http://localhost:8585/api/v1/databases/e89bd155-96c5-4ba5-89d5-a65134b3df91" + }, + "service": { + "id": "b925ebce-07ec-4598-a174-62b3c1b3e0fc", + "type": "databaseService", + "name": "sample_data", + "fullyQualifiedName": "sample_data", + "deleted": false, + "href": "http://localhost:8585/api/v1/services/databaseServices/b925ebce-07ec-4598-a174-62b3c1b3e0fc" + }, + "serviceType": "BigQuery", + "tags": [], + "usageSummary": { + "dailyStats": { + "count": 0, + "percentileRank": 0.0 + }, + "weeklyStats": { + "count": 0, + "percentileRank": 0.0 + }, + "monthlyStats": { + "count": 0, + "percentileRank": 0.0 + }, + "date": "2023-10-13" + }, + "followers": [], + "joins": { + "startDate": "2023-09-13", + "dayCount": 30, + "columnJoins": [], + "directTableJoins": [] + }, + "testSuite": { + "id": "0946125d-6cf5-41fa-894f-3ee6aab6a20f", + "name": "sample_data.ecommerce_db.shopify.dim_address.testSuite", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.testSuite", + "description": "This is an executable test suite linked to an entity", + "tests": [ + { + "id": "26ea4fb8-1a13-42eb-a4b6-37ace2c4dd05", + "type": "testCase", + "name": "column_value_max_to_be_between", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.shop_id.column_value_max_to_be_between", + "description": "test the value of a column is between x and y", + "deleted": false + }, + { + "id": "a30d4175-2681-4427-87ee-6a5a139e3d31", + "type": "testCase", + "name": "column_values_to_be_between", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.zip.column_values_to_be_between", + "description": "test the number of column in table is between x and y", + "deleted": false + }, + { + "id": "d3024ad8-3bc7-4875-8369-1e2085dfab77", + "type": "testCase", + "name": "column_values_to_match_regex", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.last_name.column_values_to_match_regex", + "description": "test value of a column match regex", + "deleted": false + }, + { + "id": "e7a485ee-af8a-4f22-b114-014e6bb74531", + "type": "testCase", + "name": "table_column_count_between", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.table_column_count_between", + "description": "test the number of column in table is between x and y", + "deleted": false + }, + { + "id": "dea9535c-2f8b-4a69-8a79-990d672af4f5", + "type": "testCase", + "name": "table_column_count_equals", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.table_column_count_equals", + "description": "test the number of column in table", + "deleted": false + } + ], + "pipelines": [ + { + "id": "9633bd61-f424-46b0-9de7-47aa755d3939", + "type": "ingestionPipeline", + "name": "dim_address_TestSuite", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address.testSuite.dim_address_TestSuite", + "deleted": false + } + ], + "serviceType": "TestSuite", + "version": 0.1, + "updatedAt": 1697177643705, + "updatedBy": "admin", + "deleted": false, + "executable": true, + "executableEntityReference": { + "id": "464fe2d0-eb37-4351-b939-3358170c941a", + "type": "table", + "name": "dim_address", + "fullyQualifiedName": "sample_data.ecommerce_db.shopify.dim_address" + }, + "summary": { + "success": 4, + "failed": 0, + "aborted": 1, + "total": 5 + }, + "testCaseResultSummary": [ + { + "testCaseName": "sample_data.ecommerce_db.shopify.dim_address.table_column_count_equals", + "status": "Success", + "timestamp": 1696313644465 + }, + { + "testCaseName": "sample_data.ecommerce_db.shopify.dim_address.table_column_count_between", + "status": "Success", + "timestamp": 1696400044815 + }, + { + "testCaseName": "sample_data.ecommerce_db.shopify.dim_address.shop_id.column_value_max_to_be_between", + "status": "Aborted", + "timestamp": 1696400045181 + }, + { + "testCaseName": "sample_data.ecommerce_db.shopify.dim_address.last_name.column_values_to_match_regex", + "status": "Success", + "timestamp": 1696400045622 + }, + { + "testCaseName": "sample_data.ecommerce_db.shopify.dim_address.zip.column_values_to_be_between", + "status": "Success", + "timestamp": 1696400045933 + } + ] + }, + "deleted": false + } + ], + "paging": { + "after": "ZGltX2FkZHJlc3M=", + "total": 20 + } +} \ No newline at end of file diff --git a/app/src/test/resources/sample/plan/account-create-plan.yaml b/app/src/test/resources/sample/plan/account-create-plan.yaml new file mode 100644 index 00000000..d90d5b4b --- /dev/null +++ b/app/src/test/resources/sample/plan/account-create-plan.yaml @@ -0,0 +1,23 @@ +name: "account_create_plan" +description: "Create account data in JSON" +tasks: + - name: "json_account_jms" + dataSourceName: "solace" + enabled: false + - name: "json_account_kafka" + dataSourceName: "kafka" + enabled: false + - name: "json_account_file" + dataSourceName: "json" + enabled: true + - name: "json_account_http" + dataSourceName: "httpbin" + enabled: false + +sinkOptions: + foreignKeys: + - - "solace.jms_account.account_id" + - - "json.file_account.account_id" + +validations: + - "account_checks" diff --git a/app/src/test/resources/sample/plan/customer-create-plan.yaml b/app/src/test/resources/sample/plan/customer-create-plan.yaml new file mode 100644 index 00000000..c0252e46 --- /dev/null +++ b/app/src/test/resources/sample/plan/customer-create-plan.yaml @@ -0,0 +1,20 @@ +name: "customer_create_plan" +description: "Create customers in JDBC and Cassandra" +tasks: + - name: "jdbc_customer_accounts_table_create" + dataSourceName: "postgres" + enabled: false + - name: "parquet_transaction_file" + dataSourceName: "parquet" + enabled: true + - name: "cassandra_customer_status_table_create" + dataSourceName: "cassandra" + enabled: false + - name: "cassandra_customer_transactions_table_create" + dataSourceName: "cassandra" + enabled: false + +#sinkOptions: +# foreignKeys: +# "postgres.accounts.account_number": +# - "parquet.transactions.account_id" diff --git a/app/src/test/resources/sample/plan/example-account-create-plan.yaml b/app/src/test/resources/sample/plan/example-account-create-plan.yaml new file mode 100644 index 00000000..8a9345f6 --- /dev/null +++ b/app/src/test/resources/sample/plan/example-account-create-plan.yaml @@ -0,0 +1,18 @@ +name: "account_create_plan" +description: "Create accounts and transactions in Postgres and parquet" +tasks: + # - name: "jdbc_customer_accounts_table_create" + # dataSourceName: "postgres" + # enabled: true + - name: "jdbc_customer_transactions_table_create" + dataSourceName: "postgres" + enabled: true + - name: "parquet_transaction_file" + dataSourceName: "parquet" + enabled: true + +sinkOptions: + foreignKeys: + # "postgres.accounts.account_number": + "postgres.transactions.account_number": + - "parquet.transactions.account_id" diff --git a/app/src/test/resources/sample/plan/large-plan.yaml b/app/src/test/resources/sample/plan/large-plan.yaml new file mode 100644 index 00000000..3548d054 --- /dev/null +++ b/app/src/test/resources/sample/plan/large-plan.yaml @@ -0,0 +1,14 @@ +name: "large_json_plan" +description: "Create account data in JSON" +tasks: + - name: "large_json_file" + dataSourceName: "json" + enabled: true + - name: "large_csv_file" + dataSourceName: "csv" + enabled: true + +sinkOptions: + foreignKeys: + "json.file_account.account_id": + - "csv.transactions.account_id" diff --git a/app/src/test/resources/sample/plan/simple-json-plan.yaml b/app/src/test/resources/sample/plan/simple-json-plan.yaml new file mode 100644 index 00000000..23df33bc --- /dev/null +++ b/app/src/test/resources/sample/plan/simple-json-plan.yaml @@ -0,0 +1,6 @@ +name: "account_create_plan" +description: "Create account data in JSON" +tasks: + - name: "simple_json" + dataSourceName: "account_json" + enabled: true diff --git a/app/src/test/resources/sample/plan/transaction-create-plan.yaml b/app/src/test/resources/sample/plan/transaction-create-plan.yaml new file mode 100644 index 00000000..3ebe3a07 --- /dev/null +++ b/app/src/test/resources/sample/plan/transaction-create-plan.yaml @@ -0,0 +1,6 @@ +name: "transaction_create_plan" +description: "Create CSV transaction file" +tasks: + - name: "csv_transaction_file" + dataSourceName: "csv" + enabled: true diff --git a/app/src/test/resources/sample/sql/mysql/customer.sql b/app/src/test/resources/sample/sql/mysql/customer.sql new file mode 100644 index 00000000..7f23c403 --- /dev/null +++ b/app/src/test/resources/sample/sql/mysql/customer.sql @@ -0,0 +1,47 @@ +CREATE DATABASE customer; +USE customer; +CREATE SCHEMA IF NOT EXISTS account; + +CREATE TABLE IF NOT EXISTS account.accounts +( + id SERIAL PRIMARY KEY, + account_number VARCHAR(20) NOT NULL, + account_status VARCHAR(10), + created_by TEXT, + created_by_fixed_length CHAR(10), + customer_id_int INT UNIQUE, + customer_id_smallint SMALLINT, + customer_id_bigint BIGINT, + customer_id_decimal DECIMAL, + customer_id_real REAL, + customer_id_double DOUBLE PRECISION, + open_date DATE, + open_timestamp TIMESTAMP, + last_opened_time TIME, + payload_bytes BLOB +); +-- spark converts to wrong data type when reading from postgres so fails to write back to postgres +-- open_date_interval INTERVAL, +-- ERROR: column "open_date_interval" is of type interval but expression is of type character varying +-- open_id UUID, +-- balance MONEY, +-- payload_json JSONB + +CREATE TABLE IF NOT EXISTS account.balances +( + id BIGINT UNSIGNED NOT NULL, + create_time TIMESTAMP, + balance DOUBLE PRECISION, + PRIMARY KEY (id, create_time), + CONSTRAINT fk_bal_account_number FOREIGN KEY (id) REFERENCES account.accounts (id) +); + +CREATE TABLE IF NOT EXISTS account.transactions +( + id BIGINT UNSIGNED NOT NULL, + create_time TIMESTAMP, + transaction_id VARCHAR(20), + amount DOUBLE PRECISION, + PRIMARY KEY (id, create_time, transaction_id), + CONSTRAINT fk_txn_account_number FOREIGN KEY (id) REFERENCES account.accounts (id) +); diff --git a/app/src/test/resources/sample/sql/postgres/customer.sql b/app/src/test/resources/sample/sql/postgres/customer.sql new file mode 100644 index 00000000..28304210 --- /dev/null +++ b/app/src/test/resources/sample/sql/postgres/customer.sql @@ -0,0 +1,46 @@ +CREATE DATABASE customer; +\c customer +CREATE SCHEMA IF NOT EXISTS account; + +CREATE TABLE IF NOT EXISTS account.accounts +( + id BIGSERIAL PRIMARY KEY, + account_number VARCHAR(20) NOT NULL, + account_status VARCHAR(10), + created_by TEXT, + created_by_fixed_length CHAR(10), + customer_id_int INT UNIQUE, + customer_id_smallint SMALLINT, + customer_id_bigint BIGINT, + customer_id_decimal DECIMAL, + customer_id_real REAL, + customer_id_double DOUBLE PRECISION, + open_date DATE, + open_timestamp TIMESTAMP, + last_opened_time TIME, + payload_bytes BYTEA +); + +CREATE TABLE IF NOT EXISTS account.balances +( + account_number VARCHAR(20) UNIQUE NOT NULL, + create_time TIMESTAMP, + balance DOUBLE PRECISION, + PRIMARY KEY (account_number, create_time) +); + +CREATE TABLE IF NOT EXISTS account.transactions +( + account_number VARCHAR(20) UNIQUE NOT NULL, + create_time TIMESTAMP, + transaction_id VARCHAR(20), + amount DOUBLE PRECISION, + PRIMARY KEY (account_number, create_time, transaction_id), + CONSTRAINT fk_txn_account_number FOREIGN KEY (account_number) REFERENCES account.balances (account_number) +); + +CREATE TABLE IF NOT EXISTS account.mapping +( + key TEXT, + value TEXT +); diff --git a/app/src/test/resources/sample/task/cassandra/cassandra-customer-task.yaml b/app/src/test/resources/sample/task/cassandra/cassandra-customer-task.yaml new file mode 100644 index 00000000..1fe36a6e --- /dev/null +++ b/app/src/test/resources/sample/task/cassandra/cassandra-customer-task.yaml @@ -0,0 +1,48 @@ +name: "cassandra_customer_status_table_create" +steps: + - name: "accounts" + type: "cassandra" + count: + records: 10 + options: + keyspace: "account" + table: "accounts" + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "regex" + options: + regex: "ACC1[0-9]{5,10}" + - name: "amount" + type: "double" + generator: + type: "random" + options: + minValue: 10.0 + maxValue: 100.0 + - name: "name" + type: "string" + generator: + type: "random" + options: + expression: "#{Name.name}" + - name: "created_by" + type: "string" + generator: + type: "random" + options: + expression: "#{Name.username}" + - name: "status" + type: "string" + generator: + type: "oneOf" + options: + oneOf: + - "open" + - "closed" + - name: "open_time" + type: "timestamp" + generator: + type: "random" diff --git a/app/src/test/resources/sample/task/file/csv-transaction-task.yaml b/app/src/test/resources/sample/task/file/csv-transaction-task.yaml new file mode 100644 index 00000000..85456d6e --- /dev/null +++ b/app/src/test/resources/sample/task/file/csv-transaction-task.yaml @@ -0,0 +1,51 @@ +name: "csv_transaction_file" +steps: + - name: "transactions" + type: "csv" + options: { } + count: + records: 1000 + perColumn: + columnNames: + - "account_id" + - "name" + generator: + type: "random" + options: + max: 10 + min: 1 + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "regex" + options: + regex: "ACC1[0-9]{9}" + - name: "name" + type: "string" + generator: + type: "random" + options: + expression: "#{Name.name}" + - name: "year" + type: "int" + generator: + type: "random" + options: + min: 2021 + max: 2022 + - name: "amount" + type: "double" + generator: + type: "random" + options: + min: 10.0 + max: 100.0 + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + min: "2021-01-01" + max: "2021-12-31" diff --git a/app/src/test/resources/sample/task/file/json-account-task.yaml b/app/src/test/resources/sample/task/file/json-account-task.yaml new file mode 100644 index 00000000..24285558 --- /dev/null +++ b/app/src/test/resources/sample/task/file/json-account-task.yaml @@ -0,0 +1,148 @@ +name: "json_account_file" +steps: + - name: "file_account" + type: "json" + count: + records: 1000 + options: + path: "app/src/test/resources/sample/json/account-gen" + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "random" + - name: "year" + type: "int" + generator: + type: "random" + options: + min: 2021 + max: 2022 + - name: "is_early_customer" + type: "boolean" + generator: + type: "sql" + options: + sql: "CASE WHEN year == 2021 THEN true ELSE false END" + - name: "amount" + type: "double" + generator: + type: "random" + options: + min: 10.0 + max: 100.0 + - name: "is_large_amount" + type: "boolean" + generator: + type: "sql" + options: + sql: >- + CASE WHEN amount > 50 THEN + true + ELSE + false + END + - name: "updated_time" + type: "string" + generator: + type: "sql" + options: + sql: "details.updated_by.time" + - name: "first_txn_date" + type: "date" + generator: + type: "sql" + options: + sql: "element_at(sort_array(transactions.txn_date), 1)" + - name: "details" + schema: + fields: + - name: "name" + type: "string" + generator: + type: "random" + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + min: "2021-01-01" + max: "2021-12-31" + - name: "updated_by" + schema: + fields: + - name: "user" + type: "string" + generator: + type: "random" + - name: "time" + type: "timestamp" + generator: + type: "random" + - name: "transactions" + type: "array" + schema: + fields: + - name: "txn_date" + type: "date" + generator: + type: "random" + - name: "amount" + type: "double" + generator: + type: "random" + - name: "tags" + type: "array" + generator: + type: "random" + - name: "transactions" + type: "json" + options: + path: "app/src/test/resources/sample/json/txn-gen" + count: + records: 100 + perColumn: + columnNames: + - "account_id" + - "name" + generator: + type: "random" + options: + max: 10 + min: 1 + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "regex" + options: + regex: "ACC1[0-9]{9}" + - name: "name" + type: "string" + generator: + type: "random" + options: + expression: "#{Name.name}" + - name: "year" + type: "int" + generator: + type: "random" + options: + min: 2021 + max: 2022 + - name: "amount" + type: "double" + generator: + type: "random" + options: + min: 10.0 + max: 100.0 + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + min: "2021-01-01" + max: "2021-12-31" \ No newline at end of file diff --git a/app/src/test/resources/sample/task/file/large-csv-task.yaml b/app/src/test/resources/sample/task/file/large-csv-task.yaml new file mode 100644 index 00000000..e74a2e34 --- /dev/null +++ b/app/src/test/resources/sample/task/file/large-csv-task.yaml @@ -0,0 +1,50 @@ +name: "large_csv_file" +steps: + - name: "transactions" + type: "csv" + options: + path: "app/src/test/resources/sample/large/csv/transactions" + partitions: 10 + partitionBy: "year" + count: + records: 100000 + perColumn: + count: 2 + columnNames: + - "account_id" + - "name" + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "regex" + options: + regex: "ACC1[0-9]{9}" + - name: "name" + type: "string" + generator: + type: "random" + options: + expression: "#{Name.name}" + - name: "year" + type: "int" + generator: + type: "random" + options: + minValue: 2021 + maxValue: 2022 + - name: "amount" + type: "double" + generator: + type: "random" + options: + minValue: 10.0 + maxValue: 100.0 + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + minValue: "2021-01-01" + maxValue: "2021-12-31" diff --git a/app/src/test/resources/sample/task/file/large-json-account-task.yaml b/app/src/test/resources/sample/task/file/large-json-account-task.yaml new file mode 100644 index 00000000..ca199d0e --- /dev/null +++ b/app/src/test/resources/sample/task/file/large-json-account-task.yaml @@ -0,0 +1,102 @@ +name: "large_json_file" +steps: + - name: "file_account" + type: "json" + count: + records: 100000 + options: + path: "app/src/test/resources/sample/large/json/account-gen" + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "regex" + options: + regex: "ACC1[0-9]{9}" + isUnique: true + - name: "year" + type: "int" + generator: + type: "random" + options: + minValue: 2021 + maxValue: 2022 + - name: "is_early_customer" + type: "boolean" + generator: + type: "sql" + options: + sql: "CASE WHEN year == 2021 THEN true ELSE false END" + - name: "amount" + type: "double" + generator: + type: "random" + options: + minValue: 10.0 + maxValue: 100.0 + isUnique: true + - name: "is_large_amount" + type: "boolean" + generator: + type: "sql" + options: + sql: >- + CASE WHEN amount > 50 THEN + true + ELSE + false + END + - name: "updated_time" + type: "string" + generator: + type: "sql" + options: + sql: "details.updated_by.time" + - name: "first_txn_date" + type: "date" + generator: + type: "sql" + options: + sql: "element_at(sort_array(transactions.txn_date), 1)" + - name: "details" + schema: + fields: + - name: "name" + type: "string" + generator: + type: "random" + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + minValue: "2021-01-01" + maxValue: "2021-12-31" + - name: "updated_by" + schema: + fields: + - name: "user" + type: "string" + generator: + type: "random" + - name: "time" + type: "timestamp" + generator: + type: "random" + - name: "transactions" + type: "array" + schema: + fields: + - name: "txn_date" + type: "date" + generator: + type: "random" + - name: "amount" + type: "double" + generator: + type: "random" + - name: "tags" + type: "array" + generator: + type: "random" \ No newline at end of file diff --git a/app/src/test/resources/sample/task/file/parquet-transaction-task.yaml b/app/src/test/resources/sample/task/file/parquet-transaction-task.yaml new file mode 100644 index 00000000..f19e320a --- /dev/null +++ b/app/src/test/resources/sample/task/file/parquet-transaction-task.yaml @@ -0,0 +1,44 @@ +name: "parquet_transaction_file" +steps: + - name: "transactions" + type: "parquet" + options: + path: "app/src/test/resources/sample/parquet/transactions" + count: + records: 1000 + perColumn: + columnNames: + - "account_id" + generator: + options: + max: 10 + min: 1 + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "random" + options: + isPrimaryKey: "true" + - name: "year" + type: "int" + generator: + type: "random" + options: + minValue: 2021 + maxValue: 2022 + - name: "amount" + type: "double" + generator: + type: "random" + options: + minValue: 10.0 + maxValue: 100.0 + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + minValue: "2021-01-01" + maxValue: "2021-12-31" diff --git a/app/src/test/resources/sample/task/file/simple-json-task.yaml b/app/src/test/resources/sample/task/file/simple-json-task.yaml new file mode 100644 index 00000000..77cb6533 --- /dev/null +++ b/app/src/test/resources/sample/task/file/simple-json-task.yaml @@ -0,0 +1,98 @@ +name: "simple_json" +steps: + - name: "file_account" + type: "json" + count: + records: 1000 + options: + path: "src/test/resources/sample/data/generated/json/account-gen" + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "random" + - name: "year" + type: "int" + generator: + type: "random" + options: + minValue: 2021 + maxValue: 2022 + - name: "is_early_customer" + type: "boolean" + generator: + type: "sql" + options: + sql: "CASE WHEN year == 2021 THEN true ELSE false END" + - name: "amount" + type: "double" + generator: + type: "random" + options: + minValue: 10.0 + maxValue: 100.0 + - name: "is_large_amount" + type: "boolean" + generator: + type: "sql" + options: + sql: >- + CASE WHEN amount > 50 THEN + true + ELSE + false + END + - name: "updated_time" + type: "timestamp" + generator: + type: "sql" + options: + sql: "details.updated_by.time" + - name: "first_txn_date" + type: "date" + generator: + type: "sql" + options: + sql: "element_at(sort_array(transactions.txn_date), 1)" + - name: "details" + type: "struct" + schema: + fields: + - name: "name" + generator: + type: "random" + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + minValue: "2021-01-01" + maxValue: "2021-12-31" + - name: "updated_by" + type: "struct" + schema: + fields: + - name: "user" + generator: + type: "random" + - name: "time" + type: "timestamp" + generator: + type: "random" + - name: "transactions" + type: "array" + schema: + fields: + - name: "txn_date" + type: "date" + generator: + type: "random" + - name: "amount" + type: "double" + generator: + type: "random" + - name: "tags" + type: "array" + generator: + type: "random" \ No newline at end of file diff --git a/app/src/test/resources/sample/task/http/http-account-task.yaml b/app/src/test/resources/sample/task/http/http-account-task.yaml new file mode 100644 index 00000000..f5478ad6 --- /dev/null +++ b/app/src/test/resources/sample/task/http/http-account-task.yaml @@ -0,0 +1,72 @@ +name: "json_account_http" +steps: + - name: "account" + count: + records: 50 + schema: + fields: + - name: "url" + generator: + type: "sql" + options: + sql: "CONCAT('http://localhost:80/anything/', content.account_id)" + - name: "method" + static: "PUT" + - name: "content_type" + static: "application/json" + - name: "headers" + type: "array>" + generator: + type: "sql" + options: + sql: >- + array( + named_struct('key', 'account-id', 'value', content.account_id), + named_struct('key', 'updated', 'value', content.details.updated_by.time) + ) + - name: "value" + generator: + type: "sql" + options: + sql: "to_json(content)" + - name: "content" + schema: + fields: + - name: "account_id" + - name: "year" + type: "int" + generator: + options: + minValue: 2021 + maxValue: 2022 + - name: "amount" + type: "double" + generator: + options: + minValue: 10.0 + maxValue: 100.0 + - name: "details" + schema: + fields: + - name: "name" + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + minValue: "2021-01-01" + maxValue: "2021-12-31" + - name: "updated_by" + schema: + fields: + - name: "user" + - name: "time" + type: "timestamp" + - name: "transactions" + type: "array" + schema: + fields: + - name: "txn_date" + type: "date" + - name: "amount" + type: "double" \ No newline at end of file diff --git a/app/src/test/resources/sample/task/jms/jms-account-task.yaml b/app/src/test/resources/sample/task/jms/jms-account-task.yaml new file mode 100644 index 00000000..c5fde93b --- /dev/null +++ b/app/src/test/resources/sample/task/jms/jms-account-task.yaml @@ -0,0 +1,74 @@ +name: "json_account_jms" +steps: + - name: "jms_account" + type: "json" + count: + records: 100 + options: + destinationName: "/JNDI/Q/test_queue" + rowsPerSecond: "20" + schema: + fields: + - name: "value" + generator: + type: "sql" + options: + sql: "to_json(content)" + - name: "content" + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "random" + - name: "year" + type: "int" + generator: + type: "random" + options: + minValue: 2021 + maxValue: 2022 + - name: "amount" + type: "double" + generator: + type: "random" + options: + minValue: 10.0 + maxValue: 100.0 + - name: "details" + schema: + fields: + - name: "name" + type: "string" + generator: + type: "random" + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + minValue: "2021-01-01" + maxValue: "2021-12-31" + - name: "updated_by" + schema: + fields: + - name: "user" + type: "string" + generator: + type: "random" + - name: "time" + type: "timestamp" + generator: + type: "random" + - name: "transactions" + type: "array" + schema: + fields: + - name: "txn_date" + type: "date" + generator: + type: "random" + - name: "amount" + type: "double" + generator: + type: "random" \ No newline at end of file diff --git a/app/src/test/resources/sample/task/kafka/kafka-account-task.yaml b/app/src/test/resources/sample/task/kafka/kafka-account-task.yaml new file mode 100644 index 00000000..3e258df9 --- /dev/null +++ b/app/src/test/resources/sample/task/kafka/kafka-account-task.yaml @@ -0,0 +1,96 @@ +name: "json_account_kafka" +steps: + - name: "kafka_account" + type: "json" + count: + records: 10 + options: + topic: "account-topic" + schema: + fields: + - name: "key" + type: "string" + generator: + type: "sql" + options: + sql: "content.account_id" + - name: "value" + type: "string" + generator: + type: "sql" + options: + sql: "to_json(content)" + - name: "headers" + type: "array>" + generator: + type: "sql" + options: + sql: >- + array( + named_struct('key', 'account-id', 'value', to_binary(content.account_id, 'utf-8')), + named_struct('key', 'updated', 'value', to_binary(content.details.updated_by.time, 'utf-8')) + ) + # - name: "partition" + # type: "int" + # generator: + # type: "sql" + # options: + # sql: "1" + - name: "content" + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "random" + - name: "year" + type: "int" + generator: + type: "random" + options: + minValue: 2021 + maxValue: 2022 + - name: "amount" + type: "double" + generator: + type: "random" + options: + minValue: 10.0 + maxValue: 100.0 + - name: "details" + schema: + fields: + - name: "name" + type: "string" + generator: + type: "random" + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + minValue: "2021-01-01" + maxValue: "2021-12-31" + - name: "updated_by" + schema: + fields: + - name: "user" + type: "string" + generator: + type: "random" + - name: "time" + type: "timestamp" + generator: + type: "random" + - name: "transactions" + type: "array" + schema: + fields: + - name: "txn_date" + type: "date" + generator: + type: "random" + - name: "amount" + type: "double" + generator: + type: "random" \ No newline at end of file diff --git a/app/src/test/resources/sample/task/postgres/postgres-customer-task.yaml b/app/src/test/resources/sample/task/postgres/postgres-customer-task.yaml new file mode 100644 index 00000000..58859795 --- /dev/null +++ b/app/src/test/resources/sample/task/postgres/postgres-customer-task.yaml @@ -0,0 +1,39 @@ +name: "jdbc_customer_accounts_table_create" +steps: + - name: "accounts" + type: "postgres" + count: + records: 1000000 + options: + dbtable: "account.accounts" + schema: + fields: + - name: "account_number" + type: "string" + generator: + type: "regex" + options: + isPrimaryKey: "true" + regex: "ACC1[0-9]{5,10}" + - name: "account_status" + type: "string" + generator: + type: "oneOf" + options: + oneOf: + - "open" + - "closed" + - name: "open_date" + type: "date" + generator: + type: "random" + - name: "created_by" + type: "string" + generator: + type: "random" + - name: "customer_id_int" + type: "int" + generator: + type: "random" + options: + isUnique: true diff --git a/app/src/test/resources/sample/task/postgres/postgres-transaction-task.yaml b/app/src/test/resources/sample/task/postgres/postgres-transaction-task.yaml new file mode 100644 index 00000000..819b1599 --- /dev/null +++ b/app/src/test/resources/sample/task/postgres/postgres-transaction-task.yaml @@ -0,0 +1,22 @@ +name: "jdbc_customer_transactions_table_create" +steps: + - name: "transactions" + type: "postgres" + count: + records: 10000 + perColumn: + columnNames: + - "account_number" + count: 100 + options: + dbtable: "account.transactions" + schema: + fields: + - name: "account_number" + type: "string" + generator: + type: "random" + - name: "amount" + type: "double" + generator: + type: "random" diff --git a/app/src/test/resources/sample/validation/simple-validation.yaml b/app/src/test/resources/sample/validation/simple-validation.yaml new file mode 100644 index 00000000..8726a049 --- /dev/null +++ b/app/src/test/resources/sample/validation/simple-validation.yaml @@ -0,0 +1,13 @@ +--- +name: "account_checks" +description: "Check account related fields have gone through system correctly" +dataSources: + json: + options: + path: "app/src/test/resources/sample/json/txn-gen" + validations: + - expr: "amount < 100" + - expr: "year == 2021" + errorThreshold: 0.1 + - expr: "regexp_like(name, 'Peter .*')" + errorThreshold: 200 diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/generator/DataGeneratorFactoryTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/generator/DataGeneratorFactoryTest.scala new file mode 100644 index 00000000..33f6760a --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/generator/DataGeneratorFactoryTest.scala @@ -0,0 +1,95 @@ +package com.github.pflooky.datagen.core.generator + +import com.github.pflooky.datacaterer.api.model.{Count, Field, Generator, PerColumnCount, Schema, Step} +import com.github.pflooky.datagen.core.util.{Account, SparkSuite} +import net.datafaker.Faker +import org.apache.spark.sql.types.{DoubleType, IntegerType, StringType} +import org.apache.spark.sql.{Dataset, Encoder, Encoders} +import org.junit.runner.RunWith +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class DataGeneratorFactoryTest extends SparkSuite { + + private val dataGeneratorFactory = new DataGeneratorFactory(new Faker() with Serializable) + private val schema = Schema(Some( + List( + Field("id"), + Field("amount", Some("double")), + Field("debit_credit", Some("string"), Some(Generator("oneOf", Map("oneOf" -> List("D", "C"))))), + Field("name", Some("string"), Some(Generator("regex", Map("regex" -> "[A-Z][a-z]{2,6} [A-Z][a-z]{2,8}")))), + Field("code", Some("int"), Some(Generator("sql", Map("sql" -> "CASE WHEN debit_credit == 'D' THEN 1 ELSE 0 END")))), + ) + )) + private val simpleSchema = Schema(Some(List(Field("id")))) + + test("Can generate data for basic step") { + val step = Step("transaction", "parquet", Count(records = Some(10)), Map("path" -> "sample/output/parquet/transactions"), schema) + + val df = dataGeneratorFactory.generateDataForStep(step, "parquet", 0, 10) + df.cache() + + assert(df.count() == 10L) + assert(df.columns sameElements Array("id", "amount", "debit_credit", "name", "code")) + assert(df.schema.fields.map(x => (x.name, x.dataType)) sameElements Array( + ("id", StringType), + ("amount", DoubleType), + ("debit_credit", StringType), + ("name", StringType), + ("code", IntegerType), + )) + val sampleRow = df.head() + assert(sampleRow.getString(0).nonEmpty && sampleRow.getString(0).length <= 20) + assert(sampleRow.getDouble(1) >= 0.0) + val debitCredit = sampleRow.getString(2) + assert(debitCredit == "D" || debitCredit == "C") + assert(sampleRow.getString(3).matches("[A-Z][a-z]{2,6} [A-Z][a-z]{2,8}")) + if (debitCredit == "D") assert(sampleRow.getInt(4) == 1) else assert(sampleRow.getInt(4) == 0) + } + + test("Can generate data when number of rows per column is defined") { + val step = Step("transaction", "parquet", + Count(records = Some(10), perColumn = Some(PerColumnCount(List("id"), Some(2)))), + Map("path" -> "sample/output/parquet/transactions"), simpleSchema) + + val df = dataGeneratorFactory.generateDataForStep(step, "parquet", 0, 10) + df.cache() + + assert(df.count() == 20L) + val sampleId = df.head().getAs[String]("id") + val sampleRows = df.filter(_.getAs[String]("id") == sampleId) + assert(sampleRows.count() == 2L) + } + + test("Can generate data with generated number of rows per column by a generator") { + val step = Step("transaction", "parquet", Count(Some(10), + perColumn = Some(PerColumnCount(List("id"), None, Some(Generator("random", Map("min" -> "1", "max" -> "2"))))), None), + Map("path" -> "sample/output/parquet/transactions"), simpleSchema) + + val df = dataGeneratorFactory.generateDataForStep(step, "parquet", 0, 10) + df.cache() + + assert(df.count() >= 10L) + assert(df.count() <= 20L) + val sampleId = df.head().getAs[String]("id") + val sampleRows = df.filter(_.getAs[String]("id") == sampleId) + assert(sampleRows.count() >= 1L) + assert(sampleRows.count() <= 2L) + } + + test("Can generate data with generated number of rows generated by a data generator") { + val step = Step("transaction", "parquet", Count(None, + perColumn = None, + generator = Some(Generator("random", Map("min" -> "10", "max" -> "20")))), + Map("path" -> "sample/output/parquet/transactions"), simpleSchema) + + val df = dataGeneratorFactory.generateDataForStep(step, "parquet", 0, 15) + df.cache() + + assert(df.count() >= 10L) + assert(df.count() <= 20L) + val sampleId = df.head().getAs[String]("id") + val sampleRows = df.filter(_.getAs[String]("id") == sampleId) + assert(sampleRows.count() == 1L) + } +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/generator/DataGeneratorProcessorTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/generator/DataGeneratorProcessorTest.scala new file mode 100644 index 00000000..8d959a6a --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/generator/DataGeneratorProcessorTest.scala @@ -0,0 +1,33 @@ +package com.github.pflooky.datagen.core.generator + +import com.github.pflooky.datacaterer.api.model.Constants.FORMAT +import com.github.pflooky.datacaterer.api.model.{DataCatererConfiguration, FlagsConfig, FoldersConfig} +import com.github.pflooky.datagen.core.util.SparkSuite +import org.junit.runner.RunWith +import org.scalatestplus.junit.JUnitRunner + +import java.io.File +import scala.reflect.io.Directory + +@RunWith(classOf[JUnitRunner]) +class DataGeneratorProcessorTest extends SparkSuite { + + test("Can parse plan and tasks, then execute data generation") { + val basePath = "src/test/resources/sample/data" + val config = DataCatererConfiguration( + flagsConfig = FlagsConfig(false, true, false, false, enableValidation = false), + foldersConfig = FoldersConfig("sample/plan/simple-json-plan.yaml", "sample/task", basePath, recordTrackingFolderPath = s"$basePath/recordTracking"), + connectionConfigByName = Map("account_json" -> Map(FORMAT -> "json")) + ) + val dataGeneratorProcessor = new DataGeneratorProcessor(config) + + dataGeneratorProcessor.generateData() + + val generatedData = sparkSession.read + .json(s"$basePath/generated/json/account-gen") + val generatedCount = generatedData.count() + assert(generatedCount > 0) + new Directory(new File(basePath)).deleteRecursively() + } + +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/generator/provider/OneOfDataGeneratorTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/generator/provider/OneOfDataGeneratorTest.scala new file mode 100644 index 00000000..34468ac6 --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/generator/provider/OneOfDataGeneratorTest.scala @@ -0,0 +1,45 @@ +package com.github.pflooky.datagen.core.generator.provider + +import com.github.pflooky.datacaterer.api.model.Constants.ONE_OF_GENERATOR +import com.github.pflooky.datagen.core.generator.provider.OneOfDataGenerator.RandomOneOfDataGenerator +import org.apache.spark.sql.types.{MetadataBuilder, StringType, StructField} +import org.junit.runner.RunWith +import org.scalatest.funsuite.AnyFunSuite +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class OneOfDataGeneratorTest extends AnyFunSuite { + + private val oneOfArray = Array("started", "in-progress", "finished", "failed", "restarted", "paused") + + test("Can generate data based on one-of generator") { + val metadata = new MetadataBuilder() + .putStringArray(ONE_OF_GENERATOR, oneOfArray) + .build() + val oneOfDataGenerator = new RandomOneOfDataGenerator(StructField("random_one_of", StringType, false, metadata)) + + (1 to 20).foreach(_ => { + val data = oneOfDataGenerator.generate + assert(data.isInstanceOf[String]) + assert(oneOfArray.contains(data)) + }) + } + + test("Will default to use string type when no array type defined") { + val metadata = new MetadataBuilder() + .putStringArray(ONE_OF_GENERATOR, oneOfArray) + .build() + val oneOfDataGenerator = new RandomOneOfDataGenerator(StructField("random_one_of", StringType, false, metadata)) + + (1 to 20).foreach(_ => { + val data = oneOfDataGenerator.generate + assert(data.isInstanceOf[String]) + assert(oneOfArray.contains(data)) + }) + } + + test("Will throw an exception if no oneOf is defined in metadata") { + val metadata = new MetadataBuilder().build() + assertThrows[AssertionError](new RandomOneOfDataGenerator(StructField("random_one_of", StringType, false, metadata))) + } +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/generator/provider/RandomDataGeneratorTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/generator/provider/RandomDataGeneratorTest.scala new file mode 100644 index 00000000..4da1cdf2 --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/generator/provider/RandomDataGeneratorTest.scala @@ -0,0 +1,332 @@ +package com.github.pflooky.datagen.core.generator.provider + +import com.github.pflooky.datacaterer.api.model.Constants.{ARRAY_MINIMUM_LENGTH, DISTINCT_COUNT, ENABLED_EDGE_CASE, ENABLED_NULL, EXPRESSION, IS_UNIQUE, MAXIMUM, MEAN, MINIMUM, PROBABILITY_OF_EDGE_CASE, PROBABILITY_OF_NULL, ROW_COUNT, STANDARD_DEVIATION} +import com.github.pflooky.datagen.core.generator.provider.RandomDataGenerator._ +import com.github.pflooky.datagen.core.model.Constants.INDEX_INC_COL +import org.apache.spark.sql.types._ +import org.junit.runner.RunWith +import org.scalatest.funsuite.AnyFunSuite +import org.scalatestplus.junit.JUnitRunner + +import java.sql.{Date, Timestamp} +import java.time.temporal.ChronoUnit +import java.time.{Instant, LocalDate} + +@RunWith(classOf[JUnitRunner]) +class RandomDataGeneratorTest extends AnyFunSuite { + + test("Can get correct data generator based on StructType") { + val structType = StructType(Seq( + StructField("name", StringType), + StructField("age", IntegerType), + StructField("amount", DoubleType), + StructField("date_of_birth", DateType), + StructField("last_login_time", TimestampType) + )) + val generators = RandomDataGenerator.getGeneratorForStructType(structType) + assert(generators.length == 5) + } + + test("Can get the correct data generator based on return type") { + val stringGenerator = RandomDataGenerator.getGeneratorForStructField(StructField("field", StringType)) + val intGenerator = RandomDataGenerator.getGeneratorForStructField(StructField("field", IntegerType)) + val longGenerator = RandomDataGenerator.getGeneratorForStructField(StructField("field", LongType)) + val decimalGenerator = RandomDataGenerator.getGeneratorForStructField(StructField("field", DecimalType(20, 2))) + val shortGenerator = RandomDataGenerator.getGeneratorForStructField(StructField("field", ShortType)) + val doubleGenerator = RandomDataGenerator.getGeneratorForStructField(StructField("field", DoubleType)) + val floatGenerator = RandomDataGenerator.getGeneratorForStructField(StructField("field", FloatType)) + val dateGenerator = RandomDataGenerator.getGeneratorForStructField(StructField("field", DateType)) + val timestampGenerator = RandomDataGenerator.getGeneratorForStructField(StructField("field", TimestampType)) + val booleanGenerator = RandomDataGenerator.getGeneratorForStructField(StructField("field", BooleanType)) + val binaryGenerator = RandomDataGenerator.getGeneratorForStructField(StructField("field", BinaryType)) + val byteGenerator = RandomDataGenerator.getGeneratorForStructField(StructField("field", ByteType)) + val listGenerator = RandomDataGenerator.getGeneratorForStructField(StructField("field", ArrayType(StringType))) + + assert(stringGenerator.isInstanceOf[RandomStringDataGenerator]) + assert(intGenerator.isInstanceOf[RandomIntDataGenerator]) + assert(longGenerator.isInstanceOf[RandomLongDataGenerator]) + assert(decimalGenerator.isInstanceOf[RandomDecimalDataGenerator]) + assert(shortGenerator.isInstanceOf[RandomShortDataGenerator]) + assert(doubleGenerator.isInstanceOf[RandomDoubleDataGenerator]) + assert(floatGenerator.isInstanceOf[RandomFloatDataGenerator]) + assert(dateGenerator.isInstanceOf[RandomDateDataGenerator]) + assert(timestampGenerator.isInstanceOf[RandomTimestampDataGenerator]) + assert(booleanGenerator.isInstanceOf[RandomBooleanDataGenerator]) + assert(binaryGenerator.isInstanceOf[RandomBinaryDataGenerator]) + assert(byteGenerator.isInstanceOf[RandomByteDataGenerator]) + assert(listGenerator.isInstanceOf[RandomArrayDataGenerator[String]]) + } + + test("Can create random string generator") { + val stringGenerator = new RandomStringDataGenerator(StructField("random_string", StringType, false)) + val sampleData = stringGenerator.generate + + assert(stringGenerator.edgeCases.nonEmpty) + assert(sampleData.nonEmpty) + assert(sampleData.length <= 20) + } + + test("Can create random string generator with expression that ignores minimum and maximum length") { + val metadata = new MetadataBuilder().putString(MINIMUM, "0").putString(MAXIMUM, "5").putString(EXPRESSION, "#{Name.name}").build() + val stringGenerator = new RandomStringDataGenerator(StructField("random_string", StringType, false, metadata)) + val sampleData = stringGenerator.generate + + assert(stringGenerator.edgeCases.nonEmpty) + assert(sampleData.nonEmpty) + assert(sampleData.length > 5) + } + + test("Can create random int generator with custom min and max") { + val metadata = new MetadataBuilder().putString(MAXIMUM, "10").putString(MINIMUM, "5").build() + val intGenerator = new RandomIntDataGenerator(StructField("random_int", IntegerType, false, metadata)) + val sampleData = intGenerator.generate + + assert(intGenerator.edgeCases.nonEmpty) + assert(sampleData >= 5) + assert(sampleData <= 10) + assert(intGenerator.generateSqlExpression == "CAST(ROUND(RAND() * 5 + 5, 0) AS INT)") + } + + test("Can create random int generator") { + val intGenerator = new RandomIntDataGenerator(StructField("random_int", IntegerType, false)) + val sampleData = intGenerator.generate + + assert(intGenerator.edgeCases.nonEmpty) + assert(sampleData >= 0) + assert(sampleData <= Int.MaxValue) + } + + test("Can create random long generator") { + val longGenerator = new RandomLongDataGenerator(StructField("random_long", LongType, false)) + val sampleData = longGenerator.generate + + assert(longGenerator.edgeCases.nonEmpty) + assert(sampleData >= 0) + assert(sampleData <= Long.MaxValue) + assert(longGenerator.generateSqlExpression == "CAST(ROUND(RAND() * 100000 + 0, 0) AS LONG)") + } + + test("Can create random long generator with custom min and max") { + val metadata = new MetadataBuilder().putString(MAXIMUM, "10").putString(MINIMUM, "5").build() + val longGenerator = new RandomLongDataGenerator(StructField("random_long", LongType, false, metadata)) + val sampleData = longGenerator.generate + + assert(longGenerator.edgeCases.nonEmpty) + assert(sampleData >= 5) + assert(sampleData <= 10) + } + + test("Can create random decimal generator") { + val decimalGenerator = new RandomDecimalDataGenerator(StructField("random_decimal", DecimalType(22, 2), false)) + val sampleData = decimalGenerator.generate + + assert(decimalGenerator.edgeCases.nonEmpty) + assert(sampleData >= 0) + assert(sampleData <= Long.MaxValue) + assert(decimalGenerator.generateSqlExpression == "CAST(RAND() * 100000 + 0 AS DECIMAL(22, 2))") + } + + test("Can create random decimal generator with custom min and max") { + val metadata = new MetadataBuilder().putString(MAXIMUM, "10").putString(MINIMUM, "5").build() + val decimalGenerator = new RandomDecimalDataGenerator(StructField("random_decimal", DecimalType(22, 2), false, metadata)) + val sampleData = decimalGenerator.generate + + assert(decimalGenerator.edgeCases.nonEmpty) + assert(sampleData >= 5) + assert(sampleData <= 10) + assert(decimalGenerator.generateSqlExpression == "CAST(RAND() * 5 + 5 AS DECIMAL(22, 2))") + } + + test("Can create random short generator") { + val shortGenerator = new RandomShortDataGenerator(StructField("random_short", ShortType, false)) + val sampleData = shortGenerator.generate + + assert(shortGenerator.edgeCases.nonEmpty) + assert(sampleData >= 0) + assert(sampleData <= Short.MaxValue) + assert(shortGenerator.generateSqlExpression == "CAST(ROUND(RAND() * 1000 + 0, 0) AS SHORT)") + } + + test("Can create random short generator with custom min and max") { + val metadata = new MetadataBuilder().putString(MAXIMUM, "10").putString(MINIMUM, "5").build() + val shortGenerator = new RandomShortDataGenerator(StructField("random_short", ShortType, false, metadata)) + val sampleData = shortGenerator.generate + + assert(shortGenerator.edgeCases.nonEmpty) + assert(sampleData >= 5) + assert(sampleData <= 10) + assert(shortGenerator.generateSqlExpression == "CAST(ROUND(RAND() * 5 + 5, 0) AS SHORT)") + } + + test("Can create random double generator") { + val doubleGenerator = new RandomDoubleDataGenerator(StructField("random_double", DoubleType, false)) + val sampleData = doubleGenerator.generate + + assert(doubleGenerator.edgeCases.nonEmpty) + assert(sampleData >= 0.0) + assert(sampleData <= Double.MaxValue) + assert(doubleGenerator.generateSqlExpression == "CAST(RAND() * 100000.0 + 0.0 AS DOUBLE)") + } + + test("Can create random double generator with custom min and max") { + val metadata = new MetadataBuilder().putString(MAXIMUM, "10.0").putString(MINIMUM, "5.0").build() + val doubleGenerator = new RandomDoubleDataGenerator(StructField("random_double", DoubleType, false, metadata)) + val sampleData = doubleGenerator.generate + + assert(doubleGenerator.edgeCases.nonEmpty) + assert(sampleData >= 5.0) + assert(sampleData <= 10.0) + assert(doubleGenerator.generateSqlExpression == "CAST(RAND() * 5.0 + 5.0 AS DOUBLE)") + } + + test("Can create random float generator") { + val floatGenerator = new RandomFloatDataGenerator(StructField("random_float", FloatType, false)) + val sampleData = floatGenerator.generate + + assert(floatGenerator.edgeCases.nonEmpty) + assert(sampleData >= 0.0) + assert(sampleData <= Float.MaxValue) + assert(floatGenerator.generateSqlExpression == "CAST(RAND() * 100000.0 + 0.0 AS FLOAT)") + } + + test("Can create random float generator with custom min and max") { + val metadata = new MetadataBuilder().putString(MAXIMUM, "10.0").putString(MINIMUM, "5.0").build() + val floatGenerator = new RandomFloatDataGenerator(StructField("random_float", FloatType, false, metadata)) + val sampleData = floatGenerator.generate + + assert(floatGenerator.edgeCases.nonEmpty) + assert(sampleData >= 5.0) + assert(sampleData <= 10.0) + assert(floatGenerator.generateSqlExpression == "CAST(RAND() * 5.0 + 5.0 AS FLOAT)") + } + + test("Can create random date generator") { + val dateGenerator = new RandomDateDataGenerator(StructField("random_date", DateType, false)) + val sampleData = dateGenerator.generate + + assert(dateGenerator.edgeCases.nonEmpty) + assert(sampleData.before(Date.valueOf(LocalDate.now().plusDays(1)))) + assert(sampleData.after(Date.valueOf(LocalDate.now().minusDays(366)))) + } + + test("Can create random timestamp generator") { + val dateGenerator = new RandomTimestampDataGenerator(StructField("random_ts", TimestampType, false)) + val sampleData = dateGenerator.generate + + assert(dateGenerator.edgeCases.nonEmpty) + assert(sampleData.before(Timestamp.from(Instant.now()))) + assert(sampleData.after(Timestamp.from(Instant.now().minus(365, ChronoUnit.DAYS)))) + } + + test("Can create random binary generator") { + val binaryGenerator = new RandomBinaryDataGenerator(StructField("random_binary", BinaryType, false)) + val sampleData = binaryGenerator.generate + + assert(binaryGenerator.edgeCases.nonEmpty) + assert(sampleData.length > 0) + assert(sampleData.length <= 20) + } + + test("Can create random byte generator") { + val byteGenerator = new RandomByteDataGenerator(StructField("random_byte", ByteType, false)) + val sampleData = byteGenerator.generate + + assert(byteGenerator.edgeCases.nonEmpty) + assert(sampleData.toString.nonEmpty) + } + + test("Can create random list of string generator") { + val metadata = new MetadataBuilder().putString(ARRAY_MINIMUM_LENGTH, "1").build() + val listGenerator = new RandomArrayDataGenerator[String](StructField("random_list", ArrayType(StringType), false, metadata), StringType) + val sampleData = listGenerator.generate + + assert(sampleData.nonEmpty) + } + + test("Can create random list of struct type generator") { + val metadata = new MetadataBuilder().putString(ARRAY_MINIMUM_LENGTH, "1").build() + val innerStruct = StructType(Seq(StructField("random_acc", StringType), StructField("random_num", IntegerType))) + val listGenerator = new RandomArrayDataGenerator[StructType](StructField("random_list", ArrayType(innerStruct), false, metadata), new StructType()) + val sampleData = listGenerator.generate + + assert(sampleData.nonEmpty) + } + + test("Can create random string generator with only nulls generated") { + val metadata = new MetadataBuilder().putString(ENABLED_NULL, "true").putString(PROBABILITY_OF_NULL, "1.0").build() + val stringGenerator = new RandomStringDataGenerator(StructField("random_string", StringType, true, metadata)) + val sampleData = (1 to 10).map(_ => stringGenerator.generateWrapper()) + + assert(stringGenerator.edgeCases.nonEmpty) + assert(sampleData.nonEmpty) + assert(sampleData.forall(_ == null)) + } + + test("Can create random string generator with only edge cases generated") { + val metadata = new MetadataBuilder().putString(ENABLED_EDGE_CASE, "true").putString(PROBABILITY_OF_EDGE_CASE, "1.0").build() + val stringGenerator = new RandomStringDataGenerator(StructField("random_string", StringType, false, metadata)) + val sampleData = (1 to 10).map(_ => stringGenerator.generateWrapper()) + + assert(stringGenerator.edgeCases.nonEmpty) + assert(sampleData.nonEmpty) + assert(sampleData.forall(stringGenerator.edgeCases.contains)) + } + + test("Can create random int generator with only edge cases generated") { + val metadata = new MetadataBuilder().putString(ENABLED_EDGE_CASE, "true").putString(PROBABILITY_OF_EDGE_CASE, "1.0").build() + val intGenerator = new RandomIntDataGenerator(StructField("random_int", IntegerType, false, metadata)) + val sampleData = (1 to 10).map(_ => intGenerator.generateWrapper()) + + assert(intGenerator.edgeCases.nonEmpty) + assert(sampleData.nonEmpty) + assert(sampleData.forall(intGenerator.edgeCases.contains)) + } + + test("Can create random string generator with nulls and edge cases enabled") { + val metadata = new MetadataBuilder().putString(ENABLED_NULL, "true").putString(ENABLED_EDGE_CASE, "true") + .putString(PROBABILITY_OF_EDGE_CASE, "1.0").build() + val stringGenerator = new RandomStringDataGenerator(StructField("random_string", StringType, true, metadata)) + val sampleData = (1 to 10).map(_ => stringGenerator.generateWrapper()) + + assert(stringGenerator.edgeCases.nonEmpty) + assert(sampleData.nonEmpty) + assert(sampleData.forall(s => stringGenerator.edgeCases.contains(s) || s == null)) + } + + test("Will throw exception if unable to create unique value given metadata restrictions") { + val metadata = new MetadataBuilder().putString(MINIMUM, "1").putString(MAXIMUM, "1").putString(IS_UNIQUE, "true").build() + val intGenerator = new RandomIntDataGenerator(StructField("random_int", IntegerType, false, metadata)) + intGenerator.generateWrapper() + assertThrows[RuntimeException](intGenerator.generateWrapper()) + } + + test("Can create random int generator with standard deviation and mean defined") { + val metadata = new MetadataBuilder().putString(STANDARD_DEVIATION, "2.0").putString(MEAN, "1.0").build() + val intGenerator = new RandomIntDataGenerator(StructField("random_int", IntegerType, false, metadata)) + + assert(intGenerator.edgeCases.nonEmpty) + assert(intGenerator.generateSqlExpression == "CAST(ROUND(RANDN() * 2.0 + 1.0, 0) AS INT)") + } + + test("Can create random int generator with uniform distribution if only one of standard deviation or mean is defined") { + val metadata = new MetadataBuilder().putString(STANDARD_DEVIATION, "2.0").build() + val intGenerator = new RandomIntDataGenerator(StructField("random_int", IntegerType, false, metadata)) + + assert(intGenerator.generateSqlExpression == "CAST(ROUND(RAND() * 100000 + 0, 0) AS INT)") + + val metadataMean = new MetadataBuilder().putString(MEAN, "1.0").build() + val intGeneratorMean = new RandomIntDataGenerator(StructField("random_int", IntegerType, false, metadataMean)) + val sampleDataMean = (1 to 10).map(_ => intGeneratorMean.generateWrapper()) + + assert(sampleDataMean.nonEmpty) + assert(intGeneratorMean.generateSqlExpression == "CAST(ROUND(RAND() * 100000 + 0, 0) AS INT)") + } + + test("Can create random int generator that increments if distinct count is equal to count") { + val metadata = new MetadataBuilder().putString(DISTINCT_COUNT, "100").putString(ROW_COUNT, "100").putString(MAXIMUM, "100").build() + val intGenerator = new RandomIntDataGenerator(StructField("random_int", IntegerType, false, metadata)) + + assert(intGenerator.edgeCases.nonEmpty) + assert(intGenerator.generateSqlExpression == s"CAST(100 + $INDEX_INC_COL + 1 AS INT)") + } +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/generator/provider/RegexDataGeneratorTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/generator/provider/RegexDataGeneratorTest.scala new file mode 100644 index 00000000..7d62af0b --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/generator/provider/RegexDataGeneratorTest.scala @@ -0,0 +1,32 @@ +package com.github.pflooky.datagen.core.generator.provider + +import com.github.pflooky.datacaterer.api.model.Constants.REGEX_GENERATOR +import com.github.pflooky.datagen.core.exception.InvalidDataGeneratorConfigurationException +import com.github.pflooky.datagen.core.generator.provider.RegexDataGenerator.RandomRegexDataGenerator +import org.apache.spark.sql.types.{MetadataBuilder, StringType, StructField} +import org.junit.runner.RunWith +import org.scalatest.funsuite.AnyFunSuite +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class RegexDataGeneratorTest extends AnyFunSuite { + + test("Can generate data based on regex") { + val regex = "ACC100[0-9]{5}" + val metadata = new MetadataBuilder().putString(REGEX_GENERATOR, regex).build() + val regexDataGenerator = new RandomRegexDataGenerator(StructField("random_regex", StringType, false, metadata)) + + assert(regexDataGenerator.edgeCases.isEmpty) + (1 to 10).foreach(_ => { + val data = regexDataGenerator.generate + assert(data.length == 11) + assert(data.startsWith("ACC100")) + assert(data.matches(regex)) + }) + } + + test("Throws exception when no regex is defined") { + val metadata = new MetadataBuilder().build() + assertThrows[InvalidDataGeneratorConfigurationException](new RandomRegexDataGenerator(StructField("random_regex", StringType, false, metadata))) + } +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/generator/track/RecordTrackingProcessorTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/generator/track/RecordTrackingProcessorTest.scala new file mode 100644 index 00000000..73e3b584 --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/generator/track/RecordTrackingProcessorTest.scala @@ -0,0 +1,32 @@ +package com.github.pflooky.datagen.core.generator.track + +import com.github.pflooky.datacaterer.api.model.Constants.{IS_PRIMARY_KEY, PRIMARY_KEY_POSITION} +import com.github.pflooky.datacaterer.api.model.{Count, Field, Generator, Schema, Step} +import com.github.pflooky.datagen.core.util.PlanImplicits.StepOps +import com.github.pflooky.datagen.core.util.SparkSuite +import org.junit.runner.RunWith +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class RecordTrackingProcessorTest extends SparkSuite { + + test("Can get all primary keys in order") { + val schema = Schema(Some(List( + Field("name", generator = Some( + Generator("random", Map( + IS_PRIMARY_KEY -> "true", PRIMARY_KEY_POSITION -> "2" + )))), + Field("account_id", generator = Some( + Generator("random", Map( + IS_PRIMARY_KEY -> "true", PRIMARY_KEY_POSITION -> "1" + )))), + Field("balance", generator = Some( + Generator("random", Map( + IS_PRIMARY_KEY -> "false" + )))) + ))) + val step = Step("create accounts", "jdbc", Count(), Map(), schema) + val primaryKeys = step.gatherPrimaryKeys + assert(primaryKeys == List("account_id", "name")) + } +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/model/ForeignKeyRelationHelperTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/model/ForeignKeyRelationHelperTest.scala new file mode 100644 index 00000000..668ce900 --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/model/ForeignKeyRelationHelperTest.scala @@ -0,0 +1,25 @@ +package com.github.pflooky.datagen.core.model + +import com.github.pflooky.datacaterer.api.model.Constants.FOREIGN_KEY_DELIMITER +import com.github.pflooky.datagen.core.util.ForeignKeyRelationHelper +import org.scalatest.funsuite.AnyFunSuite + +class ForeignKeyRelationHelperTest extends AnyFunSuite { + + test("Can parse foreign key relation from string") { + val result = ForeignKeyRelationHelper.fromString(s"my_postgres${FOREIGN_KEY_DELIMITER}public.categories${FOREIGN_KEY_DELIMITER}id") + + assert(result.dataSource == "my_postgres") + assert(result.step == "public.categories") + assert(result.columns == List("id")) + } + + test("Can parse foreign key relation from string with multiple columns") { + val result = ForeignKeyRelationHelper.fromString(s"my_postgres${FOREIGN_KEY_DELIMITER}public.categories${FOREIGN_KEY_DELIMITER}id,amount,description") + + assert(result.dataSource == "my_postgres") + assert(result.step == "public.categories") + assert(result.columns == List("id", "amount", "description")) + } + +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/model/PlanImplicitsTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/model/PlanImplicitsTest.scala new file mode 100644 index 00000000..b647aa49 --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/model/PlanImplicitsTest.scala @@ -0,0 +1,22 @@ +package com.github.pflooky.datagen.core.model + +import com.github.pflooky.datacaterer.api.model.Constants.FOREIGN_KEY_DELIMITER +import com.github.pflooky.datacaterer.api.model.SinkOptions +import com.github.pflooky.datagen.core.util.PlanImplicits.SinkOptionsOps +import org.scalatest.funsuite.AnyFunSuite + +class PlanImplicitsTest extends AnyFunSuite { + + test("Can map foreign key relations to relationships without column names") { + val sinkOptions = SinkOptions(foreignKeys = + List( + s"my_postgres${FOREIGN_KEY_DELIMITER}public.categories${FOREIGN_KEY_DELIMITER}id" -> + List(s"my_csv${FOREIGN_KEY_DELIMITER}account${FOREIGN_KEY_DELIMITER}account_id") + ) + ) + val result = sinkOptions.foreignKeysWithoutColumnNames + + assert(result == List(s"my_postgres${FOREIGN_KEY_DELIMITER}public.categories" -> List(s"my_csv${FOREIGN_KEY_DELIMITER}account"))) + } + +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/model/ValidationOperationsTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/model/ValidationOperationsTest.scala new file mode 100644 index 00000000..f2d6d6a6 --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/model/ValidationOperationsTest.scala @@ -0,0 +1,68 @@ +package com.github.pflooky.datagen.core.model + +import com.github.pflooky.datacaterer.api.ValidationBuilder +import com.github.pflooky.datacaterer.api.model.ExpressionValidation +import com.github.pflooky.datagen.core.util.{SparkSuite, Transaction} +import com.github.pflooky.datagen.core.validator.ExpressionValidationOps +import org.junit.runner.RunWith +import org.scalatestplus.junit.JUnitRunner + +import java.sql.Date + +@RunWith(classOf[JUnitRunner]) +class ValidationOperationsTest extends SparkSuite { + + private val sampleData = Seq( + Transaction("acc123", "peter", "txn1", Date.valueOf("2020-01-01"), 10.0), + Transaction("acc123", "peter", "txn2", Date.valueOf("2020-01-01"), 50.0), + Transaction("acc123", "peter", "txn3", Date.valueOf("2020-01-01"), 200.0), + Transaction("acc123", "peter", "txn4", Date.valueOf("2020-01-01"), 500.0) + ) + private val df = sparkSession.createDataFrame(sampleData) + + test("Can return empty sample rows when validation is successful") { + val validation = ExpressionValidation("amount < 1000") + val result = new ExpressionValidationOps(validation).validate(df, 4) + + assert(result.isSuccess) + assert(result.sampleErrorValues.isEmpty) + } + + test("Can return empty sample rows when validation is successful from error threshold") { + val validation = new ValidationBuilder().expr("amount < 400").errorThreshold(1).validation.asInstanceOf[ExpressionValidation] + val result = new ExpressionValidationOps(validation).validate(df, 4) + + assert(result.isSuccess) + assert(result.sampleErrorValues.isEmpty) + } + + test("Can get sample rows when validation is not successful") { + val validation = ExpressionValidation("amount < 100") + val result = new ExpressionValidationOps(validation).validate(df, 4) + + assert(!result.isSuccess) + assert(result.sampleErrorValues.isDefined) + assert(result.sampleErrorValues.get.count() == 2) + assert(result.sampleErrorValues.get.filter(r => r.getAs[Double]("amount") >= 100).count() == 2) + } + + test("Can get sample rows when validation is not successful by error threshold greater than 1") { + val validation = new ValidationBuilder().expr("amount < 20").errorThreshold(2).validation.asInstanceOf[ExpressionValidation] + val result = new ExpressionValidationOps(validation).validate(df, 4) + + assert(!result.isSuccess) + assert(result.sampleErrorValues.isDefined) + assert(result.sampleErrorValues.get.count() == 3) + assert(result.sampleErrorValues.get.filter(r => r.getAs[Double]("amount") >= 20).count() == 3) + } + + test("Can get sample rows when validation is not successful by error threshold less than 1") { + val validation = new ValidationBuilder().expr("amount < 100").errorThreshold(0.1).validation.asInstanceOf[ExpressionValidation] + val result = new ExpressionValidationOps(validation).validate(df, 4) + + assert(!result.isSuccess) + assert(result.sampleErrorValues.isDefined) + assert(result.sampleErrorValues.get.count() == 2) + assert(result.sampleErrorValues.get.filter(r => r.getAs[Double]("amount") >= 100).count() == 2) + } +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/parser/PlanParserTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/parser/PlanParserTest.scala new file mode 100644 index 00000000..3ede538c --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/parser/PlanParserTest.scala @@ -0,0 +1,26 @@ +package com.github.pflooky.datagen.core.parser + +import com.github.pflooky.datagen.core.util.SparkSuite + +class PlanParserTest extends SparkSuite { + + test("Can parse plan in YAML file") { + val result = PlanParser.parsePlan("app/src/test/resources/sample/plan/account-create-plan.yaml") + + assert(result.name.nonEmpty) + assert(result.description.nonEmpty) + assert(result.tasks.size == 4) + assert(result.validations.size == 1) + assert(result.sinkOptions.isDefined) + assert(result.sinkOptions.get.foreignKeys.size == 1) + assert(result.sinkOptions.get.foreignKeys.head._1 == "solace.jms_account.account_id") + assert(result.sinkOptions.get.foreignKeys.head._2 == List("json.file_account.account_id")) + } + + test("Can parse task in YAML file") { + val result = PlanParser.parseTasks("app/src/test/resources/sample/task") + + assert(result.length == 12) + } + +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/plan/ExampleJavaPlanRun.java b/app/src/test/scala/com/github/pflooky/datagen/core/plan/ExampleJavaPlanRun.java new file mode 100644 index 00000000..1b3b37a0 --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/plan/ExampleJavaPlanRun.java @@ -0,0 +1,79 @@ +package com.github.pflooky.datagen.core.plan; + +import com.github.pflooky.datacaterer.api.model.ArrayType; +import com.github.pflooky.datacaterer.api.model.Constants; +import com.github.pflooky.datacaterer.api.model.DateType; +import com.github.pflooky.datacaterer.api.model.DoubleType; +import com.github.pflooky.datacaterer.api.model.IntegerType; +import com.github.pflooky.datacaterer.api.model.TimestampType; +import com.github.pflooky.datacaterer.java.api.PlanRun; + +import java.sql.Date; +import java.sql.Timestamp; +import java.util.List; +import java.util.Map; + +public class ExampleJavaPlanRun extends PlanRun { + private String baseFolder; + + public ExampleJavaPlanRun(String baseFolder) { + this.baseFolder = baseFolder; + } + + { + String[] accountStatus = {"open", "closed", "pending", "suspended"}; + var jsonTask = json("account_info", baseFolder + "/json", Map.of(Constants.SAVE_MODE(), "overwrite")) + .schema( + field().name("account_id").regex("ACC[0-9]{8}"), + field().name("year").type(IntegerType.instance()).sql("YEAR(date)"), + field().name("balance").type(DoubleType.instance()).min(10).max(1000), + field().name("date").type(DateType.instance()).min(Date.valueOf("2022-01-01")), + field().name("status").oneOf(accountStatus), + field().name("update_history") + .type(ArrayType.instance()) + .schema( + field().name("updated_time").type(TimestampType.instance()).min(Timestamp.valueOf("2022-01-01 00:00:00")), + field().name("prev_status").oneOf(accountStatus), + field().name("new_status").oneOf(accountStatus) + ), + field().name("customer_details") + .schema( + field().name("name").sql("_join_txn_name"), + field().name("age").type(IntegerType.instance()).min(18).max(90), + field().name("city").expression("#{Address.city}") + ), + field().name("_join_txn_name").expression("#{Name.name}").omit(true) + ) + .count(count().records(100)); + + var csvTxns = csv("transactions", baseFolder + "/csv", Map.of(Constants.SAVE_MODE(), "overwrite", "header", "true")) + .schema( + field().name("account_id"), + field().name("txn_id"), + field().name("name"), + field().name("amount").type(DoubleType.instance()).min(10).max(100), + field().name("merchant").expression("#{Company.name}") + ) + .count( + count() + .recordsPerColumnGenerator(100, generator().min(1).max(2), "account_id", "name") + ) + .validationWait(waitCondition().pause(1)) + .validations( + validation().expr("amount > 0").errorThreshold(0.01), + validation().expr("LENGTH(name) > 3").errorThreshold(5), + validation().expr("LENGTH(merchant) > 0").description("Non-empty merchant name") + ); + + var foreignKeySetup = plan() + .addForeignKeyRelationship( + jsonTask, List.of("account_id", "_join_txn_name"), + List.of(Map.entry(csvTxns, List.of("account_id", "name"))) + ); + var conf = configuration() + .generatedReportsFolderPath(baseFolder + "/report") + .enableValidation(true); + + execute(foreignKeySetup, conf, jsonTask, csvTxns); + } +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/plan/PlanProcessorTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/plan/PlanProcessorTest.scala new file mode 100644 index 00000000..73d01206 --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/plan/PlanProcessorTest.scala @@ -0,0 +1,251 @@ +package com.github.pflooky.datagen.core.plan + +import com.github.pflooky.datacaterer.api.PlanRun +import com.github.pflooky.datacaterer.api.model.Constants.{OPEN_METADATA_AUTH_TYPE_OPEN_METADATA, OPEN_METADATA_JWT_TOKEN, OPEN_METADATA_TABLE_FQN, ROWS_PER_SECOND, SAVE_MODE} +import com.github.pflooky.datacaterer.api.model.{ArrayType, BinaryType, DateType, DoubleType, HeaderType, IntegerType, StringType, StructType, TimestampType} +import com.github.pflooky.datagen.core.util.{ObjectMapperUtil, SparkSuite} +import org.junit.runner.RunWith +import org.scalatestplus.junit.JUnitRunner + +import java.sql.{Date, Timestamp} + +@RunWith(classOf[JUnitRunner]) +class PlanProcessorTest extends SparkSuite { + + private val scalaBaseFolder = "src/test/resources/sample/documentation" + private val javaBaseFolder = "src/test/resources/sample/java/documentation" + + class DocumentationPlanRun extends PlanRun { + { + val accountStatus = List("open", "closed", "pending", "suspended") + val jsonTask = json("account_info", s"$scalaBaseFolder/json", Map(SAVE_MODE -> "overwrite")) + .schema( + field.name("account_id").regex("ACC[0-9]{8}"), + field.name("year").`type`(IntegerType).sql("YEAR(date)"), + field.name("balance").`type`(DoubleType).min(10).max(1000), + field.name("date").`type`(DateType).min(Date.valueOf("2022-01-01")), + field.name("status").oneOf(accountStatus: _*), + field.name("update_history") + .`type`(ArrayType) + .schema( + field.name("updated_time").`type`(TimestampType).min(Timestamp.valueOf("2022-01-01 00:00:00")), + field.name("prev_status").oneOf(accountStatus: _*), + field.name("new_status").oneOf(accountStatus: _*) + ), + field.name("customer_details") + .schema( + field.name("name").sql("_join_txn_name"), + field.name("age").`type`(IntegerType).min(18).max(90), + field.name("city").expression("#{Address.city}") + ), + field.name("_join_txn_name").expression("#{Name.name}").omit(true) + ) + .count(count.records(100)) + + val csvTxns = csv("transactions", s"$scalaBaseFolder/csv", Map(SAVE_MODE -> "overwrite", "header" -> "true")) + .schema( + field.name("account_id"), + field.name("txn_id"), + field.name("name"), + field.name("amount").`type`(DoubleType).min(10).max(100), + field.name("merchant").expression("#{Company.name}"), + field.name("time").`type`(TimestampType), + field.name("date").`type`(DateType).sql("DATE(time)"), + ) + .count( + count + .records(100) + .recordsPerColumnGenerator(generator.min(1).max(2), "account_id", "name") + ) + .validationWait(waitCondition.pause(1)) + .validations( + validation.expr("amount > 0").errorThreshold(0.01), + validation.expr("LENGTH(name) > 3").errorThreshold(5), + validation.expr("LENGTH(merchant) > 0").description("Non-empty merchant name"), + ) + + val foreignKeySetup = plan + .addForeignKeyRelationship( + jsonTask, List("account_id", "_join_txn_name"), + List((csvTxns, List("account_id", "name"))) + ) + val conf = configuration + .generatedReportsFolderPath(s"$scalaBaseFolder/report") + .enableValidation(true) + .enableSinkMetadata(true) + + execute(foreignKeySetup, conf, jsonTask, csvTxns) + } + } + + test("Can run documentation plan run") { + PlanProcessor.determineAndExecutePlan(Some(new DocumentationPlanRun())) + verifyGeneratedData(scalaBaseFolder) + } + + ignore("Can run Java plan run") { + PlanProcessor.determineAndExecutePlanJava(new ExampleJavaPlanRun(javaBaseFolder)) + verifyGeneratedData(javaBaseFolder) + } + + private def verifyGeneratedData(folder: String) = { + val jsonData = sparkSession.read.json(s"$folder/json").selectExpr("*", "customer_details.name AS name").collect() + val csvData = sparkSession.read.option("header", "true").csv(s"$folder/csv").collect() + val csvCount = csvData.length + assert(jsonData.length == 100) + assert(csvCount >= 100 && csvCount <= 200) + val jsonRecord = jsonData.head + val jsonAccountId = jsonRecord.getString(0) + val csvMatchAccount = csvData.filter(r => r.getString(0).equalsIgnoreCase(jsonAccountId)) + val csvMatchCount = csvMatchAccount.length + assert(csvMatchCount >= 1 && csvMatchCount <= 2) + assert(csvMatchAccount.forall(r => r.getAs[String]("name").equalsIgnoreCase(jsonRecord.getAs[String]("name")))) + assert(csvData.forall(r => r.getAs[String]("time").substring(0, 10) == r.getAs[String]("date"))) + } + + ignore("Write YAML for plan") { + val docPlanRun = new DocumentationPlanRun() + val planWrite = ObjectMapperUtil.yamlObjectMapper.writeValueAsString(docPlanRun._plan) + println(planWrite) + } + + ignore("Can run Postgres plan run") { + PlanProcessor.determineAndExecutePlan(Some(new TestValidation)) + } + + class TestPostgres extends PlanRun { + val jsonTask = json("my_json", "/tmp/data/json", Map("saveMode" -> "overwrite")) + .schema( + field.name("account_id").regex("ACC[0-9]{8}"), + field.name("name").expression("#{Name.name}"), + field.name("amount").`type`(DoubleType).max(10), + ) + .count(count.recordsPerColumn(2, "account_id", "name")) + .validations( + validation.groupBy("account_id", "name").max("amount").lessThan(100), + validation.unique("account_id", "name"), + ) + val csvTask = json("my_csv", "/tmp/data/csv", Map("saveMode" -> "overwrite")) + .schema( + field.name("account_number").regex("[0-9]{8}"), + field.name("name").expression("#{Name.name}"), + field.name("amount").`type`(DoubleType).max(10), + ) + .validations( + validation.col("account_number").isNotNull.description("account_number is a primary key"), + validation.col("name").matches("[A-Z][a-z]+ [A-Z][a-z]+").errorThreshold(0.3).description("Some names follow a different pattern"), + ) + + val conf = configuration + .generatedReportsFolderPath("/Users/peter/code/spark-datagen/tmp/report") + .enableSinkMetadata(true) + + execute(conf, jsonTask, csvTask) + } + + class TestCsvPostgres extends PlanRun { + val csvTask = csv("my_csv", "/tmp/data/csv", Map("saveMode" -> "overwrite", "header" -> "true")) + .numPartitions(1) + .schema(metadataSource.marquez("http://localhost:5001", "food_delivery", "public.delivery_7_days")) + .count(count.records(10)) + + val postgresTask = postgres("my_postgres", "jdbc:postgresql://localhost:5432/food_delivery", "postgres", "password") + .schema(metadataSource.marquez("http://localhost:5001", "food_delivery")) + .count(count.records(10)) + + val foreignCols = List("order_id", "order_placed_on", "order_dispatched_on", "order_delivered_on", "customer_email", + "customer_address", "menu_id", "restaurant_id", "restaurant_address", "menu_item_id", "category_id", "discount_id", + "city_id", "driver_id") + + val myPlan = plan.addForeignKeyRelationships( + csvTask, foreignCols, + List(foreignField(postgresTask, "food_delivery_public.delivery_7_days", foreignCols)) + ) + + val conf = configuration.enableGeneratePlanAndTasks(true) + .generatedReportsFolderPath("/Users/peter/code/spark-datagen/tmp/report") + + execute(myPlan, conf, csvTask, postgresTask) + } + + class TestJson extends PlanRun { + val jsonTask = json("my_json", "/tmp/data/json", Map("saveMode" -> "overwrite")) + .schema( + field.name("account_id").regex("ACC[0-9]{8}"), + field.name("year").`type`(IntegerType).sql("YEAR(date)"), + field.name("balance").`type`(DoubleType).min(10).max(1000), + field.name("date").`type`(DateType).min(Date.valueOf("2022-01-01")), + field.name("status").sql("element_at(sort_array(update_history, false), 1).status"), + field.name("update_history") + .`type`(ArrayType) + .arrayMinLength(1) + .schema( + field.name("updated_time").`type`(TimestampType).min(Timestamp.valueOf("2022-01-01 00:00:00")), + field.name("status").oneOf("open", "closed") + ), + field.name("customer_details") + .schema( + field.name("name").expression("#{Name.name}"), + field.name("age").`type`(IntegerType).min(18).max(90), + field.name("city").expression("#{Address.city}") + ) + ) + + execute(jsonTask) + } + + class TestValidation extends PlanRun { + val firstJsonTask = json("my_first_json", "/tmp/data/first_json", Map("saveMode" -> "overwrite")) + .schema( + field.name("account_id").regex("ACC[0-9]{8}"), + field.name("year").`type`(IntegerType).sql("YEAR(date)"), + field.name("balance").`type`(DoubleType).min(10).max(1000), + field.name("date").`type`(DateType).min(Date.valueOf("2022-01-01")), + field.name("status").oneOf("open", "closed"), + field.name("update_history") + .`type`(ArrayType) + .schema( + field.name("updated_time").`type`(TimestampType).min(Timestamp.valueOf("2022-01-01 00:00:00")), + field.name("prev_status").oneOf("open", "closed"), + field.name("new_status").oneOf("open", "closed") + ), + field.name("customer_details") + .schema( + field.name("name").expression("#{Name.name}"), + field.name("age").`type`(IntegerType).min(18).max(90), + field.name("city").expression("#{Address.city}") + ), + ) + .count(count.records(10)) + + val secondJsonTask = json("my_json", "/tmp/data/second_json", Map("saveMode" -> "overwrite")) + .schema( + field.name("account_id"), + field.name("amount").`type`(IntegerType).min(1).max(100), + field.name("name").expression("#{Name.name}"), + ) + .count(count.records(10).recordsPerColumn(3, "account_id")) + .validations( + validation.upstreamData(firstJsonTask).joinColumns("account_id") + .withValidation(validation.col("my_first_json_customer_details.name").isEqualCol("name")), + validation.upstreamData(firstJsonTask).joinColumns("account_id") + .withValidation(validation.col("amount").isNotEqualCol("my_first_json_balance")), + validation.upstreamData(firstJsonTask).joinExpr("account_id == my_first_json_account_id") + .withValidation(validation.groupBy("account_id", "my_first_json_balance").sum("amount").betweenCol("my_first_json_balance * 0.8", "my_first_json_balance * 1.2")), + validation.upstreamData(firstJsonTask).joinColumns("account_id") + .withValidation(validation.groupBy("account_id", "my_first_json_balance").sum("amount").betweenCol("my_first_json_balance * 0.8", "my_first_json_balance * 1.2")), + validation.upstreamData(firstJsonTask).joinColumns("account_id").joinType("anti").withValidation(validation.count().isEqual(0)), + validation.upstreamData(firstJsonTask).joinColumns("account_id").withValidation(validation.count().isEqual(30)), + ) + + val config = configuration + .generatedReportsFolderPath("/Users/peter/code/spark-datagen/tmp/report") + .recordTrackingForValidationFolderPath("/tmp/record-tracking-validation") + .enableValidation(true) + + val foreignPlan = plan + .addForeignKeyRelationship(firstJsonTask, "account_id", List(secondJsonTask -> "account_id")) + + execute(foreignPlan, config, firstJsonTask, secondJsonTask) + } +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/util/CombinationCalculatorTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/util/CombinationCalculatorTest.scala new file mode 100644 index 00000000..20fafd13 --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/util/CombinationCalculatorTest.scala @@ -0,0 +1,26 @@ +package com.github.pflooky.datagen.core.util + +import com.github.pflooky.datacaterer.api.model.{Field, Generator, Schema} +import net.datafaker.Faker +import org.junit.runner.RunWith +import org.scalatest.funsuite.AnyFunSuite +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class CombinationCalculatorTest extends AnyFunSuite { + + test("Can calculate number of combinations given a schema with faker expressions and one of data generators") { + val schema = Schema(Some(List( + Field("account_id", Some("string"), Some(Generator())), + Field("name", Some("string"), Some(Generator("random", Map("expression" -> "#{Name.name}")))), + Field("status", Some("string"), Some(Generator("oneOf", Map("oneOf" -> List("open", "closed"))))), + ))) + val faker = new Faker() + + val result = CombinationCalculator.totalCombinationsForSchema(schema, faker) + + assert(result.isDefined) + assert(result.get == BigInt(103908640)) + } + +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/util/FileUtilTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/util/FileUtilTest.scala new file mode 100644 index 00000000..4d53600c --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/util/FileUtilTest.scala @@ -0,0 +1,17 @@ +package com.github.pflooky.datagen.core.util + +import org.junit.runner.RunWith +import org.scalatest.funsuite.AnyFunSuite +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class FileUtilTest extends AnyFunSuite { + + test("Can get file") { + + } + + test("Can get directory") { + + } +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/util/ForeignKeyUtilTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/util/ForeignKeyUtilTest.scala new file mode 100644 index 00000000..3d4791af --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/util/ForeignKeyUtilTest.scala @@ -0,0 +1,224 @@ +package com.github.pflooky.datagen.core.util + +import com.github.pflooky.datacaterer.api.PlanRun +import com.github.pflooky.datacaterer.api.model.Constants.FOREIGN_KEY_DELIMITER +import com.github.pflooky.datacaterer.api.model.{ForeignKeyRelation, Plan, SinkOptions, TaskSummary} +import com.github.pflooky.datagen.core.model.ForeignKeyRelationship +import org.apache.spark.sql.Encoders +import org.apache.spark.sql.types.{ArrayType, StringType, StructField, StructType} +import org.junit.runner.RunWith +import org.scalatestplus.junit.JUnitRunner + +import java.sql.Date +import java.time.LocalDate + +@RunWith(classOf[JUnitRunner]) +class ForeignKeyUtilTest extends SparkSuite { + + test("When no foreign keys defined, return back same dataframes") { + val sinkOptions = SinkOptions(None, None, List()) + val plan = Plan("no foreign keys", "simple plan", List(), Some(sinkOptions)) + val dfMap = Map("name" -> sparkSession.emptyDataFrame) + + val result = ForeignKeyUtil.getDataFramesWithForeignKeys(plan, dfMap) + + assert(dfMap.toList == result) + } + + test("Can get insert order") { + val foreignKeys = List( + "orders" -> List("customers"), + "order_items" -> List("orders", "products"), + "reviews" -> List("products", "customers") + ) + val result = ForeignKeyUtil.getInsertOrder(foreignKeys) + assert(result.head == "reviews") + } + + test("Can link foreign keys between data sets") { + val sinkOptions = SinkOptions(None, None, + List(s"postgres${FOREIGN_KEY_DELIMITER}account${FOREIGN_KEY_DELIMITER}account_id" -> List(s"postgres${FOREIGN_KEY_DELIMITER}transaction${FOREIGN_KEY_DELIMITER}account_id")) + ) + val plan = Plan("foreign keys", "simple plan", List(), Some(sinkOptions)) + val accountsList = List( + Account("acc1", "peter", Date.valueOf(LocalDate.now())), + Account("acc2", "john", Date.valueOf(LocalDate.now())), + Account("acc3", "jack", Date.valueOf(LocalDate.now())) + ) + val transactionList = List( + Transaction("some_acc9", "rand1", "id123", Date.valueOf(LocalDate.now()), 10.0), + Transaction("some_acc9", "rand2", "id124", Date.valueOf(LocalDate.now()), 23.9), + Transaction("some_acc10", "rand3", "id125", Date.valueOf(LocalDate.now()), 85.1), + ) + val dfMap = Map( + "postgres.account" -> sparkSession.createDataFrame(accountsList), + "postgres.transaction" -> sparkSession.createDataFrame(transactionList) + ) + + val result = ForeignKeyUtil.getDataFramesWithForeignKeys(plan, dfMap) + val txn = result.filter(f => f._1.equalsIgnoreCase("postgres.transaction")).head._2 + val resTxnRows = txn.collect() + resTxnRows.foreach(r => { + r.getString(0) == "acc1" || r.getString(0) == "acc2" || r.getString(0) == "acc3" + }) + } + + test("Can link foreign keys between data sets with multiple columns") { + val sinkOptions = SinkOptions(None, None, + List(s"postgres${FOREIGN_KEY_DELIMITER}account${FOREIGN_KEY_DELIMITER}account_id,name" -> List(s"postgres${FOREIGN_KEY_DELIMITER}transaction${FOREIGN_KEY_DELIMITER}account_id,name")) + ) + val plan = Plan("foreign keys", "simple plan", List(TaskSummary("my_task", "postgres")), Some(sinkOptions)) + val accountsList = List( + Account("acc1", "peter", Date.valueOf(LocalDate.now())), + Account("acc2", "john", Date.valueOf(LocalDate.now())), + Account("acc3", "jack", Date.valueOf(LocalDate.now())) + ) + val transactionList = List( + Transaction("some_acc9", "rand1", "id123", Date.valueOf(LocalDate.now()), 10.0), + Transaction("some_acc9", "rand1", "id124", Date.valueOf(LocalDate.now()), 12.0), + Transaction("some_acc9", "rand2", "id125", Date.valueOf(LocalDate.now()), 23.9), + Transaction("some_acc10", "rand3", "id126", Date.valueOf(LocalDate.now()), 85.1), + ) + val dfMap = Map( + "postgres.account" -> sparkSession.createDataFrame(accountsList), + "postgres.transaction" -> sparkSession.createDataFrame(transactionList) + ) + + val result = ForeignKeyUtil.getDataFramesWithForeignKeys(plan, dfMap) + val txn = result.filter(f => f._1.equalsIgnoreCase("postgres.transaction")).head._2 + val resTxnRows = txn.collect() + val acc1 = resTxnRows.find(_.getString(0).equalsIgnoreCase("acc1")) + assert(acc1.isDefined) + assert(acc1.get.getString(1).equalsIgnoreCase("peter")) + val acc2 = resTxnRows.find(_.getString(0).equalsIgnoreCase("acc2")) + assert(acc2.isDefined) + assert(acc2.get.getString(1).equalsIgnoreCase("john")) + val acc3 = resTxnRows.find(_.getString(0).equalsIgnoreCase("acc3")) + assert(acc3.isDefined) + assert(acc3.get.getString(1).equalsIgnoreCase("jack")) + val acc1Count = resTxnRows.count(_.getString(0).equalsIgnoreCase("acc1")) + val acc2Count = resTxnRows.count(_.getString(0).equalsIgnoreCase("acc2")) + val acc3Count = resTxnRows.count(_.getString(0).equalsIgnoreCase("acc3")) + assert(acc1Count == 2 || acc2Count == 2 || acc3Count == 2) + } + + test("Can get delete order based on foreign keys defined") { + val foreignKeys = List( + s"postgres${FOREIGN_KEY_DELIMITER}accounts${FOREIGN_KEY_DELIMITER}account_id" -> + List(s"postgres${FOREIGN_KEY_DELIMITER}balances${FOREIGN_KEY_DELIMITER}account_id", s"postgres${FOREIGN_KEY_DELIMITER}transactions${FOREIGN_KEY_DELIMITER}account_id") + ) + val deleteOrder = ForeignKeyUtil.getDeleteOrder(foreignKeys) + assert(deleteOrder == + List( + s"postgres${FOREIGN_KEY_DELIMITER}balances${FOREIGN_KEY_DELIMITER}account_id", + s"postgres${FOREIGN_KEY_DELIMITER}transactions${FOREIGN_KEY_DELIMITER}account_id", + s"postgres${FOREIGN_KEY_DELIMITER}accounts${FOREIGN_KEY_DELIMITER}account_id" + ) + ) + } + + test("Can get delete order based on nested foreign keys") { + val foreignKeys = List( + s"postgres${FOREIGN_KEY_DELIMITER}accounts${FOREIGN_KEY_DELIMITER}account_id" -> List(s"postgres${FOREIGN_KEY_DELIMITER}balances${FOREIGN_KEY_DELIMITER}account_id"), + s"postgres${FOREIGN_KEY_DELIMITER}balances${FOREIGN_KEY_DELIMITER}account_id" -> List(s"postgres${FOREIGN_KEY_DELIMITER}transactions${FOREIGN_KEY_DELIMITER}account_id"), + ) + val deleteOrder = ForeignKeyUtil.getDeleteOrder(foreignKeys) + val expected = List( + s"postgres${FOREIGN_KEY_DELIMITER}transactions${FOREIGN_KEY_DELIMITER}account_id", + s"postgres${FOREIGN_KEY_DELIMITER}balances${FOREIGN_KEY_DELIMITER}account_id", + s"postgres${FOREIGN_KEY_DELIMITER}accounts${FOREIGN_KEY_DELIMITER}account_id" + ) + assert(deleteOrder == expected) + + val foreignKeys1 = List( + s"postgres${FOREIGN_KEY_DELIMITER}balances${FOREIGN_KEY_DELIMITER}account_id" -> List(s"postgres${FOREIGN_KEY_DELIMITER}transactions${FOREIGN_KEY_DELIMITER}account_id"), + s"postgres${FOREIGN_KEY_DELIMITER}accounts${FOREIGN_KEY_DELIMITER}account_id" -> List(s"postgres${FOREIGN_KEY_DELIMITER}balances${FOREIGN_KEY_DELIMITER}account_id"), + ) + val deleteOrder1 = ForeignKeyUtil.getDeleteOrder(foreignKeys1) + assert(deleteOrder1 == expected) + + val foreignKeys2 = List( + s"postgres${FOREIGN_KEY_DELIMITER}accounts${FOREIGN_KEY_DELIMITER}account_id" -> List(s"postgres${FOREIGN_KEY_DELIMITER}balances${FOREIGN_KEY_DELIMITER}account_id"), + s"postgres${FOREIGN_KEY_DELIMITER}balances${FOREIGN_KEY_DELIMITER}account_id" -> List(s"postgres${FOREIGN_KEY_DELIMITER}transactions${FOREIGN_KEY_DELIMITER}account_id"), + s"postgres${FOREIGN_KEY_DELIMITER}transactions${FOREIGN_KEY_DELIMITER}account_id" -> List(s"postgres${FOREIGN_KEY_DELIMITER}customer${FOREIGN_KEY_DELIMITER}account_id"), + ) + val deleteOrder2 = ForeignKeyUtil.getDeleteOrder(foreignKeys2) + val expected2 = List(s"postgres${FOREIGN_KEY_DELIMITER}customer${FOREIGN_KEY_DELIMITER}account_id") ++ expected + assert(deleteOrder2 == expected2) + } + + test("Can generate correct values when per column count is defined over multiple columns that are also defined as foreign keys") { + val foreignKeys = List( + s"postgres${FOREIGN_KEY_DELIMITER}accounts${FOREIGN_KEY_DELIMITER}account_id" -> + List(s"postgres${FOREIGN_KEY_DELIMITER}balances${FOREIGN_KEY_DELIMITER}account_id", s"postgres${FOREIGN_KEY_DELIMITER}transactions${FOREIGN_KEY_DELIMITER}account_id") + ) + val deleteOrder = ForeignKeyUtil.getDeleteOrder(foreignKeys) + assert(deleteOrder == List( + s"postgres${FOREIGN_KEY_DELIMITER}balances${FOREIGN_KEY_DELIMITER}account_id", + s"postgres${FOREIGN_KEY_DELIMITER}transactions${FOREIGN_KEY_DELIMITER}account_id", + s"postgres${FOREIGN_KEY_DELIMITER}accounts${FOREIGN_KEY_DELIMITER}account_id") + ) + } + + test("Can generate correct values when primary keys are defined over multiple columns that are also defined as foreign keys") { + val foreignKeys = List( + s"postgres${FOREIGN_KEY_DELIMITER}accounts${FOREIGN_KEY_DELIMITER}account_id" -> + List(s"postgres${FOREIGN_KEY_DELIMITER}balances${FOREIGN_KEY_DELIMITER}account_id", s"postgres${FOREIGN_KEY_DELIMITER}transactions${FOREIGN_KEY_DELIMITER}account_id") + ) + val deleteOrder = ForeignKeyUtil.getDeleteOrder(foreignKeys) + assert(deleteOrder == List( + s"postgres${FOREIGN_KEY_DELIMITER}balances${FOREIGN_KEY_DELIMITER}account_id", + s"postgres${FOREIGN_KEY_DELIMITER}transactions${FOREIGN_KEY_DELIMITER}account_id", + s"postgres${FOREIGN_KEY_DELIMITER}accounts${FOREIGN_KEY_DELIMITER}account_id") + ) + } + + test("Can update foreign keys with updated names from metadata") { + implicit val encoder = Encoders.kryo[ForeignKeyRelationship] + val generatedForeignKeys = List(sparkSession.createDataset(Seq(ForeignKeyRelationship( + ForeignKeyRelation("my_postgres", "public.account", List("account_id")), + ForeignKeyRelation("my_postgres", "public.orders", List("customer_id")), + )))) + val optPlanRun = Some(new ForeignKeyPlanRun()) + val stepNameMapping = Map( + s"my_csv${FOREIGN_KEY_DELIMITER}random_step" -> s"my_csv${FOREIGN_KEY_DELIMITER}public.accounts" + ) + + val result = ForeignKeyUtil.getAllForeignKeyRelationships(generatedForeignKeys, optPlanRun, stepNameMapping) + + assert(result.size == 3) + assert(result.contains(s"my_csv${FOREIGN_KEY_DELIMITER}public.accounts${FOREIGN_KEY_DELIMITER}id" -> + List(s"my_postgres${FOREIGN_KEY_DELIMITER}public.accounts${FOREIGN_KEY_DELIMITER}account_id"))) + assert(result.contains(s"my_json${FOREIGN_KEY_DELIMITER}json_step${FOREIGN_KEY_DELIMITER}id" -> + List(s"my_postgres${FOREIGN_KEY_DELIMITER}public.orders${FOREIGN_KEY_DELIMITER}customer_id"))) + assert(result.contains(s"my_postgres${FOREIGN_KEY_DELIMITER}public.account${FOREIGN_KEY_DELIMITER}account_id" -> + List(s"my_postgres${FOREIGN_KEY_DELIMITER}public.orders${FOREIGN_KEY_DELIMITER}customer_id"))) + } + + test("Can link foreign keys with nested column names") { + val nestedStruct = StructType(Array(StructField("account_id", StringType))) + val nestedInArray = ArrayType(nestedStruct) + val fields = Array(StructField("my_json", nestedStruct), StructField("my_array", nestedInArray)) + + assert(ForeignKeyUtil.hasDfContainColumn("my_array.account_id", fields)) + assert(ForeignKeyUtil.hasDfContainColumn("my_json.account_id", fields)) + assert(!ForeignKeyUtil.hasDfContainColumn("my_json.name", fields)) + assert(!ForeignKeyUtil.hasDfContainColumn("my_array.name", fields)) + } + + class ForeignKeyPlanRun extends PlanRun { + val myPlan = plan.addForeignKeyRelationship( + foreignField("my_csv", "random_step", "id"), + foreignField("my_postgres", "public.accounts", "account_id") + ).addForeignKeyRelationship( + foreignField("my_json", "json_step", "id"), + foreignField("my_postgres", "public.orders", "customer_id") + ) + + execute(plan = myPlan) + } +} + +case class Account(account_id: String = "acc123", name: String = "peter", open_date: Date = Date.valueOf("2023-01-31"), age: Int = 10, debitCredit: String = "D") + +case class Transaction(account_id: String, name: String, transaction_id: String, created_date: Date, amount: Double) diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/util/MetadataUtilTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/util/MetadataUtilTest.scala new file mode 100644 index 00000000..c487aa38 --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/util/MetadataUtilTest.scala @@ -0,0 +1,25 @@ +package com.github.pflooky.datagen.core.util + +import org.apache.spark.sql.types.MetadataBuilder +import org.junit.runner.RunWith +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class MetadataUtilTest extends SparkSuite { + + test("Can convert metadata to map") { + val metadata = new MetadataBuilder() + .putString("string_key", "value") + .putLong("long_key", 1L) + .putDouble("double_key", 0.1) + .putBoolean("boolean_key", true) + .putStringArray("array_key", Array("value")) + .build() + + val result = MetadataUtil.metadataToMap(metadata) + + assert(result.size == 5) + assert(List("string_key", "long_key", "double_key", "boolean_key", "array_key").forall(result.contains)) + } + +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/util/RecordCountUtilTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/util/RecordCountUtilTest.scala new file mode 100644 index 00000000..2d1ca24c --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/util/RecordCountUtilTest.scala @@ -0,0 +1,142 @@ +package com.github.pflooky.datagen.core.util + +import com.github.pflooky.datacaterer.api.model.{Count, GenerationConfig, Step, Task} +import com.github.pflooky.datacaterer.api.{CountBuilder, GeneratorBuilder} +import org.scalatest.funsuite.AnyFunSuite + +class RecordCountUtilTest extends AnyFunSuite { + + private val generationConfig = GenerationConfig(100, None) + + test("Set number of batches to 0 when no tasks defined") { + val result = RecordCountUtil.calculateNumBatches(List(), GenerationConfig()) + + assert(result._1 == 0) + assert(result._2.isEmpty) + } + + test("Set number of batches to 1 when records from task is less than num records per batch from config") { + val task = Task("my_task", List(Step("my_step", count = Count(Some(10))))) + val result = RecordCountUtil.calculateNumBatches(List(task), generationConfig) + + assert(result._1 == 1) + assert(result._2.size == 1) + assert(result._2.head._1 == "my_task_my_step") + assert(result._2.head._2.numTotalRecords == 10) + assert(result._2.head._2.currentNumRecords == 0) + assert(result._2.head._2.numRecordsPerBatch == 10) + } + + test("Set number of batches to 2 when records from task is more than num records per batch from config") { + val task = Task("my_task", List(Step("my_step", count = Count(Some(200))))) + val result = RecordCountUtil.calculateNumBatches(List(task), generationConfig) + + assert(result._1 == 2) + assert(result._2.size == 1) + assert(result._2.head._1 == "my_task_my_step") + assert(result._2.head._2.numTotalRecords == 200) + assert(result._2.head._2.currentNumRecords == 0) + assert(result._2.head._2.numRecordsPerBatch == 100) + } + + test("Can calculate number of batches and number of records per batch foreach task when multiple tasks defined") { + val task = Task("my_task", List( + Step("my_step", count = Count(Some(100))), + Step("my_step_2", count = Count(Some(100))), + )) + val result = RecordCountUtil.calculateNumBatches(List(task), generationConfig) + + assert(result._1 == 2) + assert(result._2.size == 2) + assert(result._2.forall(_._2.numTotalRecords == 100)) + assert(result._2.forall(_._2.currentNumRecords == 0)) + assert(result._2.forall(_._2.numRecordsPerBatch == 50)) + } + + test("Can calculate average record count if generator defined for count") { + val task = Task("my_task", List( + Step( + "my_step", + count = new CountBuilder().generator(new GeneratorBuilder().min(50).max(150)).count) + )) + val result = RecordCountUtil.calculateNumBatches(List(task), generationConfig) + + assert(result._1 == 1) + assert(result._2.size == 1) + assert(result._2.head._1 == "my_task_my_step") + assert(result._2.head._2.numTotalRecords == 100) + assert(result._2.head._2.currentNumRecords == 0) + assert(result._2.head._2.numRecordsPerBatch == 100) + } + + test("Can calculate record count based on per column count, task records per batch should be the pre-records per column count") { + val task = Task("my_task", List( + Step( + "my_step", + count = new CountBuilder().records(100).recordsPerColumn(10, "account_id").count + ))) + val result = RecordCountUtil.calculateNumBatches(List(task), generationConfig) + + assert(result._1 == 10) + assert(result._2.size == 1) + assert(result._2.head._1 == "my_task_my_step") + assert(result._2.head._2.numTotalRecords == 1000) + assert(result._2.head._2.currentNumRecords == 0) + assert(result._2.head._2.numRecordsPerBatch == 10) + } + + test("Can calculate average record count based on per column generator count, task records per batch should be the pre-records per column count") { + val task = Task("my_task", List( + Step( + "my_step", + count = new CountBuilder() + .recordsPerColumnGenerator( + 100, + new GeneratorBuilder().min(5).max(15), + "account_id" + ).count + ))) + val result = RecordCountUtil.calculateNumBatches(List(task), generationConfig) + + assert(result._1 == 10) + assert(result._2.size == 1) + assert(result._2.head._1 == "my_task_my_step") + assert(result._2.head._2.numTotalRecords == 1000) + assert(result._2.head._2.currentNumRecords == 0) + assert(result._2.head._2.numRecordsPerBatch == 10) + } + + test("Can override record count per step from config") { + val generationConfig = GenerationConfig(100, Some(10)) + val task = Task("my_task", List( + Step( + "my_step", + count = new CountBuilder().records(10000).count + ))) + val result = RecordCountUtil.calculateNumBatches(List(task), generationConfig) + + assert(result._1 == 1) + assert(result._2.size == 1) + assert(result._2.head._1 == "my_task_my_step") + assert(result._2.head._2.numTotalRecords == 10) + assert(result._2.head._2.currentNumRecords == 0) + assert(result._2.head._2.numRecordsPerBatch == 10) + } + + test("Can override record count per step from config but still preserve per column count") { + val generationConfig = GenerationConfig(100, Some(10)) + val task = Task("my_task", List( + Step( + "my_step", + count = new CountBuilder().records(10000).recordsPerColumn(5, "account_id").count + ))) + val result = RecordCountUtil.calculateNumBatches(List(task), generationConfig) + + assert(result._1 == 1) + assert(result._2.size == 1) + assert(result._2.head._1 == "my_task_my_step") + assert(result._2.head._2.numTotalRecords == 50) + assert(result._2.head._2.currentNumRecords == 0) + assert(result._2.head._2.numRecordsPerBatch == 10) + } +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/util/SparkSuite.scala b/app/src/test/scala/com/github/pflooky/datagen/core/util/SparkSuite.scala new file mode 100644 index 00000000..486f915a --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/util/SparkSuite.scala @@ -0,0 +1,32 @@ +package com.github.pflooky.datagen.core.util + +import org.apache.spark.sql.SparkSession +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} + +trait SparkSuite extends AnyFunSuite with BeforeAndAfterAll with BeforeAndAfterEach { + + implicit lazy val sparkSession: SparkSession = { + SparkSession.builder() + .master("local[*]") + .appName("spark tests") + .config("spark.sql.legacy.allowUntypedScalaUDF", "true") + .config("spark.sql.shuffle.partitions", "2") + .config("spark.ui.enabled", "false") + .getOrCreate() + } + + override protected def beforeAll(): Unit = { + sparkSession + } + + override protected def afterAll(): Unit = { + sparkSession.close() + } + + override protected def afterEach(): Unit = { + sparkSession.catalog.clearCache() + } + + def getSparkSession: SparkSession = sparkSession +} diff --git a/app/src/test/scala/com/github/pflooky/datagen/core/util/UniqueFieldsUtilTest.scala b/app/src/test/scala/com/github/pflooky/datagen/core/util/UniqueFieldsUtilTest.scala new file mode 100644 index 00000000..3b431f94 --- /dev/null +++ b/app/src/test/scala/com/github/pflooky/datagen/core/util/UniqueFieldsUtilTest.scala @@ -0,0 +1,65 @@ +package com.github.pflooky.datagen.core.util + +import com.github.pflooky.datacaterer.api.model.Constants.IS_UNIQUE +import com.github.pflooky.datacaterer.api.model.{Count, Field, Generator, Schema, Step, Task, TaskSummary} +import org.junit.runner.RunWith +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class UniqueFieldsUtilTest extends SparkSuite { + + test("Can identify the unique columns and create a data frame with unique values for column") { + val tasks = List(( + TaskSummary("gen data", "postgresAccount"), + Task("account_postgres", List( + Step("accounts", "postgres", Count(), Map(), Schema(fields = Some(List( + Field("account_id", Some("string"), generator = Some(Generator("random", Map(IS_UNIQUE -> "true")))), + Field("name", Some("string"), generator = Some(Generator("random", Map(IS_UNIQUE -> "true")))), + Field("open_date", Some("date"), generator = Some(Generator())), + Field("age", Some("int"), generator = Some(Generator())), + )))) + )) + )) + val uniqueColumnUtil = new UniqueFieldsUtil(tasks) + + val uniqueColumns = uniqueColumnUtil.uniqueFieldsDf + assert(uniqueColumns.size == 2) + assert(uniqueColumnUtil.uniqueFieldsDf.size == 2) + assert(uniqueColumnUtil.uniqueFieldsDf.head._2.isEmpty) + val col = uniqueColumns.filter(_._1.columns == List("account_id")).head + assert(col._1.dataSource == "postgresAccount") + assert(col._1.step == "accounts") + + val generatedData = sparkSession.createDataFrame(Seq( + Account("acc1", "peter"), Account("acc1", "john"), Account("acc2", "jack"), Account("acc3", "bob") + )) + val result = uniqueColumnUtil.getUniqueFieldsValues("postgresAccount.accounts", generatedData) + + val data = result.select("account_id").collect().map(_.getString(0)) + val expectedUniqueAccounts = Array("acc1", "acc2", "acc3") + assert(data.length == 3) + data.foreach(a => assert(expectedUniqueAccounts.contains(a))) + assert(uniqueColumnUtil.uniqueFieldsDf.size == 2) + assert(uniqueColumnUtil.uniqueFieldsDf.head._2.count() == 3) + val currentUniqueAcc = uniqueColumnUtil.uniqueFieldsDf.filter(_._1.columns == List("account_id")).head._2.collect().map(_.getString(0)) + currentUniqueAcc.foreach(a => assert(expectedUniqueAccounts.contains(a))) + + val generatedData2 = sparkSession.createDataFrame(Seq( + Account("acc1", "dog"), Account("acc3", "bob"), Account("acc4", "cat"), Account("acc5", "peter") + )) + val result2 = uniqueColumnUtil.getUniqueFieldsValues("postgresAccount.accounts", generatedData2) + + val data2 = result2.select("account_id", "name").collect() + val expectedUniqueNames = Array("peter", "jack", "bob", "cat") + val expectedUniqueAccounts2 = Array("acc1", "acc2", "acc3", "acc4") + + assert(data2.length == 1) + assert(data2.head.getString(0) == "acc4") + assert(data2.head.getString(1) == "cat") + + val currentUniqueAcc2 = uniqueColumnUtil.uniqueFieldsDf.filter(_._1.columns == List("account_id")).head._2.collect().map(_.getString(0)) + currentUniqueAcc2.foreach(a => assert(expectedUniqueAccounts2.contains(a))) + val currentUniqueName = uniqueColumnUtil.uniqueFieldsDf.filter(_._1.columns == List("name")).head._2.collect().map(_.getString(0)) + currentUniqueName.foreach(a => assert(expectedUniqueNames.contains(a))) + } +} diff --git a/app/src/test/scala/spark/datagen/AppSuite.scala b/app/src/test/scala/spark/datagen/AppSuite.scala new file mode 100644 index 00000000..4718bffa --- /dev/null +++ b/app/src/test/scala/spark/datagen/AppSuite.scala @@ -0,0 +1,15 @@ +/* + * This Scala Testsuite was generated by the Gradle 'init' task. + */ +package spark.datagen + +import org.junit.runner.RunWith +import org.scalatest.funsuite.AnyFunSuite +import org.scalatestplus.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class AppSuite extends AnyFunSuite { + test("App has a greeting") { + assert(true) + } +} diff --git a/build.gradle.kts b/build.gradle.kts new file mode 100644 index 00000000..1e3d57c7 --- /dev/null +++ b/build.gradle.kts @@ -0,0 +1,92 @@ +import org.jetbrains.gradle.ext.Application +import org.jetbrains.gradle.ext.runConfigurations +import org.jetbrains.gradle.ext.settings + +plugins { + idea + id("org.jetbrains.gradle.plugin.idea-ext") version "1.1.7" +} + +idea.project.settings { + runConfigurations { + create("GenerateFromManualJson", Application::class.java) { + mainClass = "com.github.pflooky.datagen.App" + moduleName = "data-caterer.app.main" + includeProvidedDependencies = true + envs = mutableMapOf( + "ENABLE_GENERATE_PLAN_AND_TASKS" to "false", + "PLAN_FILE_PATH" to "app/src/test/resources/sample/plan/account-create-plan.yaml", + "TASK_FOLDER_PATH" to "app/src/test/resources/sample/task" + ) + } + create("GenerateFromMetadata", Application::class.java) { + mainClass = "com.github.pflooky.datagen.App" + moduleName = "data-caterer.app.main" + includeProvidedDependencies = true + envs = mutableMapOf( + "ENABLE_GENERATE_PLAN_AND_TASKS" to "true", + "ENABLE_GENERATE_DATA" to "false", + "PLAN_FILE_PATH" to "app/src/test/resources/sample/plan/customer-create-plan.yaml", + "TASK_FOLDER_PATH" to "app/src/test/resources/sample/task" + ) + } + create("GenerateFromMetadataMysql", Application::class.java) { + mainClass = "com.github.pflooky.datagen.App" + moduleName = "data-caterer.app.main" + includeProvidedDependencies = true + envs = mutableMapOf( + "ENABLE_GENERATE_PLAN_AND_TASKS" to "true", + "ENABLE_GENERATE_DATA" to "true", + "TASK_FOLDER_PATH" to "app/src/test/resources/sample/task", + "APPLICATION_CONFIG_PATH" to "app/src/test/resources/sample/conf/mysql.conf" + ) + } + create("GenerateFromMetadataWithTracking", Application::class.java) { + mainClass = "com.github.pflooky.datagen.App" + moduleName = "data-caterer.app.main" + includeProvidedDependencies = true + envs = mutableMapOf( + "ENABLE_GENERATE_PLAN_AND_TASKS" to "true", + "ENABLE_GENERATE_DATA" to "true", + "ENABLE_RECORD_TRACKING" to "true", + "PLAN_FILE_PATH" to "app/src/test/resources/sample/plan/customer-create-plan.yaml", + "TASK_FOLDER_PATH" to "app/src/test/resources/sample/task" + ) + } + create("DeleteGeneratedRecords", Application::class.java) { + mainClass = "com.github.pflooky.datagen.App" + moduleName = "data-caterer.app.main" + includeProvidedDependencies = true + envs = mutableMapOf( + "ENABLE_DELETE_GENERATED_RECORDS" to "true", + "PLAN_FILE_PATH" to "app/src/test/resources/sample/plan/customer-create-plan.yaml", + "TASK_FOLDER_PATH" to "app/src/test/resources/sample/task" + ) + } + create("GenerateLargeData", Application::class.java) { + mainClass = "com.github.pflooky.datagen.App" + moduleName = "data-caterer.app.main" + includeProvidedDependencies = true + envs = mutableMapOf( + "ENABLE_GENERATE_PLAN_AND_TASKS" to "false", + "ENABLE_GENERATE_DATA" to "true", + "ENABLE_RECORD_TRACKING" to "true", + "PLAN_FILE_PATH" to "app/src/test/resources/sample/plan/large-plan.yaml", + "TASK_FOLDER_PATH" to "app/src/test/resources/sample/task" + ) + } + create("ExampleAccountCreatePlan", Application::class.java) { + mainClass = "com.github.pflooky.datagen.App" + moduleName = "data-caterer.app.main" + includeProvidedDependencies = true + envs = mutableMapOf( + "ENABLE_GENERATE_PLAN_AND_TASKS" to "false", + "ENABLE_GENERATE_DATA" to "true", + "ENABLE_RECORD_TRACKING" to "true", + "PLAN_FILE_PATH" to "app/src/test/resources/sample/plan/example-account-create-plan.yaml", + "TASK_FOLDER_PATH" to "app/src/test/resources/sample/task", + "LOG_LEVEL" to "debug" + ) + } + } +} \ No newline at end of file diff --git a/design/data_flow_flags.drawio b/design/data_flow_flags.drawio new file mode 100644 index 00000000..5b38635f --- /dev/null +++ b/design/data_flow_flags.drawio @@ -0,0 +1 @@ +7Vzfk6I4EP5rrLp7GIsQQHyc0dm9u9q5mVv36vYeI0RNDRIrxFX3r78Egkrir/FQFHUeBhoSSH9fOp3u1gbsjOefGZqMXmiIo4ZthfMG7DZsGzi2J/5JySKTtLx2JhgyEqqbVoIe+YmV0FLSKQlxUriRUxpxMikKAxrHOOAFGWKMzoq3DWhUfOoEDbEh6AUoMqX/kJCPlBTkw5AXfsNkOFKP9u1WdmGM8pvVSJIRCulsTQSfG7DDKOXZ0XjewZFUXq6XrN2nLVeXL8ZwzA9p8GzBWfztp/U86UTtP4PxK/nj9cHOevmBoqkacBdx1EcJVi/NF7kmxPtP5GGwiEgcYgYb8Gk2Ihz3JiiQF2aCAUI24uNInAFx2KdTcWf4pb8UoOB9yKT0dcpFN1jJkwx34IrjAYmiDo0oSx8LB678k3Ia8zV59pFtOaPveO2Kl37EFTU2zDieb1UaWEIhOIzpGHO2ELeoBr6btVjkrFRgzlZcaCnRaJ0FSoYU+4bLjlcAiQOF0QfwggZen0i0HauQBtNxOtQPQ3XJMNgHwtA6FQyOAcNnHGOGuIDCesEchWIWGaiI8fOi1otKi6mcEAXNKxGKyDCWc0/oCwv5k9QmEVbqUV0YkzCUj9kIM8uwFQJLAajsLPDzc/WSoASsgFsEyzWxAv4GsOCpwGoZYL3gJJF237Z6wQiPTaTqPn/aFc8f34Dk8e13IejiAYkJJzS+NUigXzEkbQMSAwK5ck92jV45Yaift7A+qhWYW4EPWnrg+c2WeyLVuIZq3iJ0DoaGCPuDYCMTAx/3B4cycTvgJhBV8c8zlPwNJe+/JL+aek6VkXvjtraiKhDG86HckDQHEZ0FI8R4czwVu4aHPeBcAACK+Qea6FPyPt+DabsDIekIT4cJR0SHZuVpgP3sX3dE7A26d7EfOpt079t9WJYZti1tadzgrmzyVtxTzQMADKUbasZx+Cj3ttIjjFCSkGC3ZvGc8O/K/ZPH/8rjpqvOuvO1S91FfhKL0XxXHaYna63k6apZelZo94YZEeqQrmomDMWmWr0/ZXxEhzRG0fNKqqCU9+0GUuiBTlmAD6AtR2yId01Ae68FdHcgz3CEOPlRfN9NdFBPeKNEjGSNeNpK19YYlY1TtVrf12sdAa/YEdCpmenB6Chl53LY/4OwZgzhugh7kdyEVXLTcYqUcsCR3HTaxY6gd2ZumvGSmnGzuOJWwdTWnallMNUMKdWMqVVw079zswxumhvgD3OzaKe2MRUcw1SrDkx170wtg6lmFKHWTN242aqCvl6V9LXbenjbaULfWn60Dg8lM9Rmhe1ogZZTk9lMXKxlmbq3m2FyrAvLMAEzn1G7QJnjarOh8kDZARmLcmx9/fxn/xq8EsOs+5r9vRZDnqfBboWpJ/dKDqVvpV6J42usO5a+S2+8KvqaGYl7kdj60uhqSFdeJZbbl1srE9OBqLxOzC4h/nzfjx5h+bcR5Uym36uL6S8hKn1N/K2EqnalVG1pDGsfS1Vb3x+emapm6O8J8WBk8lWg+wX1cVSk6eEBEIaFJ6Jq3CStJnJI6SDdp4bb3UDyrUvbIQVzOybm1oXQaoo1tVUKQZbR/2V5QbEHOhgk+DSImlGVVeHuX1M83eHQCP+0xylLv9yxN7xC5jj/5skB4ZYLc3GqLuW1zTBMWsurIbMHA5RMsm/vpGBcGwiVF+/Cs0UYbnidrjSZ7OoxrGNdSldL0Z27XAyWUN94p+oequ6v7T4lVfVMwbEupaeVSIIzZ5OhGUb5ilH0wMX21uRsHdxKuHvds5rA850CJg+gFM48aL26Wg+n8zKhGaLpRFh+vcP6e9K46Tyrp4UxKs+zQjMcUbs8awtsMXpV5Vnh2aq/biN7lZP4wmOY9XE4z1YTdhn8rYSqlcYwr8DhFKerX/rIbl/9Xgp8/g8= \ No newline at end of file diff --git a/design/data_flow_flags.drawio.png b/design/data_flow_flags.drawio.png new file mode 100644 index 0000000000000000000000000000000000000000..57ea0f13474ae34c35baaa3952d039e7ce90aa84 GIT binary patch literal 39094 zcmdSAc{tQx8$XW7QkEhu_Uu{5VC;;Y!PsX|7>wP_*vH78BqE}eLJLZ=B~-RjWGPdL z>?BKe8rk`s(dYR-pXd7h|Glp7)nsPgbKd8D&VBB4FR#~qk}WNaSXs`n(9qDZni%U_ z)6mdSXlQ6Nm=1$0EUe;g;Egue+DMnC>=W-V8XD%R5Ce2b0MXOOA4?+&)%*KNR6*Vw z9~>eI)fZJzzz_(s9zGbaAWT58EDjq2E`i?z@E$&%K3I>xZ4~4cpfd6>8F?jJd8jB9 z4pjkv*E$;yKcB+N1HKDZEku(v2w7koFtg<$=`JGcz~B5lAQ zH24pZcY!Fmz$C#(IDz1gMPc0#K41)nN>G?AR2f{BKp5Man~5sufzSRvepv8kgmw4B zQ*Y7p4#Ee3D~1q=qO3giAGm0P@x%oA{HqJJK3i1nl3d9@sD+ckn<3QK$hB=Ra}-O?>{5lsv-1 z1Mg@UWM&G}wNQ+JnVJw){&s@HfRP?Z+!6>&2J*KPm_5nUPuWD#z)~qF$cz9pG6}Y_ zGLCSU_Xto8RSi@Mun&VOhdE$Py5Xu zF;?|N5}=_FFSlS{B-Yc&G0+?x;A4VUbPVzKG&h3jI|dt(d=yNnk}-|2LD+_v5fx0m zy&?<(1KsTLCSY2+4gvP^hGsShD}SP0kdL(;kw7%}@k79gLB>cV+|USxFel*yq53Fq zJ5MCuK;O;4TN!SLHxC4BY3OIFXX&qN>ldyBM|%ca_#q7uma3r;lt+M(xe6i7-p16# zS})W;*w)I%6AVWmfkZpXqkXaAj)r#VKxK50g#ymZ9*Z_paSSKHRS7CaBwdszDTDy} zwo_5IvkkFSv?LJh??@Qw~jQW2Gc_fF{jO_a=i`MY8Th$bncKj_5lZfUstUgLib@U`AI}gN9JC1ZMmeCs zUO?f3yuep?1r#>I%s$k_F%arwDzA^n80lGt>O1I}+ro+9Sx{X?_Xu~}FfUVYU3(RG z6_USYxP>WIkHL?oz-H)Tuzt$+e&{gYkRXf?!du@+McLThPv6Sez){}NM8#UsofK{# zs)#YR#Ok`k;d(&1bgi%$WkbsdOJId`^)YV2`X*RE6I(@hQ)_t}W5-Z?f|n&U7!CC> zfWZ}%Jzxf4*M-Q#R6|rzrVfrFK9CTM2gxfafD~aLLbN4WnZxxFc5awpM=)YtJzr=@ zh>5SSp`MMOksj1B%*b5@2?k45YDY_B@K+_smk=0+3=Km=-_C84YUdhzz67?qXJMt)Wvl~6Aipc)<*K47Kp$=Jv&wKNO>c> zP#;}e{{XnYGCaguA;QE%9s$%q!ok2pj}U;x=_>1a`zt}s9Kbb2WVoXj$vf1`8|LQ` zVPKCovvqd}v~v&EvxXYmVu%jPULFdLp#fn=HUT&zQYg|C74Bh+!eRXV%`NQA5gsV> zKzsXeAVgAdI59N9JIqTTiX>qWSOs%yJNIyiO_07H5dxDB5Ajm=4=_X+qiih{6csRm zdSU*`_Nr!RH!RNA4P$5KiNYz%>wBsAo2yy}cw-@WFGojPKUIHoG=k_K;1&oCP%w-L zf`^%z!wpC{h!M=n6z7WxLVBzC={v%$1I*k5h(KtDoMh)O}KVb+fN zxIj-`H#~|!wV@se{XqLLg1xe-qC#jeQV9r0*9LeCT@^%FAjZhSAIQR2Sw7S|(8kaM zYwGCc<_`%DGEhb82AUC(Bt1oM&^n>Bc% zg1wTJn}IRX+shC1sc&Nv;BRDak2MZOL(Em6SYsLQxQst*$vLI5fz_&DwQ0ri04j?P*k>!2qGxj5fRi?Ct3NLyW6=1t6;)Ig3Zwc zdnJ3EH_$pTa5ocO-1av ztj{)${E58#J1^=}|JVL$=f26e+hr@YMUmRu4t~WiLwHqr@Ok=hrw*^jD&Wh;J{p$tqhWyWWxzv52EMF|{=C{<&=PV?WFCD}>i@TYujIj34&wv1Da@#4_SLc;jlOc^KT6vPTX zthM{>Ba<6q-g+P3t;h-0bfwd?@u`V4?QK21s}<$Q!Y%!H?~!?KZ@T%5Li0CT#m9Yj zD+=klvgjX(5Pp6*R(P_kX^+&N3&VdnFW=grRkWsY)V{kX`lP5@c53jx3yF(TnJS_; zU{h?-&V#QPirS*Qlx|}bxM=w*bWY5~#DqJ0Dq8uZ?D^|1(VGT#B3_^FK9SY;^xGZ| zA2nH<{br-D9lm&;1rxkJ9Nrnva3pi$=ov-hyr@48&ke5dtr?DM?<^ zfw2AE_0DRaud;J3si&bO3Ii|v#AO_-uau-OJjUCez?>aDmIL)l)7annUFGw&=w}V4 zOWB*Ov$eh?*ZR$=(k$hkSq$Ul#3 zb1E-yt<80U$y$PZrOsS@BYs5 z#6MYCZej22bBTEt6raK>T@4$(_U^w0g zA$~JVl8!QNmWYle#LgY>bwsO$&U`6M30WLcJ$o*!2ljwPzG{g{=lU(M4PIWznFFeJ zgHzl>Vu1#dYWdKR)uE0c#1M5(I5s#xe&h?oTM1oE;%KFNlE=q8N-tAWPRg0S44S0p zQK8$Dz3{4f3noVB&eP03(mwSu*REcDsotg{*&Cs_DxmD2b2Z&OW~%G90=9$e!m+32jhV@u;-=-!4Oxl2H)-k@jV^rVzcMq3Z@O?s z+5hp-W!G!`5VuQ51+>mj(n`^t$Baa82efAhCp`&cn7gENvn`G9MiM{C_!~Rz5597V zH>aDvYJcV4A_TCbmIw>Ip_e1=))$AdVE1>tN~`I6Vq>%dmv@|Lne?%{C(YN>)fGnf zt;FtzDB|YhAQELp=$=}rsnyia_8Wy(4-IF%a;67dWB2uz^&XcwH#oj59SUsY>j+>B zwq@+%Of~Pi(A5obZ~ah(?fjV+xp=I;sFm38)j2?S0uyD z+FxIbpI$Uf=YQ6l5jxi^X?0Ecfd%@ZWv>&yp)k69IOV#)R9aXP#3sp=&wZ%p(6c?m zl}n)$=gj_4m2)b{hA#6uHJ6H}i=hcI_db`|RBk z-%fEV95`{b z#EPuF+9cY46I$rzh~sGf(0i$Y zg0tG&^U$vgEH`6(Kkvqnx#sb2I5ev_W{XE>d;mG%`c^uM( zcaJ^6pSvdzvva>Roas9agpuIQJB^|o+E|%r`_>X~rJ)z%KHp+mE}*|Q{>pguX#GpN z34!3~8S?#(SC`dv=J(f|_U#_L_r4M5n@I>j;w*j*%6HwP? znryDQQzMq|tX(!Bxh6|%JSM#Sf21w+qxW~im5eyfs*auUz7JE!SBB;AJ@xEMwd6Z} zEpcSyCdYZfkxIky1sGlLsZ;E__oBv!ko`Q=rGOHZ9u4~_`+qOUD>~-duAH~p`~KqI z=Jf0d=p{{MoulZD0rww#S{GF|efSc1`drS)P`*oyKS*%px~ZbbsIBol;tWI7WJfl( zx749soQ#oiYKm?k%@C*Wg=Y%zQ)v0G_}5KbTD=c*N;;5!D6f)l*6~i)%Oq|YM_bVr zk2WC>dYPGzH+f1%JO`r&18POLRn9!)_?Qd3Rq5VVqUHm~D|e&kxF$F}gOZq=4H(#$ znLq?%TXs&Z85RBIk%^@5Ble8bGC{=ek+V;VZ8f@Vb_e*~IerY+sRExqI{Wc-=^i2R z&8q}PIt>l|%wns8G#;2m14J*@>GmJHw!=pr9=>#Bsx*vG$6-rIWZBYRB$TPwZd^I^ zp@2p&S3T{u!p4m1zVZ9-e`r`V6L0YJ#y+trzEcudPykc+8hh&(R+@VOs%xjh4NreT zyT#lW_V(0Oo(YOWd?DhNn2X9Wv@%fpvs{#-BAXfGR@y?B>9b*AXGNE`4)O`khLU*% z_t^6Z{&xqyFu-^hP)GbucJw~rM%H-#ey2ybGs)p9nvmUp$`zg<9~%>1fUpGjh5~8o zoII2zATcAvQn5LKVZh09XmhOA@pIk$=sm!8>vR+!dTc?%bKsc(Zw1Xbt(nuQ>)edF z9I09q&sDR9*vi|KzdB5F-u9eb$s^PEgEAVHbJy<_z#0?ZZX`&t#RqfMH!Li!QrWFzY&*9#|bHn2VZ)>y|E zzLKy;@3|DU9P_1RvRfd(IL+r0{HvBjORJNe=HFB-$wmrZU1@gD{R|(o6pPJ_AAj1^ z_K=m!@D@+~uY}?_wxz~x+$wF@DUFKt;Kd|?b6k}1(}iaa+5cwO-m!cSx$3^r?%TP0 zT}UtM#c3h(Kx#fF;VAc5LI7nuPdlcuN>=C;gGgx)$Kio^K3QZB{0PZC&p0WeY#{sa zep@cxM%-Lvo((5uL*;V6+Kjr-wFNdVO2q2VjQyQy?O7WB0oJsfnF<=9+9CmMUt{Ef zeHgBYpKa-ggK@_RU!sfWK9FNTQvW}4j13HKY0;2w94P08Js(>q-205bPdU8tjuR;& z;K}~N)781<0uuj;$ZlX+4mo-kI2vF7mCdYKhb-Xt9J3` z@5#WnuMGiwfK?!ktb0wuU<@vl}QKkY6jKDK{CToRc zf30eiSi%(WY5w4M_u=>u8)pz+asUiRdPf}(y6J!X=|Meol1JXeC%o6yuP@-&F> zN(Q{RT5}!~KEF|=c^)b+JoCAzITzyEa4F}n#7alJANGgwL`?<`vX%XCdhU5G=mI%K zo}QJBeCH#ySM9yZrQ1Ucbib!QB5Il+66>*SA;Wko$<)4<_~&OjI|;)KV{`IQ}b@3bu_sEt`l4}DABn^)PntXG-QBXqYQi5$nk z&(48m+GVWI`J|681)^PT$VpyY z%7>-<>zq4gI(^L^CGsiz$;<@i9@US0TmYLgrf)cqVvj+L+nN2#PRT16dhG-THstt* zuawyB&nRxKC1Rp?(FIS21}mL8R4Dopbv!jmC9ro%VeH1whU)?un~*Zzw{p=Acb?gN zr|n?M6LGEfW_T)iNkclsFyBT)-*>&XSUl0y+3tD7<>M40&F6KKBu5xwU;X?{ zDxRO?B)fvj7`}#WTzSosFQ9%-yLZcN0W#=}WYe(k2&FT;b=x)j&0zGYtFF(Cn7AmO z(*Yvx!}E8JvGC)j-=5ZvO0laT5)p^p50CgWvTV7otlciK#NOGo_CNbEiiMO?r2I}p zxwx6I^zJo`mqeYklf5anjNX%VgGdI;!}^$lbtBhN%>`t(#&KZ|t8( z_`=hFOB43L(oS11-=ryD4^?*!d^g4F#~Yu=S%+gUA6mOp9Od<~R|vh{tR(kckF9R} z{1LXNJWEw+FB*wZx0(hvc5&{LM)dM7%a#JfSl-dh=7jRk!?rw0sb!L$Q)VLWnwd(% zT%L8qKdPATh95cl9U9nd_gaLJ|LS`NPQubRTD&Zwd6bBcam@qib}~Rf>S@O0rE%G@ za=eQ%Y@snFfzV4>9v;5Dw?|H#dvqS<#+>u6aw_>yfOpGB*y!CKDVcF=x{>1w@u+={ zvl~2buBz#`H2c52KO0aptm2;pgG#Ww?3q97?wUI7ckjXYd&Nc0=GM!b>Azt)`>hF+ z_kEK-@~%Mk2%kikClo(dpX=2ae{~pk2D%nE$J=|dZhS1At+H1rGYlj%#$SxAFr_IR)uzP%+#89}2}QtOLhu z`m&JbZYvo53$cphpckA8Y-Lzft)qEMe9n5`zzBd)YB)24e?wJtd4f9drDU9|=zn1? z0C5Sb0U+vgp!>Ohbx3=BnVyz~gbX<(1^=HeJ|8P6NKfOj$@tfRug8irS&}0k@SvIh zr;96TGz?K8LpfLf1*65%YrtdUM>vO@P5+I0UDpW#lYKbRYWpu}jZS2Y84N9N=CFPI z|8(&Hh(Ul4R&@4%5H%!6LBYB{tm63pbiu(#%d$072RZRyhz&fL7*E5{ApR;Y``>7w zVB8Fthf&+LC;ua#W==3P&Cjf*^#9X^{{Mp__CHCC|E_!KZ`QPM!Pt?Nbq_h z;wl=KL$67%zNgbXu@t$~fRc5t&ygNVU=Ed%O0Tab=W9mRbe!4mJKF?LILx(Sf56x_~I{*#Ldck)yd}rYj5jL2_vF13|l9{f}weI-~!hf)8 zv7=0cr9V#TRhlB#Gh=<=>F=8*e$4eg<2fn)qONamL)0|uIsnz$42B&{{!!v9i8Skr z=oyF%^E>vqig;(a!=bZ1R?n`U6cQ*sQeD?JTJhcHdt0{-*4*R(DDL7M8wtoe;hq^Q zz;ua=LW0D)>}akJO(fNb)yBOYK@GRJ{#E%MZ6E_)7Ln^Y^zkgDx?cb96>!(=^-O&I zw+oLFcTb?s3-PIkRjWl#8U3^O(vFRdES&P?)~Cx8zOGB2_!+r0(vW>U-K+18v1Ioh zB}7fYpxF9oj5hv#G8g|p)}+ofHWIduLoo^-S*MTlfR~@V6t*z1IdDz@*hfuV>wSEK zc&q3?``b01W*xUUEOwdsp$H9VJo*$RCwab(pzAPoXd9KXbzd(@OsDU8^4}#`v51YV zWm;b(3cN3EIzgXRkd$i<(vIVfU4A*}Q(p=Z#nNj>|FhzDbVINA7fvq@JM$Hm-m;

^+q6@G2fO3b?j*Hl110cX8V0T z9p)`bn5Bg{;{7>1zO}s-19G z=Ad!YG4jE17v44u;)U-mKoElOUu2Uo#?=YR6DYQC1j#6)__A-E9dF4lMGFN)CLcn)(TSayC!Am zb>d>xigtB>yae;5or-#(5F%z z$+9Pae>+0FXQv`mK8S$lqdDWAiBi{-c$UM09K4iPJmPV|L@=I5t#VOA;G7 zqt{=ty>ccLLwbb=)PM&KcwD9R_r^OqkARF}ZEVaNxjJU?S{&D`#wNlIrL-<->B`6P z&3hU1NXGyWa^%|PuN~pax5d3LxF~!48~%J$X}p8Rq3MC*5f>wR@6pF|R@jmmrF4%3 z@50^@CGO*PBIGj!HY_=wtqpg%f_zjjIq>g74GFpy0tH(zWUn$iUU4&aA%0|FR_zuK zLql+>!|?+->v+)5b5kCgc;U0i^S#e5Po)QOGHi`R-DfBX3Fw4FhK0@@}9eo!MO z(J*K-G=v)p`{^E7LD#DX7oNnuHD=ZW(Typ5|C9QXF(3j84X>SHhN8b){x#g&%&sx; znW2}#AKrqHf)nV_f8$J+99AhC#AHMJ^Bm)579kbt_4*ycRX?oE6m>-?bmq%6$ zEzz82-)+c7Vqn>}Nv|pb&+5YQ*5tYOG@qY)w4Vij@6;H%own~bh=)JQ3|o5@?Mq0(W%4GGVGMHTV$_U5Teq9BZ+ zv-(!O#|Q!?*h;@LPjzmok&7K1HyM*}I9T==S-6!7NhxKJbGIxS=|9)iTym z>d&H%d|aezbocMzMqaJJVF{Kk32BOP(629r6@29<=Yca9mm4{9Qub(W+OfaOvZF`i z#=jlM9p}la0dT+@N`AeMXy8YpW+!xG(C2azN9ITP(D5tdh;{7a=d63DU-+BUgK&>s zcxJ7v)XxSUQ?gTKc+&B;&%SXVoZ7fVIPQRGG%H@K7tYYhnm_Aa_<=&b zaZHnr<7Ze?a$%?Wax}a6%)%*|q3(r(4h>F-ILKH^ZVzg8xgp7O>-T@DT+ z&9s8)=*6KSHQsdNNR@4s@jAQ_F?cPc50w$0^Cx z;fy@;L)`(l4Z9R|>5611=~`Ql{YM)$RkwL?b#UKrC87W^%>waEk>mVGL*yiY&~3^E zZH_cVuHW?UuWIK+Y8=BrOVe?+1#MCe(sW^tgwo=-yniE*x5t(a}GC!iqHc3)^vAkZ4$6>RjzO7ZY%m^+LqXwILGX++4`*l^cAtcx7!j;FPuTI z8T)xj_780r@4*E8=<7v*cB@0x$kU2s)s{Pj^x_;D1^6$Ik`-}7DyqfVQYcfvW=&El zvAOal=bN$@O7yq$WA-FZNZE7MAJ>T4t8X3v6Tq##=vg0$l=H5?s~XHgzx(;2(PVSn zA+Q&@sdQ-l#*Y-G-L&xk)B*%YOgbe?p%C^Q{R6kGusf`x}CNqbA7x2T`gCCOwq9sn8v$afVbx+q6D>Ti{L~3p7vz$05 zg4A5gN(C%UfPG0*Ir9Uj|R#yv)i|QZ?455;l)k4g7KGD6b0HHpYsa z`}UpjU&W@SzsnI`$wXQl?qzon-x}I-gc@ldyJA83JeZX$5PI()mp( zF2Ns_tjnD>!M3;tAVqyKi@QVzm-}hNU=CSeZ-w9e6G^>$2-H7dx2~|DHcv7*H5Ir{ ze(sRES{*`*^CXFecz(Jo3H9u#*(fw!l^|Sr?oED9Ulg0d)W{ulAu;Egf?^k z!n`qh2|UOR)!?beO{h}z{UKc0>qWIVIW3FaQR2k465OG~M=g`Qn|4+alW!9=%Ia4; zDx!?x0^G-h|E44KLSvWeQuIM|QxlV;_Mh!Q`!!x`S#~fN_sYbfQ{dt<{M#O=TW}ga zduTu65>V$SgI>0`9Gy1f%4zJS&>B)b#)}?`iueACJV2AaT>$W_DrFox)_=7{E})~43igoe%$B1 z+pJAoE?Dhs6Zwn*MHQxxy*^xrzkeg3A<`@-F!_Vj*?g#+*c|I*(Rgw5XUWqtg|A0X zB*l*?o-?FI@U+L}IS9H#w@*+!)hb63 z64Jn0e=>zH@t)pB1i3A-^|FAmMw6t4@~H8^jHWm@gDlO8MkqR*+MLOvIr+uCcrBGk z@jp0FZE;cVI5j3(|54|0k7{!Tap11keW6^GvqzN8G8Y(?+spI@hvt~lv+v6JVI;3N zlc2ERhPo<^8krOsg{=3yEdxyV0>=Fg)-w76Z<2#MRjL$nMbE#N^NZ z8`g?QhV`*Fcss`*p78Q4T~&nfLZ>{WS_`5NKhJ^>YgyW6+fT z?eu3?ESwAP&4X&f)<<-odnp@b=;AOdIU_lZS%H4U6fS(XVFz}Bw35ye(?%cv(R`ksQQypCEJ_)=(&+^UrKz7 zC9EUmOzr@~bm8qu6aW!S`4DBVS}V9f?Jt9i*w^5uv9Q@BKrq zs3Pj z$s0W@=Ugwr@h7^bx8)qWkjn#oW*>4v+{8rJFN4vQ!&N#5Dy>6@dj`fj%;S$+Q^mhb ze%2i&H*+X|LuhtaY<{}ywlLJ|ldW!J^zK&WwJQ=fuV2vUJ6sSG{%Z^1%^zP3Zxr2& ztWEw@C2`d$$^tIn#3**D0OmMXspE>d5}u-Y-jBmIJKW}jUky%%rjj+L=&vI%z7~73 zHw--oVknr!@N6*HXIm=w=^G3a6EU+rPiC9ehk}vk9p;68*5ZcG5h_>WuLBAzfKEY) zsxZXcu}(i^PjONWVAdTazYee@_awV4Ss0@HKX6);c@i}j8^0m$M7WeM=B6^#@YEA4%CurtI)<(H=#Ww;EYz>O0wpo}np`f2SPDe7&XG~mk zJ}Zy%2M`Wqe`_?(INRPEDZaD#f@suGR`-Kt@?^T7I4Gnl+Y6gz>F~bH#So>~lXIE6 z7wgTN=ls;EK|nVsrFy84_&8hv6iXTc+LB(o!5H8@RzMve=|_V^=3A2x`{a6t9)J&fEgsE`hi-iRDRMDB8te9!DPPC~n{ZPf1BrXbc%4EA&t|QZ>(Z+6Xv}C`1Lr(0& z1Jx9d*>Iaj#+5PkV)2Q>N$DRJL`yo}Djg9)<()55k0O-6S)Ke4_+|a8mUW|bglffp z?9qkoWV21u)vhf}kw<5SjED1M)Os!H!~BW;A;THlf`ag?)!m)r50-wjdHx2AbUc%$ z@6VhOM5Lgpg`)c(mU!5WAKuqQ3uKbbD_=3RPq-bwO)h`bN_o>4%roZr%{LFiqH3 zyzZr@@oZ~6WBZQD*EWHh-G|e&Xoa^Cac>!9d2+ZY$?7PX0M(OGh(HDCfytq#En5I& z0_t3)_vf<^$JvF(H8E>AyVTZ?h|zY`^Vaq#yE(-BL8*=enLXiAi#$ohlw5{nig@Sh z`_9QHMFEdSuS8on*em&52XM=Bi7^m>axZ^cN#xL-jewu8O8+b>Km-Skq| z62^Ai%4i(kGPPb2@!5Bb+`RaGT4C!(H+JT`Q0Dg5koB{WP@j=kFNeMs)TEwL(M=Q3 z7J4Y2Zq2>Fr@4jrcE@hAecG`TR8sx?<=OpVZmCHuF@-w=Hqdl`;A>G7kK~Pg?zpF) zIlDDvW%7L``>?Ol;zzO_GH4QmJ|kl-JUPJ*I-W*d^10Q{Ge@o15t{T3D^omA8KNJ> zN6%lWRKWd2bu1h{+c+>Be$G!>db(U|{mxFkdv|&8QekiH4=eMS-h8q<(F_C{FV%vsTdyUS6+ z2^Nseet6xssnzN7f>Ukr6_%$vtE{6^aP5G0Tgmg)@(c|Z4qe~e(~C{fGNPjz?~Qbt zzOel@pGPuZxJl?=$4XG;xK5+uvz6q0)hI*XVMnKz4|^V8xG-zqh{f$=;n6+m#DX^9Po55y3=uif z__fY$ukXu!ToKwxI*w~6rtY4%CB~vr>k`lkm zwa{5te-*g4`>&91WIaZ9lch3&_PvH}?9LEA%kK5BhV;j|{#72GXDhC@WH!#O=l2NN zT?yX!I#J>Gm>7q0e49PpfEvA#etGStZ#c>Z`{o*>tcpsOn(qgRNd>6S8~wO#^FdD} z;#J5mkNH%&ESLRUxkky9Ug@k82$Y1tp4AN_(_dAE*mniT6#hJy`?jRTr=pQqkhGDn z6k2n1^N0JS-00?{Y^IPN=@+Dek)c6T@9xXPU_SYP!2)J*$0*kC_5N0me~RGLq1~yE zMc2ffMb^oqQA-!T)I>Q=ZpGtiUikG}F#B9G8m!Lx#pz|7?Q55%eq?8S{A82s+Qo?J z>~P&yXz%HD^7`Be@-q9GjGc+Wr>Kmws8^jW=Br)yEPgEkluCvu*Xs7DSUJv4>*}-IG6|DrG<f9)fhBsvG$#<8~h%6gH?tyK4Y{W35ec)NMRh!$tX6)1B)*JEdJiy zSAxu24Emx@he8nF#^N`eS4E`CpT?YAQkX5s!A!UJM8!^Wz2;x`!XC3c|88Dq|#aI?AW`3k(vM}fn(2WB*T`5 z-}uhy=QB#(0*|VkP=}9#B>MZ64f)U{HokX@25&%5NyG2OPBZC{o5=GP3e1A!nEB)P z3#Kaw@v)qK>Qt7YGxBRPHT6h#_-S^Ui}HyO_JRxKyW-qYD4{9>3Q+2dZ5HsQ4uaVS z-FxUk$r(^Ne319=C-oKck=5(Oi*0GTB_UC7Ji zfHE6$TdSO`HFz_CRc69LuQudhw>ktUCeYlom>+hoBB!H!ZLa>56<38$1+$ax-aN>4 zYrSiP|Mn+bwCs@-MTCB)z1xujYvpFIn*RX_}|o-(YtOP!GnrJ9`3yu*6q=Z7c4 z)@Shsoo$t)(DPM+nOq-3Zw%Qz$dJ2Z3Kw|5BRLZVY2>T|E6hbt>LgTW`pz!3@Mjv%5A%Z^)g1J08rTO&dz|*p9N739mm}P*(Z9uh*T*kbY9u_%3 z4W>5S`_FPw7O%zYpP&e`A$HfDJWa~XHVgGfOu7DC#xR}CO=t`6lvBUexnL4{5qrDu z<0Al4gySc%?KKA+PA%3IV)GTOm-)jg4Y#vLNX!WRfwsc*o)`cC9Kk3&tveu;1W<==X0sv>gt#MtIjRoY51PQ%>Lq7}-Mce~LBBxK zi%cHBZXR@6d(jfoa)nyoajEmm2QJFZA0D28`UkhCU6y+4d-;G}x*hmdlz7gR#-|((byD>6TokaiGZh%1Mc$}iRka;0 zGI?1^jvA6r?WJ!_J8DaG#`G_k4%$dngP2>PK##TiTgr)50V-GlY70#BwW9o!+8PFV z-2kq2-u#r56|>5@8R~f~pPt87EWHlTJ+lE@-VRU|5tqh`Aox~0-jC~K`hIQ(c`r*L zQK9|%=NpqZS8LlrJmN7@`egN!Uw3C|VN=JOru9x6nKK_|Ws;=*((O)Lk(~@OH!leT zOfuLSV3KTo6K5?b5gXtP^I1-UEg1#QjsZA2@buDYsON{cLyV6ieosl#1MGheOD=$~ zPUpvT1k?_Tfg=@nJwDy-&)9pAIC88@`rTnV3#P3=o2DD9wZmn;2T_%!^y`p|6H$^@ zRc0)3?dllz+X{O}RAa{DdrIw)d)r}1)tJ4zijf7E6{D)JT~vOQP6ZbS8p$U7z9R#N z&+L1e!0}Sfo;bn(Gg$5Vdo#b6U$~~~i;X=l=my7qh9ai#wbz3)Sh7^g?U=Bk#I0PE zOO%oW*(H29^h*Us5Acr3K!|#+X?#-R7I&|MIjMbd;i9CudI~8643eC_L#oLtM;ln& zQ2nt;9^gq0Usf|7wEvhkpx~eQiKCCo%fRyX1K7 zI1CV=^cXn(WKygHgCJ#%uW&S7=BE@zg*Wj=cU z6O{cJ_9%%Xnv3^isv@F7117&11W#^?3s{?X-PA0$ZQ6D|4RyPI@%Ms4M1}J=kepOA zXd_mvvGY0)R)5|{50VuoXy}qfW;g?v*u-W|$~l)RvK(TrGiZju1p*GS15SDINgLl9 z>O!bpb-(`Iw0JEy53{jjbYY-mJ(u`r{&at>SMAT&_Y2g9qL-Va+wY3z3gi3f=75X# zS%O4Y)9;APJj9MoH>h7{*1EzBN$1%-W!wx-E}f*{;_erOr)YFu%Z|N8A5|JqsQ}z- zw&HD+FYq_RJOUu7XzhIy!ynJT_Z#@hHFZQ7IZ128HOYyKKvP6on}wLWigWK0Cq8Wz zSvn0^b3)n}npwCwxrF<3S-(ft6Vs>asCIH1v;N`F={q9WBlvjb`y?uW6 z(py%4)Nj0p&`zL_ip@*Td}Jo6PlfQZOW68Yz4qxlO+Y26;J82@R4twe)2aM%^LT}C05TxY zhFmN=^DJ+=;@yN8Pk=u2+U1@vaqqf}1?fPLCFH6UFko9`p36QQNBww+{UMc+OnQsdgi>h`GIvagYcJiZjLL0-rW-A$7W7W=~*=j!)5 zZ`9kPG?+I+5uEC6y~Z=m<7uz-?rMX2g=}j?`sinuk&+QNEqg$zu)}S(#~C6!j4~0o zWD^${WjdB^%l88)FCG@Kr^sW@o`OX}lb&^EONj&S$>dEE_f}6*iWZmO^j(V_+Rn>G zrh`&us9O6EL-8WG_@=iM+8w6kkFh<(r+)tubXjN;UM>Uys4hn zxGJ65S8M$Bv6b)}&2FYRpIx(tm$oUu`>fVUf^`={xrTvYBUIp5c7*kqZ&} zc?rjihp`WXCU9jt&3vNVB;(`PK}r+Xll#QJC`w|SzXTXr%D}Io6Gok2;i2?Z+8sRW zq$$+_=EICRDJ{zN!9k>9KIH1heAqF_Z;3c%;Dj^MhAaJ(0;i zo07b-RQLc5tdD4m)ipJwHspKy4C?^Ec;{K6QEzsfYeE}e|NJ0S3D}@=dtZW5T*MF7 zN)ma02c=MWigYmaoY4O>yZ0iyA%DIlUyuK@{2vA_m4>bNC;tHY0tQ;{XuDkPHi^Ii z!N1SK@2-8Y#=(Xi!OW|AfScNXnlr{~(H*Vwg-#+i$qvmk&&f{t4tUktAjbarYunCb ziM;RSfwISb^BwtiF_1Pl^4h8Rce&{NdEWaX8`#HD9>kAsnF1>nqzuRWzr?jaAC!P= z1J6q!s)~dO0SM_l$6FI|Af3_K8b*pnS;l}=sVblg%L~5)NTWkQQs@SkJ14?;_dRN` zZWfbgUg)12R7lXE!1ogj**vgsQ#|TpaG)}tNK329;`Ce;xDHkT$ukasBzgd@l||;} zvh4lV__J6Z17=_uL<92#H~*}{dQ21GK{-AZT`?N#KBt@&mv>G|dein<1@NoK=#FpQA=LRx@1cAjy~bGR1S0kvuo{Sw zWfpy^2e~63yq*HXT8D^@l~r-{hkAXb%T{dSISpIams^Q%C(`ZiJvwk%@ixF$Yu;|l zeA1toDSPs!QY0o&+O?H?{EEqMM<$7O1H|rDma^2G7k@ojeT+RiaJ1S@t0{ACFvvgo zP^HpEJ)4T?=g7|pgKAXaxu~M<1P-{*KO7hLNyFhZgA-rPQX|0s{Xq zKwFEsZbqYEAmsW-ISFv#JB22USU`@x&KnS8yYCZ0qpFJ$3}Vg-r2x3LHBqDo`gqBR zSa#j}QeH%r3Q1xljtle!z>h$g4+Niq?R@K4gohyjvmp}ajG6mdb|LZJsV^Cl_z><8 z&nmK-$2=vl7L!DMB=)F%=~u*6hg%oQnEUNbNwStuP9fScE;#<3rV-{o(ZEvbvUks* z#6AS22M_51=yL8>S_Ux*yGjZ zB;g2(-VK5}z0QUViPe+LTIZ@TF*=0vEloeb5an*Z8d7H9yz%Uv^SL-tS^OImllUwx zjY0hVpU-tp%k5-ldJLcE>OIP)Kk^w>GlHaS48XW`@>b1~9C(sO1UoxPeBwwr!%RtC@u+JEOA0)2N;xXzo~Dk|OD^8v5P8x>_C zwzDuTyj32)3bpHw%HZdNH{ot$4t|}bLK4xTA^!XcH_rDM@>zuUBzX+4C3zZmMDz1c z$sN>|NH^;bCCHEN~88Y;)%z3IWO2uG`sZ}}h zJ}Z$F^6Rt)W9uz!DI=@h+AY3j@)+XnI`_R;TvE0tAj_;h>-@tqa9+c!ieU|VWls|>b0ZghngIV(GP74(-N!zPC9=gU+d z{~oYR%t{F~f!GTbcglCd4Obb$y~{CmFC6?P`C?Qlzp7?A+)ao~h5T0@|7LxkP5uMc z&yk6&fuFfqz%~jov6(0=FtY!xo}7C*=k%vSHHJ$oM~ii;!Y1zRK#FmS-?axdP*rd) z2ncl;%Z)+Jf!)fg{DpQHXmNOKmvm&Bb$n4pw$wV*S3cH+z!*+<_cJ0NJ!$el+i24r zghi4SHj(5%Ul|Ad385jh=f`>9auA*tQgn$}g=M`AWAmnCSZs^8CpGdW}ugiC$8s=;R_x34{TEJE9eQP=#1~jGf24QABJ=^PdKl30~Ap8v@8My*_7H^FlWB8D62LHVt>T*Jllxl z)Mi^!@|as~ANa48g{gt>$(_0_U-GGVZ?5jteB84U2Ngt<-T514v)aEDNk^rA&GqK0 zkb!ke$iTK_m_0YGYM6_`x*R%rdSlV|@AIDe>moZ>in%chNnhT#0ldb4AsT@6%ks_n zdMa8DhPB^Gp5R)xv*j0&ns<8y^$Vl3-sJ`ELRdM=2ZO;2pS{WqnRs^GR@J65z+S-l zU~Dz6A^nmH0382$C6FK#-alVaH!MK9iS!nhEmulK$bb^xZ&HtT{4GzuG2&j;Eo zLH#Oc;@3QX;5BT+E=#42GN~1jSNa$BE2^?2j19rGcFG{hOh?Z}!&~}i`1suOS?QnC zf6un&c)(t2qu!yL*QxELImq5UPv_zA2pe}&qsb+3z5pAVG5(AGj-%7n&*bJXFGS6G z$M{D#yKCS^%#j?`u2=l&z?{XTvu+h;{F}L!R7qaM8b2eH&6{HciU06ykbM}~9wLvp za7C4xv;*n!1EoK#Pz5kzLq}T(0RE@XWM63t{WNCKLLdTVwy`*2sdQW6s?s=w6Oj?H zUMA?)XT`$CPQML+k)(EeCsWa{T)gEN`v7W_Z&A$^P!3!wRH`#5+)vs)S+ib5HjTx) zO|*q6rc$6Ll|C6KHYNwsi@+{Ruf7V_a~~!Oyvc}pEVLUpki_(?S5NEZ3wFn_W6xek z*Tp14<<2WOXnHaQax$aZ-BdayX|iq(kS;KBjno)Zs>5E<$b!C}>q^DLBNpA8W9^>Y`Qp^M38-OF? zU_`Z#J=R3TYcLl9P^&nt_2j0$6}9&2tIE(lvJFH}iOo^fj9ignKJPI(;P_32#Qw@A zkz6hGQGWZkz9QE9d~CB;EQVpdTst#^zoSrWCl>8zU+KS6CD|tpAOnl`?pYw0_vQ}C zvAXwrW_t0k#Ku#vuEIVzC$xz77=B%BoG{8|miWhg>7(X61F!cM1(DBN&`=N>-un{r ztwa2~KMcZ0isT>^sR#G?i-kzDiVFG=^s&eVjv)=v)bpmo%(N_0-_UWja&a6da3Qq9(3cw3dQ-aMuL}#O&I5T!e}0*2-tlK$NoO(5!C&*$*K{ z)_xatyk#S$0J{#fE+Z~OG9OMq z`LKGS>7{AT`+ybxKT2w$x*q)-mK|RbH?|W-YM+Hu@RCRt&%i3P+rN5MC9*`A|L~W9 zXptjPfCfQ7jsjr^ZV}qDLec#n9Sq)?xO%dGO;Z+40Vq=(4K#xd$7;;yO$2D$z z>x}9-zDp=ubfrp^S?ST9b?wc1DeQmyJxz{Z>)YsU3$yXDW=F-ih*$cS*{B0d5*R|e zh=)SBtt@EvlL(%@=*!8Qn>NvIX9g<9g?o%k;3pht{aioHe6a9o23zE2QYjhz%3a;X zmxQm0!qI0gCBN}^d`iWdWyo!u50KCgpFCsmRfh=-GA*xp7C%3!hP$YHHTLWzj3_|? zi2BX(?SA|KuPRE<09TMqL1UNOb6*9>`Zp>2H-*R!1zaDjiSucS-26{a5a%{>y`Q(% zPI+ zH-+l;9vdY$3)+Gy^vylY{*Bhe5l=*upGj(@Gs-(cu_bN_Y3*$NVVZ5=hFMyK_kP&-Dt|$rb_-}5$?_9 z<+g)+f8ggq&X4Ss@xSC*Ht^F-%@L)pMsc6ba^%*K>3yl$=c`q(#<8fy-bPGjc`ri?gEUxpYiwsNe;sE(pX>`&e;LF%VQaYEesX0|nS_rw%LH*uJp26}; zL+)|b(hm7*GsVp}2ljzz)}y%2=(Zo`=S!=HIwQiv4hk|kCV2^h`BNO0lFK!p`)GvK zTJA@_T|0DGZI=_do#nf`Emo6vId^E=@hEX+qS?I!Z&dcnd0Cb#d2VC_purK6bKBG& zS6366jzi3cN*5wJUeo0cEar!BW=A2X zTvRxY>NmGq2EfW#IKB-2nVm-vhXqodA|RJi2bEHY_teJmHmB*ieJ>}CLE=q*1kHJj zb6r4ak=to)%0$vU%s4$Yc&{F>6fne(nn($ zdFb*Pa#rl-vECt~jgImn`gYuyANNRGMz2tIdiC~S-<1v)^Xq{low-VfDZbM=O;d^@ z;|HYe>d}um<%co7Lp&596aV3ph$B(ohyh(z4KX-q_R&p26Q5-=Cu;rff+39p?NMixc ze}PT%{yzgYaeOj507L%dp{?WkUD$Ms+O(zP)5Ahi7xfDV^)1YtNd6Kk<0VhWvO}<5 zQ<%VdjV~lVc7@vQKr4Wjozk0h85dNfu*s)xX6C~+_Qf|>+mYX@Ccf62Pp|t$9&%z(_0Qt#h(> zW2hREEq>1TUB3UhCN zuo_YXyr7LmzG%G3B+QXZYDuLJF_v$`uOB8Ul-z@%QoIH8t90t>61ec~B^{fa(?>Ps zl{n+#Rq)yHW*w%5yP_q?T$NFE7jO$gd@- zJ~_Up8%>Lx5UJLi{G2lLPD%!Dzw>^6_iuNpht;A2q(Iy}pryNC`lN~2f*cEHatfX_ zGXHZ#DZrtADsr|zZG2HZRB3!&YIh_Ia5Z(Pxsr^IOrJc;F?kHE{4<}B({DyKE}YP)JNL^)bVA4F z7`SBWiUg5Ojuzw!qqk~}Cn%lOy==hJEP}W1tG?vJ{I4$7ra!GK634GNPqaHpne%$! z65H#1ETor5W7eM{#!W3?uG1AoFO~}tma{&<>iL`g(wmdQeR2yaPADv2y|_}xlxh@z zqAKQ#tj?_CO3obkYO*iuh(cPKTPZg+8_b$V8M;L5N(HPkX?XwKoQ-$UN_!GOW-3J3 z(QFMJ@B-UB>IPULH)U)Ox|4(}V~Dm7Wc;SENJ90{LCcdQQd9B>U<-$KkSc<^VTi*X zm4bL_$ki-nhp($9P+_J{!)7>UkG)9-sc4BSdnM|ec zfwgtNRnbXr<1uh;wn)yJcCQ{a-3MfGbPj(qPyei-x{IxX4d76B(KX_Lw>xa5JAC*H*e-BW;fj647S66{ZU)h$N%Y(SJ9 zk^Di8lm-(GLEdKp+l109Cb2Pm{~3i{T-v&g#}7K|;Z;!MT)h#Z5l z-v_A}Qn@cg8zqrgb!&p?H~k$2i=zYtIHMv>pXLDf?$f}x)_-I2<$daMkfXZ97s(|o zU^D{YhC4PXuu=5UfnK?v-z?htWVGJ&cU0LNoqkVCt?{lqdPT?aXd{>>H~QE4&G}3i zI7lF?likHO>HtbI&JQBM4~S_3093xbhRl%S`kB;w#B)QMjy||JCnYQ|pBPPUNf!5%?*T5O*DlZcvty_C9iH`!}b1Hz9Z@0Spb`4OU&}sq_ z1`m3eF(+_2(6}0g3GSr|y!VJfD45Q1?*54m;K$fOZ@g0#yB>1K@GVH{-%*{dIW6Wg z2NAT*J>bAjnsA|$cxHS}g<^C?XonmS_*JyDw0gbgx?h3F9h&a8FqTyT-f)xo)NPPWHH-`fIP#HORK=cdK!a#Stei&(>Z!7rfCl~Qt zu`n^1-JZ3_-(WF1jlh&Q(z%N*3J(_0%T<~Zi6+XhWJf(Y*y8r|Wb`a?Rufi_&NLF~ znPa_gU?Nook+Lrzy7T(VGP%g;x9ngQp?b}nC=+7cOg&Ycnf1_FU0M85oNdwrMVGk; z?o$u5mlX%KdzHnlL7;T$Bgm=^U`r)*5Dh6vQ@`%Iuy28Ql&dOpew{HSGdf>UFiwlYYp0n&_v^>C(2a$U9I`)*Rh=I(kxSBHi9AA&`1*FQ}O8b zdehJ<&kC#-*415-NV}p!QU_~hJ4l;%W-Ge6;b0A5;dlx<&7*Kj?QU*!rc^OsZM5_b z^?B)b3F#SgSEvb>*a=H;B904$19WX>?yozyzpSSjz&(35c%y&EcrSFr})xwU;czg3Y0 z!Cs%eR%eiGF!Y@Y6Ep6(*Hh0oZ$p7Hfq8a;bOY(cY469Kh31c|K-+d*>S$iy!T>)C zoDmv~(5TPHvC1vLz7fR0>KVh%rOM&#=p7`MKLk}W# zxdvo`i*^&D*8S_&Kor~EbwctMILaOWZLcI6KH_4-L7PSo%7iu5Z|U|GASsbzyiM)KxU5s$uZY%Dka2h3L%_uL@}5e9eT zcDx_5y%1&!*^%H5HV)KrldWqX_jkz(}o8y7kZf>E5XGF@%wn2 zAQP>^>kt}`R;WG`^@x&b_PqvL%G{Wyg+^kor9L2neAq%H)A>g;V+Apk!PO@)Ajtyy ztGoNi@hYH4RT9>%lN`YNe**G-=m23ijMe05SrJZU1ojmw(NRyVPJjee1fV=;U|)gM=mYuSyFkf+ z)c{a~#ZQmXYSv&y(39h2wizzIy1+Y;uTKsh!mBEKmD)mMQFqJOZkw(5QU%DK{31c3 z6MFW7#nCMVY#_~`RcUMB`T;wTUiTOzECm$vjspyE9C(~@UgYY017I5#M?e6H zv!@-1+(hqP@6Of5q;<$cc#bq=uFAXd`G7d-o?lFwMuB&kF%;?$OSY_t7NEFqojwAz z%6djLYOJ7shbt;IsLi||iMK_bg>It><1j1qVp|0=+?wqi^{?%Lp7G}kpbw0nfAEm` zMZUf!b?&PJB34AnyJketK7j1~xpD&PDVuQc61=8C;BtG-hlyKYr zNh|r``Qb`e#u9*_fsoQFC7MdS(w}SyRLg_XWYB3D$-_>T?m%Zidd>-fRG-KWE~hVb zeq-s9J_13=@AOih98iYbOW+75vME3v?vt}90U0l8Jjj^zI&b5aQ6psJDgg&-HVuYJ+tuii-mf3LB>G4#safGwU= zHCWueOoCgz(xe)7Wa2*S6vAUz`4smIgi!zZ+$Fj$(Iygty?m>3gy?X4|I9(rPAdbhGq-)%h4vP>%e`{3jr zdY%z9|I#|_o8dzS&+>YpXm1TyjpIwMEyny?rx;Fw*A3+Db9s)hE>88B8^9gbz2owK zq-|H#Z($NZiSoqhiHV8VPi7qSu#D?wU%`Q1Qj9KFxPqs4z4&({*IOB9-H z`q0^SgXNNH==Qt51R$fmD}LO4u$JgRh&uYm>*k8(H!VC2NR`Sz0y+4+(rGiy_N`~t z%d>7XcBLLi78Q)YbsMG@f#%$QuX!@$xRzSPERd%y9!!<-Gchj4IJjPoGtH$LcI=XW z^#PF{63vYO_w-oe0yqvi-KlY$GSWiuIZp)W9H}U7|9%curpQCxW*@Ruu+#Kl6~X58 z*JqAqU|~0VMvkAiQuWxf;_3s1RcP2pW&J-3_&5`^`h4%31`ei48;;tEH`n!_2msbN zp&*mYWdQyFz)Oyj$98%FzG#;N_SbjTtBF>C_d%^1LCOoz9+q9$8p?4rHXbO>s^`*3 zM~7wtyW-z1)t~Sgc7by3*!ybtzp7tgfE=u1u*kIk8n+S1)obB+hB{VnApEO_fwa?W zjFy&9AO!>8|80oxxow`l`LPyq`{nwT;7h>&4o#6&pgT#c;j&4)jh{K%d4~30r~*v>pB|9#U_{($pISClc^n2Jne2LZmina;4FpOTJC7 z{`&GF_uhvQ&C<6ju8Klv3u6Vcq&t|ALnCba{B(GEU-SHMb&LR_i;d@*_vCTvD`3eK z&{2ZY96r+a!Li%oJwl#H`vA%-)7V|T8v z12ETn_Z`2z3=Ju&mnd0BSs9h_J|G4PH`uV`xsBGPz=;}1f;Uej!PXi-0k%YiB(6B}~73tNa%^GH<#dQ8SD$Ucd%i$3?#>T8S_Qw!rLBGVkUaHyf zUiJM&Hc9V0Y0uN;Hia24Y4w61qa)@#2q(%*w2m8C2X}k!k0StYw!Ljc7~5xZ_w6St zaP?iI(I#e8u}|;6gDl8I@z39u;IXmAGqSG-kDg5m`PgxWC1rp2?>RPPl)6y0x;7yI zxaMron-L9}oQXGQH0c0uw25C({ZT|mb{1@;o=>=>l5l()x z*RbybdMRl>9w=~`llxDYP|oWwV8_wa&Hh>PO)JB!y!UWfrYPu9Yl&Xt;e3OSmus9K zka56fz!11Sd;gOQHd@F{7~oQf=D}QVhUb5vyKM@cy%V)n=Sam74a}u$c)x!gL4G+h zU1OGWQxV#DT4e0}t_F)q!h9s<=(oi7((XkBAMjQlxLJp?f&H-BP_^>fCNGHk^x_#279mW%@# z(&VyRLV7kR@#8*>+8dXi(#VpM#uDF1BXY)p3It(niILTR@lAbpCW=y{x%Jdz{1YvJ zm`D-7`nG)jo9|038fSR)tupGn%eS;eVNrE%2i63q$4f2RIA@u{(z%#^;*t*+j%JDn z7k^{a>t2E*dMxhUfhOT44MXAIl@9XPq8~nC6EY;qiFJ}X+;!CO5p1@{hc(0jbX&sw zfpUT+Pwh`VoOA`Vd{q0j*1S%A z+y9+cu{J}QP~{je$b9^}>4VebIKNS4WnWl}wpP84mB)4tCG9=r0!<`iRZmd2psf;; z3_$bU&svvblO3;*fT3t(>d{dkL%ajJzVU8%wq_F_Hn|1&yYZ>0a^D*7)4uBI{_0PO ztKiB(ktp4IxY!ah#`!+f+S^v%aLwt4uT|6}W3dwBsrn^D6{H?HthU+X{p8L8AvFBX zt$Op)+O|cG2%7pxn_}bKp8eblPQNkb@F{{u;At$s0pmxd2)*;$YJSG;PY*|k)X?DM z?^!n&>+&+`a_IR{=~WI*wuDF(99<`+4ppieBECuA=Z3|H_>^5A{rMQzc$mVjLGPMD zaIy`MDmg{l070ge&i^7e+8bEIfuiW`kwY4)I>@5tokOmB0@Y8hE>FTte+Os@%%2@J-;>l= zxF1n9qrkGp%o}w8dxo)$YsO5XM<8{3s8sf~L-zkYRlsIdB2z0za@b)~6lf;pz27@5 z74$`Do@%`xbf6R@L<;iJLSelqeJqczvut(4sMq_gjqVcSbD0p(8Q}@9(Ud z`FU-4|0i}n79S-WJ2Wf)R>|ALi>%a5mvqoZ;`+-8lP1kCxtd)CW(12>>Ihi0DMTVB z+s8N@3=CgdCsqzhiY)Ise=Co3QfcBxTl4P*QeG531Rg}i5{6UMgx~(aM{~MBx?RCD zoRCMdlP#;aPuG~6gl_K&itb}=iSMF>ZsWM=uP`AFnj^i>4|CcM$_Au=0I8|GKg4p= zoXbKU(K$8*eOo0h0PKXm(=% zbq@sQB)#>Nh1_HaX~=WcTz()gTeLInPZ}xXvp_}A+rkHR5Ls<1s&B!VtaY`q0v7z7 zqJSUCzEHp)h*bXi&rdIZ+Q^~n*6=+(;}O1`s({n%cDGIJ?_<2GUv1|=ESeGJi5R&M z?{5+i;W;)2q;=?BfXYy)uYvSV3ll_?wAJv1MGQXi5}<)i(`c|INpS=K6L}|=aHHid zJv>j-fr8Nc**?XQ(z7{I51CU=vPCGOi=G(N6zrM}S1;ZI)8FWJ!|7Ao$oiv^`iJ+? z)4Xic@9U<%YD(Uh-;f{H8*{mYtjB&#z;}43TvoIKO+v!T^g8>hbe>Rbu(bC@OIVNU z-Vnt$yjuLV>3*CW6WcLU9tQ|Z)MaTh$Stz-Hy_|>=durj@RfFlLdzY`k4~={iK{DH zSk?M~n-QJ&``W#F{C1i8meT}~GOZ8^t}A_PL~VaLe-`7O)OFE#(hup(-Rxavz#OHD z)FQhN6_m|}CM#rx27kWz`-`#ld)mDp35=qj6|`IFI(V5opEE+%{8=HZ>J5SzAzVL%z^t-o5 zF*sCInAQhm%VJ<<-Q!RhEHbR>52}8eBsyeXqiLI`?x}tEGT)%gh$Eu@2;f@X8R9pP zB4JndbF~$mh}F5ckf#UEwWg=m0uj<*xD^JuN0m71ql?^%U?Cxt}%O1@>ZPj)a|Nv&TAtj+BNOYg&LXOO0IV?5U`2ONUIzNdCu z+X1y(w0XV1L^fEIzx3MZWbGz)89t)aAqz0F@mg5GHG$X6f2F(yz33#`AgdB+GS;ak1MJ?l`>f-vyB!hK%WoC{{&2H&{Es&P&AMm5>1Kh) zhL}t_0>!cI#}j`z)FS`7Jm^2d^-ShqA806@v&^pV2BDWEci$#|%)SsEGBXV5T^~Lo z*e0%-0C!-+PwK^KfB#SZ(mvlg2tIi)RD?ec_PzW$VtaGTvt}^i#%BYY#s!7j@8UcB z_1^BgwwhDP;;OzB-(=XO4EM7@$w{({4ffQ@M^}wj6VDR2>b?5Ob^1TLJvIfHU+WNVt@PUY*|6?n57+xT5ZSwbaRE2IJa}T|4I=EP)vyoab}3 zW$K=2Prg-DBsVi)%i^VN+wh@X?jp{^SU=pC)6PZ16Bn`UCAAV`&Sm&Xi4&3PfpUs- zQc^T<_#)->hqv0Sko3@g#O8{b(xPnL#A{~MplYTHML)|-NnUvg(AwTA$eC~WH`}kj1d*a z4&tj1Mz)MtFDmHWI{N@7YOXmrK*+OVZA&F21MG)bJaiw{AP)MU$^Q;2<7wwFrm1i>)2EUnN(;%jssS42du1 z=Px&=Uw6=1Xh==G@^U|C?w6jpIy|{?*`lahs=cwEpj9PD#{V@b@(7xX{%%2R7$nOU z@NoR3NrJ&OmL~b)m&l#&8+tOUCzPJ^9s-o%|~vj(#$$!op! zbB7|}Ew|88xrm zz>f@S_74UkNsy|*Xffj;_LtoNEnvk%4S)0VcvlndOXZXHrJMOH@D}`1ecxzEm9z#E z%CM|I$Tyv^q=C=8f?v%Kd9dR`Qz`FJwmxC}Qem9)i(KpFmoz;tt7HPw%pgKyY&kz5 zNaqyB>3dR^T9fZ8j%`BkLx+(Ap-^4SAF*cbjxpE`W%=UhKwN(M44121L>*U92U)lK z;#xp%H&?>9T1V_C6WO7jPX@{`jX|B*+8HEopwhwLf3Dg95Vw#bgPdJ3xMHy`(Kmb2A8 zDdyf*3K6YjMX>%ows-|3jO;b~KyuIcw*(!|`NBt{HK6q^!02!Z^uAuf&)P+1P0B$` ze#i$aP2eqha;*M!eV8Ytjzk3m8kMGz`C8W?dx-=&-8&5+-QQd6={*rll&D{Vqf_%v zK|>r#N*g%>*s#(G&N~f|8&E`X_H-K^nX3_a^;imdIK4Mn(n7}=Z{y(q27xa&ThRS9 zY*8-l6-fC&e45+;26eIfrzsDz!&SK(L!59?&dZ38Bh#Q98){9S*zaNnJ`F@Sjg#ut z9qA-L6q6#MNmn4aY7dNCS2>fE4|H5&UMIF9IcPmX7zpm#|Jd58g73arEvjA$f40}1 zka1-FMSPRPeKK?RMYj#P$9bLGj&+7|RH66st53wCp%{VmP=;?-ngvAY7Qu+>GWud6 zpu4YIhq!Owz`*5b;$mK|i4!Gx(yd%n{L|QNstt%j_KS72b>E@)!9`WrOo%&4zw4)U zFH*Q+iIQAuMI%s*XJMN_V?I3DYqy54=vu=MrM&rx}tNRUA*Rxx&op8WJ|qq znxM5O)p>|#{_|AGJvbcX9S{l^KuT>-@(6@?Q;#=BI-*HXUF@m0_e?T0uyhNJHiU<@ z7%>8daTpQzkXZ(V?yTR@W0Lx$CBIa?jT6yBP>1_liQ01BW1TL}M``JzV#36w@ea+4 z7?_gcJLR{OZQXvzA?5K5Wuo;VqR+zhzQEB?KMPl}z*SF}xFKt7w4HkF&VP=`+iv^o zD9?hP?GTHnIxDH@*X+UZHlRs?CgnZH!|dkmhjfUa(Z0h(I1j%4YlK2wrUon62UTQ9 zsgtRmhSTI*J3Em##pHSOg2TG8min{0WC7LKFy}5Cd26wdr{qFSp?tdY7a%IXlob`q zKc*`j@R=Bi`cqg^QsVLZF{O6fMb7BxD4Ea?rw@2bCdhECrna01{%H=Jh%)wj&{Wkj zb>nQG{GAUQ+V9xjQ(>i(JjqIPu(W*WV`cW`vs-W8==gYLpth;$-t}*NX&;pZO6J{r z$n@q9{xHTI0r6&Q;zc#LaD!#?1i88J&wM03I78gz?H zjK3Bu59*!bvlL_~qdOR&EWHpIB3%%hTJL z6!Pnl4e$K_{dY2=#8?94z>marYX?QowHEKpqZ6b;emY@k{4s2&=jee~N&V@OI<611 zyC6gU>c2g;Ss|rl0)vonB3Rcz5mOtIBPR-v&0CsaY=21F1u3RYV_#LBz-Q9EuXdv5 z+#x>tv5Hb{#QVI6F+IDlb`GifkCqQS3R2o$Zzms#-JTAXrG=2NBYeWnG29q8D=@1H z3;Bk{p8WmAuz`^y>4s%RjO5lWa5tKtKP!QiZ7}RS9&UclB~Gi#dM0(jHWp9)h6Tz` zL2@R4$5-IQBdvLp?dzussVCPD0U<|6Luud2XLMj$NB-9l@u0;eD9R_ox{6&yX%9$t z@^hCjyLr?`x$m-XVn4tIMvQkgdky3}2jUhcf3vZD7ui|-lCSp5ior}+ zHvc<{8cM7U=E3xahK3CwM_K_$G#_?6hKF^mn0Q2_ETq#^43J01KW?%^WWPgv4CPE- zKjadUI(4H*o~ROwXE~SgD!$Bb@ErpPRDmD@a$gx*W5%;hx=n}xc2PQtB zQGlm44ZTB0R^zWe{fXUbNaH>YO8b7m102t=t!>BqVP|p>?*>h^QIY!N64KLvCL+mW zJUEkuGz0XS0o+anJFO!fkG9G&F&iOCX?m32VJK_zqc==6Go{r>9C@l9F?61f`WoS1?6d{_O$TBzUc7=SU^)b zH!B3BHz|Gg|K_jFZK)U{^G;qohu$KBz=ETxzmu#IFH;;&nRSt!;Gj6(CDh`xer>Ig zntYJxrpfS5bxUqNaf|UQ1dA8q1M1)fT1WGvL~5FcN{Ho5Zo zD_=0%gzn>j2Sq!fDAh{IcHKURGad@#$^KuWWkGw?%_4P}yUcz6psNZB5i$B2Q6lNlzPFz%+{ zlfV<+AeUx`#ft0cA1u%KEZkR=|kL{4%l_;q^&lgxDVg!pz(>O@B{6HwXhV z4UU#mOIjWj{o;rq1l(_-IK`F9kXUOb3`~d*iJiZqOjwy*D!OIS%KjBFN#9ulOdK)| z3?iR{Ub(uDiZX)HIuP(a#zzqS8BziA_YkZX@)*!Pr(6T1)Zc6?bc^rZdu11e!!ss_ zfn^6SDK9}n3ZJNbtNU2Mj#2<$W5H6Z0PPBt4ij$rmD*~Mu7Pf4H%rsB3BAm2Fb3o^ zyMIW-2aEK%=3fl=Rk;E&AdFwBu=01p;_<|>0&+q3PesT6cYO#D#$`lE-M z8ZOJo48C?RAnPS)7c^CY=GBnKQvM)2x|OS-mHbf*?g0AOM%pBADp@wT`)Y`YfR!AI zfkph}F6Q`2>r-sH0NF8O(1#@J|6Lzs6M^BC{GOZOb!oPjt=Y!K1_=mQ%?H`x$2D<$a7fnP`W%yy_x-GLbjxj zjMqmG4{>OmEUir)L~3An{OEp1nS_bWFCiQl-}!ovN}&Cb2nuEbYIl(bl9cUfF-Z9e-OLvliU1esPb9TKJ4oA#Bue|# z+QA#<3TroW^+VfAI9h>p16H`wztK*M2NQDv6lvZ8a`-1tiU84+IoS|d65LiU1c*&r zfkdMVtx^QlYXFS!#fSQVWbQNwljP^*bj^Ei@g|Bo{Q}myxf@8}1GL|WEs%u~2DLc# zMcepQ}insy{48lMa z1*p!8LA9UmJJVzR(igTg0Mr1gefNMu?{FT(ox4=8-X*)n`T$5!rA6QUy350!td3oiRjzKu6KuxFDLH9zR4TADjW9nk7(QA|EYW%QKGIqZ%);K|Y zR4VcWd&!7NEwc&-?r5hf50T<>N~5NEie(?0^&JnsWA$NP05tLky6_t-iT~O$0NQPj~&dZ#W__0vjHxYZ?Wm zMFn!X{tl3}4ZQw~&tDFx_tU_`*sk_InR+K|HT<9sIA%MDjHrHU`BYqDXzFd3Us33j z5F8z+?{58PTl67Ub$bmqz&EP|y1XNx;=qXH5_eJku(*@0az_CrE}M{U-V}#0VINEP?A3iQ{SFMcnxS?c*GMM)wdSfEYb(pA6ohjOe$1$Dsup1#W#p zpzL1 zb;d?V1t(C6@XL0(yT^m_L^X{Y>v!lsG)Z3}wP^$`zh14d%anky&cND0s>58JyE{D! zRFj{QzSuTnPjU=222iB_BQ(o@21NAidVf9wm98He!aOT@^^2R58S^Or%*)bxZI|p4 z1YBSDm|hP{-(0$!md)#I&(@5nOP{|5is?z5z9j{o_(Gw7D6x+<4~{f_9~dI^YY4$* zfdTo;O6*GOGfR&G0cEl5gp^g(dua|&r7i8#G$3QqCw7M9iCb0=k=vZ|9ebe{gkK#w z{ma*_<1x%)I0AKsC&9t8B4_^qNy+>2Es^wDnjsaa z*)dD|892cXKw_x^_&kPyCAAIIHXKZzOy|1C8M|@fXg4TA>8W7N9Ex8%7Ga#0Uyab# z6b9BeVvcvo{44XK*f3fP=klCROm}eRRS_dY=so{{Tu3)?Pqslkt2B(1ecC+E*djZ{ zGZ!Rr`^x?7LH_oeV}GF_yi6iieGmsCNz7q52qHyW!k`?n|a9(DwZ z^iAy6tkbVQimPS-p`$raUV;~L1>_apfF)S%17s(J0ibZ*6<%(oUFJA&x27v1J?^U> z)j-X9%GR_Oh-vJAl1=Y-R0h{`PyKHZa^Scah6hrBit(^Y$fBJS0jgZQ*8cs}Db*hiZoCr!t26``k@3w+I)kWV z+EM47)y=#jH<@nEkhCJB3k#rr@w_t4Z!P{^(z;Qg$>Kv#_Y?y+xQ{!f4Ll`H?}>U8 zz0|B-2dzs4XZ|{Xl=X8XSg(%@Z~kO_aXtmf-r=Bi$&*Plb#oK8^Y`0Ip} z=Lo>eMUm|!6Kz`B1gOVa4buF_5f>u6(8OupH{iXtLUQ7S=ZSj+Fp;(I>S$ykr%tcE=uzH**Fb`%0+eE}jXEb=Bgrh&F}ecdyes{El+Cbh z(WWF?FU<&wo}h#`$R5s@^#W$wU6$H}&vwy9z*iHjfRD=XTkINQ8AC4Wi8>j4YOy>{ z1E#DM&ou+_q~OQf6Gz5@*B}z>#%zK{yzbsWo8=jf?`q0G8W^7!rwzn}m^ir{eqr!f z`BwNC_CpURLbyBElD)pQYFJx!FcpOT)Ggn248FtnY-`Sg$yWP;cpIO(UzF5|Ba=`w zeDx|7=&A_;7qu$8Z8^{M%32*-<*Dq@q7BP(Z^ z`Rf)-i`-Usy?S^jkX?LPe>8DJUUmxXhnJu%LQgNBx?WvYt3vE!``v(l*Wo4$ zqeesNUn?1d!guPlOd_6@U+AT#Sra?o2XopOgQ&?fLVgfnnqp&7XA2m__IVs);9YbG zGLDfSHp17t>HgzbPnBIr232R2sF%r{Gxp-m0a&bGzUckf=bV9l=N}#gat^BzEO!56 zssAjWb8$=ocH8c=?XB6F!+laX=G>72eP9L2$PuF6sBmxJ-rDfOiWiqX?15!UZ}Ot^ z0vavAR*j2eJV(qru!9*nTpWQ7#MxTF^`qH~&zS)GNXjoRCoKOW2+rjSCD!<2VDE#FmtVasRn8%EWh3K(iT)@YpH;^ z;R|mXKPiKl>Hz6i31lVW;4(=LFUb+5EO9K~m z9TUKzx3Ds>l#5d(?gIPAkHBV6Hs4y#=FNM)3Vr1|ENPMvu-ZU|rB-6fN@j5UE&{eu z-7mN&hF_T($J8nIC9&%1?O95kzky5TwtTt+;n0E4_6G_tyqBJ=1t($wQ&7a%Wl1moS0d=KWWn?q@w>~`dY(BR z`D7)(3sA=I(-u~6YB~x|bln@p*R8E#>JS&1~(3{vXziT zcP`^)!4eiB;0(|+Yv6q$WWmI2qS zTvhX#@nGxoQ~LLTrQfFDpf*#ErQ1M5juExMjxMOH+~A-FPE6q}EJmCq8Ch9blPbPy zz5?djYp%Vy`r7eRYm-?U7ucx_!9H>} zRMA=~!F0CCF8Io8maD)t=D~jR!i5B(&xv#XNM*XO1cfXxZB8%%=b1x3lipRRU2wX< zz9>$8h1N1Vtw8zXje+xaUa|qF3I_!vu)36EuD~(<3tktpMNWO#1U#o9*u?L~y{g>0 z+?<>(z$Mc{#`C=gU%2H_^pP_$^#$(KY${biT|WE{~(9gTeV07+xCv zpcEHBkF~ITso)9i7tM^ov0#C3|Gj!1+k8hGV}t7f&x^$G6Ag4$71W5hkj@wND9)8B z-j!)Ta7)eE=>qcco@$38AVmXYwr7VpLb9s4EP41BrqHGiKpl4+Jne7uYYGgAbrtKaAlWiYqiS5Fv/rrS1qUJVFyrLq25SDJJeKQHA7n+4YzlDxA42TzN8eL+X8sJPEAWuFmgCYDCIENXflPSba5xHNHuSDiNNSDSsET/Um00NLSJQ1JVhsoGIsFXdSFAUtTEoiaDHPO1vVhMxbXV13giDQETwGOm9L/aSjmWgqKbaiOfwiN5nppH3p5R4KLwXon2RyHbF0RoYcBGnPGRP6UbMYkVs4r/JLP+3Sgd28YJ6noMuHL4vGnbX+e+P9y8I3acPt5Nfvo5FpWOF7qDU+wwFOcESnVS2RiW3hDkI1c634uklgKgHzMBGffyZjFjEtJylI58n5G49gQ4ZhGqWwG0loi5fcrwgWVfr7THQkNQ7XM/XpOBXla4ECtuZaskjLOlmlI1EYspZ6lQjMFuEVbG6ltVtrJ5qCnwN7/kriEJUTwrRyiJ4w0Ypqytm6uK/hbWjavQI+0DGvKRXvFJSryQQPzGyAB8BJK8I2ihJBzYzDBBkyfqNzu24THgbcGD2rAM4BurKCYyodIPQiO0wwHgrI0KzrlYvv+/QRuSrIFTmswuz+WbDcUB9+jnes/Bjmad3Icj6YfwEh6aKzOWQ/kD9Dy/8p16sktdj3LTZaW5ctewDio2LyzCWoroe20GxcsMyEx4s/pMpkSfg3rgF8YBW3tQ/eAdTiRCkRno4bDYSNgy8ABB0KrEstFjMZkJowoHkA0m81gEDRCXvaE7tR13J1NZzga6xnMb4k90BJ7/sViz26JPcPLJA3vVMGmzrcYZxnN3YS5aIor/n75VCMbKr7qker5mxo3dHRrstHTdo1t0Ujllr9WG5VZqllO27WKefmOSNgoKA3Y5K7ZkgfkBX9pd8nNR0QcPdOaPKgA7bTgXMg4ibGgq7q9beDrFR4ZVaFU0AwANLTQqPxza6wDBciFxnzbWkm1UjX1+nX6AjMn5H5pKNpRc++FP2CrfzxTcLx+vtlssbnxLPGeIl5KEX5A2lPE1Hdspzxp/rA8u7EU0aE6Kyh0+WA7hde/EXEpTkhTisOQkyy7PuFmxD1Qk3ijqaUIF8i0immq7hyTMxHwOP/8a9KvrUAx6Md42Jl7u7EVSrwGVuY2h1i0cLN/Cp6BcmZd0TfnRn0VxScWuO3FNPKOlNOy8Ug4lT4zzo+zlMqoY6ls91spGxUtMDjVtTSGVl0RLN6yXKs07o2zr/MiB7rSM79z9MVP6B65cXXlJ7L7vboVVcXV+dmVM30hbJvAWN5pCJsXh4aiSyPc/BjSb9Z0vaN5U7VOT4E3fnY0mIXsMzHr2mcHbDDrYUX0Wm/vA44HbuwDDuzwisB4Lff+Vu79rdx5bqgEhA7x2m6oI9dD+Ewfbjxw9IZ65bdyfdVTt1jv95VhXWTeHkenZViTXg1FF86wqK/a7dUhbLlnQthUdGmEmzXUlc6LfqrzmyEMQmcijKnoZMLIZvnjxnx4+RNR9PAL \ No newline at end of file diff --git a/design/foreign_keys.drawio.png b/design/foreign_keys.drawio.png new file mode 100644 index 0000000000000000000000000000000000000000..a117dbc217f21daa48dcf3017558a35cd71d7231 GIT binary patch literal 30114 zcmb??by$=^_oxU+DP1a^OG%EsTpC|zH^7*)C+9t<>u$$jI_VQDXQ|%5~qle6Ux(zQxwE0A_7CB z1rg3LM-0r(Q_$Vs3tNSSyP*)y4$k(7f3^`35)l;;k`xdUHx!cK6jc_I{QD3U6c-gW z|7UxcqrLmT5Q+;5VG#hdVQ^=6FO;VfrzjBnuI}z-kHr3BtFce00rp{v{TCLp5f-af1zScsyV_%a)a>D|sJ~rQoG>Ui zY>le0u$Z9G-#=`n0n7o0asEG0{H4d<3+DJwS`1W7(LhsGBQtRyT~jZ#w7V0`^`A8O z*ke4MQSSdHSx{6EJO1C!UjAtNf7T-GeVpOg0Yx}P!B}?wi*MK_&i~?+i>8->j;phT z9m?I+UrRhd-^mpGPbBUz>`ec`aa}C41cd*IMA8_oWhd<^2G$i524XxQYR;Nyq!_0N zP{Z9`(%Z~T%~9D7Bkc#MIkPOx*M@FQm;3G}VD#+FA%@u#g&9S>4XtN7KR8+t1Kh581#N-Al`l>-ays&~&g&8=Sm^fnKMhHnikOU;a$ygHRBrFN@gd3WhX^9w` ztE+;%oJG`yw6&F843RGC?!quhPcv5|PszXCHMF4Esf!voON%&~X-KM~w7nr(5H(V^_|^CkswzM zVYq~-m=h8WHd5AgGgHyV$`$Aupr)m!VQ=83ZR{_uE26EB20QsWsUiF%)y)mmyfu(I z(mpCc#9xd)NGQVJ53Pn3jU)!HrRnKnsN#t52!IK@BF**vyinpGl#98urh}2EDjJ3M z)fR<`xH@@xN^3w7&Twg%cLBR-BT{kC32S;sJWmRz%6+3Z#BoOFt?`UR-(FO@Q!!ceyuEJsv z9Y;}nRb^FoM^zzpUsF|MkfgS-nXV%gEbgnW;;iK=;b*AjZs(~hBxkRz}*z> z?_w;f3r9o6&F$U%jRTaSnl5GzcIsL%12;!WX}FoHgpj_vi>_0EkE%FS#STlJDgqAD zby3$8k@irP^mXx&LMo%dW)e^}UlB0UOjlP~*#M#9;Rn((G4oS1SAl730>zbeG<ZS~sbZ>Xs0#e6GuY`nXnN>F^o<>)ja;xRPZcbpqZZ&Hp@VQo!^|AiblrTw za8EA>XE9L~Ege&Bw4X3c#LiDh-`~JfRYOnB4x(jhr!54MLTO?VUPCK?XuXDlXWba3OC`Wj}Xy zH@LZwh%j2i(^y?t+5wHVi$F*901XL!Y{A38QNl=97v*Fq?dTyYg{?6XRgrSA7x(h= zauL=AdrP6koK1~TdJe9-C?jnLBZP<)QrFEz$}>RJ!$jQ)ZDj7}riyWr_Qax91xe`m zt9$#odKn=Z4-~&Oc7^%2>+dDW2 z`A7)s8=-(cLS`U8uK>va6%jpkcT-<&jFGsLik6z2xQL+?*hEv!!9>+qIKa+J-`L31 z&&ko)%fV1Z*T)d#>ZSJA1cCp`#5LfrUiqQW#`9`0Z+WC}&77`c19AyEEaSPXhT4%#j-PqeN$1R;qq^1@m@?*ObC z1Kr%Au9{{DPq3ely0@ORvlLWS&&X8Y$4SD|Ue68#b(0Vm)p3(R3)|~q{WlW! z)KW2k{#`|8Qg+xy2!vqwUqgLgf2qG4iRfRe@$VhzAKMZ3{a+irC^%vnZFT1k%N=!) zvZ0^#u9dH4!rtTCfFrg*MD1=zb8>t%4(k!-#^l?<@1X$Cj~jU zao$=J3_al4^cvZI*M1NpQ{Xvz@Kk=(^J$^$!o!=ZixYwpLOi^?cX2|WDJikAkdqVs z<2956$P&iFsviE1UiE+1{=1Pa9v%_J^Z#l5?`m>F4xYQu!#Q{s?1cVrbpL3POwNk? zpX1+w>xmXKS8CA7D~)9%%7~r4>FUQ#$T>;@wKpcUk>%x_mX?-6?h8`X zPplcBg$mk|elZahql7Qqd3|S79iK=UWvKV}_s1qD!#X=V2Uj`OdTq3)iwE=?z#8>F%!N8bbkyTKl<7RaMo)vooVwQ9W5JR$Di>g!Q2; zK1DC{LyuFbzTuUrpQ4x+leyuY+<-+KGoAdzXXo46DW(5 z_T6XLT9?X*B4Y-ssd20K(Q$B4{KzyiMIw=7uZ{SEt}khnm6fwa9d%Ble-c`nbFrQt z7DdH=?!_lk$1A3`atVLdQZW@)s~mPONPbe2%EH0|Isc9*>xs>VP9n2nBCUXV%!ybvd6d?vu$vHR{qam*?EBT31xM$u~GRX&Uw3eGD zCmZtUctFjKVKLXLA>EM=w!Sq#@l`?vM&5t;+@>?Q{=)x!f3d}Tht_4fYQUlXdq-fY zWh=(C6~k#-dq=#m-k#>{XW74Wd{WX zp*Keu#hfR?twymzdPy#(qM~9Qg`~(#7W0FZt#HP;nxDDmTDL&Bt{G*0id<)E{LX&T z5YzB}rc^qQ5{CDd**TLNK(+8VC{wY#4Z8ZRx37EpUPMA-V`&S_EFZU)lKfjPiPF=p z!89x#0aaCoqJ66r)r-%Eeba~4hJN!=~7mxP#_RI)8F_^A;c;Dg8b>Q@Ne@2Rz z8t+Vv75r%e@mZ_O6mPzqf2w!W3;Dp~p(8489ozkd*6~ZaR+-$qyi$XgIy`oz;j+eZ z&8&hE;9oe}v8`EIba~0!_a3#AzhXJb&d8wYR8JRJGe5)XnA3UrtlMazViJ~WwKQ^n z&0V+bwI(X}G2sn0@ar8p4^RJ9h4c!+RXXp-)Xgfpe@~CT&2$ z!SSMtJC~CftImlRsVp_FmsrFMlblmL=I;QABa1p)9MyzJ8Gt_=GTPUK{pP?ee;x}F zME{)t+Uu(e15V#6ENAo;pWET{P~ezR0;i*9%ONRj46>WMyYVqp+{dG=tgHa5HnBcj ztQsnqOa7d#XYUNJ$daue!s>`}CzUj7NpU6Ky?ZNID(J)IefS?_6uN6zaN7kM}I0`gRP;~y#u(i-~g&G&R^ z^9N!V!PO#mg=ZU0Y8Mh)p-(GfHv}jEY>&N)d3ZsN8NNQ>c;M+P3X)O0w8I2 z;140UiT(W$iF&t%7U6k0RiH|RgTFBAS{s-aZ?ICyZQv`H{8o%Q)`rUGhLbxN`|I(l zJbI%PdRLC1!7jP07D-B71wupjVJGNKVrF(5XO+!pmH0rV5)aOwjb4{h9|){5;OLJIPLX#HY3D(YL#SS2NjyF_3hqWaHKidDVJ z-OzB5(nF$@Gg{WgSbmx{rbetCbRtW$?g*$42J|dUy)uXr;5LM0rBliz^U2A-<R(O=n_aH;W`+?AdIaH(%c8XzI^`9x3E@?Q897 z#u>Am5=1ctjB!`QqxsU^H(^*28#|Lyi-CD0?MW!e8zRT9)~^LEvc4PY5rdV1`q4$P z&Xw_4OS-N?Gb65@J$|i@S>`x5h(%HH1Q^k_dztBFhT{0gg8$8p%c$;GyYme%h5~ZD*jBzBU1^`HeV=vx1P7A;6SG#hY}k z3xAD@LVMQWd_OOLdT4BeQOdWk*ptGXeGD-Xbq{3B_f(dK^3ic$+5@dvZD3hEYXfbD z{ZMATBFn{z`dUP9aWDc7l#`d27DRk&a$|YRM3ZaN=rWZ)5}GaA>N}3Fv=+9(y5p%S zNc+xRh9QOc;$f5fV(wFFPS4`7FkF|J8UYA|H_cJl>N8;#?aaGl*Ia&<(}w57JkJ_t zo?^{yiegAGtMOf&RzePHqGCTa?H6ixfp~X`V*Xn811kRr>j{l>)ww{xR0coYX^se~ z{ylx(bm2H_%SoIUr;Hr{wi#c{Tm7u?g%3|82W}WrPj)mnelR zXKE$8Quny%zP5ff7MA~fXG@!{6{W|jsMJuRJ1BwiW)Xc{ZdLlsW1u{aRH96Y!;F#* zC5>P#o_)Y$R9hlFh9fhfz6Vl@woR6$PoavXD7H1whyvu*hi>q++9fweUQw@NddNe! zkEo@_`Nt{9XS@t)X^m=}W1&MY?L>a3ql>=WP%c-4Hl~Ty{EY!yU;3sA# z`~m`{R!myj+Bsj7564ZDnB-R&ey-U-9NMeQ-ZnIb$07!jpJy26;8HLXCuezgv)u*G zsh9C{ATElOyxMHbfN%q$jJ4x(LgPUqC`$cR^seaxDPrS?JSlUeJ!555C|GDA(u80Rp3&R< zr>l`XLXJJn6}YvS*8HE|$zKtI#F3DYGId14lHcUN5H9=$ zEy%L;(*38mQ6`FYH`E;;3uf~D7ee>HpnXR2Rg(Yol_nlyJ)W|+59o$y{u3dGm# zoHY6f|3Ce!?fW>&z=^rYxOL+HM40**3rYux0s_$^d!e2^;gue#fQ5)6c?`z!5_>UQ)*or-*s+Lw{?*OOCIiQJ_%Rw!@t7m zdhuSTSXBFZlGm`|PBIR_yop7_5D;&-PTbQ9h~QW|lJ3lACJN!}o@!*Ny_2C`jbY$#q zt0ri129u=~zYHq$V!l2M_lW*|NS(s05_&(U$m)aayW<(^-y9To&zq55g|#+E=q09f z_h|-yjf|C)ll1sYK8-H+!{o8TWaNj3l*3vR}0IeUQ&$UxYeq?~v!#_t=o1J_{S zi{BCv#xH&Xbc1yV&DDvshj)G$MF)j2a>^E{1Pvh)nl{4OrZ13xQ1;^ za9rx9AUcyMR_XWT9Z+ykjDA8s)jON>gOLzCtn^e_mGHX zOLIqj0qkkRH&?p*4OqAbr+5o~(Pn9wP&1qoGFyiLXDEbYAonS|h;8>>x-KdDxJbZ4 zM>Tg?Lk8<uEhSdG(*HTcUZoKdKk(tZ~>9G?Rp zB}9=D-+EG%@83x+H}q9pys$hfMYgc+S=pKuJ6o!N39Xp$>)}ZRCFRAHkhM*Abn1ge z{o@6LuM5_SmL!OQu)Aj!<(2;By#s_71|c)yS3G_E$7e?21Sr*4E{ho^I-%mhH&+n{ zioq;Howsc8-VpsC_o>`9mPZ9uDN>Fdh-RE+_O`XhXcm^<;WP&P(s)T&`C)gdGh}t= z{NT5wrvqXzTkkBA$nfOo!t*z`iF^9aoNQ=-R_CHz$Dg~Qe`<@1Lm_^}TEvk3&ZoD% zm_lJVm;Dsg)p?ZU3ne}iV=*Ta_So>`$6BExv_vm1evVJ-?#lQvv^{VswjSms`jEV~ zdwAB7p_FL}7;|#R=&)go5siRpd#VQ9&IHSmzd zK+wsA8WkbF*n2|NTWlB>sh)c=_$Qh9`}>fjVtFm`g5Z*-+oKb1V?UJ202_GhRXNj$ zW$SZe+QOS!zuXMvNOt&(Ym*a zwWG2{h%kF11&U6pP@yg7Fn^sUdeJiI^Yg+5#_w|`WcV&n)zCJB)Ii=>K3Dy8eJSL} zzUQCdhAz_&Xe%O3-wHk5jY?MRv17jYMGJUr99)ptZ&oHP^JvTNpecgqQszjOmM~0E zB6|}VC*1w#$B&dd3^J6_p|n=IjhGy2`SbAXg}q5ZfbMs^`E~u5>%6Q&WABohB~dJX z1#~VsoYgd-=2~gN@9!gOv!1xLg@fE~r9~+5z4A(*idy~VH;*QO7IrNLJ~sdQhM=2v zAWeGr_m_u&nGS-%Tw*@kAj!g!Tf)PR=l9f+_}gDgCaHgYp7|MG# zq}&O1^=qx}E9GW2ZJr}Zcq|6a%e<#95;YsPdvdDQ(KMCJdes%K7)`iodpfxpCKNm^ zk_?=&?j-6oa{pdQn(6NkKdDhc=dC|`WHbn0YNj)p-SoHlyDG)OiIT( z_Gy$3yFo>~Q~AstsyZ z%x*osgDr+Bzv<1E=G1UcENHpI?dXfA=oj18$Xo1q?z81O9m*Yai6rlZT1B>hN}B6} z%{5bLWCHFRcoJB)1d-4TW_1s}-Md!l z_*3)#g722K;%KwI7-&k$*6e%f=`Z=AIuhN*Cbdl8LoUM|VX(s&ib)wyM++4E{v6@v zPQ40at*CG^e0y*D&;F;&jzHMg5_}&f`N5e9mNfUeU6(JbDH#zz@fJJcBoZFiPnBSD zM52EmgZBesom@|{S7}>T*2;$}O&W>Ynp`426vU5tR>=n?GO6eY2;3Ln!AWa!6TgdJG26;|bNbeQCZjG;vy;08rF}YOp z?IVTGxR0Ch2SVNU#4_Gn&nF$B#XiF_TQh-oHd;nL*Hft_3md3zNw`EVhq*0xc#zEuB864=myTxz6|bYD^;v662xPk&;8lVZcRUK6#!bSaWP!|b=r z?_yjVLF)EkyhD@hem!F}Zqzwz;4g-)nIX!^Jz3aH#{amDqvMHod}0l*)xPZ1In!wIa>5<$^VrZ& z=P|l3{q7xl&qpXS+qZi`ojv94RR~JdDZVkTVz7XD*lMzCQ!>1Y$}!664hMc&U#6nj z+##mmK2^7I$ljX+bCb2&l2P_F2DzZCe8n_Q#PT~~{-HtzwG!=hFjX8UIMa0 zImexR!;1L_kvao*Ld?r}lR9qX6c_k^67wC*8Xo61FcSlBs7aXc%UtPd!ziex?ata3 zwE)QKz!LYpL8M#s_uHee6!}02^*#<@10McP%-GfR>rjqy15(KeUo;gu5yn(unCDqTlH)BlUDCa z1ux&_+t7BI{yerV`a@=fX=*S-B~Mw;F)X4$F7b$j4t_RL(6ZJ+V>FbJAuD=b{ax!` zboi;T$mi$vK7iRP#hJd|Mwc%8QztReW~F){{&z{ zFdzTE@)twXIz|DyHO3E8`KE`XOvZi3LTJm!CEzInd9?YPI^bcKMpGg(Dc@Y0lYqt@ z_yR&!EAK~q>^fhmT4&9|b%Lj_u;+I#!Bj! z)Fb2g8qI6yBT1TqVVNRGFZ;=%aE;sX=>Mw{ky2gX&V{C*0KFycPm{QUY2e)zWt_To|FvauLwJXf}~dEV6B^G#HkAQb?4RSX~F}3ywG`ZC1{u zHonVTL$ev_bf*M6L-eW8CP1+&R1P{yJuhp+q;&Brev85BmyU&xr(52w#qe zWv5e0b5a5uX8Rl{BQ1VC>pl!VT-c3M7aS6Heg7c3pAs8aYX~0j5_@HWdyRZ)TYYXl zdny?b19J_R^G^iMU*u@LTw_~<9_{POMf)7J{VsT=bl;bMVEvWWww-CGi%hOqRcN*B z)vVokUzNNLKAvzr3;1@{+ShnL<{2Kr9&ozqmJRA5-vnIfcu#d5sr6}II}ir9Cr%Tr zra1mBO-%D8$$W}W=;)PoBg0?@PiS$e!?NK=8iph9?)5jmwu;Uk?}nx*&#Sep9i$%g zeDh_7x4;K_!!26_2&gJrVe{dv*<$W2BUbN2RWGlzr3!voFxL84ypu`;P-67*AD4X6 zj+!{mbeR>(4d0^XyI1>ga&Dz=c$DnCP!3#6y63T>LC()D9ZLAT?<-H`{2YCUjE_>s zYa`)#P7Pt1wg4JrzGJ<}&DtR=qCZ)A`3qvPo+FF<^?UbeEibo4;w<;Yu6obSxzwYb za}+%(V{h5|;+&{yoeyIgIhzJm)ji>2)}2AFEa~Xz_|HW=2IH^wRAx_0TWClBoeQuV z(~k`VT=)5UNeVq19WTXda$vfEx#?+=Ppgdgwuq%%$Jk=ggSIAj#%@MU39fLAQ5C6ErI47_tmp>-!m%15N_R3zS~mCZH!+v^@7 zi@rux!|(}XTvo^hb($AAKdDgPHUxCdRXVNqTFspXs>BGmg6UiPB{ zX>JUE`cZWEj)s2Ab*nl|kxfIgp3|67uDtK3qS^$R0e?kFrTJx*I|-PtpvhLe93Q6-R&a9L`h_VoVNT!eb`+c5Foq&h^_!$&7qH;t7x zZ+HhK_P?OoL*%5RzHmMsv^uY*`leSNLByAf5H1F;^5ohCeM)mv3%ccUI%Ghxr{ zD<|@7Eh(D6_4EQ|gVVv~J=dr+zX|<>^#J?#-!!fw`EA-69fRa^Adm(SM|hPd$jOe5 zt!1qV%z;=CBJo)+#jNbw)0@qTq?7~k>|dRX>mS1+s6rdm`hzFqaw!Vv{MPrFy;>@1^}o_W9hbFlz(H=fF)^)m#g~%^zc>+PT}> zq#(WXPpH~a5h6X`7d#2!W99(CGR&Xq?PKd~@;HUBi}V4j+$;TaN=@OjXKz2oQ|Mus z{LUUlj^;<~e7YTwP9YB&odSIOfO|8X{;YJ1yf0$FU`O>qv7#-@bLLx{ z&asK-t3{#oZm=4(Jjgg8$upWuyaBg2kqFx>!^=zPqcbMnrywfnW3ebsOP#l9&~=jN zUn*Yj^7TZ}BTw|oUstjdgs%JKjgL2Q>fFn>)$AX`kwDWka=y)aE@<0jNU-pM&Yq$a z$bsm)XCpRI8}=pV7Tg^GN(@>4yuFG&w@{vBwG_QnM}2uA^F-(UdP=g3(QEjUgrYm$ z%_U85JaY_{V`ATyT4s3i%F;VM(W=EYBJNEhQ!&SfX-V!+W!cyC(v6b&rP!n5?aqb3 zJN3J=gwv(fCF%*g&+6eoggT^EWL-GmDw=+eKk~Hq$e<5Bwh6dOWI|xCYhGC0FRfBL z5}>A|)CL0fyB(gBG}U5Y`bv1usoSl4_kJ{bleOD2&cCO2vk>djjWR>qG>54}{0k`k z&NdjW!#jFo-e%SH29oi4vTg?DWcS~zkleBz5oSYtsx+(-v*5WT^<9x1V>I7>4FdL4 zroI7sGOezBIzti_k1B>-uP6+F)ohv-5(Az%TJA^58A)|caK`r1wQu}+cV3H>Q!MG|C$i&=o`93USXo}2DZ{zMk<0n^bdD)9n)vy{LxMBTMkCf`o* zqIBez=NcU~kLjIsa2!IPtz9fI*8cMFNc2SqA$*6qM~Z@?mAQ^@q?skRS`KDTa*>u) zDnhxki%2-~(R--PPa-2D1=e(KeB4C=M0As_JDP#0kIiH{CYQ=PtcZlW;ha~-k~0E05c|F zFcT4LT-PTWy;0bo3z9m&RW2p^DuPd)~st|IDVJAc9bRia+)+oHI9UoB{>UyT&!?4nUB3L9KQ2y1|&Xu z=8dx?>w}qULy2wU9EgegY9o)p8VcCURYJKQ4+;c6+GZ>Cms^RYBMpr|M>s!mo_w=M z+O{WGH+)fJ);uZHe>8afGcYq8Y4w&W;iqJxGSFqJa#&EWw#&vNh@dBFEPv#8fm&zr z)3qAM%Q7>krAG0-k6Mh1xMa-owtVCiHRj?!0q?@eao8uuTj3`BJ~0});I&=wEJ^-( z=W_vB31OlX$x%E`R{dB5;@PGqp-*BaWKC>AErEAs&zV;xr>MUv?+a6(D@Q>3PqJ!- z`rYpl)cZ=hB)lRq7WOYTO?}h+e7qt`Jbqp*YN(8&3Xs~M{?uz{mRM81xxs=a<#8;X zUJoUZqil2Rs0u*ZD5TueP;R>;#w>R5Hc&35y~O&y{SIdUXw|83SEm{)1CfVGDl3++?cF*@mqr6uLw)6gYDonNnsOcsCmvi2(&&|xDm`XzSbxg*&1`h z`}y@XUX z)a34|T5^=Nd6+Z-#`3PT|B%%IpQI!&-rQ^nJMfbQ+f~!)Sik@5u4f34U=P_}@X(20 zC>*isB*h-!Y-daG-{m#hHgm;pG~S(JYF!$$7g2ZuGMVLx{#$K;UA9}Nc!gBJH;wekG*K3j9W^B&s_RvKy+&r zO_)K%)d@6mE1SIi=32MLihxTRp-lXMtgqtfA8Dotp%gn4Y$8_6h8asOMqVYEe0WVk zkg{Z^p2vcEOZ&s<3&yH3&CFyKHPVkR2x|4EM&~CZ)`Ygr0AA|6V*ZWLg~A%kj_`=L zS(s=38TZ!bs@2(85pyggQEw+1g8uxXncUFY_;6qu5S?K?M8kWIjZrQ8(M;CJ&ti5vKDxRdSJ|{=4ez063tu-z&^?N` zS5%BtcJlrA9-Qrn46u^uoBr0^JzX3*0))pT*}G)g-*1)S{To@z&;We0V{0?;Tfys|@e z@bR`Hn*Ujb;-JQw)O$ro_1uFG%TIjyv9x)!`U;H-d{}sZUvpq7PnR*YQ`ev&j1z28 zLeGf*h%dL$!H#*#91xmY3`imW367;m4d9sJ(P-8ri=F+W;oU#4Y&Ku%g{&Sjjt`cP zSNgR3*q70&!TUdT)XkX`V6KI&WRXY|zkt~iy$;{3|+H?lEFhOWH| zD8nEI>cKK0&3Zyx+cz5%S3-wGex-C>c*$Zh&Hr06guQW7-5t(TjWwuqiJ!SxCy|k# zzKdv##CYij;nT#{ON9t%j4(6E+>uc%H61d(Ql!AeEdCVwDr1ggBEiQ-nMhKKNL%41 zZgd)e?|Q}skP(s`$t+aq&ZGiR$S-ek={^`p|-hoa2Ivb**59DcQOd z`F1{eti`r)Z((@6Qe(s^U;k3+9o;STh3KQL!?IIWa=(ee=r)l z8pia^*6|LOpKk0u-g2Evi@~9|!zNBGq%(N^>Mi%&-cVpQS$mjXQTt^4k*VBF!sJFG zrtZ@xc7hgWVgR#-<4|i8Auuh12!TUdlDH&G{<#C=1 zkw~JD6Vg_U!|hJs>yxu@zH%fFR(4s_g5#A^y70e#pZ7gb zTp7wj0n9ln7UY62?}ABO**!Npp=IR~Ppm(vniZ_%3u`tdyRs}jdE0xXaCzxRO>+l| zsn*dGgL+>=cH6M0lgK&{kopYh?4}3?6`^s?*76$+@fBlYhoY@w>3Bb1>n$k;Un#w? zY+ajX&)VLE!Se4()v);bCv0C_Um4L4QYXE#`P|4vY!|}tD7FNjgv~cG`#j__;efzA zH#VsLOkmwTg^yyzGn@~Wd6{I>;A^0*nxlhmI zVLgfc`;wbVRO+9F*0rUikn77Bx+ga1N4DPUH{Y63eiXE^`kWelPLF2-0MXZTCoch( zl|q<}Xn%nT6vz2oW5aDDQ|{*ECiLi_(@ZKatIgAhWcJo1Qxmu{sTbhRo~|_PqbNZ% zUe$a$W8PnQC7+}XoG3(;jnI%_7v%>YcGjc40@AZ@-u#1)DB8ECUk%ez>8&JT^D>V5 z63u!41(x;m>s;7Cbg*P`{u?ORR<3(j99J)y{J{}brc~=4EU7t`oF=ZyKrt1CdPuax zTgvJTY(7S`NchT){Qp#thlMX(>6w{bi`@oJeQBPuE@feLQK2mjWnq1^!VQ7sbF7^f zcUl#TsmSd$O^g4|*^Q&(0KFARhV(}e4Dg+w4RLkyJ4YfWnrJydqVG8=kJpT|9T=Tw z+YmHo3msw%f>V-o9Ba205Cu%g&6R3G)$Kae#M>92NRNi+;*9cn_}I<$%{4(TlftV( zjjgNHGJ3U)R$WG{;Y>;8XMG+H$l{Qp!(RW!lXHNijd?m$F=^!LnRrBcP<`x!tai3B zEiW8`c>;+J_>=9#!Et&)Wl7dX7?zmJh$#7vS*c<4n`i#Te!04~t`=f+`u#_!=*z^F z=*^HhF+)ZRK3_p(Wk*Z=(H-L&cFrDYbWCeTIJMx95ee9w5OCSFthYVsO;q>08 zgcosn;lB^AA9Nc08Gr38ix{T7<)D9rN`FqO^FlGQp`5qKw0Pz4)TeLgP&b~LHp7jx zcyI<~GhP8@M_M-!^pIplkMDJJpTmT)u5J@P5oN0GGj6ydB@t0enjeNaEB{fpWKV;$ z_14^LEs>Js%SbFs-^!;!`orEpsrG_vV;6ROohs&l6J|RgcGkY6-M-ydzoh(rs>X(% zu11{``umZIyGmjcntFDLZw3=-3SV)G5fz6~Zp#tLTdEk{_zDB!Eb2~ z&HU-{k1O6+IbQF!b(iGPm`k{a(bc|K{gZS6Fw){y#<{xQl&o1I1@<%;-LTnV^N;{v z2fpWkfQi=Ha;-_4I`~8}g@gchR&2sH7@H(k^ztQLCyu@cpSf(tKw9a@Y_bUlC1&-i z1Ju6shV2|{t+|~-Qph{66-;+U@R=2Zl>*tR;+Yh2n^Ab5y4@5qYZ7H}jt6_*nr_xp z*X%!5j=dH!ab!4LjC_A_3{UU&O0PA=#|?P6GM7gF&Uq-o_yZK;t$`52$bMcubLxG!*N0$)ly)hs0{9$=%xJH}6x5g@$y z_+cSL8LkKm7Hu;qNg=GxVI9bCy`G>iW4~U4KAE?#vXr2UGe&Mu+waFY!f$J;0s8Mb zh-n04vH8Npn$ZE+ER05MkSgd}=NOg=Rj~nfIzQzvkWL(Zb~L{dm;AEW?`DNY#C|Y* z{Vk)=;#AmZUh(Odo>=NNs?h<;wLtK zvmSR$lH84@LS*98E^~w?C?)she%t`A-00D4c2u_#?ubQ62l;>ax^fU8d#f?Y*X5 zGlGH@`{h6{?&FP-sBv5dy++`JpxEF2#&M;Nq-?$;a`Y2{9J$l3pW=!?TcDl;rJhNLHDI`5fhd{b1=N#1J#?H-yb_WQH{;7j|1 z&#R8C*$2z)5fYqg7c|ZRY0#?2bpKeYi=7a-ugUWIqjE zpCz=v*H7RC0pK2da7rTQg>%K$gZYvB1kl}2_uDV}vt$(yuCLX6m!*)rs#;9ZI&NBZ z-lG9rAD<==e5^BCXuUmWI*G|NV5^UNZ%KZ`61hz?JF56uz5 z$7a@cW$>AN-4)+6%$n$-q&@DB1_9f$AAy$+(p2`b<_fxDmz-=RdGoQw?WvSbL6m;^ z5bPJV?0u%~XgU8ASZvsX#CzXYzeAa1qjS>sW(SpHZ|5d&=%sy#b(j_7drm6Ye$0C! z7k`AFy~*l&x+sYK$b_cAVg|f~BcX+aT-=@>;ZD0L-j_6SZTX^1e@Z7kVW1GI4Tyt5>eMVVxKDe9<=hbeD`TufEcZOR~XnG&DHOZF~kpk6BAE zEVZ0wjDt`mh$BxyBL}-Oft2QrI~-Ue`Bz^#DcK61xyx44eUWMIIgMcQyIrPgR~WY3 z?TIpYv?bo#0;44iyrwmVJKDZpBoDeh!X+KiR3dmh5wrC`)|>@elV8W|5L)_j1paG6 z+|Rd75%>xE4)E0t~dj}wTKSxqRI*U}}rrj-PMB+YUAUs(G^|i*FOtrdQ_!Z%M z%%dIJ-H+uvo5C)awqmFvGI$u%;X)I`E#fzU=1YH(&{n@|hN^)19}5|Gs^%N0`y>kb z(&&rtL*!3QC!}6r5)DX?uC*j3a7is(0Wm~u@4ax z@!!rq{<_YadnDo>O%-ro5;&K-KHj`@C!dV?W7BnBo&5k4mvHPxIpLU-Z&l9+>}O)4 zZ5GW@>QUB}rWGO!)9ovZQO!Ir!U^s9U3@cmuu5yzbfVZ=Ytg}aFJULbg~qic3dVf; zRo_|m+Q)uk^Xg?j7*Ge4_`%yhU(3qzSCA_3NA z9QV%>>fLTtEjy_=u-~KYBe5r6j)Sj8FXa4xtgd1&HCQOq8~nz6>j$v9+l2Z~^RyF6 z+DgX(l3z|z2HqQ8mV?|-M?g1``x_Zt#Dg_Mfg;cJLEx;oqyHbJ}I5NxcdGPH8+9bXrUK{+(Ye>PYDr5j2aNNKl9)k@~SZ(8rY% zX+{_}>$Q~*h}-cE(^Y;c($!^Cmrpr&S>`%Q`OYiss>HATVcnz}DJm?Qm zzP9KCie&G;)bw_V;rZ?nfBGbFcnooCYkXsFv|Q^kPkt5~#9qC>QP$)~2MTxkOI!o; zK&lR(MBeeGDJ5HaeRlh6t%n9{iQu)50s4wf;o_BZ^rR{;l~UutX(q)wjh9Fo4(V@o zW86AfO6od)a!iXiqjhF?{PP8#e`58ubw+Xh)g(7cqL_}(uSNmQ&mrhRSve>hd+b7e zs`)<*-*4=AkygPDYa*T=$5EmZwkTHMg1w1Us z6OwRxrg{FoC{I~Mj<}T>ZjHDmY`Mu-k3JY50vOcT<-f_|WPR|4y6e<`-|5dS`{qE2 z=lX=SGVp6!A>J#2lw>9q2ED2}30kd(!`Bl7>*n^kE7%yZ)K&;Bf@iJ;dGA*B)A9=v zuI|ruG`EVVivT>!_mSAhwXMwTa;?8W;a>D>nxP{2Qp#_?gR%*KO^!0~Y8%?DiPjf- zuqM$Ne63nR`2c&~jI}*BiH-|LqR*6Ht>}5sDMY|1NM1tKw?9ymiBcX?b*XU%8(?1s7T4Ilm-)~s51M;)cJpTI@tp!ziB2^7c2 zckZ3vxy{FZcGfkY#;3PjRlW(Svb<$VKgC_bL^ubP$KO*I3u*%2H`OZXROer94ZE%MK5AFmH?jGENyKB(f`MxV_-E|-S=bs0lr+d1(tGdrQwa?zCuylXoq1n|x z&}Qp#ro#KE8O^6#@Zv%o^KfaFfqYzPkxQk;OsaVUHP1Gl&>k8X9KEozrd(Gc;=oVF zB#8t2HSG+3|Di6ZIT?`~@Aa>bdkHAJ%$Av>o-~^+t$vGN?@_^9yrcZ8KXt>$)yK8+ zxXZMn0Jp{l0=0U*>XwV|xO1~}I5^QCO118V|LM;`9Zy8y%5D7_8MY!JhDk#4lLA(Z zrzRTEyaID}AdnP`Fa;#?Sz ziCD2MH5wZy@L>#g@RO(lv~zK^l$1huti{kMFes(cp6jdaa`%-Q^J2tefn^PQCy-)G zuJ;yy4bEQhc-BZ<+tcT-jK^49IpY3sB?M4Z0>l)Fo16E70VY{?e2Et(pDmn`VJ{Zp zoB!V2l>e3=%fh z!0iXKPY#0wmM|rkFioY={QQrt4^_h2JKFm5tNynOe6%Hc$O^A| zs`7HpHX8}}T(F|-8oGYi8??RZ4gK|QXS%gOS1v@kxU3>ehy-;y-FT2(>tbGO$M_MM zN1X(BSPmELcyr;q_^P4vlvS11Y4)Of{o`wp*1lP=FGg>imZ!?`2{V&3TZaq87&U*T z)elEKswIUa4tH$YQI`{Rs$YjGiH^4}Su76YuQ`Cnd@Rl0yfwXf<$8$&*y2G&-9)Ut z$fM`GEx*h3hyl4Ro1u~!NXGI%-h?hv4A#uzH(v&s zb|;6;oh6Kq)-bjD-1Sg?>iR}jsPuyfHB`#`F(RKU++^YL89cbMGDKXpN50xUJ0#9!lpeK z+x4@UlIyKWfzrLH+>vye`^b5ef z@UA3y1TlZg=6un%z4Pzm4zb<%4N>6M?K~X1;YxO4FRG@<>0Ih~eeZcAA|p=$@vr`u z>|}%v{*l!Gj-y+FIC_7DzN6ol_Vf4b?3qTSt)|j#YYB122#2fRY#7f94%DxY7C(Vu zgr(B{1;;-K2wY8ydx|UYSuIuw0G($K*%O6uR>0TxA7nAXrkM-ME$TuqDZ2=7ZQht@ z%D4JFpf9uwIWYf0j{aC`ruUP1CFE;hI4fvwpBz46I?&tTsU;_UQ=CLFTl|g;d1aM5 z-aq`s&V#z5DyCRU=h_aG&tVPOfFw^YsV7HDc{?WeIxU!86yUMgd3tIo{z_a@R*wLf zcgCaI8k~iCxE%OiYpy5nN({g*z8&EeGv-U3siB~Rs8W|0uE92I$+^9K)N06jPH@I9 z@9U_}yY-8|6wW&wFjF_iD$g-Kh@4=(aaUL2xm3uGZWC$5WRrT%`-GpHALbfy_S_s4 zLcA$WF&QW|s2wxrk9@JH#pjLIv-RYqrMG&9$^5I66dhRje5t1A_5U>ep!5$ zfP}O}`?_q4ob9iSj7H@KtbB7G_6)ijqibv-I|xh>I#2xajBpWegZ+a@-r6r8w&glg`J4!ksY zNy_%{=NVdhi9{;CB?m(eofkh4#1JY4`?RB%96rGP^0VhL3gBHPWR^m}r|OBM;R^DK z3JrF1`sp~?QHz5@G;rPGK2wU%Ws7XL(CdOlk29!KNeN+=h#zccvX@p_7Im&B`z!5_ z``b$#AbSx>O)DV}eJAYH5<$rEgF~@Q4xges`u^sTG|zcJ@-ryrFhsgLsV}7MmYRIN zh-eyz$v;RebEfg-r6HT5F9)_#KFP=|O@44t;QHjYbUc%q;7DBm z?#3Bvbk=us$f5ZRgGEh9AIrCD;V|6s_sffZpXG99qXuU-k6p^r`IIWHe-7AKyKB3?7@^PUX?Ukp zFIWtOq$)e8erBlBk}@~w;7&6SRGmGp3ml~_7WF02L(e$QwwbVUj-#_gD1y}$%HHV+ zOL?z7a|;ttb{&IG`P{CBF7n|I8SIp^8Nh>~a$AgjNi=@WY4P61jf{lY1Dc4AB%Mk{ z`DnQ#To7-!tCFR;2UU~0EA>g$W@o`Q#Vca1=RjJ^!MM@$+FNY|^qp*-pFz9@`=g@G z?#pueZ8#R_d#tV_i-Np{Vj1|b0(O}nV_(}JCjL39i`m)aKx9^ZSM(GD$wh8u+>Qn+ zXjEQ`v*Qn7P<#Et^2rwu&W$C*B#-T;<_0M@%7B#oF`&8|m6EoK{8V9%%`%pBU*UZ# zKV~T@YY#qqZ~r~m{FpB2rfTcNe+?huQC3%B2gPia4YZpMOs8o3X-E==e}Ju3t3xN^ z!P@%YXR-M;KWbr9N)W=>{50Ofe(y6qOeI#C$N(B$G$3IKps!?S7E$oR`ujGk-Xypx z5dkhI-h#`Q#VQyKCW?vq12LF*@N~f^6c@h%bJpBTBHV}Utv znh1S?*Y}BdGI4YS4S--%_Gb$o2WF1`YC- zLcGo8Lgw;Q(s!-`{~%r!C?w0V&#hsq&kNAT6C&YDSDPmBe@8Yw0*yLoX>3~uc8;1Onl`NrRFk1ii%WvVVJF{GDMjFL- za|hOy#GZC1zfwL*;V??pXyeG`BmjJYT!{wRp~IX`a~-atU#LAQSHEuX07-3l`t2bY z*vr$P7z)I6Sv>i&B$|M;xS;-r9s0F>z-ChEOxxL@q#Iy_0j))m&yo9RnuTG-K9N@t zo}G|cPTQW$#DnADx!u7@^q6^F6c9S)GVsUNk5G_I_*uywSrPOa_~Jy`VgV%>DkL)L zmHz>_=_)|Ux*L_SydgX%)AyHwht01WMYU83rI`Eno1wqACbts_(NyWkb)S}8GY~F3 zzrX(*H)@(J(2pnL`zYbG?!U8CDHFwSdKzdF@0(JL@yYT}qG<6Wr}td^=lK~% zJikjOzif+kFI!+_LBs>u%S&}ykYlj1S=qVQk@z|OpXg|Z&;LP3&8``)yQ?exX4K!( z>4O8&@w*EY$#H&zW2edW2gqcrC@Gl+akcKJ%d{h^aOq9%%vd%?%OgP?x+w>?`b{s$ zMOu6RtgdTP^OU;uCiO{QRMrGZUe~c6Odtug%m94&bOQbfY~h9J^~IER@5rI#CX)EF z3`WSs!t@(5@1wZVPR<(JnUuSn%5A=AslEU&^Z9J99=-+&#bxi4Q~l^|MeftVJN6GAVFSkoYcB|0=6l}Y5bw^^axxp#B zDFJD5o3}%>oUJNpN-_c6l+`o>cX8U*B$lnPo((5~`JvY^{7ycZs8*~q3a>AYf#x7s z>(9=Q^W+xiH_J<>+s7(SGOvJV<1sb%Eh%ZnPA9Pi<6J!<7&Payjv28q9lZYAgI2AP z#7wcW^G{4v$!~dwiuUneUV_7My%@$@p9F*%*aG<#1v))YG>V@BUk4(`CB2+53gP^)VZi72_oU!MD6KsG{LsW zt_+II3;|&NUoKaaHI_4UV$vZ8!~9Gl&GACLSv9DoEB>MW~c36UhbUmt8|tVWV4Dd$eVr8+cG{ z@#mS*RMd|q*pa#kV&wt#d6#=+3*#Gz!U|WJ1;p{TEkeL#uxd_q7ZCJ;qj~F;#lHY@+nrJzd3} z5eG;n0VETxy5r)z_oW(R1DP59Hw62_ylxEgbfDAO8qCPlJ)_Z)VTzPoZizx06lbYR z?aQ(5T+BGE=t!?|>3S-i6Cw?UuMTl~$@V2$wFFWl%bQ4hWJ0flj0UxNeNQ2-wp!tD z^lk4`(3HJ_w>w9$*z>fP>KY8HJjHS>4}sIh-8HoE|7gSosoR~>>da>GW*9!?)kj%> zpfURW$x7aUo8`abTE8#C+26gz0-o@im4NGotGqyB))${tNW8)E#0JE0tVUPt)Pxeu zg%N~{G^udptWz<*4Df8-Hd_9WL1f#We$4&)Q1mpJO$g>`N#(|Q6YQ9@vC znos|CHtonzR*N(wWgy{ItW}a6oy9HkUZ@HN(4wOL9tI9Nqt+=QMjJCCU&ckRQMnij zkD;_M28m85P1+{&q2n>5-^o-rtk@Z}eY5R;akA`{x>Q4ZGvx6o`#7vrPW_%wL5d+j z0j<>urw8Q?FKzw*2Gw#)Uh4XdPm+A83 z+ksU)5NATtc3jg#(*c2(f;Gnpz8hgzygMosx6U?!#L@2_p%CY#F^iK}s!va$RivE?Z>TA$BoP7yu_x z!MDE47Cq&r+c`#1DMp9bfCvSd2ht!yJJ=wfHgcXZ>i+smVCYs{(KGeuM{xiOZVokz z=%3B~*(z^pSzH$I2WCpSzD`jCfG|;nfOD22|Q0KmHI>Im(J&bY*k0LaZIPqdUN!_ zb?MB}fWQt6?S|({d8l7DyiHmJ=w)og@Woz@qqsP@H2b%qLj^T08HUTgg^t--0-)kk zw5ORuN{1Ft9<;%vp+rA%>oTOPaizcO&RKDV`&%-c_vG)=+&G#TG zF9QC0-wyrIvXsqjLOO!LJgAdq#Q(N=^*9oMW^?!UB@kHM3sO87vf~U(oIB+QkpYhY zkK;OzrGnFH)PV%(myV)nY|l&Lfg?4&1+FflV{uxYR`%PJy6$?rm1H)_UWpofDA*})TuE(~*~mY4Z0Hz%t?@&eqZZsIO7A)5&03PKLh zovVk*#4N;`wAuOSb;+&k+J2SiaN~(?B?4)6H&y4bPSgY(yS$+_75ohdR;2rmwt;Yk z;hN4L-WT_DVf`j1q`vAv(;xG=z(WXHd2~&e3ctxfgZEQ-wKDfPj*GR?C3)g{+`FZ_=fG}j5K)|Z4NHo$%POuQm3u;lsoR#3+h8+ zC+;kvN3ja>@hlE=Wg#f$pkRIVXp=X1?e#-l5E~=)N~>?+;h#)ieEYAx-|Jh-26?g# zSE2H-t6S9X&M9XO1BE42l=o9|uMehTaJlH;zLtM};af~o#I!wtF_?=nxz;H+_s^%y ztmOJMs9Y#{p5D{lI$L$yhJWg()z9V}&?qVbgXlCW(Hm1!s|C~_-VgKHXP}!V?pv@W zi;QNh_N6owB3^YZ=2?wb$h@j!6>j!Jt;Q?cTKVnJtCC2{P&!augx~s~4sU7xPluo1 zIV-yefJ-u~a3 z`TQlylY6@=+I7Nnj9^i;{WNqFJ>V1-a{m=U^PN^*g4n}pa1@&B_?a#@8<3P=2CRBKcfhgLgY&QhQ$^d zaWizuZZ?#Al`9ida1%qm(v}miz}Jcbs5)>Bpzj`UGxyX>m1%meIok@N#lPtlN?(u^ zo3)l+HOKcGXnVW=I8zGqQIPhGDV}{H(Y{e`(DI2dj;6W@ouvEA#y|!D*P9M5@RW-& z8&Jt5DZ76&f+4UhhH7X>i;n#19m(&~u^`ubgFdqSG!kla(#My^h-eYjP? zv;i6*n@&xfE>xG7*4B21l}Y|1lF~_s1l=4KqXMQ%_Fp;0wd*U}u%LXK)}NzEGUIWI z@O}``W+7mOV^~ZFeZuQuN2YH7+TYNJLfopzCrff9Z1w1%F601(_@pZhu2ivpyyAzT z``FXAb#cDU(y&5vvGGyX515|U;Q+A1TCCKt8U7T1<=xDuDB#+2Xrx1^RT8{yUV_G7 z0~s97PL>93hIf#7gc!dLL&H|G?s`#d1PrL#Gfz6)6UhT|FsMh0uXEepYw#PU&Z z5eK@0u4j-Ts$&r|0?!HD#oV!_ors6_mDMsF$(lf0N`$ zLPA1ZnXfI1i>h!oK34r}4d8abV6No+{(p~%?Sax1H8$gD)ikr;G*`bKM6m+}4AQ(Y z{xK?P)pIF?z3+z-5x|`r=1vL%Ow54)L&T|UN<7b=GS_S(;y&R0QY^)@eh(xU(&`X* zJY!bKGe1laB6hX_u@n2_R%ut_k}oZS1vA>}cYZ_1@yCfxs(;KtabVJan*m=R%5x{W z1>b(yH`yI>0P_A6&2=}>4GY@Lwi^R44KfX-enfGl{}0Q;5c0VkNy zm@|@*g9uxk!;+EX%9Q#JOe7ajN%b*ENg|TEnuMv-t^9R?=yVmXWg8Q?MH{n8W&rVM#-;VSm&aqkTOoOP-^V_pOywZ(qRSSs@ z^DK)v)+l>;l{KYf&&>!9Nmq>e~SR@^=V`EYR%Q~rZ8Z2;rHt{2hfBFYeGUlmN_%BQ`xe=&m9G*5ta|Chy zakh(_*HE%xPyy$|Pe+`I2@Vg-tkJzvpvZ2M+gA7^P5%$azwU;UDw>!im@V_QCrdNy z4!734c;!n$pPx|H=Z7^3g%yZ$Zq#*6z3q&6NS18)jjPs`9(75c_64&89vt?LSub3Y z2eLw+3)zgD?oc*782WrPg*?*XLwYlHgH337cooQ_5)V_2_p4TnK1~xa1LTb;+`H9! z`e#v-!D18NnAJz-y1F|wUS-&C9&p_|ol-}uldRxGtl%B4t##+-LGTm}f`#8T8E+&Z zvEBQ{RX!ZZal};B#P6%kN)!QS8INn3;ImSC?UT79|1YI9 zSQUSHo9jn|tY%t^a>X02sc<1R1Rn(x2JlQ&upJu{&CZt_uuoh6ye`k=Lt4q;(>Tg9 zaCu`G^x!?D`zOpiLp0KN#m|ggSr5zNQxiKBo)-sCZ;Mz(@;q4>(~<~y@bo?!Gvobm zM;6<7dt=%zFN|0nIl5n=I$s=HbmfiB4sb^fn6g{xRelfef}?y(35BTJE)M4~f|8=o z(!8`db4`=5r6UCU@ze3&l^Na9v((rc4Ft;HJ=MbRqKh|t^%ME7C5K6vUWCJ>|6Wf> zqv5o)Z?mj-#FQ@nS()Yao^YXDV|fV8xQjNr+-d>Nams?Lp!SYMg1;@ljNADyA`5>TwTD z#RuI$F~XRb7YxV>(i~#BN}KY)bTM966%RJ_(O*S+Fu*hJ|9)&3O-= zy%hY~jvlU?-gb|Pa#hti;j-YczbN*|Cnxs9%=}UT{+fh`l(7D3Yo)=1xqH6!uLZ`- z!P>U_?*%%qa`d+BPmL@;eMyNUWpl|EBBK;exND#mil{+*%T26;ebz8YLcobI{M%vo zvlE|>1;Sq2y@?~>;T$0;dq2&zwnDd-(>c~!k+r5=C{Bzwy1G_JL7s4_9pgt0o3>j( zE#D+Q<>X6ny0pFTgh44e1!avybkpCXAsq-=tz~)h<`tNb`jA8}Sv`vWO2l!GNkDSw zgaWe_PA_!ewBF0Gd4cHFE98&J9pVG-gv8WWkR7s<8Q z)KC2m_96=P91Rt~0j-u^v>gimy8gn7ZOqd}n1gcJaUp}T*qL;+EUoaN^Ec>f$KnZB zp9X$v;;~Ln*)eO0(8Xr=+eJx#-S7p0)VkQa&trGv3r^JVyHj`}pUMdvsCKUKp_|-d z>Wt*L`&k%s{g=j!n&^CX45~D%DlLhI^DAb?&giKpZxJGwWx3$QsW^D&(aB7RogETw z?LVrLO(yj+Tm6Ibyf=MExQVP}ze|3rNI`^Q)vY$&g|IkS&sffC z)vgo&E?;R3?_5ag^qRu^&j_>*3PLOL>F9AFrPKgO#6$5p8P-qp1JQbQ2Vc;;MUfy$M3Ei?hd z1?1Cfi#Q#7ESiWce(e-=oaN6W>B{`B7JyQHlo9Y2^&|aHkZ@Vy7wj}(L(P4u0I};A}%+E%@ zW-S0SJbS(3S^5k|pyW5sOnc_#@{ZBx&_82?y0g-LEp_C-iPL;j%1@j=q>5@Iqo=+3 zfoAk7oUJU=d-UacyA+q@pS~;HiYoJA#hgY$oZt*A-#?L+Lik}fJAS-afKhARj~`7h zXX~|jF%l?+x0aKg$#B*->IOoI%nHpB`%8>AN9~EnbTHK&RuEQ+hAv0<3A$-LtJd&< z6{K$T&oW8m?P_}2>UY)W_=;-FqLlN_{=mHFA?a}@|4b<&!$b+A9(MU-p)uiHB4TIP zd49n-L|Bh}{rKUJFi_ont(WP@8>eZdC}6g7?b%cFw!J{QuzQ&_cI?>(p)Tr>a)OGijLo*(YZU)nZ&{>}z2FoHJ+qw4H z$4ALf{nhh7?Vul7f_8ZrE7r zs*1$JgU0PG93GlZBlE+%njeRKqC;UItmz5uLnQIK$+(oHZy6oyBw%FZoBb5ga5}9O zRX*nQqz8nBu?w4-w*6{>OR<{M`nTF1!k9HvR}!pvHXFZlNtI$(LD14o*axU=Qk zr+6V(c@_cJ`cwrjvNv=$nI?~#e}Ag`q8mEM+S)SfZ#MT}iJz~jmUFsOWS)Yz4TtIkS-{3aOB|LdOsdS-_E zF|rDA0(F~thMA$R3hb39Reki+a&zqLekU28{J{~7P%IOti}=?;&Y|mAmdCd#*|vA_F!}b9C6!txcpKP=XT|*b1B0LBTd+_Y8B4 zSWj=)@%+YdVtYs-6E-UB_Weae*nJ9$q7wmwta}@JTQ(Z;D4p9QK{2&l7pVkl@es8e zhg_Z!4_wW_n5d352rZJ4FG_M?K$ z(m|R15+zG}(d|Bs?Z5k;Jq9C<7q%knl`vsFQD8ss4OqSHrrj~ON-A(jv^0pbyL$m; z>q@*B)RXa7XywH0YK#3Yb^kF!M`g(Nj~e;~v6e|`xd;m84$4Opq#JTHiJTy?b7iII z{euKD2`FekS9xsw`?YqzW+8uZE?h}e=e5i^TalA?zhL+6p@gj^I+0sldr-^cO^Fvz zG68|1fguHv3}r=ag(%Dzit&IOL3ed=t&6jR?RI6STFUUAqt_vIEW>dwIJ`F-ae44k zeu2A(yNzjQDTgJMle;5kJy@wtA4$`tqNh^|(;m7*kWNA^nl@c&nYEor{Vq6)J8*43#TFz4^{Nbz` zA*D&8zbl&ZoqsG|zWW=74-8Hk_gWSh8{CB1&vs|D=)f;=Y3*?E3pkw(Qj_pV#r`5j zc*~+aKP5>EN)z-@49(eU(yR0nqiTkl_l2;uHm{g!LBd|TI=iBq?@IWr&tt{oH0f=I zjf45H{(Lxz1-Ulafdw1U3rk`v5E0D2LnPv?hN$B-K(A*8EaI!J7u}Q^t zq3ta}k|*kmP#b`4MamBGUpP$VycK!70euypk_k_N_-*-h}-OCop)R+?Hnc&N!B z+uN!5zTK?iWz8Az6U)+w#KJ*g`)DdjXz(LC;OtZlmF@Jnvy$==H&Sb&)aF;(Yw6Tk zJiQdlLR|lQ)eUsZUINu5pmLWDDc+pact(8uq+h^#PN6b8F_OsI3VIHsQ1 z$axZLH7oXB*nu43a|`-u`}*alP{iO065l9=?JcmRf}X?TxxsfP9}4yzEeDjm{1{4{ zCt(7QHG^>kRrkTm3bDC_g5D04nDo*f5RDUbdK^D3g?rIHxy(N}G4T?of z7+e!CUqOq;a4dm#NA9XyBwha|&y-Fq-3Ddr1w3j~jf$m#<=-uXpTc@}9*7Qp>MNJt z#wT8`jwLj{T!;X&z{qH%N8c?g<{VlY-w-KjCl>ju`HR#M3O$^f@F!}#)NDQeN;*@l zQ%{{|n((3yrdKt*vr9M1<_*+nqoaf&l6s@%ad%TbN=RVL-{bGpSW$|?*_C62P8I2@ zfcF~Af_aL+EvDLpEh!<3ZbUb`DJ2S)`4a!4ZqZbqtV2lX+f3gVC^Rq70d#+c)|Vqi zDJuI#&p}?(QSnzQ#g2@nvP3TLmjq=}vga}*@z@lBl6`&|Mlj@ta)|Ag{B>}JDwpe9 zyQ#e|QE0{t{DVz3d~)}3lF2s(L_OUG>6jZWYcM!7o9dfwNe$*Li(?{e57*P z;MXEuK})2u*Lz$MHa5GD2+ZdS-7Z=e;&KK2MIjRATM~D6YOn1eZm2Wqu%onkb6{D% zB!WclS=syQ9VxZLqOrfy#M}Xl=u`1Bz4SLWSDh4;QjE{e93c~SsLB~v_xl6()%*e; zA-KLTlp|QWY|NfdS&A;kFptwSFylOh_~o_N9>miEN>QYx` zii7g6C>y7^_M?vkB|AzbD3?735-Z(f5hZ11syD9tJlNGULa$S&r4lUF4Ckc&KJYxD ZBI070-HXqFW|!rGy|MLisRfFa;M8e{L`=;C1i&odI@5)vZf@*)yaCgQSu zlB(izz(pD?Dla2%`OouqP7WUbnNV6(9GE~r*AC_4;e+vZ=95$d{)Tz@IJg7vz%X#t zHwG@|z%N+b7A$QmD+qj4_4IUiFmpiaxBxV0NK49!O3DDk0yOqRi<<-R zrs;rk!<>CZ-5HBP10xz>abVT6A7Ief&e0C*^4An+{5bg7IsL;6RL9L&9_xTHGDXYk zgYBF(+OQ^_%F+%5~6bd-0%tTbogh~-oe)e1uQ7RCkX|_`5(Cf^Sk^< zQbuw{F50HL77(P7s|HHn6c3ix`DYRjJAl$Z5;p*ZB?A6u5?O>V66K{U1vQYcGf+or zXgT6BC`-6D(#IW%b@P(2$60zBOR4%vIJvq(J+!@LoS0!0qk*ux0@J7#%67o*c#vi<32VkoVPqnwx3B41gOh0g|#{O_-gV zys;m~+X>-eh=u5$eZgGaOVUFA8DbdFR4!=#{6>fYXXHK>6H zMB4{#WUM72YpQ8%2K6*`R6{yh`a?8iG>!3CoVSCXH$Vbf)zsMwZ{VaS=j7lGk$}1T zTllNW0;{9-04=J)0rJ%xB)z@VJut35-gpO1xRbrAo~paMfjim=s;{o)=j)~C<|c{p z^K}V;qU8{d4t7v1+}%jp8!%K2n5LerpE1%xS65rlzzOH4uj(V|Vjzyuas=kpg-e=w zxX2^rP2ByY?HmH|M&7O%;7e`}(xyfjgr1a-v$3%^(p^>>cv?e4#~!EZZej#MnmSoH zszQCGv}9Dh0su*ANV<7w$l%n)ozw#W%U6RzWDE^erH~F@PIBtbFnyeyhm?jh@SMM@ zxPg;{v5x`P*GW?jYHlB(>Y#_ybg?tg!b<8uPstC*7@{q}rsjsaK6o4i?BWi`tBX5(K)mfBKGKE(a)v0ZldP+%mI=xwK-Eas8Rn`9 zw={qwH9U1pb)_YxwN2dY#If$G2rqy>85uiqq>r6E&dAeD&mN5sM?vktrbZqJtdyTV z-pdJiMBB^2%gJ6{!rw&|r3crRcJTExmk{@W8T%r|#r?7VZr*Z!P*dPRPc>J6OD$JP z7c>F|mPgo`$h)`&7&(Apo?gy!PNsN&RXY1=H(w?gFQcyP;e1IpySPc(XHL&->NddRC96c? z)mRnbi8ePfaFKy(q5O5QI9+Xww5+F(4?;o$<7$W(M?$65y!?O_6)QXs{X(e zZZf9MTKX~=z&h1T;Ar6LXDnl(uWf2!jy2cw$D24x%6WJJ;aSZA(43kc5Zct>mN+-4 zWq_pt!Zg4~6@zzi!nlK-WW4?S;Ob_wP(x2g9Lf{|@s<~t4{$)^Jf*Z;Eo3w>CTM?8 zGi{uHfCmEX;EiwtR@X3*G(!R&qXF}9ba(ef%c)CBOZw=#YWsL=;?(@q5iU3sLQ55H zZ0L-Isd}6EX*wa~ypRx>skfOtMAIoiRYOY0O$r*Ih44qnp|Sd2W~O>(-ghl%$TaBpL(OL?Ya!#f<^&`FqKF;^jQlWzimX zY93%mtgD8Zt}NKW1ci6ihiH0Y!6v#I0Tx;~w1oyv#`DY_O?B*zC9&?tu9h;EP97FG zHCKHZBW*08Qw?uFV?zTuSyM|734Jd+32A2|q(4|vLLTqvpl;_c>*wn#>EtJ;qwekH z1MwCITR`RXO#RFt2x(6*KO-MG6NIM^7??&+&Kxf-j@OiwM>s*Wpq5aCnid4EgO)Hu zpky4q(asWJCxHTg|EDC8gf4GA3?U)mB!NLxP5f=vvM#3Tnz1~X8_ex57>u6{;a0oE z-QYafco)Kt&jsP*XQ}BRu!t2td^yE^PVF7eNlyIHSl!I6ddZj5z8{so&hD2u*f~DE zla1-eu?L189o)M{@sQ-NS6JD3y^_*9+e>}Se5LW7`=PhKRB7&S_syZ&V^}tK2D4NG z6&Uz!l7EUly?y1C$kE!jyrrUSX0bZUmh9q%g@M~s#^wCI2LZW;wJA1ZPYZcS$F)BBwYWiaL>Q4kd5gRBQ^|ipSFYurabd+TWWnunRce zs0lNF^@T0HJoa^THM1^40OT8)8L1U^{@)iTo(w^|^;7+}U>2|Ly`NxI2W7Q()SGv- z!8!>U720vpv0AaEB~AV(IkEZBNaTV)JmBJAJd;pF&{cT!(OrMVNUN_M{pRzl-rikr zD|Kjxak3itm8|Pc&42j-c=~)Pb8=v3;HOjKHh;eXm7oNJq!I@$V^Zo@{U6z5Rt5r~ zmrs>h|Cdx}&)fHLrNNst(zu zyLab%2_xBuiaUK4q=M|cU9*U0k39T(NxENFSP zm6T-L=+XBpv&FcrPLk$}16Nz$Y2DB{KcCIU1|ywE-TGdYC&a|{FUfSC5n<7Uy5;nU zuXjI4gOiDiq@Gl*q9h$W6x01okFI*r-uuPSDnKGaQsTooUKS1GA~_OS4_;7tf@ff4 z_ZG2UhJMPqO|ly6Im~j4r~WZXfum+HJ}wshuQ7+|(_RG=Fag(dGuxNqtr|+yxXD1U zO~-5WPW%EL=U-FJx0r=$z(27MddZMCCH`$rbY!EY*2%1*G@GM+YspBq*y!l!j7u4D zJbn&Vks8fxrgyf310nAk zkP9Wm_HzScl^=vz!iLh(J}45!3!AiI9_ghXRX0)Es0r-;bmh8UNJepKWH3L;*MlC& zUk1AQSml6~epTo4(%X~IGdka&*@y;IWXLwNctN3uH!)!uIw$Vyb*38bsC8;2k}>X( zB;-6N&4=VbBa+tOKJ}{O34Bj@qq|q-tkds9TPD?{meX^bDVwp27u#sTNf&=Er{f{G zB~|4YORCMZVb$puu`b5!v!#TC z2!hBhR+}NojY1DuCYmaggx2*&?QV=R(*6oG*|uL~rT=zRsO4zy+c#D!5Bl8kTsUkn zJ?bv(d`8raADTUahc1QYX0br>+V60OG&Q>E(AeiRdoi#8k{0vtOcb=gk}d|qYRgGf zwvBnk94W(+)0=LrD%=t56Y6#E6C%99$yXw|+hW&RNi4h+SnjO^UAap}@@>qC#Kuxj z*2?aBiq^PadY}WJ&^_p;J+wGH(n+)La>k`eoKDI2E&AxcjBEZj*~ew#cyD;ATtvp@ z;%>Qj(+K>#Sbd*uTEkn8-Qc8HQ9HL7NhUgRl&w5Z*6-|%E=3cy0!JAO zHov5zJ>Pw*ZN4C5^#OUVVwuqW1||}mzJ7;Jvu7j8OQ$)(UA57XR^8%ya;Yusro41z z4ENN;$*AM^SBU=!g>>Tlq{~ESdf7MakXm|}P4w>PabGEFlJ0PvKbYc_o!&2ouo$V8 z5MAOA*`d=+qMo?HHx|^_s5x5~9iuo*LvlCx=mnr{ZcqZKrEl=vCz=Vz44!EdT3Uu* za%A&t6SJ4*??LtKcbx=DEJTvf-j&>GbCs8O%3n0eNG8_&B`g>PWS^aWnJ9P79H1q6 zu9Rjo87|WEWz;R&p)Wlf$-SYFBx1OIRTMTms)K+mzO|XoKy@V#&AaT^Tiibsp?~E@BTY%TLD+3#2}4hh{Wey6;yd5e zvinkB{X|FOzuB>sl0v)-@+9ZGAZYa_Y`hk8KVzuuip93x4gN?TV4#*;mw>`a_%1)64=9=*(G~c4rK5+XHD;cLI z&s4SysKcHSn&7+){VNFFRS%N~O~NlKbwFy>)~l%0Ph@Q-)7LGqYN^&g80dT2Dw=Qc zDayl+(-~P-W1_oz>_|S{p2+6gU#7~i;2 z@Fc&VYfGM@ckYQ#3UR7o9lII&gW_(4gZL;U#E!FGu(n=k^1aHa_P?}%cv@NM(PWP0 z#BLW$?DwH7eT7!EyBc)UOR2&E%<21S)wRboZ`M!e`vw~--`&++OFy*dgVuVRp5wHN zE7>=#%cOHa_dP~&7vGTNe#@mv(dYZTK-6OWVO}Vu71pW*$-A$0CC+{1a*Gk?2Kg&? z!<#*yD@XeFD)U9%PFcvznPC_CHlVfk6DcS0kAQoqtDrNeuLvG{8845ck1Z%-p9dBJLdL%canDdMmxQ zI0Vap>+y9Q;(M~ zylf|Y|1V=Tql?yqbF)-GnnfXRkz*KAHbkuI7UgE+)8F>`K2u6(WJqF?C(robwIXwR zSQQdEP6{0YV;JaHSw14rqW1vS5d=~*t#@+wRuYhP27I?|rHW&T-cf6$_ z=m#lVGgAyf#@a&ouZk`#fWn;SO3Y^D5HJau4tVf^up8fB`9)le6!6|_9#-Yz_tgNR zDuBmcSN;`7I7z+fqgQa0+CVWkc|IMPOKtYIN%77rlB&0aqxUwGohLm~f96R}WX<|p zSM;d0{`gm{BozM9K$Vk!dhYktu;b62{fzX~{1N%*%03U6OaDzRjKbd%zQ}k@N2I*& zYQ%eRVYQYwW}t~QhHoZn`hny{HiQ{(?pbHx_7Cb<9sdrUqk1ew;;=Apa(d! z4N4?9Trq$v9<{ZO{$24YYPO~z>DCKs6w=(j^@GL=Y*C|bMP-SS*;%P|!g*&bcH@sS~iD5oC3|8JcIaCwgODMQLrGk1;B zPTPgUiCv)wfi$db@8$a{u|DU*DIs+xL!V7i@1u`EM^?U(EKs6NUPEXd!@D zWYb0Jg9pzG3;!<-k&-@TN-^6Hp{CEJ={?V-U-q6T|4o<=Cbw=3p)9O2 zZ58VSJq!BA{Wt>95(NVBm*ex^B?FA;2`4o(d)&yXFYUK@U6tUN;P0`|-2wyuMK zB_``;ESu)uaai4KOdiZx5cK%2!;8Nq4&qb+zoPi`{9CLzZWLLQrPI!9eAn{1ZWdYp z)0WnaEe6>-PdXLBTOh%$}u`_Ic>_y^S|k-cj^ zlUkS)g#q0Cz;9ieTow~qAY%Qw4ok=kGAfTvP6oF%xY9|-5J2V@g%%XiL?8EqXBLh9 zA)4xU-`!`+A$mUkNABZSbBN}574gUC?*$a)`t5W2xK7PegOiX-&DmkLG+C`L(~PAb zLHNbtgC{^1U(}mm)D0uL`(5jROX$-+d-N~#=5aerzATmo!tYv6k0&MsGojMIcP{T7v^)rRsWdd}04jO|*=G?6i&gNGEZv|0R zuaR~@bQ(&YoE2DbSQv1#!VpfyK;_%<^Sl5kJMmt+QV!7>mVrw-le{1xc`62(7{a~# zZ_E6%Njlnpf(I%uGSG{BtMoYIL!1xb?cO^LFxWtRoUxxihv=_NZn+nR%;mc?s(a>X zFL?pt4aNmPzCky(IwE#@mglOo4Ckw73gCX23u&Z3ktT@G;59pLvu_?<09 zfTpjg90wW!kmV_gsbu%Y9~;OwGhsm73W{s2XWH?Lc`PIG||>HFYz<+;wJxvo z0I@MK#X`pr{{Dvi-XABF0>%cZO1*?%5}9RX)*exlDs?P&{QMeyU6|?DDqudjJnk}o zTz(mdtYFQ|=}Da!f|XaLMCqj!LC}Vy?)}KVI&PmepxzkG_ux@2rpB*i}Xof;O12uR}Be zoye)hI-KReR(>F=1aZH&y9fhsz0G0q?EWnhTzPV$aV;JMdNNl2?qPWYa;Vvuy)8g? zQReazoRAeI#_dofb!DGN2t3i?MP2J|9v|ub619BaRKV~M6}9TONFGDrsWwkPv*n=s zfPwPAg9DR`KS6eHlk#RX&3~m<9W;XLWhp**8{9ZQIU?-3YxikI)%?{r*&1Y}%gp8X zk7dZee#}(cN?sax({G>{xW}NoYSQ|GGW3~R+GC=ew_(K6tX>wAm`zh_^;TO&P!E}X zPdfdt?Q4)YB6#rdC;RKIerc+2Qx}eWG3p-`e{DkNK6huywpbE=`F>}%|B!SscKHLO zl0k3)GtRphTJA|y39+n-Ymn9a(M0??gtvZ!y*>n@YAX-)P?3`POf$hYZt8tKV^Za&ye3?hUG6weO-2^RBUf5OEJwaX)G(Ueb zqYbaL?~A0nKsMNHWxFV5z?>Jgr&`MzFiE*CTWkRJx7_2rlu!kS;^)E{Ei_WuOm}$T2Z9wENOnu<#=ZEg~_~TP1Aj@ znyI*s%`kW9B@748s$wt`%`*GM#Z8a>nogGjE%xOu2!Zm#iOj~1Vyc;Pn8;e5smacZ zVv8{Z<=f}nBmZ>%K+n%W&yh(DT&NoEMY-RRezl%U$57h7cx;<**z7F2b}NJ$mJI6J z%8N35W39>uDja&*)qH2lpczX&-HbbzqCwNSg^Jp2i&@u}E3vA#*DOE2M@~R)38Ln@%*xF^z2$?Kz zr3b%$%cL|x@nB!s=!IUC)Hlf{E>Lm=yG1?CNd|da=LZwv8(3X7Sq3M)ER~6vk<^rk{`LF618ei3Z9+vK?R$)oZP_VWL;)|Nj1LoiU6*XkWu9q? z0;h}POn)kOmDKZ6w0|=2#@B0(QRHno!uQOqyv+^^P3(1z;6Bt3#$=YuXlUfwg+k@D zAD0o=X6i5Kx@NQy`VvQI-(n{bh4(E|iPjCCSJt15b@s3B^O$A(ZMIF{EwOIcqy|q^ zI9fbkvVUU!Q|8WqKtbt^(bm~4PGL6b?=pr{6}}vA?nhA27j6(Lp2IRo`A_BUXJ;w5 z?WuoVXG-ZxLG+q6unHATp}W-@9~yoh}vEVzL*I8JY6{OWtG}$L96FII)W#uify-GK23ND&`V5dE(FfI8MKnt)B^_MYsg(Y@&|4 zjTc*c*4+~y+nAcugpVW*5E8?Gso;6Te!l6dSd_hbD;o#<=m_16{?cK{C@1<2 zbGAm|sTUHCA7fhK6XHR~H%lyr9YXk-p2|nl2XSW+ql#YK@H<+o|u{JbL@vZWcvnvVl^>?--O` zvJi`*2>tj^yQl~`<5dP}WQM-gPpnfm(^h0KaI50?ZJy0aYW$Qds~CY?_&o0q`&ky^ z-|b2`-X0Hx|Cnwj9PR%onx=xW*V!behHM>MJlEkUsKvt`je9L{dQAI2lx^>05(*ek zEScWoiArspM12j3UHxz>GUwrx0;jNXXAVPP<@8zm6-5AKfGg};LtEQr2 z)E@hkL_b>*(L%?6C2a9+_o$Tog`3b`6TOp~q)b~%m-{G9Eb)s~87r0H;@sL{`taSKM zCYO_Qs@}t9n(?!*XbeSu!mUfM*W22r+83AlGRA9NO7XP0EukdZW(6^<_1g{M^=2cg z;H@H0toT_qJAZ}DBd``Il6hEv6o(@hl+#NDk!OT$%78ydyh;Shi@*9MbUzaVJ?g`a z7{aTC&+%`(rlKx(w6o-E?x1pskp5^sz+kmnyMXm4?jw;UkJQ0ngksYxSztP#{QI&K z9Rg=aS-Jk|{F!ebFq76PSsB3Nq`d_|oiS0ed!1+wb_RM(Ci`F_0ZFIClm4gH=ioVG z8CcQrN=Li-)@J7{z#c$p{1DF}*btuU)K2$@5!CtVJ$F?wc=SDx8-&?s$64xv{SDwv zUULU(vl@1Sa#~o0t64}==*PJ#LNR67=Ub~97+U1FR7jsDy(-$MH!!4 z6?g0_PN2T+^WA3FJ_xAVzH~(^@tzenSrl@I?p#YtH%K_+a)>avkeNV&&CLB-6(zSH z*fT6jsX>aG2XqvAbZ+J-uI*{QSdQ8wZsuD}EXjPkWAY{m(wbnexNADM^jZgLXriou zz9jCwiLAl@iYF^ok=joR9IwdTR zgWVT#WDAZVv~|OA=Q`R=J|msZ0=^0j*+)eyb2DA{BiXZk`p;jZLV6?_)MKbRc2*jV z(OL;TNvQlJ1=H<;Bb`u_OU7M$F$VBa^mP0n{nyV6TO&a6Xj}}EpDT_m&JR^Bs!Nd{ zxv1n3V+6OXab+~Ty1R01YJi~P=~?l#Q&;}y=g$Ti5IAeYy+~cnyGCP1mi@-UDFXZ4 zjV#lxX~%KES);s>eVpls+bAW8MS98B{yT{e$I1t-MV^g)Et3q#t{y*8YT^|#Ewu9o zwzY)KAi|)O@N0HPo4K;%>@!C^CmFY5CEw%f`7OcMZ~2ypvf1$*(yx4aLD>X_6H&RR z$AiUCj18C;TxeR5XCp9?@bP>)E0F}ll#-biI`T;1$xv)-&&UNkGO(9P<`vRzj)Z50 z2skTk%CFebg^c*X3LLuP#&(rczt`G<1%uE-7O#c&Q8yv^X&@}x+0S|dk_|+D`@r*z z=M{thR8r|aueZ$-!h)b?x^I^{ffoDo{1fLD273Anv3ID| z!;tSN>=3}$sx&?x|9(j%-hQB$2tW){3bO?~0IZNuF_CHopj6W_DQVyS0~0Vl0H}-N zKM!2OF}crx)XQ+wH%%CndCo=^_$s5F)DkCf)`IFupn@*gi$-%a_Y9h>4YFwTJ^vb+ z%VsrN@}GggMF5mjR=fpVScB-PvyjN#yRV9^!Jk<-F9S4{U&<4__$RVV54pq;0_7^1 z&;TvfOMR0yJgtoM>*JM<{~f4DzLUR7N(vzX&*NptaL;uW8j{g36Cbq~4 zI5%^7{n@VDcI3gybpXac$fuDcTOG?{W9n$Pz}k3nbhK06Upc3qLp)G<_{JxifnJQd zgoY{>xo}Rk_Ihr2n6<1(OQXW!0h0wR>6v*!>$nww%Gwy&m?t0?B2{>5pjV1%&Y$ain%xZ zhKsq&Yh%Kq{TGW~%b6%=zVJk%*OR$L%U|#z4GHejz?q(#II-7>$b}24JNLK%n1{35 zMgbB-$RHoR_y7P9vpF?y@|Hs(8jtd~b5AT25s;-3;g*o{sbBg4DSS;ifVLAJ+*{2j zGRw)WSqcF9OX{IaF8>y%7&*wMkHvf9sh=$T@@|h~?Au}qh#CUqu9rvoW~<}pwdm`yPYcUZd`bX_3)K{7a@aGJ z@4vOcx6l>q_T1y!=rvn9`g=F-%!Yf+ue*1{r)t(%jfhA(nqO)M^zRgF(^kJE-tK%3 zOL-{mHmcR-{reEddOL*mg?1X&@%`hupTGC#zPyX1T$!pXUq;JM0StR#|3?4b{OVnz zl*6u&2q8+>{m*$ceEBxSZ~n@jw8*49>h;#bE$h|2538FW=52s}ZGdI}#B1>?)BEfg zpu@?7N9u2SjaX_uz8Tqu9uEYfv5gUJ#;xFOsiKUMd$-HjVk>jw-DJ2lf2?V7Fz<;p zm(`S=&gef??kM6bHY~(L+jlqB`8psi2m2gsaX&LoI@i3CIK-=ce|qe&O3=1oESVj; z97U`XM1O0bhR3o7i@bEBF23?vXIkt_qqZO$k`F33^w^XJ_N>_lYjYQ`$4u;Axl;P& z>$(~p+id`L2igj--MjP0n{70M@ohYFPjC5^(WR6(ANKOBPYo5o+S?Dy;m=PeeVD73 zOuHU%OiJ2k^o{fUjZ%u%eRaR!r|yww@G*5f;DFocrS=z3=_Eo={HmGZTGoz$S)oGF za6uv;rPG&>>rc!pJ<88y5l-mfovgp*=}>G|a~W5CEz)>tjWOlM3%6X|jL`yvn)}A< z%u6pDx%>>JoWSn#P1gmDU+g&7!Eb^FKhDrgZVEXq*(&vp)oyuTJAF`sU3`Co+2tuP zgHesumoG-p!MtGO{LX#7N8~G0za)nXEk_HF;yqKuVhL%Xkoyb0EFZE6#kgX`!8NYO zf}!n?yfl3iJ&@QR)ADf~x7lSqrd1wEIEV-Cxd$Ds*SU7sfBl%Htt@({Awn(k@-y7( zsQ<~a{Nl62jh53+O^)CQ)o^mCxhOP-!T;AbpZzZK_u+0g5zK>KeqUHikaiuuREL>E ztg|ZzF0rschU-f`Dg0E$b0J66EV3SuCjYGp zmm$!3a`IfOI*;y$T<6?77KaOsi)b_xSPj>8rhaWNRW-8tk&o1hrbH8bex<#bOYYu% zkOOQV=IgPy%{SliT>s8kb13ez!&x%E^TZN7QSEWTQI{p$+ez=4e-CzL)=3~+DKO?G zPH6s9WQ)NwZC!X~y}&yxCSQ+oqEZfhEPm6rlEk8N3DSIgftJS+UVYMZ(c{>gTK&-X zZS&03LrHz*pg})5?AH(F2xN1X0=b3v;@jK+$)uam%R4MG6u;(#Ssfn-{Xd2Cl1e!B z-1~g4nJTU9b|Qx=04G)0^~3DnGyJ-oFYGro!t4H;Fsy zK4xq{N}%3Rl~^}ve`uj|(3y`dZ$xbu^bloy6=wB*%RGqVxKE?apbA++w#U(eQpIeW zAe#pO23C3bQI$jI>GmlN7}co~+ix|$d|MV=vf;Tl_l86M=c3A3^PMDNj3}#;4?|V{ zN7YeQi_M{?4y|xWlX!~JFh-(0R=>vAAU28*&Q`$rem6z8HdWPmbNJ-nCNu*E_t zNY+6H^f5c^;;FLAX2kUa^aiH;$!X|8Nt=^@d3T^QLh{l@Uk;VM=?U;^{-aCvZ1?Xe zk$*l&`qG^$O^?5_cF%7v^csIccI(d8+io*X4uq%;;l-0sUx&vU{^ftXY*i3k@4e4{ zn0UF9rf-ZV;1dbe#t^A#Us_lR*81oxQ$vkQ9G$df&Tf8u@6S< zgh5>UKk|CM{x+yKP~KS^xG5FV1`3IqZ3FRmcUB)P7wZ*R6iO|+{Y)=0?PpKxW9pK% z#UXwkuMi}ir@WaHGUeiX9b!-e8EEj=d{bsc52jBq^avZByPAc$7e;;$wbbLiO8AoV`*z@i*xv6AH(F*;)xJ{v2YA)qE%0#b0Z9wysGCeZbaP?y;%Ro~I z6n^3h)=n>t=zX!B=Q)^YQ0_%pV!6jti?NIynMx2CbQyZC@baTYjBv)sJj}j~w!PYq zX(ME%^N8O0k@9E&Fh=#R@m;D9rsU$POH%rJI-Gjs^0too5GTtjw?zYNxURpKirZsP ziG`(ln^A&)9c|AEm{lm=sB?~({?v{)Z9-?8+8 zwj}+2C>_XD?J-O~vWiguopbow>$hXXQZGV?9%N%RhUv&y$KM?ozbEH+$O=lxjOdjX zMzt?~F0g$?3IKg-Klh&6Z6v%Q%6ozKvM0k)QKu@g4I-Sl5zMFxEb0B{`o876UCV6F zLlq);ns1TsCn|IF^OXyRUdveGTqA?N&8h-`_|@Us>&oa~g5OO`64juTg&XA-wM@5U z)?2sHeHBhq?A0Z6m%p^Im7g#E9`%$nux!nFq>#@SJrlX%^uDd(jhokx_wrJOvkdnv z$Y_T}FV(P&#Q2ITShg-A>6WHu$pzD1Ce13m z>DGtTjU|^I%JV za9Ri5dpo1^3<`nRQ1*n{QchI{&UODt@A>>z+lMx#FGEZi{4k9oG&D3*IrzrY;Jrt5 zDXD_SMI%(S5f2|;tx4vdd#V^vNq1!xQ3g00)Y4;TWl$UceWWfw$HU_`9vzor2IGU)_md?7TBxK8(|esB3>d}Eh30q;S;bV^!#ek%L^K` z9``%Gk<6=`@I{j*4_|*@8*Y@Zn;6_!`z&bb#_Ql!a!@>b+?TE^pQ9M3(mc)Uus!KT z-Mzc3gY5SnRdN^TUXy-_h`+6x+kWzWL%OdeQRL!BPV}LQv~29MZtMvUh|7>Wln)fu z|9~^(e#H2zU)OxxO*x`IPE$zgzQZh8zFE@@GI^Q%IGkmywDlU>xH&k)+xWj+s?Ltl1EBR`Ac64 z!bL%bAMc2?+kd0c0m~j74KRjXm=kS%IIEsUJjNZ0O;gtABPh!5ym{Ham{-X+P!@P= z++!)GC=;D?agRAV?vX^U%abuHn@MSps0admVau3^?X^{!Tq@a<;6 z4p$KvnH=IP`PUd!u zi`BV7Zu=#w58&k-?9;)Itl_ztjoEZ5dv7%FZeTRQPRAsRJxSLauN)_k1$ZOd7v=5R z?SJYI85EhTPM_{`(K(6Tqo2B_UK@y8ktDN_co|+obiYH#YKILZ+z=V8G^7r)Y^2l6 zRF1mkw~@Emc-pp9JZ6}0N>ih-ebRVpOtwA0re_*8%%-^EM726YT5DB5TRwcAQ9C3D zVZ8Xb{_b`1xGSCY!NO_8AlcfdbZJig**q56pIoh_h0@islW~L17HoX7*{=madxnW? z@ymNNY+i!4wv2?BEKy`7e%JM;d4)9bWDx zBCz#;^Ai?GTk8GUN?UI)aughvT9w3H6DzalIp(u^_35Q>M zEoP4Wi3l>+g(bTvXucQW>6buI z%;DG0ycGOk57DN8-MnM(adK`*s8e|)OM zv8+rRqr~qg47R98MNUe9#@=m&w5ARk54##P#C`Mp^=?VF9fI&#{*al-IAt4}-ZyfP zqHox&9kAGw!GdO~Gyar2;y_DZTD0%{hToASyBk~|w97G!8PcI3xYvFER7tBKRUR9X z_cDq$WvDTo+wJApsm^-FOU^48>#&yc850REv))A*mxKKX@3Dh0DC49#LAj0p%Tb5h zu=WG%8B1A>qSPL+@JF({aDU~vZ z+YVH?hSaBX#bfj;O2UWesuPQZ-L%_Lw}Z9E^qv%e@3b$@zYSlRu9vKJpUB1DUaT%v zzVfq_8*aeOEN;oWI348kg!oOs@px&xkQ^rn0~EE+|h!BNa} zf2q3E=t31hpU-F3N!jxFr(+wMWr`c~QT1=lfiV_jrQL^0t-c!Yi+LJ$Sffo`K0VXyS~h?6i__Iq;J)_A zYOYgD(!u_Tl2oZN`!`AZcR7Yb!~ot;rwCmE0gayUMSO%Um{63tE+@or!u(-UNYe?qk-fYrwF_Die7J_;gP)1UuCb%b>Q9E1YLTBRusjUMIh>QYz~7Qw#kYWF9rAHn zK9yK9S#PvNuvQ4A;J8*8{fz#>!WUOY&(CWYY8L22Iu;jx#X66@R@ytMznILya~fbG zXGMEnEVJw+Pn91Go^DGyce^Snm{{k-y0|7jUk>$$!JIW2vm z71m0@;-i4lVYxTkVuy8L5v{pmqE4o{*HDk`{AYpS9y)KJpSp z{W+Mq*q!FfhgaBR-_;vG>l))3c;4}Kye2qra+yz_3oDeagyg1Jy->w(wy%gkP+2O` z3MESh8I$o6erkT4X-oX+sHQi6;}%+(4z#WLagS#0LMzf9NF{0|QeDH516dO9G>KO6&OVw``ta)CmRPcGhfmF{{jPoE@WCKEKV zr?WO&t4ScZ*JwZC%Cgi=viF2BoK}zI9~}2xDKKs>>m4hYdhU_BNv%_ujS?Iq3+ z0u06GJXRI3Xy(OJ{~#>X--hg$Hx^rG%uMYUDDYi`Y@2b@rx18R$e9>harB7^pVaS$ zea-3D7`ub#pY7TqbSr#OP`%m9S~qecoVS`f4%Vm3A2RN6Puw;@UvRtv8MN*g_`T{; zFjB$fik#Gi z(ka&rqIn`Ell;R5h|zQ9sXhc8IFl0x|29+*loqb3pKD)=SedSTrhpR;$d+Ip%;$ML zTAI79WnZJ7ARJPzhQht~T-A3&9>0$yp8m8v7RNDvdI2j$=(r+;BCn|K&R$LV+HcG0 z^25mW=+ReKj}G{EFq&;P1s{W7eAu3% zD8)^`jafEmER$)yK(ZM3z6L^%7gB53;~JY;#`g=fU2Ilvqs@J|f4cFIKZ(a;>tJ`x zcAbuVqL%)?l3^&cMG72NAi`hzF z77`Hfl2f<*qeAPs{>Krj!8WIfmiP8!7y7?wZnedrg}y#ltw|PYfs}QsuAAIH%)z`Y zD%Bn7WrA>@lDxO71b@PEbER}rb-t4y8u2ZRdsM|OZY)E^ahuXDT}=kY<@cCK*AvanbhtAcEu#tUQE8d+)WtFd{;4Ogt9##G1y8)e$tu+OXAPzDTF|8m z7?UN3Z~uJBXWkf;r9g0dA2+C#Dq#403A@-sd*dt180PMGV&Q?;M{Zu-Y=-NSPA2mF z$&}o?ILwSvwfooEtbt0D&e}w?tP!|AbVFG|dLu6QmMx|{*|@p$!;RQfOuh+FYstXT1(5BhW?#bTIPNZ{(z-T|KKmV&`r6&Hks>^h+Ax=gKY;qi ztxM7@!>w+U{`1tCpoDli=9+-9POPa2z$T?HO9u}KXlUb6^OT7u4Tf}zJ zEp{h^cfLXpVW5yB9*>h(6g|n#E!=DqQ8FEnl+cgng|X*-=;^I%SRw|fd zwaqro!$tdUCgF+iyYp|)wZ|~(^n~5)xE-(!L3hNIWU`J3Z*xm<7mHw#>6r_BR!xie zkFiMLOpU4~v3D!kjDsTGCA>1WD;7dqo91OAWpzB?-FrCCrA z6htHmDu|?ENRpJCl{hefWXV}_j)Fu%7!Z&ljYyCnIS5FQD3U=z$r(u^S#pN$8E^2u z{l0y>|GeFE_T9tzdCsM~tGc?nx-wX(W2Eon%{vHG``Ks*XA~%XnCx>po79hcMdE93 zAmT@{*n2Ur&_EUHo+J=T#eZv#aHBL>{?@L!y^Q+Wezc^#!JS8)>6%n#%dtr_JK8lv z-dpDXOnu6e6ARt>cbQN3&Ai*jvfhmf3k%CSKgPBR*>mr}MSZ@2ytB!V{!EJ10iu*7@MEHcsC53*v)As$ zQHDO-KS?+YYYRv}lN~j5tAwqorp_mvwU#mUyx>-7X zFe6h*QDWtZQT+B3DkgNY7s7|f>-IQR8>n{|R@~b~-98n^M)S9K@s)u>5(b~d%8aDG z(sYKgkKdI%ea;1CbA%zX+sh_$@P4pDb%O8D-P@FyyUNi43N#H%$>EXI?mMMnh?}Oi ztz8fkQg)p}rTMT5<7))%Fl~kl+Y)7()K2T*sTP%NVnCtRDE)C{Do6%V`#l#dj5I<)isal?=AZ6`c~0d>vriV0g7vVaUi{b z!%S?%I1HiuY=Z5ABKJlZ*Wn!P6GeD|Gm^(HAS?Gy0^xSY>ly*e=$MOi7LP1g#kkvB z^62Z5P14D=pZ0^a3pv{12xbrf7P$8Zfl8+8nW3aRF2dqx9QPy!AAzEqS%_ochw=u# zlm?&qZ%Lrxg7l%PEmIz09f(I?5+xf4%iMc0EDaXH|9cTuDCxnh%~HL5O4@a1rtXuQ zNuas{6wCNiIN#@X;(%to2e}%F4-Q;qS)gLnzG_4%CMHWi4_p^~wBZFEKW}Ue%04-} zEG9~nbmO?G=9&}e3h{;hMvBP{Tng?=)Y2@~xH(;<3V$N752X=mX`aK{o?bgDMad1n zyu3a-H^hn_s-t(=4^&_Mq%*mQWj{Z2o9kCi*TNR{o*Q>bCw|Oy@|nu7RiB8!oj?-6 zRpZSAC7=Q?WcyoTfutoWq11JOmbsm)Mt5p$#4oKWJxqn z=!%`@#;uJaHTdbPbMR*#t09fa9X~-5i;3}m4GZ)c5s9!INW$Ra2FHzfx-hGsjXLJX zLIFgHo(jiIvp|blKK}GjgI}hhcYgsAFN{n;RGsW%Pg{)=H@@(%fk8D$%)rKfH}J6w zc4!jUjmZO=%N~#>HF(m3Lzpng`ofFp@j!O0i;3gH3BVN?{c`XGl(dm3V?mp0b~C1} z?})TC^PWCy0A+Lg#3fJB*&@FqDkb&{eh3?8N5fR7_F;%=$>uAnaDj_(P(EWa^XZ3%Mu&}{S*PKcCBOf>B@sU8C3l^|3Su)bSbF|j zEO*gZj`gsnBO@VAY}`(Tf|GWx*dSKpOSz}={Y91?h(!l)sn61SRUg9i6|96pY+Hw> zHZ7Ld6R-GullN`4zBGI;&QD)stLI_V^UdbVKV60x=5}m&v=r>lR^6Q-iQ>A$W8xl6 zO%@$mgdkZ?I{$TIn!bSNK4g;Z>vjHaJE>_^#f_l@HWK&Z?@VugZiTUv_&UtJj%5C+ zq!fFSsgGLMDtw5oKgutaONzTnU8>t9h`d}p^weMXVteOhS&1zoB9a)8yT>#BVT5jv z_~-XMR8J)lkDs3#-uS#_n=SoQrY&K?3q7g%;`}y!G&g#)KYkFi>z^WHQu4367@0QCwCcFC%^6D4 zL_~B;to2*;-4g2wr9U#a7NZen4(Ov{nBJ(C;XOmA+)gCn-=z&hc(f&CU@9e;@GMVl zNfc8H*q)s~$E@O{b?1qz36lOX@=MVHBS+{P0+}?gIHZH?UW}K1G_Q-$^BaS_+ul@_ zYg#6o^k(9q8aJ)pcW&6KR_z^@?^GS}!D+&0#Dl5@3UBm%STkN-F1I{IXUVRy$biqj zGsrsvImS3xpHtb>7h}+QhIiy2j7MMQG(j?HQCwx)RI=PjDAsGs{3iW^oXjJ%<0OA9 z4mDo&_##7VcO!EC)y7_~Tj-6g@&eDLFb=dB$!+)4s!8-iXZG5B45A@yX|9>=`idz} zBg5hGt9YBCOfs)cC{Uke-vE*z=%k+wjpg60lY)c#m3a8#eTOG;m6`Rd?c0o{67{z1I!hd+ZyniSHlYE+ai6MQjc8(M5q zm|j@O#$h%+>{r@R%YT*L$)AR@l%A+udV|{6tep_k2}6uUm)=L`3Udjg>9BIn6;I;S zasEQmNN=^(%+>ej^|iSXtOpa+y6Ae)H;)9l3;xM7Wt8FQ&9vqN}$b za&zaQl7vP0=f0lU%rYQJjU~CAW|)F2Yww{M7`;!Sf9&%QCdp?s21-i;vqm=j;MGI( z7KE`hJ9P6_%v{H5{^VD+_k4B}#h`d!iaVNJk4}r|R=0tDcTIFa21g~GfxxBXubI(S3+H8vob6`+QA$&Vns0M!r{d9=VgZS-0;%nI z#Zm&4I%277VwzxmhkoPcHZHBPA!~~^^9V!O6g=h!GF{?4c+Ci3m&#)pk{>H76dw(i zC-N8A&)|hjI!_65JuT10X`tH*_`k>h!2t!}>^=&_V1Nn!D`2nlQCuSv!aNQwDU9r~ zs}UH4n2R82I+*tSON8h#2qCKyF$jtOON8EI5W=5{#O$8?+k_Y`lru$K_S3_X>o(Lt z%W@6+oP{~!n>ak=WYC%#KK$`G^n&bHYR4`%&rKA--Z~y=&VAZa%G{`TKERH=DdZof zVdHJ9B;s?3!xn#AB5rAinBU+@$We*Ume7;CRo67_#kO1Pge&bo^h&$X**$rfht2Av zdJ#ku+^Yz^9bTh`6~g8*KrQerp_pAgJlPnThl94`X#sEn-ETCy{%X zt~E6~TzIWo?)BqKAtnV2laBVQrI?tc#9a(yxU?r@&cURyrX2YBG{pSpD9?TSf3J4* z`eEwe?Toe0(ZZbHkKf(WD%NX#hu`#bc`~{ERRgV#+de67_*Jbv9A!#G3rIZMXbQ=h z%48gMUyGpD^~Klce(%AFW9V-A%!9zkU%+YdW4*pEx82$uxAkrOrV{IsD3YRrs^yCy zC6)tO5P~idseR@T#c=BPkeZkYv_kmQ!-ffpE+)+*`hTsMRMN0Qm$%rEB&I?CIntah z%{lj&S#O)6-)8(iqIp-*ro?NEgi**YQN68SP{C=w_wn}XqznSyo~$bmrzjjnygnq4 z<}x;czpTS4F==T9-ZjUgLBiw5$ISIVzFggT_3nN)CxiYM=Ys`6A<(lC+)@ebNtq9; z7z$;DQvBkfk2U3FWVs{N)+S?N|Dwt?#EmK@6t?tkvBY7BFSb}uQZ!SUQuKOT6!k|U zwhFfCxAyZ$c8%z`<)I8w?7QaD4_0;Lcq15-Bfel)sQlJ1Re2gu`u{U@Ps zN%3RNqJb{oW?6IAk3Bs^Aol&>zSH){hP>irox!&87ODq1?I8j#O9Du!LI9cHaU@oq z5WS?)qwyN5zyk&TkPeN5jiVd;OW_rHpFzQ1hVzbB;%C1w#3M^>Zj2y{^Ve+&>R2mM zJYR^!ut%_mEC)2P6kJ+@ckSEfbQV4786 z9cSKC(lAg-bZhwOp@w*Ku1!zI)tG}RMUAm9_7le!7w%bEiZ8pz?WayP$LjJoOE-pR=kvMvEv~7~}L|Oq^Y*23x`+G5?^Nur{rfyPilPWaw zj3^kP2^>k1N8iggwhm9);})oox%jWMC2%~Eg!2_>Q&W~gsZq&*Bai*?672`)7!E;bPl{N}(fcWx%E_lHHTPbO7BM1bK)7a)j*Znh$zHDA z?w#es8z7OSF@2!st=lZ+_R{$M#U zbDllar~y%dL4rrhMQe|@e+G1FN1x(A6ti&TNEgiGX4@X$rDJmJa4!f#9Dl8PWaaj$ zq~H^9^2bBYDn47A%zu9>u+c^WzsXCSk^Lm9V8q3EJk(df*|LGMQuA7i6kBOQ0$TXN z6)<+G;FMDsVr#`S+Z!Kvnq9@?mXEY!xN?*&coVVLuW8^J_F_33-oeGm!kaf>$L30VnEg}H>bNmJ*AUB&SR53Tt}a+_5<$jt5Vg&unS zLy7VMh0f+kvWp9vMcJQ8`BuImg6v|>HO9_Ay{BI*wl?P#KsN-H zLSn(K$XH7%JS|Y5!OR`cgs%T8Mt7V!ByJFt2tyEEn0PKZxTMZesQo{?5YvGRvCyqA zD=+mQeM&$pk66M%;6F3MGWMW{8fkJz$A#Fv8qB*!cX)L1wQF!*do&wo(JdO)9P|7tvi8OMNg znGVV{ILL8>)2WsH#undCJRC!-ZBU(3<>)%C8HOm98lRiZM4?3dzq+RW6993H8dc&g z`B!B+ryv6@={mMUNy3NsO)iotBKtLfa0Vu~7gPYIQ8Poe;6i*49RUIXw9|?3rvvFU zPV=m%Z$^e+7B&3Q#MIWn$*!92ZBxAzdN*IQP@W!71clI~bqw@xYY7^wN(xORt9)4L zG+(r>#!`e5zn1N7@j7k3fW%|!ixy|TlYvMhL>dhn_Uh1AC;6RmOiGbZ0rKb|HP zR-viHde3x9nVZ}F$*?eb<)7!UqQdbjofgU5miaiWBG{mZTo!B`=xQCjFW)MGBkLBe zJEqm>Hq$p1?rA;_A8h-Y2+#tS$faWD0{ZwQcpS+=ZJ<3{tn+Pw?s&|4&DweYXjvyWgJUP`}s`R{J!0*&?{{*eRlCqbo}^Pp|&Wys5SD zh}(JPrtNU4a$JnJm%H6FRvNz?W6zuUqbYWJpcV;L%JVxuVmNx_F{knNI(|`ahh#=u z&B0;rfgtkxGC@&S`L+7$&Bo1 z4CgXgxj9~6HNut8v}=oI`K_ldn&`Xm=g&=sZ+R#=n?WtY%jbyGUSGGO(gJ^|BJ+X4 z6P*Sm^2+;1J%)(Isnh4c>&MECpb_UNoi7gfoS&7ZjtI>A1ahXho_w->cO?(OldC2m zqo2`>0b_}iI`Dfdz$(yC!G!)!Z%@`WW1JR1x6CLKL`M!?a>Fd_i*W0qe57LKgo><*ce(~%=GpUIAD5({c*%!Lj8rC zrK%~u5+uf9wgAjf(B7^6z_im2`oltD#-46@9Fw8JcM? z>0`YK9IHWciqEfe^IVEL85l)FF%egb$2>HD0*R84x<5TQ7*(7buGiAwYLgtP>Arq? zzB&9TzLOF*bywZXYjRJKo>&g2(9sFz)9M%!=wC$_+2QxkK8nI6Ty$g9k_Nn|b3+{xXLSnKNPN1n5B`Y7ME7o7E z9)LU-kB#^C9Z908NvxgTe15rXso5z?te&VT=Zc8Tbm5@|=}K9t)PD&$C0W#P0Xo z%8pj`CI#ZUn6sTagykrG2CtosR}~5XFR;dZNYegRl2=s61l!mP0q`Vpp1l;ZTdrK) z>R$(qtASTg+a)1GI^?5NZl?-DbL)D$mobGp6F(fYrNDp>e zxf^AKBbGs@3IVt8^is?P@985;efNKGp9*>4B1l_gTeowm>-1wbBhfF6CFzBK_F7cO zmKkX^R~K7bO2|M%S)=dyM@vumy${S|RdB4XU?rhy^p>%0C;!o`kpo+1tkoRY1ABun z(c<9fpcL}^`FG%>-$5+bM;Pfg{7>n2p7iZ3L0-?>)?a(qh@q z_Fk-K`!zH4pEF;eXP%#m!@x9Y{wGX1G|YhYP7Jo(CjZ2i9*wQPhubeHUh=yX4>+sz z;4~77J34C0B`EpR()4}M=L`0=U!ZaytywF8d@hT7d)ZD zij+KL6p~{K>aMw&xKv5Tm_|x+|B9mXzSh&J9sR<#Gw#nnKYiLNu%Mn?uGUlRDQNO( zsJB7&xJ~WpSdki0Ch-4!liVuJwUMQyhl_>D?J=C2kRD%Ki~jhA{1yOzI7yGkTCcSi4UgeN#|1vFq4-4Hz!GAASHkL1*!FS zNLSI2hC7llkRJUG>3=Z^L!&PpDVRz2zfWSopqISz3%!3pB7JM6cT1mjGeK~A{Mr?H z=zUe?_)pKAIR2s)AaZErxkiXkc~x?+gCujy|zGuHcs>$idF3=&)jrNTv)KMnvr zu%4V2wO_%*bNR0mI~VW+j~Je=X5$t17uEg!9dPlW_3VU%l9*Hdb+kg*I59u^6I1B_ zI#Swix>$h-2L;KdzmMg=(fhwhJvj@mTvn^POmgHBIUSF5?*Z}@%EBKJrj4wCI-ZEj zS+gu!!IBw86FzXS{C&y%CFXE@Z@}|lhim5=%@Q-WAiUAr29)_4+NvquCtACGkzEtJ zVvM=rnHIgnk2oU!<`W4c96Tr@j+FvFD3|BvFM0a~F`U%Jobxv>ycs}yA?7>sf}D_I zuMBL9ae~FlDj?B4uK!*y0!bn7rPynVbbo)8%$Qr8=?yv2?%hG!Cz$oMU9x{e2qlLQ zx@JEfNO%YD4M*NWZii>yW!yn3#Rgn1vMm=#hT3Hm+FRw}in{uRK_?tP2 zF9f{CjmenEH57*>vM)vwvZUmfinA#Y#4IsGg^f$?NXsk}6rP_An;XWP~5RXAskp^HS|4M9Rnu>Gr-to@yb3i3x5|T(Pa| z6hw}~5=pMXS17L2FFL{aH1glF`$ixlT(4G4uv$GHe{5<4_fqhZ>cxRgFs{a{EA_fo zic-8TIB_q)jSWM(SNSZ?8!!*@6KH`>Db7yK>-yDEf1M67J$nBO6IP0REsKx#VA|%4 z$&f{Fk7md&gpgL+%OkG18Zd*#{Ksz;-6?Abz;4uL?-=VReW_9Wq=XRTX>H0}Sm5*Kj-1^!EpATRp zTb$Ox6bLE&_VMt0f-zgKml4mJIek;FR%W$e#}-~g*a;%}YWiwy<#h-uKUcnAhOek} z@?GldPd?O;?sbImg{e0Aq-!(V-xmt_ixU14LQi2b zA9pL((biy6|DF$>`sypaFqnFZ7h_3GwI6@+uANba`;xZtPG|UL$qsVD~G-H z#9~}aGvTnY3CBA+z3MPN9g{R$ROYj_kH>avr{i_MN#!6J+T_M&V!OJa!dGt%KZrBG zraY3K5bKWL&VUdwSDc!})fBVSHgx*1Ca~YGj1Gw!XL29m+^l#YQlU*;>S(mipuMcZ zRkDX_4n#%}kKn;<%b09NVm{hh4@$3vRID{)C*a1Utc_mx&Knl-$V(7?Z}T?uUfTQf zi+I1ud@EnQx8z2#M+8+7SDM91hp=NJNQV;%bVH2Qs%*;)H$CJ_WfxmzBuyO5E3Gv7 zE*TSiv#X8~Ro$PgVl`Y<8Yz_}7&GzUl!4zyJ=}jwOiJ|?!D|&%V>ET^%*N)CZJCvM z#fK?A-46~9(UlVAE!4QBJp=?!1grd{kIDf$Y9a zE|wjSTT^vpfveW72uWLO`LW>H@j!#>|Z-n&ElQ%-f zciwpXx_S_L2`-ov!l#!5{OgUcI(jY;CjVGa@YlouVpo75*hX zNWqej^ky-uYqY4$Ivf@|V0pZ$O}dGZ837W&aKKi9Qo^|K_e3SQ@I8DMsei4A0KXws zF-d&~@a;7SjOFt;LaG`lMPgoL30j==>+m~}`HSg7JkG^n$#MSk|HPI!*a)do=e2Ws zaBz^kxw#qH)6>J55FNdt5XY)fKVD%s7PLB6DK7s2Hm3<&7sH9!KFmH`GRB8y94U{+ zShIDf8U&;exkr9%c(o|~eF=}2SUcds87}DGGNHg{H_B70k|9&$wX@_V1!@c9l9LN- zYenY2f4BaG;(q1q;zHEj-d^R^70=EoyUT$m(S{GjVpe5Iyq;y$TdiBBD(9G!ifceE zt^lNE32p(4A)AKWVo$zTQHWmi3f5%P*#k+vR?-A*mTG~ig7 zQzWiGg(ZiPvvG`6*y#q(sK|xf^7_#Y$el`SW$qMZe%6197_PefYORb6-m8yQlo|?7 zkc4sf9)K^;LELKOti3!~DElc%@NRb1?bXrpF(K2|=gW+uYU8FO;^n}I>_5ypTEU5N z)$(6v`hzji%#9=n;y~SWZl`l_nXYd7GolAM3n(1n9%_`G#OQBi3sTZvOkQF9QEb*ZjJz~bG(YJ0wev$)#T@7@EG(zw+(j#!l*3g%Pwj4X{ zG8F0AXkxui)zhQIwUIR5M?eY(+-Aafl@=9C9ErA_18{IZQo>{7;s)II)+REb$|-tj zim&*jodFg9$v|YTru?d0x7izHcvmBQaUmZ?cpL$ic=XvyGI0!+x-!2zXHUCJzujHw9 zxqz8^OI{m=Z1qv66rjdQ0|M|mzbfe4zl$*(6&t5y?*FKH6*Q>oGd zDTl?HEd@Ao2IoFCeBAdD=m}Hj=j-d6U0Yi_>vysvc4oSn+l=bvk;2f{lG|(V@K+t>WI^k!hHLs!GT8KtrH^Gf#jz=bB8I_ zru~*aTi=&EnblekQX)`E4~kQ)Z)s~#DB0@PdzF*9D-{?TPH+mO8qGB6#)D5fqyz?7 z&BU1%&9KA)%s4TD6O#-qVJBC%i6n@hA_6>q3>k{sk7dm+^?zM?yv1`Cyr5BJtDQ}u zHOcn{|2BoDHIGdZ|1GMFtnB0pe$=@8=YekZ;)wOn8k#L|i_W38RL;TCh^gfnmu@=L z{?AA#YX2=W%$Bqffh!{q0)- z$~!KYHAN!?_9E#5T9VwS4P&^7h9dyrTz6|oBeLw+B<~KDv-P$*OX*&4v zs@x_n?q#mS)MH)p&IFGvXKk*7xp2F&zTYs(ch?43%pt%XhLW&l6i*>&dhi5x;6i-+x%R!_uynyHdie_zT4v9pr-%( zp@G+Qqp|f+!Q!NmYMHf0^5}-igzqY^&RerwgIlLB65W=;66V%?ZOZWZV;YZ6VwkHU7_b9*vTSIqU zXAZ7ej^sJ;o*GIfWjl~xh7ReEtJu9tdA}z@&ZhogB9T|L=)odK+ial1Lfh9#@L<+D zeP;!b^s%y>r7L#>mL8)55q05aGi=3{U;9~(`i*+?i*v_TJp|S{ay^vTELef@S96PM z;aFREnVMSjeJL4SsNDAbo+y;6(spD)?}I%6Z`2YKcR9_=>|Fl35N>gXnT8>$>|{^z#} zI1!Ccgf~_Mb z3vkKv@4o^96%JUJOXF^p4?=XYpq(j&1Rkh6E~S$q!BygIT5-e_tp-dtS}_iRfyT`F zgUx_+a1R1S-UYRU(kJGFUx>qZ9NHd!*Fr=guU^<*rPCLfxnc)G5KB7E!@x~Afa zzbookeh^=G92Ie1an#7yQEXbfRn!`hDA0Coyr+9_V90Q(+xvp`Y}UG)?-)U{`;}U& z7kM<=`l%d;QXb-554#$)i|$hK4^Zd7@8{XS1= z9?Z*+RIjz(s^05P%k%qGQazKX;HGjExzxKT@6+HUnRb-immX(pHm}T-l+(a3ENj{t z)yqV%usrc{J=gh7r%-tZta`I|O=xe0k5bq-w!-cR#wo_DNF;jbFZ2Ad+=B78J|(LVsohWE3g`nPDRqopjt_lUK%u@jlS)Xc$&M8-OwWxfKn;?p64fVrH+ za<>6;t>v)p3D>a~uab$4)lzKBRG#f7k6E^)^Lp(bWb;1Re%hV$jCyM|QN#ObN8FHp z79$P2OJs@RadFkI>cVo9^Od3TA#q0;Mcb<_S=J-&YE!!9wg*~yu^QTv_BBV9Ha-;+ ztDIJ=I5F#ke%Tjg#`9VF3{_jgLWHIo4m)l9uEE1bdq($~`)0S7!W{(LM*HfQQq;Tf zp=3;=ECly(D=V`;_vSlvfALrz6zXfXE!*`y9Kjalo7p>J$^cKl8ME^hG!ZNVaxTO$ z?+W9IUrd#=e)u}=ISMDu`Hk%?mJEDlC1C%h;($7bH@0SBqU>TwEzS0Bje~#<_vA}% zEoHGSakSnf86xsS-Dp#x$NER;5szE%WnQb1_8Z1leWl5o1v+cb^_peimoCKkoN=^L|Ak`8fMox6aF*sp6?Izd#s^AF&|Xh?Fanpgk>7E{#^a7G}~A^9i-x z#~a6^p<3bnXb=TS9&TU~`9Mi#x4ts+3y`XRz>*FJ)+_=E%E8u2p&ar+6=F`*8aOQ?Q@H zai79$H;#1I>*;FZMTM#wOPE%gjz(sJm-=_;MK^xnP-`^=*jFtI)$KLvF`-;nN2JTG zZ_Yo@8csio)G~Ozrtg*GxBjZXx@lx-JT07Kb@)yD35Q7W({5Y0XKU2k{-YJv*+(?> zZ7;LT8_ia$%ezMtu0fUDC^p3TeBa#-yW_CSM9KZ#7`(B*c$P#!!+3BhfcZ2~iHoI$ zFy^DAX$GT699wX^uc#Zg@_cF5(dl}=Ixd;6tZ^DGpJJ!XJ=@skEa$qjR6UdFDhg~~sNq)kq9u2i2S)Y@ZM9A|Mve}YybvM-B&rF#*Ye$8byDTWx z9e>*Pu}=Uc^1_V^JC%lRV&X>y@ujq;jb>{XU0m&^@{@Xq@|{tMyjE-Q1}~q^2Hvfm z#qO9Y8F3yZZSAXIC3@pNIV1A9WGxMJc54=8s|?27kUdl^ z>7ASqru|!F%LdVVp)`jnc}6BaWlSoCr}pJr;a-n|;n_7HSTG@*nsV~)ao3}P98FW2 z3%Y&L`hsI4XA7x*gFOw$1@hW)^3vsxR;}YW4GsJRjzfLAx4JkRESp+)^0e#X%j70T zW0I3s;h#H?xRtfFwY!IcS3SNoUR)I466+uNtZThjG%;HBID?W`T!T$aw{GwArhC2O zD}EsYV8<*D<3Sf6EF=P*jS;9ev3bNnMk&RydS$YKKi4*`Wk512U@OMyp;k@Hr> z`QE7m)ps8#YWN*IvM5tL!%mlD8-4_{^ek4HQvC?<6IBl@gPuJIgl#GcGN4sXP6@7Q zU%Vh-?Gl@_Ix*lw88gb5tVe#_%V;<*H)HzwRfUgqYkkDQYQBi6o;a8P`t3{;v^(t@(=(MDd7zDz2mX(Tu zLM8;4ca_h8n@4FBKJ%#*j_{#8RlhoejGTl0!!e-8EJ}2qB6f?AdIwd)*>nU*dD^uqjhNp;4Hy;&4;wRjA{<*n3;R@NCTE!5Ic4$dieBiU(JakH~L|(xHAz(%^ubLYyZ>MIu4lj(v{^3jSOoa%t=0!Q{rf zRTV<0F+K#>A)cSZT0H;0Qk+m*@>o`+=p$fdFGi7FV@m_9736sH8Svafn2EQIW?DjJ zSY^#fZk4T@F(XR{HKlmna`w)0sctXpc=Xj5?;`EJ{%x%)sQv^{NIf02G6^%HUyFg&Par3H+>-9B|)AHv@hu<^7kIS zIeiYS6VhC(i;WQ8q9k&lXoTY92%nYR@gblkwzWM0cpr&^#b0+lE>$YW=Syk6`p2c!%6h%nbB-$+2hy zJ7E^XzDODxOuomMn5Vwl?qd)+b0KQKkNEzTpw7KNW0L_Uc<6cYJTXS9&-2RIeg!Qs z+Y=)TmFE)F?mY+NtG;8{TaK)c1z8uwFlxw%ddfwJV!XOcZwRtg?l2^C7a4z0;Lh{s z8KK>{U+|s6Ch9B%!Dn2c6>Yd20LEAP`g{Kg`^ow)O)N9KO#k zi^P4wh}@j5kEZJg{LBZl2{cso`WtzFiJv$hDd2%xSUS)D+7b9QJ<5^nw>|nxjDlFW k6(<%Unw9@VI^gVl!eG{nW&r>FbKpOiw31ZOeWSqt0SI}*)Bpeg literal 0 HcmV?d00001 diff --git a/docker-action.sh b/docker-action.sh new file mode 100644 index 00000000..d1e7fa02 --- /dev/null +++ b/docker-action.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +version=$(grep version gradle.properties | cut -d= -f2) +sparkVersion=$(grep spark gradle.properties | cut -d= -f2) +platforms="linux/amd64,linux/arm64" + +echo "Creating API jars and publishing" +./gradlew clean :api:javadocJar :api:sourcesJar :api:shadowJar :api:publish +publish_res=$? +if [[ "$publish_res" -ne 0 ]] ; then + echo "Publish API jar failed, exiting" + exit 1 +fi + +echo "Creating data caterer jar, version=$version" +./gradlew -PapplicationType=basic build basicJar -x shadowJar +build_app=$? +if [[ "$build_app" -ne 0 ]] ; then + echo "Failed to build app, exiting" + exit 1 +fi + +docker run --privileged --rm tonistiigi/binfmt --install all +docker buildx create --use --name builder +docker buildx inspect --bootstrap builder + +docker buildx build --platform $platforms \ + --build-arg "APP_VERSION=$version" \ + --build-arg "SPARK_VERSION=$sparkVersion" \ + -t datacatering/data-caterer-basic:$version --push . diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 00000000..f50e9d9b --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,39 @@ +version: "3.5" +services: + postgres: + image: "postgres:14.5" + environment: + - "POSTGRES_USER=postgres" + - "POSTGRES_PASSWORD=postgres" + - "PGDATA=/data/postgres" + volumes: + - "${HOME}/data/postgres:/data/postgres" + - "./app/src/test/resources/sample/sql/postgres/customer.sql:/docker-entrypoint-initdb.d/customer.sql" + ports: + - "5432:5432" + + mysql: + image: "mysql:8.0.33" + environment: + MYSQL_ROOT_PASSWORD: "root" + command: "--default-authentication-plugin=mysql_native_password" + volumes: + - "${HOME}/data/mysql:/var/lib/mysql" + - "./app/src/test/resources/sample/sql/mysql/customer.sql:/docker-entrypoint-initdb.d/customer.sql" + ports: + - "3306:3306" + + cassandra: + image: "datastax/dse-server:6.8.33" + environment: + - "DS_LICENSE=accept" + volumes: + - "${HOME}/data/cassandra:/var/lib/cassandra" + - "./app/src/test/resources/sample/cql/customer.cql:/docker-entrypoint-initdb.d/customer.cql" + ports: + - "9042:9042" + # Allow DSE to lock memory with mlock + cap_add: + - IPC_LOCK + ulimits: + memlock: -1 diff --git a/gradle.properties b/gradle.properties new file mode 100644 index 00000000..8f3ab308 --- /dev/null +++ b/gradle.properties @@ -0,0 +1,6 @@ +groupId=org.data-catering +version=0.5.0 + +scalaVersion=2.12 +scalaSpecificVersion=2.12.15 +sparkVersion=3.5.0 diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000000000000000000000000000000000000..249e5832f090a2944b7473328c07c9755baa3196 GIT binary patch literal 60756 zcmb5WV{~QRw(p$^Dz@00IL3?^hro$gg*4VI_WAaTyVM5Foj~O|-84 z$;06hMwt*rV;^8iB z1~&0XWpYJmG?Ts^K9PC62H*`G}xom%S%yq|xvG~FIfP=9*f zZoDRJBm*Y0aId=qJ?7dyb)6)JGWGwe)MHeNSzhi)Ko6J<-m@v=a%NsP537lHe0R* z`If4$aaBA#S=w!2z&m>{lpTy^Lm^mg*3?M&7HFv}7K6x*cukLIGX;bQG|QWdn{%_6 zHnwBKr84#B7Z+AnBXa16a?or^R?+>$4`}{*a_>IhbjvyTtWkHw)|ay)ahWUd-qq$~ zMbh6roVsj;_qnC-R{G+Cy6bApVOinSU-;(DxUEl!i2)1EeQ9`hrfqj(nKI7?Z>Xur zoJz-a`PxkYit1HEbv|jy%~DO^13J-ut986EEG=66S}D3!L}Efp;Bez~7tNq{QsUMm zh9~(HYg1pA*=37C0}n4g&bFbQ+?-h-W}onYeE{q;cIy%eZK9wZjSwGvT+&Cgv z?~{9p(;bY_1+k|wkt_|N!@J~aoY@|U_RGoWX<;p{Nu*D*&_phw`8jYkMNpRTWx1H* z>J-Mi_!`M468#5Aix$$u1M@rJEIOc?k^QBc?T(#=n&*5eS#u*Y)?L8Ha$9wRWdH^3D4|Ps)Y?m0q~SiKiSfEkJ!=^`lJ(%W3o|CZ zSrZL-Xxc{OrmsQD&s~zPfNJOpSZUl%V8tdG%ei}lQkM+z@-4etFPR>GOH9+Y_F<3=~SXln9Kb-o~f>2a6Xz@AS3cn^;c_>lUwlK(n>z?A>NbC z`Ud8^aQy>wy=$)w;JZzA)_*Y$Z5hU=KAG&htLw1Uh00yE!|Nu{EZkch zY9O6x7Y??>!7pUNME*d!=R#s)ghr|R#41l!c?~=3CS8&zr6*aA7n9*)*PWBV2w+&I zpW1-9fr3j{VTcls1>ua}F*bbju_Xq%^v;-W~paSqlf zolj*dt`BBjHI)H9{zrkBo=B%>8}4jeBO~kWqO!~Thi!I1H(in=n^fS%nuL=X2+s!p}HfTU#NBGiwEBF^^tKU zbhhv+0dE-sbK$>J#t-J!B$TMgN@Wh5wTtK2BG}4BGfsZOoRUS#G8Cxv|6EI*n&Xxq zt{&OxCC+BNqz$9b0WM7_PyBJEVObHFh%%`~!@MNZlo*oXDCwDcFwT~Rls!aApL<)^ zbBftGKKBRhB!{?fX@l2_y~%ygNFfF(XJzHh#?`WlSL{1lKT*gJM zs>bd^H9NCxqxn(IOky5k-wALFowQr(gw%|`0991u#9jXQh?4l|l>pd6a&rx|v=fPJ z1mutj{YzpJ_gsClbWFk(G}bSlFi-6@mwoQh-XeD*j@~huW4(8ub%^I|azA)h2t#yG z7e_V_<4jlM3D(I+qX}yEtqj)cpzN*oCdYHa!nm%0t^wHm)EmFP*|FMw!tb@&`G-u~ zK)=Sf6z+BiTAI}}i{*_Ac$ffr*Wrv$F7_0gJkjx;@)XjYSh`RjAgrCck`x!zP>Ifu z&%he4P|S)H*(9oB4uvH67^0}I-_ye_!w)u3v2+EY>eD3#8QR24<;7?*hj8k~rS)~7 zSXs5ww)T(0eHSp$hEIBnW|Iun<_i`}VE0Nc$|-R}wlSIs5pV{g_Dar(Zz<4X3`W?K z6&CAIl4U(Qk-tTcK{|zYF6QG5ArrEB!;5s?tW7 zrE3hcFY&k)+)e{+YOJ0X2uDE_hd2{|m_dC}kgEKqiE9Q^A-+>2UonB+L@v3$9?AYw zVQv?X*pK;X4Ovc6Ev5Gbg{{Eu*7{N3#0@9oMI~}KnObQE#Y{&3mM4`w%wN+xrKYgD zB-ay0Q}m{QI;iY`s1Z^NqIkjrTlf`B)B#MajZ#9u41oRBC1oM1vq0i|F59> z#StM@bHt|#`2)cpl_rWB($DNJ3Lap}QM-+A$3pe}NyP(@+i1>o^fe-oxX#Bt`mcQc zb?pD4W%#ep|3%CHAYnr*^M6Czg>~L4?l16H1OozM{P*en298b+`i4$|w$|4AHbzqB zHpYUsHZET$Z0ztC;U+0*+amF!@PI%^oUIZy{`L{%O^i{Xk}X0&nl)n~tVEpcAJSJ} zverw15zP1P-O8h9nd!&hj$zuwjg?DoxYIw{jWM zW5_pj+wFy8Tsa9g<7Qa21WaV&;ejoYflRKcz?#fSH_)@*QVlN2l4(QNk| z4aPnv&mrS&0|6NHq05XQw$J^RR9T{3SOcMKCXIR1iSf+xJ0E_Wv?jEc*I#ZPzyJN2 zUG0UOXHl+PikM*&g$U@g+KbG-RY>uaIl&DEtw_Q=FYq?etc!;hEC_}UX{eyh%dw2V zTTSlap&5>PY{6I#(6`j-9`D&I#|YPP8a;(sOzgeKDWsLa!i-$frD>zr-oid!Hf&yS z!i^cr&7tN}OOGmX2)`8k?Tn!!4=tz~3hCTq_9CdiV!NIblUDxHh(FJ$zs)B2(t5@u z-`^RA1ShrLCkg0)OhfoM;4Z{&oZmAec$qV@ zGQ(7(!CBk<5;Ar%DLJ0p0!ResC#U<+3i<|vib1?{5gCebG7$F7URKZXuX-2WgF>YJ^i zMhHDBsh9PDU8dlZ$yJKtc6JA#y!y$57%sE>4Nt+wF1lfNIWyA`=hF=9Gj%sRwi@vd z%2eVV3y&dvAgyuJ=eNJR+*080dbO_t@BFJO<@&#yqTK&+xc|FRR;p;KVk@J3$S{p` zGaMj6isho#%m)?pOG^G0mzOAw0z?!AEMsv=0T>WWcE>??WS=fII$t$(^PDPMU(P>o z_*0s^W#|x)%tx8jIgZY~A2yG;US0m2ZOQt6yJqW@XNY_>_R7(Nxb8Ged6BdYW6{prd!|zuX$@Q2o6Ona8zzYC1u!+2!Y$Jc9a;wy+pXt}o6~Bu1oF1c zp7Y|SBTNi@=I(K%A60PMjM#sfH$y*c{xUgeSpi#HB`?|`!Tb&-qJ3;vxS!TIzuTZs-&%#bAkAyw9m4PJgvey zM5?up*b}eDEY+#@tKec)-c(#QF0P?MRlD1+7%Yk*jW;)`f;0a-ZJ6CQA?E%>i2Dt7T9?s|9ZF|KP4;CNWvaVKZ+Qeut;Jith_y{v*Ny6Co6!8MZx;Wgo z=qAi%&S;8J{iyD&>3CLCQdTX*$+Rx1AwA*D_J^0>suTgBMBb=*hefV+Ars#mmr+YsI3#!F@Xc1t4F-gB@6aoyT+5O(qMz*zG<9Qq*f0w^V!03rpr*-WLH}; zfM{xSPJeu6D(%8HU%0GEa%waFHE$G?FH^kMS-&I3)ycx|iv{T6Wx}9$$D&6{%1N_8 z_CLw)_9+O4&u94##vI9b-HHm_95m)fa??q07`DniVjAy`t7;)4NpeyAY(aAk(+T_O z1om+b5K2g_B&b2DCTK<>SE$Ode1DopAi)xaJjU>**AJK3hZrnhEQ9E`2=|HHe<^tv z63e(bn#fMWuz>4erc47}!J>U58%<&N<6AOAewyzNTqi7hJc|X{782&cM zHZYclNbBwU6673=!ClmxMfkC$(CykGR@10F!zN1Se83LR&a~$Ht&>~43OX22mt7tcZUpa;9@q}KDX3O&Ugp6< zLZLfIMO5;pTee1vNyVC$FGxzK2f>0Z-6hM82zKg44nWo|n}$Zk6&;5ry3`(JFEX$q zK&KivAe${e^5ZGc3a9hOt|!UOE&OocpVryE$Y4sPcs4rJ>>Kbi2_subQ9($2VN(3o zb~tEzMsHaBmBtaHAyES+d3A(qURgiskSSwUc9CfJ@99&MKp2sooSYZu+-0t0+L*!I zYagjOlPgx|lep9tiU%ts&McF6b0VE57%E0Ho%2oi?=Ks+5%aj#au^OBwNwhec zta6QAeQI^V!dF1C)>RHAmB`HnxyqWx?td@4sd15zPd*Fc9hpDXP23kbBenBxGeD$k z;%0VBQEJ-C)&dTAw_yW@k0u?IUk*NrkJ)(XEeI z9Y>6Vel>#s_v@=@0<{4A{pl=9cQ&Iah0iD0H`q)7NeCIRz8zx;! z^OO;1+IqoQNak&pV`qKW+K0^Hqp!~gSohcyS)?^P`JNZXw@gc6{A3OLZ?@1Uc^I2v z+X!^R*HCm3{7JPq{8*Tn>5;B|X7n4QQ0Bs79uTU%nbqOJh`nX(BVj!#f;#J+WZxx4 z_yM&1Y`2XzhfqkIMO7tB3raJKQS+H5F%o83bM+hxbQ zeeJm=Dvix$2j|b4?mDacb67v-1^lTp${z=jc1=j~QD>7c*@+1?py>%Kj%Ejp7Y-!? z8iYRUlGVrQPandAaxFfks53@2EC#0)%mrnmGRn&>=$H$S8q|kE_iWko4`^vCS2aWg z#!`RHUGyOt*k?bBYu3*j3u0gB#v(3tsije zgIuNNWNtrOkx@Pzs;A9un+2LX!zw+p3_NX^Sh09HZAf>m8l@O*rXy_82aWT$Q>iyy zqO7Of)D=wcSn!0+467&!Hl))eff=$aneB?R!YykdKW@k^_uR!+Q1tR)+IJb`-6=jj zymzA>Sv4>Z&g&WWu#|~GcP7qP&m*w-S$)7Xr;(duqCTe7p8H3k5>Y-n8438+%^9~K z3r^LIT_K{i7DgEJjIocw_6d0!<;wKT`X;&vv+&msmhAAnIe!OTdybPctzcEzBy88_ zWO{6i4YT%e4^WQZB)KHCvA(0tS zHu_Bg+6Ko%a9~$EjRB90`P(2~6uI@SFibxct{H#o&y40MdiXblu@VFXbhz>Nko;7R z70Ntmm-FePqhb%9gL+7U8@(ch|JfH5Fm)5${8|`Lef>LttM_iww6LW2X61ldBmG0z zax3y)njFe>j*T{i0s8D4=L>X^j0)({R5lMGVS#7(2C9@AxL&C-lZQx~czI7Iv+{%1 z2hEG>RzX4S8x3v#9sgGAnPzptM)g&LB}@%E>fy0vGSa(&q0ch|=ncKjNrK z`jA~jObJhrJ^ri|-)J^HUyeZXz~XkBp$VhcTEcTdc#a2EUOGVX?@mYx#Vy*!qO$Jv zQ4rgOJ~M*o-_Wptam=~krnmG*p^j!JAqoQ%+YsDFW7Cc9M%YPiBOrVcD^RY>m9Pd< zu}#9M?K{+;UIO!D9qOpq9yxUquQRmQNMo0pT`@$pVt=rMvyX)ph(-CCJLvUJy71DI zBk7oc7)-%ngdj~s@76Yse3L^gV0 z2==qfp&Q~L(+%RHP0n}+xH#k(hPRx(!AdBM$JCfJ5*C=K3ts>P?@@SZ_+{U2qFZb>4kZ{Go37{# zSQc+-dq*a-Vy4?taS&{Ht|MLRiS)Sn14JOONyXqPNnpq&2y~)6wEG0oNy>qvod$FF z`9o&?&6uZjhZ4_*5qWVrEfu(>_n2Xi2{@Gz9MZ8!YmjYvIMasE9yVQL10NBrTCczq zcTY1q^PF2l!Eraguf{+PtHV3=2A?Cu&NN&a8V(y;q(^_mFc6)%Yfn&X&~Pq zU1?qCj^LF(EQB1F`8NxNjyV%fde}dEa(Hx=r7$~ts2dzDwyi6ByBAIx$NllB4%K=O z$AHz1<2bTUb>(MCVPpK(E9wlLElo(aSd(Os)^Raum`d(g9Vd_+Bf&V;l=@mM=cC>) z)9b0enb)u_7V!!E_bl>u5nf&Rl|2r=2F3rHMdb7y9E}}F82^$Rf+P8%dKnOeKh1vs zhH^P*4Ydr^$)$h@4KVzxrHyy#cKmWEa9P5DJ|- zG;!Qi35Tp7XNj60=$!S6U#!(${6hyh7d4q=pF{`0t|N^|L^d8pD{O9@tF~W;#Je*P z&ah%W!KOIN;SyAEhAeTafJ4uEL`(RtnovM+cb(O#>xQnk?dzAjG^~4$dFn^<@-Na3 z395;wBnS{t*H;Jef2eE!2}u5Ns{AHj>WYZDgQJt8v%x?9{MXqJsGP|l%OiZqQ1aB! z%E=*Ig`(!tHh>}4_z5IMpg{49UvD*Pp9!pxt_gdAW%sIf3k6CTycOT1McPl=_#0?8 zVjz8Hj*Vy9c5-krd-{BQ{6Xy|P$6LJvMuX$* zA+@I_66_ET5l2&gk9n4$1M3LN8(yEViRx&mtd#LD}AqEs?RW=xKC(OCWH;~>(X6h!uDxXIPH06xh z*`F4cVlbDP`A)-fzf>MuScYsmq&1LUMGaQ3bRm6i7OsJ|%uhTDT zlvZA1M}nz*SalJWNT|`dBm1$xlaA>CCiQ zK`xD-RuEn>-`Z?M{1%@wewf#8?F|(@1e0+T4>nmlSRrNK5f)BJ2H*$q(H>zGD0>eL zQ!tl_Wk)k*e6v^m*{~A;@6+JGeWU-q9>?+L_#UNT%G?4&BnOgvm9@o7l?ov~XL+et zbGT)|G7)KAeqb=wHSPk+J1bdg7N3$vp(ekjI1D9V$G5Cj!=R2w=3*4!z*J-r-cyeb zd(i2KmX!|Lhey!snRw z?#$Gu%S^SQEKt&kep)up#j&9}e+3=JJBS(s>MH+|=R(`8xK{mmndWo_r`-w1#SeRD&YtAJ#GiVI*TkQZ}&aq<+bU2+coU3!jCI6E+Ad_xFW*ghnZ$q zAoF*i&3n1j#?B8x;kjSJD${1jdRB;)R*)Ao!9bd|C7{;iqDo|T&>KSh6*hCD!rwv= zyK#F@2+cv3=|S1Kef(E6Niv8kyLVLX&e=U;{0x{$tDfShqkjUME>f8d(5nzSkY6@! z^-0>DM)wa&%m#UF1F?zR`8Y3X#tA!*7Q$P3lZJ%*KNlrk_uaPkxw~ zxZ1qlE;Zo;nb@!SMazSjM>;34ROOoygo%SF);LL>rRonWwR>bmSd1XD^~sGSu$Gg# zFZ`|yKU0%!v07dz^v(tY%;So(e`o{ZYTX`hm;@b0%8|H>VW`*cr8R%3n|ehw2`(9B+V72`>SY}9^8oh$En80mZK9T4abVG*to;E z1_S6bgDOW?!Oy1LwYy=w3q~KKdbNtyH#d24PFjX)KYMY93{3-mPP-H>@M-_>N~DDu zENh~reh?JBAK=TFN-SfDfT^=+{w4ea2KNWXq2Y<;?(gf(FgVp8Zp-oEjKzB%2Iqj;48GmY3h=bcdYJ}~&4tS`Q1sb=^emaW$IC$|R+r-8V- zf0$gGE(CS_n4s>oicVk)MfvVg#I>iDvf~Ov8bk}sSxluG!6#^Z_zhB&U^`eIi1@j( z^CK$z^stBHtaDDHxn+R;3u+>Lil^}fj?7eaGB z&5nl^STqcaBxI@v>%zG|j))G(rVa4aY=B@^2{TFkW~YP!8!9TG#(-nOf^^X-%m9{Z zCC?iC`G-^RcBSCuk=Z`(FaUUe?hf3{0C>>$?Vs z`2Uud9M+T&KB6o4o9kvdi^Q=Bw!asPdxbe#W-Oaa#_NP(qpyF@bVxv5D5))srkU#m zj_KA+#7sqDn*Ipf!F5Byco4HOSd!Ui$l94|IbW%Ny(s1>f4|Mv^#NfB31N~kya9!k zWCGL-$0ZQztBate^fd>R!hXY_N9ZjYp3V~4_V z#eB)Kjr8yW=+oG)BuNdZG?jaZlw+l_ma8aET(s+-x+=F-t#Qoiuu1i`^x8Sj>b^U} zs^z<()YMFP7CmjUC@M=&lA5W7t&cxTlzJAts*%PBDAPuqcV5o7HEnqjif_7xGt)F% zGx2b4w{@!tE)$p=l3&?Bf#`+!-RLOleeRk3 z7#pF|w@6_sBmn1nECqdunmG^}pr5(ZJQVvAt$6p3H(16~;vO>?sTE`Y+mq5YP&PBo zvq!7#W$Gewy`;%6o^!Dtjz~x)T}Bdk*BS#=EY=ODD&B=V6TD2z^hj1m5^d6s)D*wk zu$z~D7QuZ2b?5`p)E8e2_L38v3WE{V`bVk;6fl#o2`) z99JsWhh?$oVRn@$S#)uK&8DL8>An0&S<%V8hnGD7Z^;Y(%6;^9!7kDQ5bjR_V+~wp zfx4m3z6CWmmZ<8gDGUyg3>t8wgJ5NkkiEm^(sedCicP^&3D%}6LtIUq>mXCAt{9eF zNXL$kGcoUTf_Lhm`t;hD-SE)m=iBnxRU(NyL}f6~1uH)`K!hmYZjLI%H}AmEF5RZt z06$wn63GHnApHXZZJ}s^s)j9(BM6e*7IBK6Bq(!)d~zR#rbxK9NVIlgquoMq z=eGZ9NR!SEqP6=9UQg#@!rtbbSBUM#ynF);zKX+|!Zm}*{H z+j=d?aZ2!?@EL7C~%B?6ouCKLnO$uWn;Y6Xz zX8dSwj732u(o*U3F$F=7xwxm>E-B+SVZH;O-4XPuPkLSt_?S0)lb7EEg)Mglk0#eS z9@jl(OnH4juMxY+*r03VDfPx_IM!Lmc(5hOI;`?d37f>jPP$?9jQQIQU@i4vuG6MagEoJrQ=RD7xt@8E;c zeGV*+Pt+t$@pt!|McETOE$9k=_C!70uhwRS9X#b%ZK z%q(TIUXSS^F0`4Cx?Rk07C6wI4!UVPeI~-fxY6`YH$kABdOuiRtl73MqG|~AzZ@iL&^s?24iS;RK_pdlWkhcF z@Wv-Om(Aealfg)D^adlXh9Nvf~Uf@y;g3Y)i(YP zEXDnb1V}1pJT5ZWyw=1i+0fni9yINurD=EqH^ciOwLUGi)C%Da)tyt=zq2P7pV5-G zR7!oq28-Fgn5pW|nlu^b!S1Z#r7!Wtr{5J5PQ>pd+2P7RSD?>(U7-|Y z7ZQ5lhYIl_IF<9?T9^IPK<(Hp;l5bl5tF9>X-zG14_7PfsA>6<$~A338iYRT{a@r_ zuXBaT=`T5x3=s&3=RYx6NgG>No4?5KFBVjE(swfcivcIpPQFx5l+O;fiGsOrl5teR z_Cm+;PW}O0Dwe_(4Z@XZ)O0W-v2X><&L*<~*q3dg;bQW3g7)a#3KiQP>+qj|qo*Hk z?57>f2?f@`=Fj^nkDKeRkN2d$Z@2eNKpHo}ksj-$`QKb6n?*$^*%Fb3_Kbf1(*W9K>{L$mud2WHJ=j0^=g30Xhg8$#g^?36`p1fm;;1@0Lrx+8t`?vN0ZorM zSW?rhjCE8$C|@p^sXdx z|NOHHg+fL;HIlqyLp~SSdIF`TnSHehNCU9t89yr@)FY<~hu+X`tjg(aSVae$wDG*C zq$nY(Y494R)hD!i1|IIyP*&PD_c2FPgeY)&mX1qujB1VHPG9`yFQpLFVQ0>EKS@Bp zAfP5`C(sWGLI?AC{XEjLKR4FVNw(4+9b?kba95ukgR1H?w<8F7)G+6&(zUhIE5Ef% z=fFkL3QKA~M@h{nzjRq!Y_t!%U66#L8!(2-GgFxkD1=JRRqk=n%G(yHKn%^&$dW>; zSjAcjETMz1%205se$iH_)ZCpfg_LwvnsZQAUCS#^FExp8O4CrJb6>JquNV@qPq~3A zZ<6dOU#6|8+fcgiA#~MDmcpIEaUO02L5#T$HV0$EMD94HT_eXLZ2Zi&(! z&5E>%&|FZ`)CN10tM%tLSPD*~r#--K(H-CZqIOb99_;m|D5wdgJ<1iOJz@h2Zkq?} z%8_KXb&hf=2Wza(Wgc;3v3TN*;HTU*q2?#z&tLn_U0Nt!y>Oo>+2T)He6%XuP;fgn z-G!#h$Y2`9>Jtf}hbVrm6D70|ERzLAU>3zoWhJmjWfgM^))T+2u$~5>HF9jQDkrXR z=IzX36)V75PrFjkQ%TO+iqKGCQ-DDXbaE;C#}!-CoWQx&v*vHfyI>$HNRbpvm<`O( zlx9NBWD6_e&J%Ous4yp~s6)Ghni!I6)0W;9(9$y1wWu`$gs<$9Mcf$L*piP zPR0Av*2%ul`W;?-1_-5Zy0~}?`e@Y5A&0H!^ApyVTT}BiOm4GeFo$_oPlDEyeGBbh z1h3q&Dx~GmUS|3@4V36&$2uO8!Yp&^pD7J5&TN{?xphf*-js1fP?B|`>p_K>lh{ij zP(?H%e}AIP?_i^f&Li=FDSQ`2_NWxL+BB=nQr=$ zHojMlXNGauvvwPU>ZLq!`bX-5F4jBJ&So{kE5+ms9UEYD{66!|k~3vsP+mE}x!>%P za98bAU0!h0&ka4EoiDvBM#CP#dRNdXJcb*(%=<(g+M@<)DZ!@v1V>;54En?igcHR2 zhubQMq}VSOK)onqHfczM7YA@s=9*ow;k;8)&?J3@0JiGcP! zP#00KZ1t)GyZeRJ=f0^gc+58lc4Qh*S7RqPIC6GugG1gXe$LIQMRCo8cHf^qXgAa2 z`}t>u2Cq1CbSEpLr~E=c7~=Qkc9-vLE%(v9N*&HF`(d~(0`iukl5aQ9u4rUvc8%m) zr2GwZN4!s;{SB87lJB;veebPmqE}tSpT>+`t?<457Q9iV$th%i__Z1kOMAswFldD6 ztbOvO337S5o#ZZgN2G99_AVqPv!?Gmt3pzgD+Hp3QPQ`9qJ(g=kjvD+fUSS3upJn! zqoG7acIKEFRX~S}3|{EWT$kdz#zrDlJU(rPkxjws_iyLKU8+v|*oS_W*-guAb&Pj1 z35Z`3z<&Jb@2Mwz=KXucNYdY#SNO$tcVFr9KdKm|%^e-TXzs6M`PBper%ajkrIyUe zp$vVxVs9*>Vp4_1NC~Zg)WOCPmOxI1V34QlG4!aSFOH{QqSVq1^1)- z0P!Z?tT&E-ll(pwf0?=F=yOzik=@nh1Clxr9}Vij89z)ePDSCYAqw?lVI?v?+&*zH z)p$CScFI8rrwId~`}9YWPFu0cW1Sf@vRELs&cbntRU6QfPK-SO*mqu|u~}8AJ!Q$z znzu}50O=YbjwKCuSVBs6&CZR#0FTu)3{}qJJYX(>QPr4$RqWiwX3NT~;>cLn*_&1H zaKpIW)JVJ>b{uo2oq>oQt3y=zJjb%fU@wLqM{SyaC6x2snMx-}ivfU<1- znu1Lh;i$3Tf$Kh5Uk))G!D1UhE8pvx&nO~w^fG)BC&L!_hQk%^p`Kp@F{cz>80W&T ziOK=Sq3fdRu*V0=S53rcIfWFazI}Twj63CG(jOB;$*b`*#B9uEnBM`hDk*EwSRdwP8?5T?xGUKs=5N83XsR*)a4|ijz|c{4tIU+4j^A5C<#5 z*$c_d=5ml~%pGxw#?*q9N7aRwPux5EyqHVkdJO=5J>84!X6P>DS8PTTz>7C#FO?k#edkntG+fJk8ZMn?pmJSO@`x-QHq;7^h6GEXLXo1TCNhH z8ZDH{*NLAjo3WM`xeb=X{((uv3H(8&r8fJJg_uSs_%hOH%JDD?hu*2NvWGYD+j)&` zz#_1%O1wF^o5ryt?O0n;`lHbzp0wQ?rcbW(F1+h7_EZZ9{>rePvLAPVZ_R|n@;b$;UchU=0j<6k8G9QuQf@76oiE*4 zXOLQ&n3$NR#p4<5NJMVC*S);5x2)eRbaAM%VxWu9ohlT;pGEk7;002enCbQ>2r-us z3#bpXP9g|mE`65VrN`+3mC)M(eMj~~eOf)do<@l+fMiTR)XO}422*1SL{wyY(%oMpBgJagtiDf zz>O6(m;};>Hi=t8o{DVC@YigqS(Qh+ix3Rwa9aliH}a}IlOCW1@?%h_bRbq-W{KHF z%Vo?-j@{Xi@=~Lz5uZP27==UGE15|g^0gzD|3x)SCEXrx`*MP^FDLl%pOi~~Il;dc z^hrwp9sYeT7iZ)-ajKy@{a`kr0-5*_!XfBpXwEcFGJ;%kV$0Nx;apKrur zJN2J~CAv{Zjj%FolyurtW8RaFmpn&zKJWL>(0;;+q(%(Hx!GMW4AcfP0YJ*Vz!F4g z!ZhMyj$BdXL@MlF%KeInmPCt~9&A!;cRw)W!Hi@0DY(GD_f?jeV{=s=cJ6e}JktJw zQORnxxj3mBxfrH=x{`_^Z1ddDh}L#V7i}$njUFRVwOX?qOTKjfPMBO4y(WiU<)epb zvB9L=%jW#*SL|Nd_G?E*_h1^M-$PG6Pc_&QqF0O-FIOpa4)PAEPsyvB)GKasmBoEt z?_Q2~QCYGH+hW31x-B=@5_AN870vY#KB~3a*&{I=f);3Kv7q4Q7s)0)gVYx2#Iz9g(F2;=+Iy4 z6KI^8GJ6D@%tpS^8boU}zpi=+(5GfIR)35PzrbuXeL1Y1N%JK7PG|^2k3qIqHfX;G zQ}~JZ-UWx|60P5?d1e;AHx!_;#PG%d=^X(AR%i`l0jSpYOpXoKFW~7ip7|xvN;2^? zsYC9fanpO7rO=V7+KXqVc;Q5z%Bj})xHVrgoR04sA2 zl~DAwv=!(()DvH*=lyhIlU^hBkA0$e*7&fJpB0|oB7)rqGK#5##2T`@_I^|O2x4GO z;xh6ROcV<9>?e0)MI(y++$-ksV;G;Xe`lh76T#Htuia+(UrIXrf9?

L(tZ$0BqX1>24?V$S+&kLZ`AodQ4_)P#Q3*4xg8}lMV-FLwC*cN$< zt65Rf%7z41u^i=P*qO8>JqXPrinQFapR7qHAtp~&RZ85$>ob|Js;GS^y;S{XnGiBc zGa4IGvDl?x%gY`vNhv8wgZnP#UYI-w*^4YCZnxkF85@ldepk$&$#3EAhrJY0U)lR{F6sM3SONV^+$;Zx8BD&Eku3K zKNLZyBni3)pGzU0;n(X@1fX8wYGKYMpLmCu{N5-}epPDxClPFK#A@02WM3!myN%bkF z|GJ4GZ}3sL{3{qXemy+#Uk{4>Kf8v11;f8I&c76+B&AQ8udd<8gU7+BeWC`akUU~U zgXoxie>MS@rBoyY8O8Tc&8id!w+_ooxcr!1?#rc$-|SBBtH6S?)1e#P#S?jFZ8u-Bs&k`yLqW|{j+%c#A4AQ>+tj$Y z^CZajspu$F%73E68Lw5q7IVREED9r1Ijsg#@DzH>wKseye>hjsk^{n0g?3+gs@7`i zHx+-!sjLx^fS;fY!ERBU+Q zVJ!e0hJH%P)z!y%1^ZyG0>PN@5W~SV%f>}c?$H8r;Sy-ui>aruVTY=bHe}$e zi&Q4&XK!qT7-XjCrDaufT@>ieQ&4G(SShUob0Q>Gznep9fR783jGuUynAqc6$pYX; z7*O@@JW>O6lKIk0G00xsm|=*UVTQBB`u1f=6wGAj%nHK_;Aqmfa!eAykDmi-@u%6~ z;*c!pS1@V8r@IX9j&rW&d*}wpNs96O2Ute>%yt{yv>k!6zfT6pru{F1M3P z2WN1JDYqoTB#(`kE{H676QOoX`cnqHl1Yaru)>8Ky~VU{)r#{&s86Vz5X)v15ULHA zAZDb{99+s~qI6;-dQ5DBjHJP@GYTwn;Dv&9kE<0R!d z8tf1oq$kO`_sV(NHOSbMwr=To4r^X$`sBW4$gWUov|WY?xccQJN}1DOL|GEaD_!@& z15p?Pj+>7d`@LvNIu9*^hPN)pwcv|akvYYq)ks%`G>!+!pW{-iXPZsRp8 z35LR;DhseQKWYSD`%gO&k$Dj6_6q#vjWA}rZcWtQr=Xn*)kJ9kacA=esi*I<)1>w^ zO_+E>QvjP)qiSZg9M|GNeLtO2D7xT6vsj`88sd!94j^AqxFLi}@w9!Y*?nwWARE0P znuI_7A-saQ+%?MFA$gttMV-NAR^#tjl_e{R$N8t2NbOlX373>e7Ox=l=;y#;M7asp zRCz*CLnrm$esvSb5{T<$6CjY zmZ(i{Rs_<#pWW>(HPaaYj`%YqBra=Ey3R21O7vUbzOkJJO?V`4-D*u4$Me0Bx$K(lYo`JO}gnC zx`V}a7m-hLU9Xvb@K2ymioF)vj12<*^oAqRuG_4u%(ah?+go%$kOpfb`T96P+L$4> zQ#S+sA%VbH&mD1k5Ak7^^dZoC>`1L%i>ZXmooA!%GI)b+$D&ziKrb)a=-ds9xk#~& z7)3iem6I|r5+ZrTRe_W861x8JpD`DDIYZNm{$baw+$)X^Jtjnl0xlBgdnNY}x%5za zkQ8E6T<^$sKBPtL4(1zi_Rd(tVth*3Xs!ulflX+70?gb&jRTnI8l+*Aj9{|d%qLZ+ z>~V9Z;)`8-lds*Zgs~z1?Fg?Po7|FDl(Ce<*c^2=lFQ~ahwh6rqSjtM5+$GT>3WZW zj;u~w9xwAhOc<kF}~`CJ68 z?(S5vNJa;kriPlim33{N5`C{9?NWhzsna_~^|K2k4xz1`xcui*LXL-1#Y}Hi9`Oo!zQ>x-kgAX4LrPz63uZ+?uG*84@PKq-KgQlMNRwz=6Yes) zY}>YN+qP}nwr$(CZQFjUOI=-6J$2^XGvC~EZ+vrqWaOXB$k?%Suf5k=4>AveC1aJ! ziaW4IS%F$_Babi)kA8Y&u4F7E%99OPtm=vzw$$ zEz#9rvn`Iot_z-r3MtV>k)YvErZ<^Oa${`2>MYYODSr6?QZu+be-~MBjwPGdMvGd!b!elsdi4% z`37W*8+OGulab8YM?`KjJ8e+jM(tqLKSS@=jimq3)Ea2EB%88L8CaM+aG7;27b?5` z4zuUWBr)f)k2o&xg{iZ$IQkJ+SK>lpq4GEacu~eOW4yNFLU!Kgc{w4&D$4ecm0f}~ zTTzquRW@`f0}|IILl`!1P+;69g^upiPA6F{)U8)muWHzexRenBU$E^9X-uIY2%&1w z_=#5*(nmxJ9zF%styBwivi)?#KMG96-H@hD-H_&EZiRNsfk7mjBq{L%!E;Sqn!mVX*}kXhwH6eh;b42eD!*~upVG@ z#smUqz$ICm!Y8wY53gJeS|Iuard0=;k5i5Z_hSIs6tr)R4n*r*rE`>38Pw&lkv{_r!jNN=;#?WbMj|l>cU(9trCq; z%nN~r^y7!kH^GPOf3R}?dDhO=v^3BeP5hF|%4GNQYBSwz;x({21i4OQY->1G=KFyu z&6d`f2tT9Yl_Z8YACZaJ#v#-(gcyeqXMhYGXb=t>)M@fFa8tHp2x;ODX=Ap@a5I=U z0G80^$N0G4=U(>W%mrrThl0DjyQ-_I>+1Tdd_AuB3qpYAqY54upwa3}owa|x5iQ^1 zEf|iTZxKNGRpI>34EwkIQ2zHDEZ=(J@lRaOH>F|2Z%V_t56Km$PUYu^xA5#5Uj4I4RGqHD56xT%H{+P8Ag>e_3pN$4m8n>i%OyJFPNWaEnJ4McUZPa1QmOh?t8~n& z&RulPCors8wUaqMHECG=IhB(-tU2XvHP6#NrLVyKG%Ee*mQ5Ps%wW?mcnriTVRc4J`2YVM>$ixSF2Xi+Wn(RUZnV?mJ?GRdw%lhZ+t&3s7g!~g{%m&i<6 z5{ib-<==DYG93I(yhyv4jp*y3#*WNuDUf6`vTM%c&hiayf(%=x@4$kJ!W4MtYcE#1 zHM?3xw63;L%x3drtd?jot!8u3qeqctceX3m;tWetK+>~q7Be$h>n6riK(5@ujLgRS zvOym)k+VAtyV^mF)$29Y`nw&ijdg~jYpkx%*^ z8dz`C*g=I?;clyi5|!27e2AuSa$&%UyR(J3W!A=ZgHF9OuKA34I-1U~pyD!KuRkjA zbkN!?MfQOeN>DUPBxoy5IX}@vw`EEB->q!)8fRl_mqUVuRu|C@KD-;yl=yKc=ZT0% zB$fMwcC|HE*0f8+PVlWHi>M`zfsA(NQFET?LrM^pPcw`cK+Mo0%8*x8@65=CS_^$cG{GZQ#xv($7J z??R$P)nPLodI;P!IC3eEYEHh7TV@opr#*)6A-;EU2XuogHvC;;k1aI8asq7ovoP!* z?x%UoPrZjj<&&aWpsbr>J$Er-7!E(BmOyEv!-mbGQGeJm-U2J>74>o5x`1l;)+P&~ z>}f^=Rx(ZQ2bm+YE0u=ZYrAV@apyt=v1wb?R@`i_g64YyAwcOUl=C!i>=Lzb$`tjv zOO-P#A+)t-JbbotGMT}arNhJmmGl-lyUpMn=2UacVZxmiG!s!6H39@~&uVokS zG=5qWhfW-WOI9g4!R$n7!|ViL!|v3G?GN6HR0Pt_L5*>D#FEj5wM1DScz4Jv@Sxnl zB@MPPmdI{(2D?;*wd>3#tjAirmUnQoZrVv`xM3hARuJksF(Q)wd4P$88fGYOT1p6U z`AHSN!`St}}UMBT9o7i|G`r$ zrB=s$qV3d6$W9@?L!pl0lf%)xs%1ko^=QY$ty-57=55PvP(^6E7cc zGJ*>m2=;fOj?F~yBf@K@9qwX0hA803Xw+b0m}+#a(>RyR8}*Y<4b+kpp|OS+!whP( zH`v{%s>jsQI9rd$*vm)EkwOm#W_-rLTHcZRek)>AtF+~<(did)*oR1|&~1|e36d-d zgtm5cv1O0oqgWC%Et@P4Vhm}Ndl(Y#C^MD03g#PH-TFy+7!Osv1z^UWS9@%JhswEq~6kSr2DITo59+; ze=ZC}i2Q?CJ~Iyu?vn|=9iKV>4j8KbxhE4&!@SQ^dVa-gK@YfS9xT(0kpW*EDjYUkoj! zE49{7H&E}k%5(>sM4uGY)Q*&3>{aitqdNnRJkbOmD5Mp5rv-hxzOn80QsG=HJ_atI-EaP69cacR)Uvh{G5dTpYG7d zbtmRMq@Sexey)||UpnZ?;g_KMZq4IDCy5}@u!5&B^-=6yyY{}e4Hh3ee!ZWtL*s?G zxG(A!<9o!CL+q?u_utltPMk+hn?N2@?}xU0KlYg?Jco{Yf@|mSGC<(Zj^yHCvhmyx z?OxOYoxbptDK()tsJ42VzXdINAMWL$0Gcw?G(g8TMB)Khw_|v9`_ql#pRd2i*?CZl z7k1b!jQB=9-V@h%;Cnl7EKi;Y^&NhU0mWEcj8B|3L30Ku#-9389Q+(Yet0r$F=+3p z6AKOMAIi|OHyzlHZtOm73}|ntKtFaXF2Fy|M!gOh^L4^62kGUoWS1i{9gsds_GWBc zLw|TaLP64z3z9?=R2|T6Xh2W4_F*$cq>MtXMOy&=IPIJ`;!Tw?PqvI2b*U1)25^<2 zU_ZPoxg_V0tngA0J+mm?3;OYw{i2Zb4x}NedZug!>EoN3DC{1i)Z{Z4m*(y{ov2%- zk(w>+scOO}MN!exSc`TN)!B=NUX`zThWO~M*ohqq;J2hx9h9}|s#?@eR!=F{QTrq~ zTcY|>azkCe$|Q0XFUdpFT=lTcyW##i;-e{}ORB4D?t@SfqGo_cS z->?^rh$<&n9DL!CF+h?LMZRi)qju!meugvxX*&jfD!^1XB3?E?HnwHP8$;uX{Rvp# zh|)hM>XDv$ZGg=$1{+_bA~u-vXqlw6NH=nkpyWE0u}LQjF-3NhATL@9rRxMnpO%f7 z)EhZf{PF|mKIMFxnC?*78(}{Y)}iztV12}_OXffJ;ta!fcFIVjdchyHxH=t%ci`Xd zX2AUB?%?poD6Zv*&BA!6c5S#|xn~DK01#XvjT!w!;&`lDXSJT4_j$}!qSPrb37vc{ z9^NfC%QvPu@vlxaZ;mIbn-VHA6miwi8qJ~V;pTZkKqqOii<1Cs}0i?uUIss;hM4dKq^1O35y?Yp=l4i zf{M!@QHH~rJ&X~8uATV><23zZUbs-J^3}$IvV_ANLS08>k`Td7aU_S1sLsfi*C-m1 z-e#S%UGs4E!;CeBT@9}aaI)qR-6NU@kvS#0r`g&UWg?fC7|b^_HyCE!8}nyh^~o@< zpm7PDFs9yxp+byMS(JWm$NeL?DNrMCNE!I^ko-*csB+dsf4GAq{=6sfyf4wb>?v1v zmb`F*bN1KUx-`ra1+TJ37bXNP%`-Fd`vVQFTwWpX@;s(%nDQa#oWhgk#mYlY*!d>( zE&!|ySF!mIyfING+#%RDY3IBH_fW$}6~1%!G`suHub1kP@&DoAd5~7J55;5_noPI6eLf{t;@9Kf<{aO0`1WNKd?<)C-|?C?)3s z>wEq@8=I$Wc~Mt$o;g++5qR+(6wt9GI~pyrDJ%c?gPZe)owvy^J2S=+M^ z&WhIE`g;;J^xQLVeCtf7b%Dg#Z2gq9hp_%g)-%_`y*zb; zn9`f`mUPN-Ts&fFo(aNTsXPA|J!TJ{0hZp0^;MYHLOcD=r_~~^ymS8KLCSeU3;^QzJNqS z5{5rEAv#l(X?bvwxpU;2%pQftF`YFgrD1jt2^~Mt^~G>T*}A$yZc@(k9orlCGv&|1 zWWvVgiJsCAtamuAYT~nzs?TQFt<1LSEx!@e0~@yd6$b5!Zm(FpBl;(Cn>2vF?k zOm#TTjFwd2D-CyA!mqR^?#Uwm{NBemP>(pHmM}9;;8`c&+_o3#E5m)JzfwN?(f-a4 zyd%xZc^oQx3XT?vcCqCX&Qrk~nu;fxs@JUoyVoi5fqpi&bUhQ2y!Ok2pzsFR(M(|U zw3E+kH_zmTRQ9dUMZWRE%Zakiwc+lgv7Z%|YO9YxAy`y28`Aw;WU6HXBgU7fl@dnt z-fFBV)}H-gqP!1;V@Je$WcbYre|dRdp{xt!7sL3Eoa%IA`5CAA%;Wq8PktwPdULo! z8!sB}Qt8#jH9Sh}QiUtEPZ6H0b*7qEKGJ%ITZ|vH)5Q^2m<7o3#Z>AKc%z7_u`rXA zqrCy{-{8;9>dfllLu$^M5L z-hXs))h*qz%~ActwkIA(qOVBZl2v4lwbM>9l70Y`+T*elINFqt#>OaVWoja8RMsep z6Or3f=oBnA3vDbn*+HNZP?8LsH2MY)x%c13@(XfuGR}R?Nu<|07{$+Lc3$Uv^I!MQ z>6qWgd-=aG2Y^24g4{Bw9ueOR)(9h`scImD=86dD+MnSN4$6 z^U*o_mE-6Rk~Dp!ANp#5RE9n*LG(Vg`1)g6!(XtDzsov$Dvz|Gv1WU68J$CkshQhS zCrc|cdkW~UK}5NeaWj^F4MSgFM+@fJd{|LLM)}_O<{rj z+?*Lm?owq?IzC%U%9EBga~h-cJbIu=#C}XuWN>OLrc%M@Gu~kFEYUi4EC6l#PR2JS zQUkGKrrS#6H7}2l0F@S11DP`@pih0WRkRJl#F;u{c&ZC{^$Z+_*lB)r)-bPgRFE;* zl)@hK4`tEP=P=il02x7-C7p%l=B`vkYjw?YhdJU9!P!jcmY$OtC^12w?vy3<<=tlY zUwHJ_0lgWN9vf>1%WACBD{UT)1qHQSE2%z|JHvP{#INr13jM}oYv_5#xsnv9`)UAO zuwgyV4YZ;O)eSc3(mka6=aRohi!HH@I#xq7kng?Acdg7S4vDJb6cI5fw?2z%3yR+| zU5v@Hm}vy;${cBp&@D=HQ9j7NcFaOYL zj-wV=eYF{|XTkFNM2uz&T8uH~;)^Zo!=KP)EVyH6s9l1~4m}N%XzPpduPg|h-&lL` zAXspR0YMOKd2yO)eMFFJ4?sQ&!`dF&!|niH*!^*Ml##o0M(0*uK9&yzekFi$+mP9s z>W9d%Jb)PtVi&-Ha!o~Iyh@KRuKpQ@)I~L*d`{O8!kRObjO7=n+Gp36fe!66neh+7 zW*l^0tTKjLLzr`x4`_8&on?mjW-PzheTNox8Hg7Nt@*SbE-%kP2hWYmHu#Fn@Q^J(SsPUz*|EgOoZ6byg3ew88UGdZ>9B2Tq=jF72ZaR=4u%1A6Vm{O#?@dD!(#tmR;eP(Fu z{$0O%=Vmua7=Gjr8nY%>ul?w=FJ76O2js&17W_iq2*tb!i{pt#`qZB#im9Rl>?t?0c zicIC}et_4d+CpVPx)i4~$u6N-QX3H77ez z?ZdvXifFk|*F8~L(W$OWM~r`pSk5}#F?j_5u$Obu9lDWIknO^AGu+Blk7!9Sb;NjS zncZA?qtASdNtzQ>z7N871IsPAk^CC?iIL}+{K|F@BuG2>qQ;_RUYV#>hHO(HUPpk@ z(bn~4|F_jiZi}Sad;_7`#4}EmD<1EiIxa48QjUuR?rC}^HRocq`OQPM@aHVKP9E#q zy%6bmHygCpIddPjE}q_DPC`VH_2m;Eey&ZH)E6xGeStOK7H)#+9y!%-Hm|QF6w#A( zIC0Yw%9j$s-#odxG~C*^MZ?M<+&WJ+@?B_QPUyTg9DJGtQN#NIC&-XddRsf3n^AL6 zT@P|H;PvN;ZpL0iv$bRb7|J{0o!Hq+S>_NrH4@coZtBJu#g8#CbR7|#?6uxi8d+$g z87apN>EciJZ`%Zv2**_uiET9Vk{pny&My;+WfGDw4EVL#B!Wiw&M|A8f1A@ z(yFQS6jfbH{b8Z-S7D2?Ixl`j0{+ZnpT=;KzVMLW{B$`N?Gw^Fl0H6lT61%T2AU**!sX0u?|I(yoy&Xveg7XBL&+>n6jd1##6d>TxE*Vj=8lWiG$4=u{1UbAa5QD>5_ z;Te^42v7K6Mmu4IWT6Rnm>oxrl~b<~^e3vbj-GCdHLIB_>59}Ya+~OF68NiH=?}2o zP(X7EN=quQn&)fK>M&kqF|<_*H`}c zk=+x)GU>{Af#vx&s?`UKUsz})g^Pc&?Ka@t5$n$bqf6{r1>#mWx6Ep>9|A}VmWRnowVo`OyCr^fHsf# zQjQ3Ttp7y#iQY8l`zEUW)(@gGQdt(~rkxlkefskT(t%@i8=|p1Y9Dc5bc+z#n$s13 zGJk|V0+&Ekh(F};PJzQKKo+FG@KV8a<$gmNSD;7rd_nRdc%?9)p!|B-@P~kxQG}~B zi|{0}@}zKC(rlFUYp*dO1RuvPC^DQOkX4<+EwvBAC{IZQdYxoq1Za!MW7%p7gGr=j zzWnAq%)^O2$eItftC#TTSArUyL$U54-O7e|)4_7%Q^2tZ^0-d&3J1}qCzR4dWX!)4 zzIEKjgnYgMus^>6uw4Jm8ga6>GBtMjpNRJ6CP~W=37~||gMo_p@GA@#-3)+cVYnU> zE5=Y4kzl+EbEh%dhQokB{gqNDqx%5*qBusWV%!iprn$S!;oN_6E3?0+umADVs4ako z?P+t?m?};gev9JXQ#Q&KBpzkHPde_CGu-y z<{}RRAx=xlv#mVi+Ibrgx~ujW$h{?zPfhz)Kp7kmYS&_|97b&H&1;J-mzrBWAvY} zh8-I8hl_RK2+nnf&}!W0P+>5?#?7>npshe<1~&l_xqKd0_>dl_^RMRq@-Myz&|TKZBj1=Q()) zF{dBjv5)h=&Z)Aevx}+i|7=R9rG^Di!sa)sZCl&ctX4&LScQ-kMncgO(9o6W6)yd< z@Rk!vkja*X_N3H=BavGoR0@u0<}m-7|2v!0+2h~S2Q&a=lTH91OJsvms2MT~ zY=c@LO5i`mLpBd(vh|)I&^A3TQLtr>w=zoyzTd=^f@TPu&+*2MtqE$Avf>l>}V|3-8Fp2hzo3y<)hr_|NO(&oSD z!vEjTWBxbKTiShVl-U{n*B3#)3a8$`{~Pk}J@elZ=>Pqp|MQ}jrGv7KrNcjW%TN_< zZz8kG{#}XoeWf7qY?D)L)8?Q-b@Na&>i=)(@uNo zr;cH98T3$Iau8Hn*@vXi{A@YehxDE2zX~o+RY`)6-X{8~hMpc#C`|8y> zU8Mnv5A0dNCf{Ims*|l-^ z(MRp{qoGohB34|ggDI*p!Aw|MFyJ|v+<+E3brfrI)|+l3W~CQLPbnF@G0)P~Ly!1TJLp}xh8uW`Q+RB-v`MRYZ9Gam3cM%{ zb4Cb*f)0deR~wtNb*8w-LlIF>kc7DAv>T0D(a3@l`k4TFnrO+g9XH7;nYOHxjc4lq zMmaW6qpgAgy)MckYMhl?>sq;-1E)-1llUneeA!ya9KM$)DaNGu57Z5aE>=VST$#vb zFo=uRHr$0M{-ha>h(D_boS4zId;3B|Tpqo|?B?Z@I?G(?&Iei+-{9L_A9=h=Qfn-U z1wIUnQe9!z%_j$F_{rf&`ZFSott09gY~qrf@g3O=Y>vzAnXCyL!@(BqWa)Zqt!#_k zfZHuwS52|&&)aK;CHq9V-t9qt0au{$#6c*R#e5n3rje0hic7c7m{kW$p(_`wB=Gw7 z4k`1Hi;Mc@yA7dp@r~?@rfw)TkjAW++|pkfOG}0N|2guek}j8Zen(!+@7?qt_7ndX zB=BG6WJ31#F3#Vk3=aQr8T)3`{=p9nBHlKzE0I@v`{vJ}h8pd6vby&VgFhzH|q;=aonunAXL6G2y(X^CtAhWr*jI zGjpY@raZDQkg*aMq}Ni6cRF z{oWv}5`nhSAv>usX}m^GHt`f(t8@zHc?K|y5Zi=4G*UG1Sza{$Dpj%X8 zzEXaKT5N6F5j4J|w#qlZP!zS7BT)9b+!ZSJdToqJts1c!)fwih4d31vfb{}W)EgcA zH2pZ^8_k$9+WD2n`6q5XbOy8>3pcYH9 z07eUB+p}YD@AH!}p!iKv><2QF-Y^&xx^PAc1F13A{nUeCDg&{hnix#FiO!fe(^&%Qcux!h znu*S!s$&nnkeotYsDthh1dq(iQrE|#f_=xVgfiiL&-5eAcC-> z5L0l|DVEM$#ulf{bj+Y~7iD)j<~O8CYM8GW)dQGq)!mck)FqoL^X zwNdZb3->hFrbHFm?hLvut-*uK?zXn3q1z|UX{RZ;-WiLoOjnle!xs+W0-8D)kjU#R z+S|A^HkRg$Ij%N4v~k`jyHffKaC~=wg=9)V5h=|kLQ@;^W!o2^K+xG&2n`XCd>OY5Ydi= zgHH=lgy++erK8&+YeTl7VNyVm9-GfONlSlVb3)V9NW5tT!cJ8d7X)!b-$fb!s76{t z@d=Vg-5K_sqHA@Zx-L_}wVnc@L@GL9_K~Zl(h5@AR#FAiKad8~KeWCo@mgXIQ#~u{ zgYFwNz}2b6Vu@CP0XoqJ+dm8px(5W5-Jpis97F`+KM)TuP*X8H@zwiVKDKGVp59pI zifNHZr|B+PG|7|Y<*tqap0CvG7tbR1R>jn70t1X`XJixiMVcHf%Ez*=xm1(CrTSDt z0cle!+{8*Ja&EOZ4@$qhBuKQ$U95Q%rc7tg$VRhk?3=pE&n+T3upZg^ZJc9~c2es% zh7>+|mrmA-p&v}|OtxqmHIBgUxL~^0+cpfkSK2mhh+4b=^F1Xgd2)}U*Yp+H?ls#z zrLxWg_hm}AfK2XYWr!rzW4g;+^^&bW%LmbtRai9f3PjU${r@n`JThy-cphbcwn)rq9{A$Ht`lmYKxOacy z6v2R(?gHhD5@&kB-Eg?4!hAoD7~(h>(R!s1c1Hx#s9vGPePUR|of32bS`J5U5w{F) z>0<^ktO2UHg<0{oxkdOQ;}coZDQph8p6ruj*_?uqURCMTac;>T#v+l1Tc~%^k-Vd@ zkc5y35jVNc49vZpZx;gG$h{%yslDI%Lqga1&&;mN{Ush1c7p>7e-(zp}6E7f-XmJb4nhk zb8zS+{IVbL$QVF8pf8}~kQ|dHJAEATmmnrb_wLG}-yHe>W|A&Y|;muy-d^t^<&)g5SJfaTH@P1%euONny=mxo+C z4N&w#biWY41r8k~468tvuYVh&XN&d#%QtIf9;iVXfWY)#j=l`&B~lqDT@28+Y!0E+MkfC}}H*#(WKKdJJq=O$vNYCb(ZG@p{fJgu;h z21oHQ(14?LeT>n5)s;uD@5&ohU!@wX8w*lB6i@GEH0pM>YTG+RAIWZD;4#F1&F%Jp zXZUml2sH0!lYJT?&sA!qwez6cXzJEd(1ZC~kT5kZSp7(@=H2$Azb_*W&6aA|9iwCL zdX7Q=42;@dspHDwYE?miGX#L^3xD&%BI&fN9^;`v4OjQXPBaBmOF1;#C)8XA(WFlH zycro;DS2?(G&6wkr6rqC>rqDv3nfGw3hmN_9Al>TgvmGsL8_hXx09};l9Ow@)F5@y z#VH5WigLDwZE4nh^7&@g{1FV^UZ%_LJ-s<{HN*2R$OPg@R~Z`c-ET*2}XB@9xvAjrK&hS=f|R8Gr9 zr|0TGOsI7RD+4+2{ZiwdVD@2zmg~g@^D--YL;6UYGSM8i$NbQr4!c7T9rg!8;TM0E zT#@?&S=t>GQm)*ua|?TLT2ktj#`|R<_*FAkOu2Pz$wEc%-=Y9V*$&dg+wIei3b*O8 z2|m$!jJG!J!ZGbbIa!(Af~oSyZV+~M1qGvelMzPNE_%5?c2>;MeeG2^N?JDKjFYCy z7SbPWH-$cWF9~fX%9~v99L!G(wi!PFp>rB!9xj7=Cv|F+7CsGNwY0Q_J%FID%C^CBZQfJ9K(HK%k31j~e#&?hQ zNuD6gRkVckU)v+53-fc} z7ZCzYN-5RG4H7;>>Hg?LU9&5_aua?A0)0dpew1#MMlu)LHe(M;OHjHIUl7|%%)YPo z0cBk;AOY00%Fe6heoN*$(b<)Cd#^8Iu;-2v@>cE-OB$icUF9EEoaC&q8z9}jMTT2I z8`9;jT%z0;dy4!8U;GW{i`)3!c6&oWY`J3669C!tM<5nQFFrFRglU8f)5Op$GtR-3 zn!+SPCw|04sv?%YZ(a7#L?vsdr7ss@WKAw&A*}-1S|9~cL%uA+E~>N6QklFE>8W|% zyX-qAUGTY1hQ-+um`2|&ji0cY*(qN!zp{YpDO-r>jPk*yuVSay<)cUt`t@&FPF_&$ zcHwu1(SQ`I-l8~vYyUxm@D1UEdFJ$f5Sw^HPH7b!9 zzYT3gKMF((N(v0#4f_jPfVZ=ApN^jQJe-X$`A?X+vWjLn_%31KXE*}5_}d8 zw_B1+a#6T1?>M{ronLbHIlEsMf93muJ7AH5h%;i99<~JX^;EAgEB1uHralD*!aJ@F zV2ruuFe9i2Q1C?^^kmVy921eb=tLDD43@-AgL^rQ3IO9%+vi_&R2^dpr}x{bCVPej z7G0-0o64uyWNtr*loIvslyo0%)KSDDKjfThe0hcqs)(C-MH1>bNGBDRTW~scy_{w} zp^aq8Qb!h9Lwielq%C1b8=?Z=&U)ST&PHbS)8Xzjh2DF?d{iAv)Eh)wsUnf>UtXN( zL7=$%YrZ#|^c{MYmhn!zV#t*(jdmYdCpwqpZ{v&L8KIuKn`@IIZfp!uo}c;7J57N` zAxyZ-uA4=Gzl~Ovycz%MW9ZL7N+nRo&1cfNn9(1H5eM;V_4Z_qVann7F>5f>%{rf= zPBZFaV@_Sobl?Fy&KXyzFDV*FIdhS5`Uc~S^Gjo)aiTHgn#<0C=9o-a-}@}xDor;D zZyZ|fvf;+=3MZd>SR1F^F`RJEZo+|MdyJYQAEauKu%WDol~ayrGU3zzbHKsnHKZ*z zFiwUkL@DZ>!*x05ql&EBq@_Vqv83&?@~q5?lVmffQZ+V-=qL+!u4Xs2Z2zdCQ3U7B&QR9_Iggy} z(om{Y9eU;IPe`+p1ifLx-XWh?wI)xU9ik+m#g&pGdB5Bi<`PR*?92lE0+TkRuXI)z z5LP!N2+tTc%cB6B1F-!fj#}>S!vnpgVU~3!*U1ej^)vjUH4s-bd^%B=ItQqDCGbrEzNQi(dJ`J}-U=2{7-d zK8k^Rlq2N#0G?9&1?HSle2vlkj^KWSBYTwx`2?9TU_DX#J+f+qLiZCqY1TXHFxXZqYMuD@RU$TgcnCC{_(vwZ-*uX)~go#%PK z@}2Km_5aQ~(<3cXeJN6|F8X_1@L%@xTzs}$_*E|a^_URF_qcF;Pfhoe?FTFwvjm1o z8onf@OY@jC2tVcMaZS;|T!Ks(wOgPpRzRnFS-^RZ4E!9dsnj9sFt609a|jJbb1Dt@ z<=Gal2jDEupxUSwWu6zp<<&RnAA;d&4gKVG0iu6g(DsST(4)z6R)zDpfaQ}v{5ARt zyhwvMtF%b-YazR5XLz+oh=mn;y-Mf2a8>7?2v8qX;19y?b>Z5laGHvzH;Nu9S`B8} zI)qN$GbXIQ1VL3lnof^6TS~rvPVg4V?Dl2Bb*K2z4E{5vy<(@@K_cN@U>R!>aUIRnb zL*)=787*cs#zb31zBC49x$`=fkQbMAef)L2$dR{)6BAz!t5U_B#1zZG`^neKSS22oJ#5B=gl%U=WeqL9REF2g zZnfCb0?quf?Ztj$VXvDSWoK`0L=Zxem2q}!XWLoT-kYMOx)!7fcgT35uC~0pySEme z`{wGWTkGr7>+Kb^n;W?BZH6ZP(9tQX%-7zF>vc2}LuWDI(9kh1G#7B99r4x6;_-V+k&c{nPUrR zAXJGRiMe~aup{0qzmLNjS_BC4cB#sXjckx{%_c&^xy{M61xEb>KW_AG5VFXUOjAG4 z^>Qlm9A#1N{4snY=(AmWzatb!ngqiqPbBZ7>Uhb3)dTkSGcL#&SH>iMO-IJBPua`u zo)LWZ>=NZLr758j{%(|uQuZ)pXq_4c!!>s|aDM9#`~1bzK3J1^^D#<2bNCccH7~-X}Ggi!pIIF>uFx%aPARGQsnC8ZQc8lrQ5o~smqOg>Ti^GNme94*w z)JZy{_{#$jxGQ&`M z!OMvZMHR>8*^>eS%o*6hJwn!l8VOOjZQJvh)@tnHVW&*GYPuxqXw}%M!(f-SQf`=L z5;=5w2;%82VMH6Xi&-K3W)o&K^+vJCepWZ-rW%+Dc6X3(){z$@4zjYxQ|}8UIojeC zYZpQ1dU{fy=oTr<4VX?$q)LP}IUmpiez^O&N3E_qPpchGTi5ZM6-2ScWlQq%V&R2Euz zO|Q0Hx>lY1Q1cW5xHv5!0OGU~PVEqSuy#fD72d#O`N!C;o=m+YioGu-wH2k6!t<~K zSr`E=W9)!g==~x9VV~-8{4ZN9{~-A9zJpRe%NGg$+MDuI-dH|b@BD)~>pPCGUNNzY zMDg||0@XGQgw`YCt5C&A{_+J}mvV9Wg{6V%2n#YSRN{AP#PY?1FF1#|vO_%e+#`|2*~wGAJaeRX6=IzFNeWhz6gJc8+(03Ph4y6ELAm=AkN7TOgMUEw*N{= z_)EIDQx5q22oUR+_b*tazu9+pX|n1c*IB-}{DqIj z-?E|ks{o3AGRNb;+iKcHkZvYJvFsW&83RAPs1Oh@IWy%l#5x2oUP6ZCtv+b|q>jsf zZ_9XO;V!>n`UxH1LvH8)L4?8raIvasEhkpQoJ`%!5rBs!0Tu(s_D{`4opB;57)pkX z4$A^8CsD3U5*!|bHIEqsn~{q+Ddj$ME@Gq4JXtgVz&7l{Ok!@?EA{B3P~NAqb9)4? zkQo30A^EbHfQ@87G5&EQTd`frrwL)&Yw?%-W@uy^Gn23%j?Y!Iea2xw<-f;esq zf%w5WN@E1}zyXtYv}}`U^B>W`>XPmdLj%4{P298|SisrE;7HvXX;A}Ffi8B#3Lr;1 zHt6zVb`8{#+e$*k?w8|O{Uh|&AG}|DG1PFo1i?Y*cQm$ZwtGcVgMwtBUDa{~L1KT-{jET4w60>{KZ27vXrHJ;fW{6| z=|Y4!&UX020wU1>1iRgB@Q#m~1^Z^9CG1LqDhYBrnx%IEdIty z!46iOoKlKs)c}newDG)rWUikD%j`)p z_w9Ph&e40=(2eBy;T!}*1p1f1SAUDP9iWy^u^Ubdj21Kn{46;GR+hwLO=4D11@c~V zI8x&(D({K~Df2E)Nx_yQvYfh4;MbMJ@Z}=Dt3_>iim~QZ*hZIlEs0mEb z_54+&*?wMD`2#vsQRN3KvoT>hWofI_Vf(^C1ff-Ike@h@saEf7g}<9T`W;HAne-Nd z>RR+&SP35w)xKn8^U$7))PsM!jKwYZ*RzEcG-OlTrX3}9a{q%#Un5E5W{{hp>w~;` zGky+3(vJvQyGwBo`tCpmo0mo((?nM8vf9aXrrY1Ve}~TuVkB(zeds^jEfI}xGBCM2 zL1|#tycSaWCurP+0MiActG3LCas@_@tao@(R1ANlwB$4K53egNE_;!&(%@Qo$>h`^1S_!hN6 z)vZtG$8fN!|BXBJ=SI>e(LAU(y(i*PHvgQ2llulxS8>qsimv7yL}0q_E5WiAz7)(f zC(ahFvG8&HN9+6^jGyLHM~$)7auppeWh_^zKk&C_MQ~8;N??OlyH~azgz5fe^>~7F zl3HnPN3z-kN)I$4@`CLCMQx3sG~V8hPS^}XDXZrQA>}mQPw%7&!sd(Pp^P=tgp-s^ zjl}1-KRPNWXgV_K^HkP__SR`S-|OF0bR-N5>I%ODj&1JUeAQ3$9i;B~$S6}*^tK?= z**%aCiH7y?xdY?{LgVP}S0HOh%0%LI$wRx;$T|~Y8R)Vdwa}kGWv8?SJVm^>r6+%I z#lj1aR94{@MP;t-scEYQWc#xFA30^}?|BeX*W#9OL;Q9#WqaaM546j5j29((^_8Nu z4uq}ESLr~r*O7E7$D{!k9W>`!SLoyA53i9QwRB{!pHe8um|aDE`Cg0O*{jmor)^t)3`>V>SWN-2VJcFmj^1?~tT=JrP`fVh*t zXHarp=8HEcR#vFe+1a%XXuK+)oFs`GDD}#Z+TJ}Ri`FvKO@ek2ayn}yaOi%(8p%2$ zpEu)v0Jym@f}U|-;}CbR=9{#<^z28PzkkTNvyKvJDZe+^VS2bES3N@Jq!-*}{oQlz z@8bgC_KnDnT4}d#&Cpr!%Yb?E!brx0!eVOw~;lLwUoz#Np%d$o%9scc3&zPm`%G((Le|6o1 zM(VhOw)!f84zG^)tZ1?Egv)d8cdNi+T${=5kV+j;Wf%2{3g@FHp^Gf*qO0q!u$=m9 zCaY`4mRqJ;FTH5`a$affE5dJrk~k`HTP_7nGTY@B9o9vvnbytaID;^b=Tzp7Q#DmD zC(XEN)Ktn39z5|G!wsVNnHi) z%^q94!lL|hF`IijA^9NR0F$@h7k5R^ljOW(;Td9grRN0Mb)l_l7##{2nPQ@?;VjXv zaLZG}yuf$r$<79rVPpXg?6iiieX|r#&`p#Con2i%S8*8F}(E) zI5E6c3tG*<;m~6>!&H!GJ6zEuhH7mkAzovdhLy;)q z{H2*8I^Pb}xC4s^6Y}6bJvMu=8>g&I)7!N!5QG$xseeU#CC?ZM-TbjsHwHgDGrsD= z{%f;@Sod+Ch66Ko2WF~;Ty)v>&x^aovCbCbD7>qF*!?BXmOV3(s|nxsb*Lx_2lpB7 zokUnzrk;P=T-&kUHO}td+Zdj!3n&NR?K~cRU zAXU!DCp?51{J4w^`cV#ye}(`SQhGQkkMu}O3M*BWt4UsC^jCFUy;wTINYmhD$AT;4 z?Xd{HaJjP`raZ39qAm;%beDbrLpbRf(mkKbANan7XsL>_pE2oo^$TgdidjRP!5-`% zv0d!|iKN$c0(T|L0C~XD0aS8t{*&#LnhE;1Kb<9&=c2B+9JeLvJr*AyyRh%@jHej=AetOMSlz^=!kxX>>B{2B1uIrQyfd8KjJ+DBy!h)~*(!|&L4^Q_07SQ~E zcemVP`{9CwFvPFu7pyVGCLhH?LhEVb2{7U+Z_>o25#+3<|8%1T^5dh}*4(kfJGry} zm%r#hU+__Z;;*4fMrX=Bkc@7|v^*B;HAl0((IBPPii%X9+u3DDF6%bI&6?Eu$8&aWVqHIM7mK6?Uvq$1|(-T|)IV<>e?!(rY zqkmO1MRaLeTR=)io(0GVtQT@s6rN%C6;nS3@eu;P#ry4q;^O@1ZKCJyp_Jo)Ty^QW z+vweTx_DLm{P-XSBj~Sl<%_b^$=}odJ!S2wAcxenmzFGX1t&Qp8Vxz2VT`uQsQYtdn&_0xVivIcxZ_hnrRtwq4cZSj1c-SG9 z7vHBCA=fd0O1<4*=lu$6pn~_pVKyL@ztw1swbZi0B?spLo56ZKu5;7ZeUml1Ws1?u zqMf1p{5myAzeX$lAi{jIUqo1g4!zWLMm9cfWcnw`k6*BR^?$2(&yW?>w;G$EmTA@a z6?y#K$C~ZT8+v{87n5Dm&H6Pb_EQ@V0IWmG9cG=O;(;5aMWWrIPzz4Q`mhK;qQp~a z+BbQrEQ+w{SeiuG-~Po5f=^EvlouB@_|4xQXH@A~KgpFHrwu%dwuCR)=B&C(y6J4J zvoGk9;lLs9%iA-IJGU#RgnZZR+@{5lYl8(e1h6&>Vc_mvg0d@);X zji4T|n#lB!>pfL|8tQYkw?U2bD`W{na&;*|znjmalA&f;*U++_aBYerq;&C8Kw7mI z7tsG*?7*5j&dU)Lje;^{D_h`%(dK|pB*A*1(Jj)w^mZ9HB|vGLkF1GEFhu&rH=r=8 zMxO42e{Si6$m+Zj`_mXb&w5Q(i|Yxyg?juUrY}78uo@~3v84|8dfgbPd0iQJRdMj< zncCNGdMEcsxu#o#B5+XD{tsg*;j-eF8`mp~K8O1J!Z0+>0=7O=4M}E?)H)ENE;P*F z$Ox?ril_^p0g7xhDUf(q652l|562VFlC8^r8?lQv;TMvn+*8I}&+hIQYh2 z1}uQQaag&!-+DZ@|C+C$bN6W;S-Z@)d1|en+XGvjbOxCa-qAF*LA=6s(Jg+g;82f$ z(Vb)8I)AH@cdjGFAR5Rqd0wiNCu!xtqWbcTx&5kslzTb^7A78~Xzw1($UV6S^VWiP zFd{Rimd-0CZC_Bu(WxBFW7+k{cOW7DxBBkJdJ;VsJ4Z@lERQr%3eVv&$%)b%<~ zCl^Y4NgO}js@u{|o~KTgH}>!* z_iDNqX2(As7T0xivMH|3SC1ivm8Q}6Ffcd7owUKN5lHAtzMM4<0v+ykUT!QiowO;`@%JGv+K$bBx@*S7C8GJVqQ_K>12}M`f_Ys=S zKFh}HM9#6Izb$Y{wYzItTy+l5U2oL%boCJn?R3?jP@n$zSIwlmyGq30Cw4QBO|14` zW5c);AN*J3&eMFAk$SR~2k|&+&Bc$e>s%c{`?d~85S-UWjA>DS5+;UKZ}5oVa5O(N zqqc@>)nee)+4MUjH?FGv%hm2{IlIF-QX}ym-7ok4Z9{V+ZHVZQl$A*x!(q%<2~iVv znUa+BX35&lCb#9VE-~Y^W_f;Xhl%vgjwdjzMy$FsSIj&ok}L+X`4>J=9BkN&nu^E*gbhj3(+D>C4E z@Fwq_=N)^bKFSHTzZk?-gNU$@l}r}dwGyh_fNi=9b|n}J>&;G!lzilbWF4B}BBq4f zYIOl?b)PSh#XTPp4IS5ZR_2C!E)Z`zH0OW%4;&~z7UAyA-X|sh9@~>cQW^COA9hV4 zXcA6qUo9P{bW1_2`eo6%hgbN%(G-F1xTvq!sc?4wN6Q4`e9Hku zFwvlAcRY?6h^Fj$R8zCNEDq8`=uZB8D-xn)tA<^bFFy}4$vA}Xq0jAsv1&5!h!yRA zU()KLJya5MQ`q&LKdH#fwq&(bNFS{sKlEh_{N%{XCGO+po#(+WCLmKW6&5iOHny>g z3*VFN?mx!16V5{zyuMWDVP8U*|BGT$(%IO|)?EF|OI*sq&RovH!N%=>i_c?K*A>>k zyg1+~++zY4Q)J;VWN0axhoIKx;l&G$gvj(#go^pZskEVj8^}is3Jw26LzYYVos0HX zRPvmK$dVxM8(Tc?pHFe0Z3uq){{#OK3i-ra#@+;*=ui8)y6hsRv z4Fxx1c1+fr!VI{L3DFMwXKrfl#Q8hfP@ajgEau&QMCxd{g#!T^;ATXW)nUg&$-n25 zruy3V!!;{?OTobo|0GAxe`Acn3GV@W=&n;~&9 zQM>NWW~R@OYORkJAo+eq1!4vzmf9K%plR4(tB@TR&FSbDoRgJ8qVcH#;7lQub*nq&?Z>7WM=oeEVjkaG zT#f)=o!M2DO5hLR+op>t0CixJCIeXH*+z{-XS|%jx)y(j&}Wo|3!l7{o)HU3m7LYyhv*xF&tq z%IN7N;D4raue&&hm0xM=`qv`+TK@;_xAcGKuK(2|75~ar2Yw)geNLSmVxV@x89bQu zpViVKKnlkwjS&&c|-X6`~xdnh}Ps)Hs z4VbUL^{XNLf7_|Oi>tA%?SG5zax}esF*FH3d(JH^Gvr7Rp*n=t7frH!U;!y1gJB^i zY_M$KL_}mW&XKaDEi9K-wZR|q*L32&m+2n_8lq$xRznJ7p8}V>w+d@?uB!eS3#u<} zIaqi!b!w}a2;_BfUUhGMy#4dPx>)_>yZ`ai?Rk`}d0>~ce-PfY-b?Csd(28yX22L% zI7XI>OjIHYTk_@Xk;Gu^F52^Gn6E1&+?4MxDS2G_#PQ&yXPXP^<-p|2nLTb@AAQEY zI*UQ9Pmm{Kat}wuazpjSyXCdnrD&|C1c5DIb1TnzF}f4KIV6D)CJ!?&l&{T)e4U%3HTSYqsQ zo@zWB1o}ceQSV)<4G<)jM|@@YpL+XHuWsr5AYh^Q{K=wSV99D~4RRU52FufmMBMmd z_H}L#qe(}|I9ZyPRD6kT>Ivj&2Y?qVZq<4bG_co_DP`sE*_Xw8D;+7QR$Uq(rr+u> z8bHUWbV19i#)@@G4bCco@Xb<8u~wVDz9S`#k@ciJtlu@uP1U0X?yov8v9U3VOig2t zL9?n$P3=1U_Emi$#slR>N5wH-=J&T=EdUHA}_Z zZIl3nvMP*AZS9{cDqFanrA~S5BqxtNm9tlu;^`)3X&V4tMAkJ4gEIPl= zoV!Gyx0N{3DpD@)pv^iS*dl2FwANu;1;%EDl}JQ7MbxLMAp>)UwNwe{=V}O-5C*>F zu?Ny+F64jZn<+fKjF01}8h5H_3pey|;%bI;SFg$w8;IC<8l|3#Lz2;mNNik6sVTG3 z+Su^rIE#40C4a-587$U~%KedEEw1%r6wdvoMwpmlXH$xPnNQN#f%Z7|p)nC>WsuO= z4zyqapLS<8(UJ~Qi9d|dQijb_xhA2)v>la)<1md5s^R1N&PiuA$^k|A<+2C?OiHbj z>Bn$~t)>Y(Zb`8hW7q9xQ=s>Rv81V+UiuZJc<23HplI88isqRCId89fb`Kt|CxVIg znWcwprwXnotO>3s&Oypkte^9yJjlUVVxSe%_xlzmje|mYOVPH^vjA=?6xd0vaj0Oz zwJ4OJNiFdnHJX3rw&inskjryukl`*fRQ#SMod5J|KroJRsVXa5_$q7whSQ{gOi*s0 z1LeCy|JBWRsDPn7jCb4s(p|JZiZ8+*ExC@Vj)MF|*Vp{B(ziccSn`G1Br9bV(v!C2 z6#?eqpJBc9o@lJ#^p-`-=`4i&wFe>2)nlPK1p9yPFzJCzBQbpkcR>={YtamIw)3nt z(QEF;+)4`>8^_LU)_Q3 zC5_7lgi_6y>U%m)m@}Ku4C}=l^J=<<7c;99ec3p{aR+v=diuJR7uZi%aQv$oP?dn?@6Yu_+*^>T0ptf(oobdL;6)N-I!TO`zg^Xbv3#L0I~sn@WGk-^SmPh5>W+LB<+1PU}AKa?FCWF|qMNELOgdxR{ zbqE7@jVe+FklzdcD$!(A$&}}H*HQFTJ+AOrJYnhh}Yvta(B zQ_bW4Rr;R~&6PAKwgLWXS{Bnln(vUI+~g#kl{r+_zbngT`Y3`^Qf=!PxN4IYX#iW4 zucW7@LLJA9Zh3(rj~&SyN_pjO8H&)|(v%!BnMWySBJV=eSkB3YSTCyIeJ{i;(oc%_hk{$_l;v>nWSB)oVeg+blh=HB5JSlG_r7@P z3q;aFoZjD_qS@zygYqCn=;Zxjo!?NK!%J$ z52lOP`8G3feEj+HTp@Tnn9X~nG=;tS+z}u{mQX_J0kxtr)O30YD%oo)L@wy`jpQYM z@M>Me=95k1p*FW~rHiV1CIfVc{K8r|#Kt(ApkXKsDG$_>76UGNhHExFCw#Ky9*B-z zNq2ga*xax!HMf_|Vp-86r{;~YgQKqu7%szk8$hpvi_2I`OVbG1doP(`gn}=W<8%Gn z%81#&WjkH4GV;4u43EtSW>K_Ta3Zj!XF?;SO3V#q=<=>Tc^@?A`i;&`-cYj|;^ zEo#Jl5zSr~_V-4}y8pnufXLa80vZY4z2ko7fj>DR)#z=wWuS1$$W!L?(y}YC+yQ|G z@L&`2upy3f>~*IquAjkVNU>}c10(fq#HdbK$~Q3l6|=@-eBbo>B9(6xV`*)sae58*f zym~RRVx;xoCG3`JV`xo z!lFw)=t2Hy)e!IFs?0~7osWk(d%^wxq&>_XD4+U#y&-VF%4z?XH^i4w`TxpF{`XhZ z%G}iEzf!T(l>g;W9<~K+)$g!{UvhW{E0Lis(S^%I8OF&%kr!gJ&fMOpM=&=Aj@wuL zBX?*6i51Qb$uhkwkFYkaD_UDE+)rh1c;(&Y=B$3)J&iJfQSx!1NGgPtK!$c9OtJuu zX(pV$bfuJpRR|K(dp@^j}i&HeJOh@|7lWo8^$*o~Xqo z5Sb+!EtJ&e@6F+h&+_1ETbg7LfP5GZjvIUIN3ibCOldAv z)>YdO|NH$x7AC8dr=<2ekiY1%fN*r~e5h6Yaw<{XIErujKV~tiyrvV_DV0AzEknC- zR^xKM3i<1UkvqBj3C{wDvytOd+YtDSGu!gEMg+!&|8BQrT*|p)(dwQLEy+ zMtMzij3zo40)CA!BKZF~yWg?#lWhqD3@qR)gh~D{uZaJO;{OWV8XZ_)J@r3=)T|kt zUS1pXr6-`!Z}w2QR7nP%d?ecf90;K_7C3d!UZ`N(TZoWNN^Q~RjVhQG{Y<%E1PpV^4 z-m-K+$A~-+VDABs^Q@U*)YvhY4Znn2^w>732H?NRK(5QSS$V@D7yz2BVX4)f5A04~$WbxGOam22>t&uD)JB8-~yiQW6ik;FGblY_I>SvB_z2?PS z*Qm&qbKI{H1V@YGWzpx`!v)WeLT02};JJo*#f$a*FH?IIad-^(;9XC#YTWN6;Z6+S zm4O1KH=#V@FJw7Pha0!9Vb%ZIM$)a`VRMoiN&C|$YA3~ZC*8ayZRY^fyuP6$n%2IU z$#XceYZeqLTXw(m$_z|33I$B4k~NZO>pP6)H_}R{E$i%USGy{l{-jOE;%CloYPEU+ zRFxOn4;7lIOh!7abb23YKD+_-?O z0FP9otcAh+oSj;=f#$&*ExUHpd&e#bSF%#8*&ItcL2H$Sa)?pt0Xtf+t)z$_u^wZi z44oE}r4kIZGy3!Mc8q$B&6JqtnHZ>Znn!Zh@6rgIu|yU+zG8q`q9%B18|T|oN3zMq z`l&D;U!OL~%>vo&q0>Y==~zLiCZk4v%s_7!9DxQ~id1LLE93gf*gg&2$|hB#j8;?3 z5v4S;oM6rT{Y;I+#FdmNw z){d%tNM<<#GN%n9ox7B=3#;u7unZ~tLB_vRZ52a&2=IM)2VkXm=L+Iqq~uk#Dug|x z>S84e+A7EiOY5lj*!q?6HDkNh~0g;0Jy(al!ZHHDtur9T$y-~)94HelX1NHjXWIM7UAe}$?jiz z9?P4`I0JM=G5K{3_%2jPLC^_Mlw?-kYYgb7`qGa3@dn|^1fRMwiyM@Ch z;CB&o7&&?c5e>h`IM;Wnha0QKnEp=$hA8TJgR-07N~U5(>9vJzeoFsSRBkDq=x(YgEMpb=l4TDD`2 zwVJpWGTA_u7}?ecW7s6%rUs&NXD3+n;jB86`X?8(l3MBo6)PdakI6V6a}22{)8ilT zM~T*mU}__xSy|6XSrJ^%lDAR3Lft%+yxC|ZUvSO_nqMX!_ul3;R#*{~4DA=h$bP)%8Yv9X zyp><|e8=_ttI}ZAwOd#dlnSjck#6%273{E$kJuCGu=I@O)&6ID{nWF5@gLb16sj|&Sb~+du4e4O_%_o`Ix4NRrAsyr1_}MuP94s>de8cH-OUkVPk3+K z&jW)It9QiU-ti~AuJkL`XMca8Oh4$SyJ=`-5WU<{cIh+XVH#e4d&zive_UHC!pN>W z3TB;Mn5i)9Qn)#6@lo4QpI3jFYc0~+jS)4AFz8fVC;lD^+idw^S~Qhq>Tg(!3$yLD zzktzoFrU@6s4wwCMz}edpF5i5Q1IMmEJQHzp(LAt)pgN3&O!&d?3W@6U4)I^2V{;- z6A(?zd93hS*uQmnh4T)nHnE{wVhh(=MMD(h(P4+^p83Om6t<*cUW>l(qJzr%5vp@K zN27ka(L{JX=1~e2^)F^i=TYj&;<7jyUUR2Bek^A8+3Up*&Xwc{)1nRR5CT8vG>ExV zHnF3UqXJOAno_?bnhCX-&kwI~Ti8t4`n0%Up>!U`ZvK^w2+0Cs-b9%w%4`$+To|k= zKtgc&l}P`*8IS>8DOe?EB84^kx4BQp3<7P{Pq}&p%xF_81pg!l2|u=&I{AuUgmF5n zJQCTLv}%}xbFGYtKfbba{CBo)lWW%Z>i(_NvLhoQZ*5-@2l&x>e+I~0Nld3UI9tdL zRzu8}i;X!h8LHVvN?C+|M81e>Jr38%&*9LYQec9Ax>?NN+9(_>XSRv&6hlCYB`>Qm z1&ygi{Y()OU4@D_jd_-7vDILR{>o|7-k)Sjdxkjgvi{@S>6GqiF|o`*Otr;P)kLHN zZkpts;0zw_6;?f(@4S1FN=m!4^mv~W+lJA`&7RH%2$)49z0A+8@0BCHtj|yH--AEL z0tW6G%X-+J+5a{5*WKaM0QDznf;V?L5&uQw+yegDNDP`hA;0XPYc6e0;Xv6|i|^F2WB)Z$LR|HR4 zTQsRAby9(^Z@yATyOgcfQw7cKyr^3Tz7lc7+JEwwzA7)|2x+PtEb>nD(tpxJQm)Kn zW9K_*r!L%~N*vS8<5T=iv|o!zTe9k_2jC_j*7ik^M_ zaf%k{WX{-;0*`t`G!&`eW;gChVXnJ-Rn)To8vW-?>>a%QU1v`ZC=U)f8iA@%JG0mZ zDqH;~mgBnrCP~1II<=V9;EBL)J+xzCoiRBaeH&J6rL!{4zIY8tZka?_FBeQeNO3q6 zyG_alW54Ba&wQf{&F1v-r1R6ID)PTsqjIBc+5MHkcW5Fnvi~{-FjKe)t1bl}Y;z@< z=!%zvpRua>>t_x}^}z0<7MI!H2v6|XAyR9!t50q-A)xk0nflgF4*OQlCGK==4S|wc zRMsSscNhRzHMBU8TdcHN!q^I}x0iXJ%uehac|Zs_B$p@CnF)HeXPpB_Za}F{<@6-4 zl%kml@}kHQ(ypD8FsPJ2=14xXJE|b20RUIgs!2|R3>LUMGF6X*B_I|$`Qg=;zm7C z{mEDy9dTmPbued7mlO@phdmAmJ7p@GR1bjCkMw6*G7#4+`k>fk1czdJUB!e@Q(~6# zwo%@p@V5RL0ABU2LH7Asq^quDUho@H>eTZH9f*no9fY0T zD_-9px3e}A!>>kv5wk91%C9R1J_Nh!*&Kk$J3KNxC}c_@zlgpJZ+5L)Nw|^p=2ue}CJtm;uj*Iqr)K})kA$xtNUEvX;4!Px*^&9T_`IN{D z{6~QY=Nau6EzpvufB^hflc#XIsSq0Y9(nf$d~6ZwK}fal92)fr%T3=q{0mP-EyP_G z)UR5h@IX}3Qll2b0oCAcBF>b*@Etu*aTLPU<%C>KoOrk=x?pN!#f_Og-w+;xbFgjQ zXp`et%lDBBh~OcFnMKMUoox0YwBNy`N0q~bSPh@+enQ=4RUw1) zpovN`QoV>vZ#5LvC;cl|6jPr}O5tu!Ipoyib8iXqy}TeJ;4+_7r<1kV0v5?Kv>fYp zg>9L`;XwXa&W7-jf|9~uP2iyF5`5AJ`Q~p4eBU$MCC00`rcSF>`&0fbd^_eqR+}mK z4n*PMMa&FOcc)vTUR zlDUAn-mh`ahi_`f`=39JYTNVjsTa_Y3b1GOIi)6dY)D}xeshB0T8Eov5%UhWd1)u}kjEQ|LDo{tqKKrYIfVz~@dp!! zMOnah@vp)%_-jDTUG09l+;{CkDCH|Q{NqX*uHa1YxFShy*1+;J`gywKaz|2Q{lG8x zP?KBur`}r`!WLKXY_K;C8$EWG>jY3UIh{+BLv0=2)KH%P}6xE2kg)%(-uA6lC?u8}{K(#P*c zE9C8t*u%j2r_{;Rpe1A{9nNXU;b_N0vNgyK!EZVut~}+R2rcbsHilqsOviYh-pYX= zHw@53nlmwYI5W5KP>&`dBZe0Jn?nAdC^HY1wlR6$u^PbpB#AS&5L6zqrXN&7*N2Q` z+Rae1EwS)H=aVSIkr8Ek^1jy2iS2o7mqm~Mr&g5=jjt7VxwglQ^`h#Mx+x2v|9ZAwE$i_9918MjJxTMr?n!bZ6n$}y11u8I9COTU`Z$Fi z!AeAQLMw^gp_{+0QTEJrhL424pVDp%wpku~XRlD3iv{vQ!lAf!_jyqd_h}+Tr1XG| z`*FT*NbPqvHCUsYAkFnM`@l4u_QH&bszpUK#M~XLJt{%?00GXY?u_{gj3Hvs!=N(I z(=AuWPijyoU!r?aFTsa8pLB&cx}$*%;K$e*XqF{~*rA-qn)h^!(-;e}O#B$|S~c+U zN4vyOK0vmtx$5K!?g*+J@G1NmlEI=pyZXZ69tAv=@`t%ag_Hk{LP~OH9iE)I= zaJ69b4kuCkV0V zo(M0#>phpQ_)@j;h%m{-a*LGi(72TP)ws2w*@4|C-3+;=5DmC4s7Lp95%n%@Ko zfdr3-a7m*dys9iIci$A=4NPJ`HfJ;hujLgU)ZRuJI`n;Pw|yksu!#LQnJ#dJysgNb z@@qwR^wrk(jbq4H?d!lNyy72~Dnn87KxsgQ!)|*m(DRM+eC$wh7KnS-mho3|KE)7h zK3k;qZ;K1Lj6uEXLYUYi)1FN}F@-xJ z@@3Hb84sl|j{4$3J}aTY@cbX@pzB_qM~APljrjju6P0tY{C@ zpUCOz_NFmALMv1*blCcwUD3?U6tYs+N%cmJ98D%3)%)Xu^uvzF zS5O!sc#X6?EwsYkvPo6A%O8&y8sCCQH<%f2togVwW&{M;PR!a(ZT_A+jVAbf{@5kL zB@Z(hb$3U{T_}SKA_CoQVU-;j>2J=L#lZ~aQCFg-d<9rzs$_gO&d5N6eFSc z1ml8)P*FSi+k@!^M9nDWR5e@ATD8oxtDu=36Iv2!;dZzidIS(PCtEuXAtlBb1;H%Z zwnC^Ek*D)EX4#Q>R$$WA2sxC_t(!!6Tr?C#@{3}n{<^o;9id1RA&-Pig1e-2B1XpG zliNjgmd3c&%A}s>qf{_j#!Z`fu0xIwm4L0)OF=u(OEmp;bLCIaZX$&J_^Z%4Sq4GZ zPn6sV_#+6pJmDN_lx@1;Zw6Md_p0w9h6mHtzpuIEwNn>OnuRSC2=>fP^Hqgc)xu^4 z<3!s`cORHJh#?!nKI`Et7{3C27+EuH)Gw1f)aoP|B3y?fuVfvpYYmmukx0ya-)TQX zR{ggy5cNf4X|g)nl#jC9p>7|09_S7>1D2GTRBUTW zAkQ=JMRogZqG#v;^=11O6@rPPwvJkr{bW-Qg8`q8GoD#K`&Y+S#%&B>SGRL>;ZunM@49!}Uy zN|bBCJ%sO;@3wl0>0gbl3L@1^O60ONObz8ZI7nder>(udj-jt`;yj^nTQ$L9`OU9W zX4alF#$|GiR47%x@s&LV>2Sz2R6?;2R~5k6V>)nz!o_*1Y!$p>BC5&?hJg_MiE6UBy>RkVZj`9UWbRkN-Hk!S`=BS3t3uyX6)7SF#)71*}`~Ogz z1rap5H6~dhBJ83;q-Y<5V35C2&F^JI-it(=5D#v!fAi9p#UwV~2tZQI+W(Dv?1t9? zfh*xpxxO{-(VGB>!Q&0%^YW_F!@aZS#ucP|YaD#>wd1Fv&Z*SR&mc;asi}1G) z_H>`!akh-Zxq9#io(7%;a$)w+{QH)Y$?UK1Dt^4)up!Szcxnu}kn$0afcfJL#IL+S z5gF_Y30j;{lNrG6m~$Ay?)*V9fZuU@3=kd40=LhazjFrau>(Y>SJNtOz>8x_X-BlA zIpl{i>OarVGj1v(4?^1`R}aQB&WCRQzS~;7R{tDZG=HhgrW@B`W|#cdyj%YBky)P= zpxuOZkW>S6%q7U{VsB#G(^FMsH5QuGXhb(sY+!-R8Bmv6Sx3WzSW<1MPPN1!&PurYky(@`bP9tz z52}LH9Q?+FF5jR6-;|+GVdRA!qtd;}*-h&iIw3Tq3qF9sDIb1FFxGbo&fbG5n8$3F zyY&PWL{ys^dTO}oZ#@sIX^BKW*bon=;te9j5k+T%wJ zNJtoN1~YVj4~YRrlZl)b&kJqp+Z`DqT!la$x&&IxgOQw#yZd-nBP3!7FijBXD|IsU8Zl^ zc6?MKpJQ+7ka|tZQLfchD$PD|;K(9FiLE|eUZX#EZxhG!S-63C$jWX1Yd!6-Yxi-u zjULIr|0-Q%D9jz}IF~S%>0(jOqZ(Ln<$9PxiySr&2Oic7vb<8q=46)Ln%Z|<*z5&> z3f~Zw@m;vR(bESB<=Jqkxn(=#hQw42l(7)h`vMQQTttz9XW6^|^8EK7qhju4r_c*b zJIi`)MB$w@9epwdIfnEBR+?~);yd6C(LeMC& zn&&N*?-g&BBJcV;8&UoZi4Lmxcj16ojlxR~zMrf=O_^i1wGb9X-0@6_rpjPYemIin zmJb+;lHe;Yp=8G)Q(L1bzH*}I>}uAqhj4;g)PlvD9_e_ScR{Ipq|$8NvAvLD8MYr}xl=bU~)f%B3E>r3Bu9_t|ThF3C5~BdOve zEbk^r&r#PT&?^V1cb{72yEWH}TXEE}w>t!cY~rA+hNOTK8FAtIEoszp!qqptS&;r$ zaYV-NX96-h$6aR@1xz6_E0^N49mU)-v#bwtGJm)ibygzJ8!7|WIrcb`$XH~^!a#s& z{Db-0IOTFq#9!^j!n_F}#Z_nX{YzBK8XLPVmc&X`fT7!@$U-@2KM9soGbmOSAmqV z{nr$L^MBo_u^Joyf0E^=eo{Rt0{{e$IFA(#*kP@SQd6lWT2-#>` zP1)7_@IO!9lk>Zt?#CU?cuhiLF&)+XEM9B)cS(gvQT!X3`wL*{fArTS;Ak`J<84du zALKPz4}3nlG8Fo^MH0L|oK2-4xIY!~Oux~1sw!+It)&D3p;+N8AgqKI`ld6v71wy8I!eP0o~=RVcFQR2Gr(eP_JbSytoQ$Yt}l*4r@A8Me94y z8cTDWhqlq^qoAhbOzGBXv^Wa4vUz$(7B!mX`T=x_ueKRRDfg&Uc-e1+z4x$jyW_Pm zp?U;-R#xt^Z8Ev~`m`iL4*c#65Nn)q#=Y0l1AuD&+{|8-Gsij3LUZXpM0Bx0u7WWm zH|%yE@-#XEph2}-$-thl+S;__ciBxSSzHveP%~v}5I%u!z_l_KoW{KRx2=eB33umE zIYFtu^5=wGU`Jab8#}cnYry@9p5UE#U|VVvx_4l49JQ;jQdp(uw=$^A$EA$LM%vmE zvdEOaIcp5qX8wX{mYf0;#51~imYYPn4=k&#DsKTxo{_Mg*;S495?OBY?#gv=edYC* z^O@-sd-qa+U24xvcbL0@C7_6o!$`)sVr-jSJE4XQUQ$?L7}2(}Eixqv;L8AdJAVqc zq}RPgpnDb@E_;?6K58r3h4-!4rT4Ab#rLHLX?eMOfluJk=3i1@Gt1i#iA=O`M0@x! z(HtJP9BMHXEzuD93m|B&woj0g6T?f#^)>J>|I4C5?Gam>n9!8CT%~aT;=oco5d6U8 zMXl(=W;$ND_8+DD*?|5bJ!;8ebESXMUKBAf7YBwNVJibGaJ*(2G`F%wx)grqVPjudiaq^Kl&g$8A2 zWMxMr@_$c}d+;_B`#kUX-t|4VKH&_f^^EP0&=DPLW)H)UzBG%%Tra*5 z%$kyZe3I&S#gfie^z5)!twG={3Cuh)FdeA!Kj<-9** zvT*5%Tb`|QbE!iW-XcOuy39>D3oe6x{>&<#E$o8Ac|j)wq#kQzz|ATd=Z0K!p2$QE zPu?jL8Lb^y3_CQE{*}sTDe!2!dtlFjq&YLY@2#4>XS`}v#PLrpvc4*@q^O{mmnr5D zmyJq~t?8>FWU5vZdE(%4cuZuao0GNjp3~Dt*SLaxI#g_u>hu@k&9Ho*#CZP~lFJHj z(e!SYlLigyc?&5-YxlE{uuk$9b&l6d`uIlpg_z15dPo*iU&|Khx2*A5Fp;8iK_bdP z?T6|^7@lcx2j0T@x>X7|kuuBSB7<^zeY~R~4McconTxA2flHC0_jFxmSTv-~?zVT| zG_|yDqa9lkF*B6_{j=T>=M8r<0s;@z#h)3BQ4NLl@`Xr__o7;~M&dL3J8fP&zLfDfy z);ckcTev{@OUlZ`bCo(-3? z1u1xD`PKgSg?RqeVVsF<1SLF;XYA@Bsa&cY!I48ZJn1V<3d!?s=St?TLo zC0cNr`qD*M#s6f~X>SCNVkva^9A2ZP>CoJ9bvgXe_c}WdX-)pHM5m7O zrHt#g$F0AO+nGA;7dSJ?)|Mo~cf{z2L)Rz!`fpi73Zv)H=a5K)*$5sf_IZypi($P5 zsPwUc4~P-J1@^3C6-r9{V-u0Z&Sl7vNfmuMY4yy*cL>_)BmQF!8Om9Dej%cHxbIzA zhtV0d{=%cr?;bpBPjt@4w=#<>k5ee=TiWAXM2~tUGfm z$s&!Dm0R^V$}fOR*B^kGaipi~rx~A2cS0;t&khV1a4u38*XRUP~f za!rZMtay8bsLt6yFYl@>-y^31(*P!L^^s@mslZy(SMsv9bVoX`O#yBgEcjCmGpyc* zeH$Dw6vB5P*;jor+JOX@;6K#+xc)Z9B8M=x2a@Wx-{snPGpRmOC$zpsqW*JCh@M2Y z#K+M(>=#d^>Of9C`))h<=Bsy)6zaMJ&x-t%&+UcpLjV`jo4R2025 zXaG8EA!0lQa)|dx-@{O)qP6`$rhCkoQqZ`^SW8g-kOwrwsK8 z3ms*AIcyj}-1x&A&vSq{r=QMyp3CHdWH35!sad#!Sm>^|-|afB+Q;|Iq@LFgqIp#Z zD1%H+3I?6RGnk&IFo|u+E0dCxXz4yI^1i!QTu7uvIEH>i3rR{srcST`LIRwdV1P;W z+%AN1NIf@xxvVLiSX`8ILA8MzNqE&7>%jMzGt9wm78bo9<;h*W84i29^w!>V>{N+S zd`5Zmz^G;f=icvoOZfK5#1ctx*~UwD=ab4DGQXehQ!XYnak*dee%YN$_ZPL%KZuz$ zD;$PpT;HM^$KwtQm@7uvT`i6>Hae1CoRVM2)NL<2-k2PiX=eAx+-6j#JI?M}(tuBW zkF%jjLR)O`gI2fcPBxF^HeI|DWwQWHVR!;;{BXXHskxh8F@BMDn`oEi-NHt;CLymW z=KSv5)3dyzec0T5B*`g-MQ<;gz=nIWKUi9ko<|4I(-E0k$QncH>E4l z**1w&#={&zv4Tvhgz#c29`m|;lU-jmaXFMC11 z*dlXDMEOG>VoLMc>!rApwOu2prKSi*!w%`yzGmS+k(zm*CsLK*wv{S_0WX^8A-rKy zbk^Gf_92^7iB_uUF)EE+ET4d|X|>d&mdN?x@vxKAQk`O+r4Qdu>XGy(a(19g;=jU} zFX{O*_NG>!$@jh!U369Lnc+D~qch3uT+_Amyi}*k#LAAwh}k8IPK5a-WZ81ufD>l> z$4cF}GSz>ce`3FAic}6W4Z7m9KGO?(eWqi@L|5Hq0@L|&2flN1PVl}XgQ2q*_n2s3 zt5KtowNkTYB5b;SVuoXA@i5irXO)A&%7?V`1@HGCB&)Wgk+l|^XXChq;u(nyPB}b3 zY>m5jkxpZgi)zfbgv&ec4Zqdvm+D<?Im*mXweS9H+V>)zF#Zp3)bhl$PbISY{5=_z!8&*Jv~NYtI-g!>fDs zmvL5O^U%!^VaKA9gvKw|5?-jk>~%CVGvctKmP$kpnpfN{D8@X*Aazi$txfa%vd-|E z>kYmV66W!lNekJPom29LdZ%(I+ZLZYTXzTg*to~m?7vp%{V<~>H+2}PQ?PPAq`36R z<%wR8v6UkS>Wt#hzGk#44W<%9S=nBfB);6clKwnxY}T*w21Qc3_?IJ@4gYzC7s;WP zVQNI(M=S=JT#xsZy7G`cR(BP9*je0bfeN8JN5~zY(DDs0t{LpHOIbN);?T-69Pf3R zSNe*&p2%AwXHL>__g+xd4Hlc_vu<25H?(`nafS%)3UPP7_4;gk-9ckt8SJRTv5v0M z_Hww`qPudL?ajIR&X*;$y-`<)6dxx1U~5eGS13CB!lX;3w7n&lDDiArbAhSycd}+b zya_3p@A`$kQy;|NJZ~s44Hqo7Hwt}X86NK=(ey>lgWTtGL6k@Gy;PbO!M%1~Wcn2k zUFP|*5d>t-X*RU8g%>|(wwj*~#l4z^Aatf^DWd1Wj#Q*AY0D^V@sC`M zjJc6qXu0I7Y*2;;gGu!plAFzG=J;1%eIOdn zQA>J&e05UN*7I5@yRhK|lbBSfJ+5Uq;!&HV@xfPZrgD}kE*1DSq^=%{o%|LChhl#0 zlMb<^a6ixzpd{kNZr|3jTGeEzuo}-eLT-)Q$#b{!vKx8Tg}swCni>{#%vDY$Ww$84 zew3c9BBovqb}_&BRo#^!G(1Eg((BScRZ}C)Oz?y`T5wOrv);)b^4XR8 zhJo7+<^7)qB>I;46!GySzdneZ>n_E1oWZY;kf94#)s)kWjuJN1c+wbVoNQcmnv}{> zN0pF+Sl3E}UQ$}slSZeLJrwT>Sr}#V(dVaezCQl2|4LN`7L7v&siYR|r7M(*JYfR$ zst3=YaDw$FSc{g}KHO&QiKxuhEzF{f%RJLKe3p*7=oo`WNP)M(9X1zIQPP0XHhY3c znrP{$4#Ol$A0s|4S7Gx2L23dv*Gv2o;h((XVn+9+$qvm}s%zi6nI-_s6?mG! zj{DV;qesJb&owKeEK?=J>UcAlYckA7Sl+I&IN=yasrZOkejir*kE@SN`fk<8Fgx*$ zy&fE6?}G)d_N`){P~U@1jRVA|2*69)KSe_}!~?+`Yb{Y=O~_+@!j<&oVQQMnhoIRU zA0CyF1OFfkK44n*JD~!2!SCPM;PRSk%1XL=0&rz00wxPs&-_eapJy#$h!eqY%nS0{ z!aGg58JIJPF3_ci%n)QSVpa2H`vIe$RD43;#IRfDV&Ibit z+?>HW4{2wOfC6Fw)}4x}i1maDxcE1qi@BS*qcxD2gE@h3#4cgU*D-&3z7D|tVZWt= z-Cy2+*Cm@P4GN_TPUtaVyVesbVDazF@)j8VJ4>XZv!f%}&eO1SvIgr}4`A*3#vat< z_MoByL(qW6L7SFZ#|Gc1fFN)L2PxY+{B8tJp+pxRyz*87)vXR}*=&ahXjBlQKguuf zX6x<<6fQulE^C*KH8~W%ptpaC0l?b=_{~*U4?5Vt;dgM4t_{&UZ1C2j?b>b+5}{IF_CUyvz-@QZPMlJ)r_tS$9kH%RPv#2_nMb zRLj5;chJ72*U`Z@Dqt4$@_+k$%|8m(HqLG!qT4P^DdfvGf&){gKnGCX#H0!;W=AGP zbA&Z`-__a)VTS}kKFjWGk z%|>yE?t*EJ!qeQ%dPk$;xIQ+P0;()PCBDgjJm6Buj{f^awNoVx+9<|lg3%-$G(*f) zll6oOkN|yamn1uyl2*N-lnqRI1cvs_JxLTeahEK=THV$Sz*gQhKNb*p0fNoda#-&F zB-qJgW^g}!TtM|0bS2QZekW7_tKu%GcJ!4?lObt0z_$mZ4rbQ0o=^curCs3bJK6sq z9fu-aW-l#>z~ca(B;4yv;2RZ?tGYAU)^)Kz{L|4oPj zdOf_?de|#yS)p2v8-N||+XL=O*%3+y)oI(HbM)Ds?q8~HPzIP(vs*G`iddbWq}! z(2!VjP&{Z1w+%eUq^ '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit + +APP_NAME="Gradle" +APP_BASE_NAME=${0##*/} + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + +# Collect all arguments for the java command; +# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of +# shell script including quotes and variable substitutions, so put them in +# double quotes to make sure that they get re-expanded; and +# * put everything else in single quotes, so that it's not re-expanded. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 00000000..f127cfd4 --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,91 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/load-test/RESULTS.md b/load-test/RESULTS.md new file mode 100644 index 00000000..3cccbf5f --- /dev/null +++ b/load-test/RESULTS.md @@ -0,0 +1,44 @@ +# Load Test Results + +Load tests from running on M1 Macbook with 16Gb memory. + +## Large plan + +- Large plan has two data source: CSV and JSON +- One foreign key is defined between the account_id in the CSV and JSON +- 100,000 records are generated for the JSON file + - 200,000 records, 2 records per account_id, are generated for the CSV file + +### Result + +Run 1 (foreign key, no unique): 26s +Run 2 (foreign key, with unique): 45s +Run 3 (cache after unique): 45s +Run 4 (additional field with unique): 60s + +## Dvd Rental + +- 1000 records per table +- Many foreign keys +- Many primary keys (singular and composite) + +### Result + +Run 1: 202s +Run 2 (with cache before zipWithIndex, shuffle partitions = 10): 147s +Run 3 (same as 2 with disable count): 122s +Run 4 (same as 3 run in docker): 22s + +## Postgres Multiple Tables + +- Write to balances and transactions +- 1,000,000 in balances +- 2,000,000 in transactions, 5 transactions per 200,000 accounts +- Link account_number between balances and transactions + +### Result + +Run 1 (no primary keys defined): 166s +Run 2 (shuffle partitions from 10 to 3): 149s +Run 3 (batch size 1,000,000): 105s +Run 4 (shuffle partitions from 3 to 1): 109s \ No newline at end of file diff --git a/misc/banner/logo_landscape_banner.svg b/misc/banner/logo_landscape_banner.svg new file mode 100644 index 00000000..2fba3f61 --- /dev/null +++ b/misc/banner/logo_landscape_banner.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/run-docker.sh b/run-docker.sh new file mode 100644 index 00000000..689e4ea7 --- /dev/null +++ b/run-docker.sh @@ -0,0 +1,5 @@ +#!/bin/bash +echo "======================" +echo "Try to run basic image" +echo "======================" +docker run -e ENABLE_RECORD_TRACKING=true -v /tmp/datagen:/opt/app/data-caterer datacatering/data-caterer-basic:0.1 diff --git a/script/plan/cassandra-plan.yaml b/script/plan/cassandra-plan.yaml new file mode 100644 index 00000000..fd481826 --- /dev/null +++ b/script/plan/cassandra-plan.yaml @@ -0,0 +1,6 @@ +name: "cassandra_example_plan" +description: "Create accounts data in Cassandra" +tasks: + - name: "cassandra_accounts" + dataSourceName: "cassandra" + enabled: true diff --git a/script/plan/csv-plan.yaml b/script/plan/csv-plan.yaml new file mode 100644 index 00000000..ae2a2c0b --- /dev/null +++ b/script/plan/csv-plan.yaml @@ -0,0 +1,6 @@ +name: "csv_example_plan" +description: "Create transaction data in CSV file" +tasks: + - name: "csv_transaction_file" + dataSourceName: "csv" + enabled: true diff --git a/script/plan/foreign-key-plan.yaml b/script/plan/foreign-key-plan.yaml new file mode 100644 index 00000000..3da0678b --- /dev/null +++ b/script/plan/foreign-key-plan.yaml @@ -0,0 +1,14 @@ +name: "foreign_key_example_plan" +description: "Create account data in JSON and Postgres" +tasks: + - name: "json_account_file" + dataSourceName: "json" + enabled: true + - name: "postgres_account" + dataSourceName: "postgresCustomer" + enabled: true + +sinkOptions: + foreignKeys: + "postgresCustomer.accounts.account_number": + - "json.account.account_id" diff --git a/script/plan/http-plan.yaml b/script/plan/http-plan.yaml new file mode 100644 index 00000000..4fbd1cdc --- /dev/null +++ b/script/plan/http-plan.yaml @@ -0,0 +1,6 @@ +name: "http_example_plan" +description: "Hit HTTP endpoint with account data" +tasks: + - name: "json_account_http" + dataSourceName: "httpbin" + enabled: true diff --git a/script/plan/kafka-plan.yaml b/script/plan/kafka-plan.yaml new file mode 100644 index 00000000..b7f31c9e --- /dev/null +++ b/script/plan/kafka-plan.yaml @@ -0,0 +1,6 @@ +name: "kafka_example_plan" +description: "Create account data in Kafka" +tasks: + - name: "json_account_kafka" + dataSourceName: "kafkaAccount" + enabled: true diff --git a/script/plan/mysql-plan.yaml b/script/plan/mysql-plan.yaml new file mode 100644 index 00000000..52047a21 --- /dev/null +++ b/script/plan/mysql-plan.yaml @@ -0,0 +1,6 @@ +name: "mysql_example_plan" +description: "Create account data in MySQL" +tasks: + - name: "mysql_account" + dataSourceName: "mysql" + enabled: true diff --git a/script/plan/parquet-plan.yaml b/script/plan/parquet-plan.yaml new file mode 100644 index 00000000..dc0a96af --- /dev/null +++ b/script/plan/parquet-plan.yaml @@ -0,0 +1,6 @@ +name: "parquet_example_plan" +description: "Create transaction data in Parquet file" +tasks: + - name: "parquet_transaction_file" + dataSourceName: "parquet" + enabled: true diff --git a/script/plan/postgres-multiple-tables-plan.yaml b/script/plan/postgres-multiple-tables-plan.yaml new file mode 100644 index 00000000..5add78e0 --- /dev/null +++ b/script/plan/postgres-multiple-tables-plan.yaml @@ -0,0 +1,11 @@ +name: "postgres_multiple_tables_plan" +description: "Create balance and transaction data in Postgres" +tasks: + - name: "postgres_balance_and_transaction" + dataSourceName: "postgresCustomer" + enabled: true + +sinkOptions: + foreignKeys: + "postgresCustomer.balances.account_number": + - "postgresCustomer.transactions.account_number" diff --git a/script/plan/postgres-plan.yaml b/script/plan/postgres-plan.yaml new file mode 100644 index 00000000..763a78e7 --- /dev/null +++ b/script/plan/postgres-plan.yaml @@ -0,0 +1,6 @@ +name: "postgres_example_plan" +description: "Create account data in Postgres" +tasks: + - name: "postgres_account" + dataSourceName: "postgresCustomer" + enabled: true diff --git a/script/plan/simple-json-plan.yaml b/script/plan/simple-json-plan.yaml new file mode 100644 index 00000000..95392b25 --- /dev/null +++ b/script/plan/simple-json-plan.yaml @@ -0,0 +1,6 @@ +name: "account_create_plan" +description: "Create account data in JSON" +tasks: + - name: "json_account_file" + dataSourceName: "json" + enabled: true diff --git a/script/plan/solace-plan.yaml b/script/plan/solace-plan.yaml new file mode 100644 index 00000000..0a509a27 --- /dev/null +++ b/script/plan/solace-plan.yaml @@ -0,0 +1,6 @@ +name: "solace_example_plan" +description: "Create account data in Solace" +tasks: + - name: "json_account_jms" + dataSourceName: "solace" + enabled: true diff --git a/script/run-data-caterer.sh b/script/run-data-caterer.sh new file mode 100644 index 00000000..1f2d9d25 --- /dev/null +++ b/script/run-data-caterer.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +DATA_CATERER_MASTER="${DATA_CATERER_MASTER:-local[*]}" +DEPLOY_MODE="${DEPLOY_MODE:-client}" +JAVA_OPTS="-Dlog4j.configurationFile=file:///opt/app/log4j2.properties -Djdk.module.illegalAccess=deny" +DRIVER_MEMORY="${DRIVER_MEMORY:-1g}" +EXECUTOR_MEMORY="${EXECUTOR_MEMORY:-1g}" +ALL_OPTS="$ADDITIONAL_OPTS --conf \"spark.driver.extraJavaOptions=$JAVA_OPTS\" --conf \"spark.executor.extraJavaOptions=$JAVA_OPTS\"" + +CMD=( + /opt/spark/bin/spark-submit + --class com.github.pflooky.datagen.App + --master "$DATA_CATERER_MASTER" + --deploy-mode "$DEPLOY_MODE" + --driver-memory "$DRIVER_MEMORY" + --executor-memory "$EXECUTOR_MEMORY" + "$ALL_OPTS" + file:///opt/app/job.jar +) + +eval "${CMD[@]}" \ No newline at end of file diff --git a/script/task/cassandra/cassandra-customer-task.yaml b/script/task/cassandra/cassandra-customer-task.yaml new file mode 100644 index 00000000..e17b73fe --- /dev/null +++ b/script/task/cassandra/cassandra-customer-task.yaml @@ -0,0 +1,48 @@ +name: "cassandra_accounts" +steps: + - name: "accounts" + type: "cassandra" + count: + records: 50 + options: + keyspace: "account" + table: "accounts" + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "regex" + options: + regex: "ACC1[0-9]{5,10}" + - name: "amount" + type: "double" + generator: + type: "random" + options: + minValue: 10.0 + maxValue: 100.0 + - name: "name" + type: "string" + generator: + type: "random" + options: + expression: "#{Name.name}" + - name: "created_by" + type: "string" + generator: + type: "random" + options: + expression: "#{Name.username}" + - name: "status" + type: "string" + generator: + type: "oneOf" + options: + oneOf: + - "open" + - "closed" + - name: "open_time" + type: "timestamp" + generator: + type: "random" diff --git a/script/task/file/csv/csv-transaction-task.yaml b/script/task/file/csv/csv-transaction-task.yaml new file mode 100644 index 00000000..0c0e1293 --- /dev/null +++ b/script/task/file/csv/csv-transaction-task.yaml @@ -0,0 +1,52 @@ +name: "csv_transaction_file" +steps: + - name: "transactions" + type: "csv" + options: + path: "/opt/app/data-caterer/sample/csv/transaction-gen" + count: + records: 50 + perColumn: + columnNames: + - "account_id" + - "name" + generator: + type: "random" + options: + maxValue: 10 + minValue: 1 + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "regex" + options: + regex: "ACC1[0-9]{9}" + - name: "name" + type: "string" + generator: + type: "random" + options: + expression: "#{Name.name}" + - name: "year" + type: "int" + generator: + type: "random" + options: + minValue: 2021 + maxValue: 2022 + - name: "amount" + type: "double" + generator: + type: "random" + options: + minValue: 10.0 + maxValue: 100.0 + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + minValue: "2021-01-01" + maxValue: "2021-12-31" diff --git a/script/task/file/json/json-account-task.yaml b/script/task/file/json/json-account-task.yaml new file mode 100644 index 00000000..f07cd04d --- /dev/null +++ b/script/task/file/json/json-account-task.yaml @@ -0,0 +1,98 @@ +name: "json_account_file" +steps: + - name: "account" + type: "json" + count: + records: 50 + options: + path: "/opt/app/data-caterer/sample/json/account-gen" + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "random" + - name: "year" + type: "int" + generator: + type: "random" + options: + minValue: 2021 + maxValue: 2022 + - name: "is_early_customer" + type: "boolean" + generator: + type: "sql" + options: + sql: "CASE WHEN year == 2021 THEN true ELSE false END" + - name: "amount" + type: "double" + generator: + type: "random" + options: + minValue: 10.0 + maxValue: 100.0 + - name: "is_large_amount" + type: "boolean" + generator: + type: "sql" + options: + sql: >- + CASE WHEN amount > 50 THEN + true + ELSE + false + END + - name: "updated_time" + type: "string" + generator: + type: "sql" + options: + sql: "details.updated_by.time" + - name: "first_txn_date" + type: "date" + generator: + type: "sql" + options: + sql: "element_at(sort_array(transactions.txn_date), 1)" + - name: "details" + schema: + fields: + - name: "name" + type: "string" + generator: + type: "random" + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + minValue: "2021-01-01" + maxValue: "2021-12-31" + - name: "updated_by" + schema: + fields: + - name: "user" + type: "string" + generator: + type: "random" + - name: "time" + type: "timestamp" + generator: + type: "random" + - name: "transactions" + type: "array" + schema: + fields: + - name: "txn_date" + type: "date" + generator: + type: "random" + - name: "amount" + type: "double" + generator: + type: "random" + - name: "tags" + type: "array" + generator: + type: "random" \ No newline at end of file diff --git a/script/task/file/parquet/parquet-transaction-task.yaml b/script/task/file/parquet/parquet-transaction-task.yaml new file mode 100644 index 00000000..9e7ef78e --- /dev/null +++ b/script/task/file/parquet/parquet-transaction-task.yaml @@ -0,0 +1,43 @@ +name: "parquet_transaction_file" +steps: + - name: "transactions" + type: "parquet" + options: + path: "/opt/app/data-caterer/sample/parquet/transaction-gen" + count: + records: 50 + perColumn: + columnNames: + - "account_id" + generator: + type: "random" + options: + maxValue: 10 + minValue: 1 + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "random" + - name: "year" + type: "int" + generator: + type: "random" + options: + minValue: 2021 + maxValue: 2022 + - name: "amount" + type: "double" + generator: + type: "random" + options: + minValue: 10.0 + maxValue: 100.0 + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + minValue: "2021-01-01" + maxValue: "2021-12-31" diff --git a/script/task/http/http-account-task.yaml b/script/task/http/http-account-task.yaml new file mode 100644 index 00000000..b52ae4c3 --- /dev/null +++ b/script/task/http/http-account-task.yaml @@ -0,0 +1,68 @@ +name: "json_account_http" +steps: + - name: "account" + type: "json" + count: + records: 50 + options: + httpMethod: "PUT" + httpContentType: "application/json" + httpHeader.first-name: "peter" + httpHeader.amount: 100 + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "random" + - name: "year" + type: "int" + generator: + type: "random" + options: + minValue: 2021 + maxValue: 2022 + - name: "amount" + type: "double" + generator: + type: "random" + options: + minValue: 10.0 + maxValue: 100.0 + - name: "details" + schema: + fields: + - name: "name" + type: "string" + generator: + type: "random" + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + minValue: "2021-01-01" + maxValue: "2021-12-31" + - name: "updated_by" + schema: + fields: + - name: "user" + type: "string" + generator: + type: "random" + - name: "time" + type: "timestamp" + generator: + type: "random" + - name: "transactions" + type: "array" + schema: + fields: + - name: "txn_date" + type: "date" + generator: + type: "random" + - name: "amount" + type: "double" + generator: + type: "random" \ No newline at end of file diff --git a/script/task/jdbc/mysql/mysql-account-task.yaml b/script/task/jdbc/mysql/mysql-account-task.yaml new file mode 100644 index 00000000..d06b69c8 --- /dev/null +++ b/script/task/jdbc/mysql/mysql-account-task.yaml @@ -0,0 +1,38 @@ +name: "mysql_account" +steps: + - name: "accounts" + type: "mysql" + count: + records: 50 + options: + dbtable: "account.accounts" + schema: + fields: + - name: "account_number" + type: "string" + generator: + type: "regex" + options: + regex: "ACC1[0-9]{5,10}" + - name: "account_status" + type: "string" + generator: + type: "oneOf" + options: + oneOf: + - "open" + - "closed" + - name: "open_date" + type: "date" + generator: + type: "random" + - name: "created_by" + type: "string" + generator: + type: "random" + - name: "customer_id_int" + type: "int" + generator: + type: "random" + options: + isUnique: true diff --git a/script/task/jdbc/postgres/postgres-account-task.yaml b/script/task/jdbc/postgres/postgres-account-task.yaml new file mode 100644 index 00000000..1cb9193f --- /dev/null +++ b/script/task/jdbc/postgres/postgres-account-task.yaml @@ -0,0 +1,38 @@ +name: "postgres_account" +steps: + - name: "accounts" + type: "postgres" + count: + records: 50 + options: + dbtable: "account.accounts" + schema: + fields: + - name: "account_number" + type: "string" + generator: + type: "regex" + options: + regex: "ACC1[0-9]{5,10}" + - name: "account_status" + type: "string" + generator: + type: "oneOf" + options: + oneOf: + - "open" + - "closed" + - name: "open_date" + type: "date" + generator: + type: "random" + - name: "created_by" + type: "string" + generator: + type: "random" + - name: "customer_id_int" + type: "int" + generator: + type: "random" + options: + isUnique: true diff --git a/script/task/jdbc/postgres/postgres-multi-table-task.yaml b/script/task/jdbc/postgres/postgres-multi-table-task.yaml new file mode 100644 index 00000000..5e5333b9 --- /dev/null +++ b/script/task/jdbc/postgres/postgres-multi-table-task.yaml @@ -0,0 +1,60 @@ +name: "postgres_balance_and_transaction" +steps: + - name: "balances" + type: "postgres" + count: + records: 50 + options: + dbtable: "account.balances" + schema: + fields: + - name: "account_number" + type: "string" + generator: + type: "regex" + options: + regex: "ACC1[0-9]{5,10}" + isUnique: true + - name: "account_status" + type: "string" + generator: + type: "oneOf" + options: + oneOf: + - "open" + - "closed" + - name: "open_date" + type: "date" + generator: + type: "random" + - name: "created_by" + type: "string" + generator: + type: "random" + - name: "customer_id_int" + type: "int" + generator: + type: "random" + options: + isUnique: true + - name: "transactions" + type: "postgres" + count: + perColumn: + columnNames: + - "account_number" + count: 5 + options: + dbtable: "account.transactions" + schema: + fields: + - name: "account_number" + type: "string" + generator: + type: "random" + options: + isUnique: true + - name: "amount" + type: "double" + generator: + type: "random" \ No newline at end of file diff --git a/script/task/jms/solace/jms-account-task.yaml b/script/task/jms/solace/jms-account-task.yaml new file mode 100644 index 00000000..51ce6304 --- /dev/null +++ b/script/task/jms/solace/jms-account-task.yaml @@ -0,0 +1,65 @@ +name: "json_account_jms" +steps: + - name: "jms_account" + type: "json" + count: + records: 50 + options: + destinationName: "/JNDI/Q/generated_test_queue" + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "random" + - name: "year" + type: "int" + generator: + type: "random" + options: + minValue: 2021 + maxValue: 2022 + - name: "amount" + type: "double" + generator: + type: "random" + options: + minValue: 10.0 + maxValue: 100.0 + - name: "details" + schema: + fields: + - name: "name" + type: "string" + generator: + type: "random" + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + minValue: "2021-01-01" + maxValue: "2021-12-31" + - name: "updated_by" + schema: + fields: + - name: "user" + type: "string" + generator: + type: "random" + - name: "time" + type: "timestamp" + generator: + type: "random" + - name: "transactions" + type: "array" + schema: + fields: + - name: "txn_date" + type: "date" + generator: + type: "random" + - name: "amount" + type: "double" + generator: + type: "random" \ No newline at end of file diff --git a/script/task/kafka/kafka-account-task.yaml b/script/task/kafka/kafka-account-task.yaml new file mode 100644 index 00000000..739ce691 --- /dev/null +++ b/script/task/kafka/kafka-account-task.yaml @@ -0,0 +1,97 @@ +name: "json_account_kafka" +steps: + - name: "kafka_account" + type: "json" + count: + records: 50 + options: + topic: "account-topic" + schema: + fields: + - name: "key" + type: "string" + generator: + type: "sql" + options: + sql: "content.account_id" + - name: "value" + type: "string" + generator: + type: "sql" + options: + sql: "to_json(content)" + - name: "headers" + type: "array>" + generator: + type: "sql" + options: + sql: >- + array( + named_struct('key', 'account-id', 'value', to_binary(content.account_id, 'utf-8')), + named_struct('key', 'updated', 'value', to_binary(content.details.updated_by.time, 'utf-8')) + ) + # - name: "partition" + # type: "int" + # generator: + # type: "sql" + # options: + # sql: "1" + - name: "content" + type: "string" + schema: + fields: + - name: "account_id" + type: "string" + generator: + type: "random" + - name: "year" + type: "int" + generator: + type: "random" + options: + minValue: 2021 + maxValue: 2022 + - name: "amount" + type: "double" + generator: + type: "random" + options: + minValue: 10.0 + maxValue: 100.0 + - name: "details" + schema: + fields: + - name: "name" + type: "string" + generator: + type: "random" + - name: "txn_date" + type: "date" + generator: + type: "random" + options: + minValue: "2021-01-01" + maxValue: "2021-12-31" + - name: "updated_by" + schema: + fields: + - name: "user" + type: "string" + generator: + type: "random" + - name: "time" + type: "timestamp" + generator: + type: "random" + - name: "transactions" + type: "array" + schema: + fields: + - name: "txn_date" + type: "date" + generator: + type: "random" + - name: "amount" + type: "double" + generator: + type: "random" \ No newline at end of file diff --git a/settings.gradle.kts b/settings.gradle.kts new file mode 100644 index 00000000..292ed3fa --- /dev/null +++ b/settings.gradle.kts @@ -0,0 +1,13 @@ +/* + * This file was generated by the Gradle 'init' task. + * + * The settings file is used to specify which projects to include in your build. + * + * Detailed information about configuring a multi-project build in Gradle can be found + * in the user manual at https://docs.gradle.org/7.5.1/userguide/multi_project_builds.html + * This project uses @Incubating APIs which are subject to change. + */ + +rootProject.name = "data-caterer" +include("app") +include("api") diff --git a/workspace.xml b/workspace.xml new file mode 100644 index 00000000..ad5897f2 --- /dev/null +++ b/workspace.xml @@ -0,0 +1,105 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file