diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b00c64f1df..4c132cde56 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -18,155 +18,14 @@ on: value: ${{ jobs.build.outputs.artifact-prefix }} jobs: - lint: - name: Lint - uses: canonical/data-platform-workflows/.github/workflows/lint.yaml@v29.0.0 - - unit-test: - name: Unit test charm - runs-on: ubuntu-22.04 - timeout-minutes: 10 - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Install tox & poetry - run: | - pipx install tox - pipx install poetry - - name: Run tests - run: tox run -e unit - - promtool: - runs-on: ubuntu-22.04 - steps: - - name: Checkout repo - uses: actions/checkout@v4 - - # prometheus snap includes promtool - - name: Install prometheus snap - run: sudo snap install prometheus - - - name: Check validity of prometheus alert rules - run: | - promtool check rules src/alert_rules/prometheus/*.yaml - - - name: Run unit tests for prometheus alert rules - run: | - promtool test rules tests/unit/test_alert_rules/*.yaml - - terraform-test: - name: Terraform - Validation and Simple Deployment product - runs-on: ubuntu-22.04 - timeout-minutes: 120 - steps: - - name: Checkout repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: (GitHub hosted) Free up disk space - run: | - printf '\nDisk usage before cleanup\n' - df --human-readable - # Based on https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 - rm -r /opt/hostedtoolcache/ - printf '\nDisk usage after cleanup\n' - df --human-readable - - - name: (self hosted) Disk usage - run: df --human-readable - - - name: Install terraform snap - run: | - sudo snap install terraform --channel=latest/stable --classic - - - name: Lint / format / validate TF modules - run: | - pushd ./terraform - for dir in charm/simple_deployment charm/large_deployment product/simple_deployment product/large_deployment; do - (cd "${dir}" && terraform init && terraform fmt && terraform validate) - done - popd - - - name: run checks - prepare - run: | - sudo snap install juju --channel=3.6 --classic - - - name: LXD setup - run: | - sudo snap refresh lxd --channel=latest/stable - sudo adduser "$USER" 'lxd' - # `newgrp` does not work in GitHub Actions; use `sg` instead - sg 'lxd' -c "lxd waitready" - sg 'lxd' -c "lxd init --auto" - sg 'lxd' -c "lxc network set lxdbr0 ipv6.address none" - sudo iptables -F FORWARD - sudo iptables -P FORWARD ACCEPT - - - name: Juju setup - run: | - mkdir -p ~/.local/share/juju - sg 'lxd' -c "juju bootstrap 'localhost' --config model-logs-size=10G" - juju model-defaults logging-config='=INFO; unit=DEBUG' - juju add-model test - - sudo tee -a /etc/sysctl.conf > /dev/null < continuous-writes index document count -# We use this global variable then to restore each backup on full DR scenario. -cwrites_backup_doc_count = {} - - -# Keeps track of the current continuous_writes object that we are using. -# This is relevant for the case where we have a test failure and we need to clean -# the cluster -global_cwrites = None - - -@pytest.fixture(scope="function") -async def force_clear_cwrites_index(): - """Force clear the global cwrites_backup_doc_count.""" - global global_cwrites - try: - if global_cwrites: - await global_cwrites.clear() - except Exception: - pass - - -@pytest.fixture(scope="session") -def cloud_configs( - github_secrets: Dict[str, str], microceph: Dict[str, str] -) -> Dict[str, Dict[str, str]]: - # Figure out the address of the LXD host itself, where tests are executed - # this is where microceph will be installed. - ip = subprocess.check_output(["hostname", "-I"]).decode().split()[0] - results = { - "microceph": { - "endpoint": f"http://{ip}", - "bucket": microceph.bucket, - "path": BackupsPath, - "region": "default", - }, - } - if "AWS_ACCESS_KEY" in github_secrets: - results["aws"] = { - "endpoint": "https://s3.amazonaws.com", - "bucket": "data-charms-testing", - "path": BackupsPath, - "region": "us-east-1", - } - if "AZURE_SECRET_KEY" in github_secrets: - results["azure"] = { - "connection-protocol": "abfss", - "container": "data-charms-testing", - "path": BackupsPath, - } - return results - - -@pytest.fixture(scope="session") -def cloud_credentials( - github_secrets: Dict[str, str], microceph: Dict[str, str] -) -> Dict[str, Dict[str, str]]: - """Read cloud credentials.""" - results = { - "microceph": { - "access-key": microceph.access_key_id, - "secret-key": microceph.secret_access_key, - }, - } - if "AWS_ACCESS_KEY" in github_secrets: - results["aws"] = { - "access-key": github_secrets["AWS_ACCESS_KEY"], - "secret-key": github_secrets["AWS_SECRET_KEY"], - } - if "AZURE_SECRET_KEY" in github_secrets: - results["azure"] = { - "secret-key": github_secrets["AZURE_SECRET_KEY"], - "storage-account": github_secrets["AZURE_STORAGE_ACCOUNT"], - } - return results - - -@pytest.fixture(scope="session", autouse=True) -def remove_backups( # noqa C901 - # ops_test: OpsTest, - cloud_configs: Dict[str, Dict[str, str]], - cloud_credentials: Dict[str, Dict[str, str]], -): - """Remove previously created backups from the cloud-corresponding bucket.""" - yield - - logger.info("Cleaning backups from cloud buckets") - for cloud_name, config in cloud_configs.items(): - if cloud_name not in cloud_credentials: - continue - - if cloud_name == "aws" or cloud_name == "microceph": - if ( - "access-key" not in cloud_credentials[cloud_name] - or "secret-key" not in cloud_credentials[cloud_name] - ): - # This cloud has not been used in this test run - continue - - session = boto3.session.Session( - aws_access_key_id=cloud_credentials[cloud_name]["access-key"], - aws_secret_access_key=cloud_credentials[cloud_name]["secret-key"], - region_name=config["region"], - ) - s3 = session.resource("s3", endpoint_url=config["endpoint"]) - bucket = s3.Bucket(config["bucket"]) - - # Some of our runs target only a single cloud, therefore, they will - # raise errors on the other cloud's bucket. We catch and log them. - try: - bucket.objects.filter(Prefix=f"{BackupsPath}/").delete() - except Exception as e: - logger.warning(f"Failed to clean up backups: {e}") - - if cloud_name == "azure": - if ( - "secret-key" not in cloud_credentials[cloud_name] - or "storage-account" not in cloud_credentials[cloud_name] - ): - # This cloud has not been used in this test run - continue - - storage_account = cloud_credentials[cloud_name]["storage-account"] - secret_key = cloud_credentials[cloud_name]["secret-key"] - connection_string = f"DefaultEndpointsProtocol=https;AccountName={storage_account};AccountKey={secret_key};EndpointSuffix=core.windows.net" - blob_service_client = BlobServiceClient.from_connection_string(connection_string) - container_client = blob_service_client.get_container_client(config["container"]) - - # List and delete blobs with the specified prefix - blobs_to_delete = container_client.list_blobs(name_starts_with=BackupsPath) - - try: - for blob in blobs_to_delete: - container_client.delete_blob(blob.name) - except Exception as e: - logger.warning(f"Failed to clean up backups: {e}") - - -async def _configure_s3( - ops_test: OpsTest, config: Dict[str, str], credentials: Dict[str, str], app_name: str = None -) -> None: - await ops_test.model.applications[S3_INTEGRATOR].set_config(config) - s3_integrator_id = (await get_application_units(ops_test, S3_INTEGRATOR))[ - 0 - ].id # We redeploy s3-integrator once, so we may have anything >=0 as id - await run_action( - ops_test, - s3_integrator_id, - "sync-s3-credentials", - params=credentials, - app=S3_INTEGRATOR, - ) - - apps = [S3_INTEGRATOR] if app_name is None else [S3_INTEGRATOR, app_name] - await ops_test.model.wait_for_idle( - apps=apps, - status="active", - timeout=TIMEOUT, - ) - - -async def _configure_azure( - ops_test: OpsTest, - config: Dict[str, str], - credentials: Dict[str, str], - app_name: str = None, -) -> None: - await ops_test.model.applications[AZURE_INTEGRATOR].set_config(config) - logger.info("Adding Juju secret for secret-key config option for azure-storage-integrator") - - # Creates a new secret for each test - local_label = "".join(random.choice(string.ascii_letters) for _ in range(10)) - credentials_secret_uri = await add_juju_secret( - ops_test, - AZURE_INTEGRATOR, - local_label, - {"secret-key": credentials["secret-key"]}, - ) - logger.info( - f"Juju secret for secret-key config option for azure-storage-integrator added. Secret URI: {credentials_secret_uri}" - ) - - configuration_parameters = { - "storage-account": credentials["storage-account"], - "credentials": credentials_secret_uri, - } - # apply new configuration options - logger.info("Setting up configuration for azure-storage-integrator charm...") - await ops_test.model.applications[AZURE_INTEGRATOR].set_config(configuration_parameters) - - apps = [AZURE_INTEGRATOR] if app_name is None else [AZURE_INTEGRATOR, app_name] - await ops_test.model.wait_for_idle( - apps=apps, - status="active", - timeout=TIMEOUT, - ) - - -@pytest.mark.parametrize("cloud_name,deploy_type", SMALL_DEPLOYMENTS_ALL_CLOUDS) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_small_deployment_build_and_deploy( - ops_test: OpsTest, cloud_name: str, deploy_type: str -) -> None: - """Build and deploy an HA cluster of OpenSearch and corresponding S3 integration.""" - if await app_name(ops_test): - return - - my_charm = await ops_test.build_charm(".") - await ops_test.model.set_config(MODEL_CONFIG) - # Deploy TLS Certificates operator. - config = {"ca-common-name": "CN_CA"} - - backup_integrator = AZURE_INTEGRATOR if cloud_name == "azure" else S3_INTEGRATOR - backup_integrator_channel = ( - AZURE_INTEGRATOR_CHANNEL if cloud_name == "azure" else S3_INTEGRATOR_CHANNEL - ) - - await asyncio.gather( - ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config), - ops_test.model.deploy(backup_integrator, channel=backup_integrator_channel), - ops_test.model.deploy(my_charm, num_units=3, series=SERIES, config=CONFIG_OPTS), - ) - - # Relate it to OpenSearch to set up TLS. - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - await ops_test.model.wait_for_idle( - apps=[TLS_CERTIFICATES_APP_NAME, APP_NAME], - status="active", - timeout=1400, - idle_period=IDLE_PERIOD, - ) - # Credentials not set yet, this will move the opensearch to blocked state - # Credentials are set per test scenario - await ops_test.model.integrate(APP_NAME, backup_integrator) - - -@pytest.mark.parametrize("cloud_name,deploy_type", LARGE_DEPLOYMENTS_ALL_CLOUDS) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_large_deployment_build_and_deploy( - ops_test: OpsTest, cloud_name: str, deploy_type: str -) -> None: - """Build and deploy a large deployment for OpenSearch. - - The following apps will be deployed: - * main: the main orchestrator - * failover: the failover orchestrator - * opensearch (or APP_NAME): the data.hot node - - The data node is selected to adopt the "APP_NAME" value because it is the node which - ContinuousWrites will later target its writes to. - """ - await ops_test.model.set_config(MODEL_CONFIG) - # Deploy TLS Certificates operator. - tls_config = {"ca-common-name": "CN_CA"} - - my_charm = await ops_test.build_charm(".") - - main_orchestrator_conf = { - "cluster_name": "backup-test", - "init_hold": False, - "roles": "cluster_manager,data", - } - failover_orchestrator_conf = { - "cluster_name": "backup-test", - "init_hold": True, - "roles": "cluster_manager", - } - data_hot_conf = {"cluster_name": "backup-test", "init_hold": True, "roles": "data.hot"} - - backup_integrator = AZURE_INTEGRATOR if cloud_name == "azure" else S3_INTEGRATOR - backup_integrator_channel = ( - AZURE_INTEGRATOR_CHANNEL if cloud_name == "azure" else S3_INTEGRATOR_CHANNEL - ) - - await asyncio.gather( - ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=tls_config), - ops_test.model.deploy(backup_integrator, channel=backup_integrator_channel), - ops_test.model.deploy( - my_charm, - application_name="main", - num_units=1, - series=SERIES, - config=main_orchestrator_conf | CONFIG_OPTS, - ), - ops_test.model.deploy( - my_charm, - application_name="failover", - num_units=2, - series=SERIES, - config=failover_orchestrator_conf | CONFIG_OPTS, - ), - ops_test.model.deploy( - my_charm, - application_name=APP_NAME, - num_units=1, - series=SERIES, - config=data_hot_conf | CONFIG_OPTS, - ), - ) - - # Large deployment setup - await ops_test.model.integrate("main:peer-cluster-orchestrator", "failover:peer-cluster") - await ops_test.model.integrate("main:peer-cluster-orchestrator", f"{APP_NAME}:peer-cluster") - await ops_test.model.integrate( - "failover:peer-cluster-orchestrator", f"{APP_NAME}:peer-cluster" - ) - - # TLS setup - await ops_test.model.integrate("main", TLS_CERTIFICATES_APP_NAME) - await ops_test.model.integrate("failover", TLS_CERTIFICATES_APP_NAME) - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - - # Charms except s3-integrator should be active - await wait_until( - ops_test, - apps=[TLS_CERTIFICATES_APP_NAME, "main", "failover", APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units={ - TLS_CERTIFICATES_APP_NAME: 1, - "main": 1, - "failover": 2, - APP_NAME: 1, - }, - idle_period=IDLE_PERIOD, - timeout=3600, - ) - - # Credentials not set yet, this will move the opensearch to blocked state - # Credentials are set per test scenario - await ops_test.model.integrate("main", backup_integrator) - - -@pytest.mark.parametrize("cloud_name,deploy_type", LARGE_DEPLOYMENTS_ALL_CLOUDS) -@pytest.mark.abort_on_fail -async def test_large_setups_relations_with_misconfiguration( - ops_test: OpsTest, - cloud_name: str, - deploy_type: str, -) -> None: - """Tests the different blocked messages expected in large deployments.""" - if cloud_name == "azure": - config = { - "connection-protocol": "abfss", - "container": "error", - "path": "/", - } - credentials = { - "storage-account": "error", - "secret-key": "error", - } - await _configure_azure(ops_test=ops_test, config=config, credentials=credentials) - else: - config = { - "endpoint": "http://localhost", - "bucket": "error", - "path": "/", - "region": "default", - } - credentials = { - "access-key": "error", - "secret-key": "error", - } - await _configure_s3(ops_test=ops_test, config=config, credentials=credentials) - - await wait_until( - ops_test, - apps=["main"], - apps_statuses=["blocked"], - apps_full_statuses={"main": {"blocked": [BackupSetupFailed]}}, - idle_period=IDLE_PERIOD, - ) - - backup_integrator = AZURE_INTEGRATOR if cloud_name == "azure" else S3_INTEGRATOR - backup_relation = AZURE_RELATION if cloud_name == "azure" else S3_RELATION - # Now, relate failover cluster to backup-integrator and review the status - - await ops_test.model.integrate(f"failover:{backup_relation}", backup_integrator) - await ops_test.model.integrate(f"{APP_NAME}:{backup_relation}", backup_integrator) - await wait_until( - ops_test, - apps=["main", "failover", APP_NAME], - apps_statuses=["blocked"], - apps_full_statuses={ - "main": {"blocked": [BackupSetupFailed]}, - "failover": {"blocked": [BackupRelShouldNotExist]}, - APP_NAME: {"blocked": [BackupRelShouldNotExist]}, - }, - idle_period=IDLE_PERIOD, - ) - - # Reverting should return it to normal - await ops_test.model.applications[APP_NAME].destroy_relation( - f"{APP_NAME}:{backup_relation}", backup_integrator - ) - await ops_test.model.applications["failover"].destroy_relation( - f"failover:{backup_relation}", backup_integrator - ) - - await wait_until( - ops_test, - apps=["main"], - apps_statuses=["blocked"], - apps_full_statuses={"main": {"blocked": [BackupSetupFailed]}}, - idle_period=IDLE_PERIOD, - ) - await wait_until( - ops_test, - apps=["failover", APP_NAME], - apps_statuses=["active"], - idle_period=IDLE_PERIOD, - ) - - -@pytest.mark.parametrize("cloud_name,deploy_type", ALL_DEPLOYMENTS_ALL_CLOUDS) -@pytest.mark.abort_on_fail -async def test_create_backup_and_restore( - ops_test: OpsTest, - c_writes: ContinuousWrites, - c_writes_runner, - cloud_configs: Dict[str, Dict[str, str]], - cloud_credentials: Dict[str, Dict[str, str]], - cloud_name: str, - deploy_type: str, -) -> None: - """Runs the backup process whilst writing to the cluster into 'noisy-index'.""" - app = (await app_name(ops_test) or APP_NAME) if deploy_type == "small" else "main" - apps = [app] if deploy_type == "small" else [app, APP_NAME] - leader_id = await get_leader_unit_id(ops_test, app=app) - unit_ip = await get_leader_unit_ip(ops_test, app=app) - config = cloud_configs[cloud_name] - - logger.info(f"Syncing credentials for {cloud_name}") - if cloud_name == "azure": - await _configure_azure(ops_test, config, cloud_credentials[cloud_name], app) - else: - await _configure_s3(ops_test, config, cloud_credentials[cloud_name], app) - - date_before_backup = datetime.utcnow() - assert ( - datetime.strptime( - backup_id := await create_backup( - ops_test, - leader_id, - unit_ip=unit_ip, - app=app, - ), - OPENSEARCH_BACKUP_ID_FORMAT, - ) - > date_before_backup - ) - # continuous writes checks - await assert_continuous_writes_increasing(c_writes) - await assert_continuous_writes_consistency(ops_test, c_writes, apps) - await assert_restore_indices_and_compare_consistency( - ops_test, app, leader_id, unit_ip, backup_id - ) - global cwrites_backup_doc_count - cwrites_backup_doc_count[backup_id] = await index_docs_count( - ops_test, - app, - unit_ip, - ContinuousWrites.INDEX_NAME, - ) - - -@pytest.mark.parametrize("cloud_name,deploy_type", ALL_DEPLOYMENTS_ALL_CLOUDS) -@pytest.mark.abort_on_fail -async def test_remove_and_readd_backup_relation( - ops_test: OpsTest, - c_writes: ContinuousWrites, - c_writes_runner, - cloud_configs: Dict[str, Dict[str, str]], - cloud_credentials: Dict[str, Dict[str, str]], - cloud_name: str, - deploy_type: str, -) -> None: - """Removes and re-adds the backup relation to test backup and restore.""" - app = (await app_name(ops_test) or APP_NAME) if deploy_type == "small" else "main" - apps = [app] if deploy_type == "small" else [app, APP_NAME] - - leader_id: int = await get_leader_unit_id(ops_test, app=app) - unit_ip: str = await get_leader_unit_ip(ops_test, app=app) - config: Dict[str, str] = cloud_configs[cloud_name] - - backup_integrator = AZURE_INTEGRATOR if cloud_name == "azure" else S3_INTEGRATOR - backup_relation = AZURE_RELATION if cloud_name == "azure" else S3_RELATION - - logger.info("Remove backup relation") - # Remove relation - await ops_test.model.applications[app].destroy_relation( - backup_relation, f"{backup_integrator}:{backup_relation}" - ) - await ops_test.model.wait_for_idle( - apps=[app], - status="active", - timeout=1400, - idle_period=IDLE_PERIOD, - ) - - logger.info("Re-add backup credentials relation") - await ops_test.model.integrate(app, backup_integrator) - await ops_test.model.wait_for_idle( - apps=[app], - status="active", - timeout=1400, - idle_period=IDLE_PERIOD, - ) - - logger.info(f"Syncing credentials for {cloud_name}") - if cloud_name == "azure": - await _configure_azure(ops_test, config, cloud_credentials[cloud_name], app) - else: - await _configure_s3(ops_test, config, cloud_credentials[cloud_name], app) - - date_before_backup = datetime.utcnow() - assert ( - datetime.strptime( - backup_id := await create_backup( - ops_test, - leader_id, - unit_ip=unit_ip, - app=app, - ), - OPENSEARCH_BACKUP_ID_FORMAT, - ) - > date_before_backup - ) - - # continuous writes checks - await assert_continuous_writes_increasing(c_writes) - await assert_continuous_writes_consistency(ops_test, c_writes, apps) - await assert_restore_indices_and_compare_consistency( - ops_test, app, leader_id, unit_ip, backup_id - ) - global cwrites_backup_doc_count - cwrites_backup_doc_count[backup_id] = await index_docs_count( - ops_test, - app, - unit_ip, - ContinuousWrites.INDEX_NAME, - ) - - -@pytest.mark.parametrize("cloud_name,deploy_type", SMALL_DEPLOYMENTS_ALL_CLOUDS) -@pytest.mark.abort_on_fail -async def test_restore_to_new_cluster( - ops_test: OpsTest, - cloud_configs: Dict[str, Dict[str, str]], - cloud_credentials: Dict[str, Dict[str, str]], - cloud_name: str, - deploy_type: str, - force_clear_cwrites_index, -) -> None: - """Deletes the entire OpenSearch cluster and redeploys from scratch. - - Restores each of the previous backups we created and compare with their doc count. - The cluster is considered healthy if: - 1) At each backup restored, check our track of doc count vs. current index count - 2) Try to write to that new index. - """ - app = (await app_name(ops_test) or APP_NAME) if deploy_type == "small" else "main" - backup_integrator = AZURE_INTEGRATOR if cloud_name == "azure" else S3_INTEGRATOR - backup_integrator_channel = ( - AZURE_INTEGRATOR_CHANNEL if cloud_name == "azure" else S3_INTEGRATOR_CHANNEL - ) - - logging.info("Destroying the application") - await asyncio.gather( - ops_test.model.remove_application(backup_integrator, block_until_done=True), - ops_test.model.remove_application(app, block_until_done=True), - ops_test.model.remove_application(TLS_CERTIFICATES_APP_NAME, block_until_done=True), - ) - - logging.info("Deploying a new cluster") - my_charm = await ops_test.build_charm(".") - await ops_test.model.set_config(MODEL_CONFIG) - # Deploy TLS Certificates operator. - config = {"ca-common-name": "CN_CA"} - - await asyncio.gather( - ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config), - ops_test.model.deploy(backup_integrator, channel=backup_integrator_channel), - ops_test.model.deploy(my_charm, num_units=3, series=SERIES, config=CONFIG_OPTS), - ) - - # Relate it to OpenSearch to set up TLS. - await ops_test.model.integrate(app, TLS_CERTIFICATES_APP_NAME) - await ops_test.model.wait_for_idle( - apps=[TLS_CERTIFICATES_APP_NAME, app], - status="active", - timeout=1400, - idle_period=IDLE_PERIOD, - ) - # Credentials not set yet, this will move the opensearch to blocked state - # Credentials are set per test scenario - await ops_test.model.integrate(app, backup_integrator) - - leader_id = await get_leader_unit_id(ops_test, app=app) - unit_ip = await get_leader_unit_ip(ops_test, app=app) - config: Dict[str, str] = cloud_configs[cloud_name] - - logger.info(f"Syncing credentials for {cloud_name}") - if cloud_name == "azure": - await _configure_azure(ops_test, config, cloud_credentials[cloud_name], app) - else: - await _configure_s3(ops_test, config, cloud_credentials[cloud_name], app) - backups = await list_backups(ops_test, leader_id, app=app) - - global cwrites_backup_doc_count - # We are expecting 2x backups available - assert len(backups) == 2 - assert len(cwrites_backup_doc_count) == 2 - count = 0 - for backup_id in backups.keys(): - assert await restore(ops_test, backup_id, unit_ip, leader_id, app=app) - count = await index_docs_count(ops_test, app, unit_ip, ContinuousWrites.INDEX_NAME) - - # Ensure we have the same doc count as we had on the original cluster - assert count == cwrites_backup_doc_count[backup_id] - - # restart the continuous writes and check the cluster is still accessible post restore - await assert_start_and_check_continuous_writes(ops_test, unit_ip, app) - - # Now, try a backup & restore with continuous writes - logger.info("Final stage of DR test: try a backup & restore with continuous writes") - writer: ContinuousWrites = ContinuousWrites(ops_test, app) - - # store the global cwrites object - global global_cwrites - global_cwrites = writer - - await writer.start() - time.sleep(10) - date_before_backup = datetime.utcnow() - assert ( - datetime.strptime( - backup_id := await create_backup( - ops_test, - leader_id, - unit_ip=unit_ip, - app=app, - ), - OPENSEARCH_BACKUP_ID_FORMAT, - ) - > date_before_backup - ) - - # continuous writes checks - await assert_continuous_writes_increasing(writer) - await assert_continuous_writes_consistency(ops_test, writer, [app]) - # This assert assures we have taken a new backup, after the last restore from the original - # cluster. That means the index is writable. - await assert_restore_indices_and_compare_consistency( - ops_test, app, leader_id, unit_ip, backup_id - ) - # Clear the writer manually, as we are not using the conftest c_writes_runner to do so - await writer.clear() - - -# ------------------------------------------------------------------------------------------- -# Tests for the "all" group -# -# This group will iterate over each cloud, update its credentials via config and rerun -# the backup and restore tests. -# ------------------------------------------------------------------------------------------- - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group("all") -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_build_deploy_and_test_status(ops_test: OpsTest) -> None: - """Build, deploy and test status of an HA cluster of OpenSearch and corresponding backups. - - This test group will iterate over each cloud, update its credentials via config and rerun - the backup and restore tests. - """ - if await app_name(ops_test): - return - - my_charm = await ops_test.build_charm(".") - await ops_test.model.set_config(MODEL_CONFIG) - # Deploy TLS Certificates operator. - config = {"ca-common-name": "CN_CA"} - await asyncio.gather( - ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config), - ops_test.model.deploy(S3_INTEGRATOR, channel=S3_INTEGRATOR_CHANNEL), - ops_test.model.deploy(my_charm, num_units=3, series=SERIES, config=CONFIG_OPTS), - ) - - # Relate it to OpenSearch to set up TLS. - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - await ops_test.model.wait_for_idle( - apps=[TLS_CERTIFICATES_APP_NAME, APP_NAME], - status="active", - timeout=1400, - idle_period=IDLE_PERIOD, - ) - # Credentials not set yet, this will move the opensearch to blocked state - # Credentials are set per test scenario - await ops_test.model.integrate(APP_NAME, S3_INTEGRATOR) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group("all") -@pytest.mark.abort_on_fail -async def test_repo_missing_message(ops_test: OpsTest) -> None: - """Check the repo is missing error returned by OpenSearch. - - We use the message format to monitor the cluster status. We need to know if this - message pattern changed between releases of OpenSearch. - """ - app: str = (await app_name(ops_test)) or APP_NAME - unit_ip = await get_leader_unit_ip(ops_test, app=app) - resp = await http_request( - ops_test, "GET", f"https://{unit_ip}:9200/_snapshot/{S3_REPOSITORY}", json_resp=True - ) - logger.debug(f"Response: {resp}") - assert resp["status"] == 404 - assert "repository_missing_exception" in resp["error"]["type"] - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group("all") -@pytest.mark.abort_on_fail -async def test_wrong_s3_credentials(ops_test: OpsTest) -> None: - """Check the repo is misconfigured.""" - app = (await app_name(ops_test)) or APP_NAME - unit_ip = await get_leader_unit_ip(ops_test, app=app) - - config = { - "endpoint": "http://localhost", - "bucket": "error", - "path": "/", - "region": "default", - } - credentials = { - "access-key": "error", - "secret-key": "error", - } - - # Not using _configure_s3 as this method will cause opensearch to block - await ops_test.model.applications[S3_INTEGRATOR].set_config(config) - await run_action( - ops_test, - 0, - "sync-s3-credentials", - params=credentials, - app=S3_INTEGRATOR, - ) - await ops_test.model.wait_for_idle( - apps=[S3_INTEGRATOR], - status="active", - timeout=TIMEOUT, - ) - await wait_until( - ops_test, - apps=[app], - apps_statuses=["blocked"], - units_statuses=["active", "blocked"], - wait_for_exact_units=3, - idle_period=30, - ) - - resp = await http_request( - ops_test, "GET", f"https://{unit_ip}:9200/_snapshot/{S3_REPOSITORY}/_all", json_resp=True - ) - logger.debug(f"Response: {resp}") - assert resp["status"] == 500 - assert "repository_exception" in resp["error"]["type"] - assert "Could not determine repository generation from root blobs" in resp["error"]["reason"] - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group("all") -@pytest.mark.abort_on_fail -async def test_change_config_and_backup_restore( - ops_test: OpsTest, - cloud_configs: Dict[str, Dict[str, str]], - cloud_credentials: Dict[str, Dict[str, str]], - force_clear_cwrites_index, -) -> None: - """Run for each cloud and update the cluster config.""" - app: str = (await app_name(ops_test)) or APP_NAME - unit_ip: str = await get_leader_unit_ip(ops_test, app=app) - leader_id: int = await get_leader_unit_id(ops_test, app=app) - - initial_count: int = 0 - for cloud_name in cloud_configs.keys(): - # Azure has no different config setups at this point - if cloud_name == "azure": - continue - logger.debug( - f"Index {ContinuousWrites.INDEX_NAME} has {initial_count} documents, starting there" - ) - # Start the ContinuousWrites here instead of bringing as a fixture because we want to do - # it for every cloud config we have and we have to stop it before restore, right down. - writer: ContinuousWrites = ContinuousWrites(ops_test, app, initial_count=initial_count) - - # store the global cwrites object - global global_cwrites - global_cwrites = writer - - await writer.start() - time.sleep(10) - - logger.info(f"Syncing credentials for {cloud_name}") - config: Dict[str, str] = cloud_configs[cloud_name] - await _configure_s3(ops_test, config, cloud_credentials[cloud_name], app) - - date_before_backup = datetime.utcnow() - assert ( - datetime.strptime( - backup_id := await create_backup( - ops_test, - leader_id, - unit_ip=unit_ip, - ), - OPENSEARCH_BACKUP_ID_FORMAT, - ) - > date_before_backup - ) - - # continuous writes checks - await assert_continuous_writes_increasing(writer) - await assert_continuous_writes_consistency(ops_test, writer, [app]) - await assert_restore_indices_and_compare_consistency( - ops_test, app, leader_id, unit_ip, backup_id - ) - # Clear the writer manually, as we are not using the conftest c_writes_runner to do so - await writer.clear() diff --git a/tests/integration/ha/test_ha.py b/tests/integration/ha/test_ha.py deleted file mode 100644 index 7df76531e9..0000000000 --- a/tests/integration/ha/test_ha.py +++ /dev/null @@ -1,620 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import asyncio -import logging -import time - -import pytest -from pytest_operator.plugin import OpsTest - -from ..helpers import ( - APP_NAME, - CONFIG_OPTS, - MODEL_CONFIG, - SERIES, - check_cluster_formation_successful, - cluster_health, - get_application_unit_ids, - get_application_unit_ids_ips, - get_application_unit_names, - get_leader_unit_ip, - get_reachable_unit_ips, - is_up, -) -from ..helpers_deployments import wait_until -from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME -from .continuous_writes import ContinuousWrites -from .helpers import ( - ORIGINAL_RESTART_DELAY, - RESTART_DELAY, - all_processes_down, - app_name, - assert_continuous_writes_consistency, - assert_continuous_writes_increasing, - get_elected_cm_unit_id, - get_shards_by_index, - send_kill_signal_to_process, - update_restart_delay, -) -from .helpers_data import create_index, default_doc, delete_index, index_doc, search -from .test_horizontal_scaling import IDLE_PERIOD - -logger = logging.getLogger(__name__) - - -NUM_HA_UNITS = 3 - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build and deploy one unit of OpenSearch.""" - # it is possible for users to provide their own cluster for HA testing. - # Hence, check if there is a pre-existing cluster. - if await app_name(ops_test): - return - - my_charm = await ops_test.build_charm(".") - await ops_test.model.set_config(MODEL_CONFIG) - # Deploy TLS Certificates operator. - config = {"ca-common-name": "CN_CA"} - await asyncio.gather( - ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config), - ops_test.model.deploy(my_charm, num_units=NUM_HA_UNITS, series=SERIES, config=CONFIG_OPTS), - ) - - # Relate it to OpenSearch to set up TLS. - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - await ops_test.model.wait_for_idle( - apps=[TLS_CERTIFICATES_APP_NAME, APP_NAME], - status="active", - timeout=1400, - idle_period=IDLE_PERIOD, - ) - assert len(ops_test.model.applications[APP_NAME].units) == NUM_HA_UNITS - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_replication_across_members( - ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner -) -> None: - """Check consistency, ie write to node, read data from remaining nodes. - - 1. Create index with replica shards equal to number of nodes - 1. - 2. Index data. - 3. Query data from all the nodes (all the nodes should contain a copy of the data). - """ - app = (await app_name(ops_test)) or APP_NAME - - units = await get_application_unit_ids_ips(ops_test, app=app) - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - - # create index with r_shards = nodes - 1 - index_name = "test_index" - await create_index(ops_test, app, leader_unit_ip, index_name, r_shards=len(units) - 1) - - # index document - doc_id = 12 - await index_doc(ops_test, app, leader_unit_ip, index_name, doc_id) - - # check that the doc can be retrieved from any node - for u_ip in units.values(): - docs = await search( - ops_test, - app, - u_ip, - index_name, - query={"query": {"term": {"_id": doc_id}}}, - preference="_only_local", - ) - assert len(docs) == 1 - assert docs[0]["_source"] == default_doc(index_name, doc_id) - - await delete_index(ops_test, app, leader_unit_ip, index_name) - - # continuous writes checks - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_kill_db_process_node_with_primary_shard( - ops_test: OpsTest, c_writes: ContinuousWrites, c_balanced_writes_runner -) -> None: - """Check cluster can self-heal + data indexed/read when process dies on node with P_shard.""" - app = (await app_name(ops_test)) or APP_NAME - - units_ips = await get_application_unit_ids_ips(ops_test, app) - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - - # find unit hosting the primary shard of the index "series-index" - shards = await get_shards_by_index(ops_test, leader_unit_ip, ContinuousWrites.INDEX_NAME) - first_unit_with_primary_shard = [shard.unit_id for shard in shards if shard.is_prim][0] - - # Killing the only instance can be disastrous. - if len(ops_test.model.applications[app].units) < 2: - old_units_count = len(ops_test.model.applications[app].units) - await ops_test.model.applications[app].add_unit(count=1) - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=old_units_count + 1, - idle_period=IDLE_PERIOD, - ) - - # Kill the opensearch process - await send_kill_signal_to_process( - ops_test, app, first_unit_with_primary_shard, signal="SIGKILL" - ) - - await assert_continuous_writes_increasing(c_writes) - - # verify that the opensearch service is back running on the old primary unit - assert await is_up( - ops_test, units_ips[first_unit_with_primary_shard] - ), "OpenSearch service hasn't restarted." - - # fetch unit hosting the new primary shard of the previous index - shards = await get_shards_by_index(ops_test, leader_unit_ip, ContinuousWrites.INDEX_NAME) - units_with_p_shards = [shard.unit_id for shard in shards if shard.is_prim] - assert len(units_with_p_shards) == 2 - for unit_id in units_with_p_shards: - assert ( - unit_id != first_unit_with_primary_shard - ), "Primary shard still assigned to the unit where the service was killed." - - # check that the unit previously hosting the primary shard now hosts a replica - units_with_r_shards = [shard.unit_id for shard in shards if not shard.is_prim] - assert first_unit_with_primary_shard in units_with_r_shards - - # verify the node with the old primary successfully joined the rest of the fleet - assert await check_cluster_formation_successful( - ops_test, leader_unit_ip, get_application_unit_names(ops_test, app=app) - ) - - # continuous writes checks - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_kill_db_process_node_with_elected_cm( - ops_test: OpsTest, c_writes: ContinuousWrites, c_balanced_writes_runner -) -> None: - """Check cluster can self-heal, data indexed/read when process dies on node with elected CM.""" - app = (await app_name(ops_test)) or APP_NAME - - units_ips = await get_application_unit_ids_ips(ops_test, app) - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - - # find unit currently elected cluster_manager - first_elected_cm_unit_id = await get_elected_cm_unit_id(ops_test, leader_unit_ip) - - # Killing the only instance can be disastrous. - if len(ops_test.model.applications[app].units) < 2: - old_units_count = len(ops_test.model.applications[app].units) - await ops_test.model.applications[app].add_unit(count=1) - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=old_units_count + 1, - idle_period=IDLE_PERIOD, - ) - - # Kill the opensearch process - await send_kill_signal_to_process(ops_test, app, first_elected_cm_unit_id, signal="SIGKILL") - - await assert_continuous_writes_increasing(c_writes) - - # verify that the opensearch service is back running on the old elected cm unit - assert await is_up( - ops_test, units_ips[first_elected_cm_unit_id] - ), "OpenSearch service hasn't restarted." - - # fetch the current elected cluster manager - current_elected_cm_unit_id = await get_elected_cm_unit_id(ops_test, leader_unit_ip) - assert ( - current_elected_cm_unit_id != first_elected_cm_unit_id - ), "Cluster manager election did not happen." - - # verify the node with the old elected cm successfully joined back the rest of the fleet - assert await check_cluster_formation_successful( - ops_test, leader_unit_ip, get_application_unit_names(ops_test, app=app) - ) - - # continuous writes checks - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_freeze_db_process_node_with_primary_shard( - ops_test: OpsTest, c_writes: ContinuousWrites, c_balanced_writes_runner -) -> None: - """Check cluster can self-heal + data indexed/read on process freeze on node with P_shard.""" - app = (await app_name(ops_test)) or APP_NAME - - units_ips = await get_application_unit_ids_ips(ops_test, app) - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - - # find unit hosting the primary shard of the index "series-index" - shards = await get_shards_by_index(ops_test, leader_unit_ip, ContinuousWrites.INDEX_NAME) - first_unit_with_primary_shard = [shard.unit_id for shard in shards if shard.is_prim][0] - - # Killing the only instance can be disastrous. - if len(ops_test.model.applications[app].units) < 2: - old_units_count = len(ops_test.model.applications[app].units) - await ops_test.model.applications[app].add_unit(count=1) - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=old_units_count + 1, - idle_period=IDLE_PERIOD, - ) - - # Freeze the opensearch process - opensearch_pid = await send_kill_signal_to_process( - ops_test, app, first_unit_with_primary_shard, signal="SIGSTOP" - ) - - # wait until the SIGSTOP fully takes effect - time.sleep(10) - - # verify the unit is not reachable - is_node_up = await is_up(ops_test, units_ips[first_unit_with_primary_shard], retries=3) - assert not is_node_up - - await assert_continuous_writes_increasing(c_writes) - - # get reachable unit to perform requests against, in case the previously stopped unit - # is leader unit, so its address is not reachable - reachable_ip = (await get_reachable_unit_ips(ops_test))[0] - - # fetch unit hosting the new primary shard of the previous index - shards = await get_shards_by_index(ops_test, reachable_ip, ContinuousWrites.INDEX_NAME) - units_with_p_shards = [shard.unit_id for shard in shards if shard.is_prim] - assert len(units_with_p_shards) == 2 - for unit_id in units_with_p_shards: - assert ( - unit_id != first_unit_with_primary_shard - ), "Primary shard still assigned to the unit where the service was stopped." - - # Un-Freeze the opensearch process in the node previously hosting the primary shard - await send_kill_signal_to_process( - ops_test, - app, - first_unit_with_primary_shard, - signal="SIGCONT", - opensearch_pid=opensearch_pid, - ) - - # verify that the opensearch service is back running on the unit previously hosting the p_shard - assert await is_up( - ops_test, units_ips[first_unit_with_primary_shard], retries=3 - ), "OpenSearch service hasn't restarted." - - # fetch unit hosting the new primary shard of the previous index - shards = await get_shards_by_index(ops_test, leader_unit_ip, ContinuousWrites.INDEX_NAME) - - # check that the unit previously hosting the primary shard now hosts a replica - units_with_r_shards = [shard.unit_id for shard in shards if not shard.is_prim] - assert first_unit_with_primary_shard in units_with_r_shards - - # verify the node with the old primary successfully joined back the rest of the fleet - assert await check_cluster_formation_successful( - ops_test, leader_unit_ip, get_application_unit_names(ops_test, app=app) - ) - - # continuous writes checks - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_freeze_db_process_node_with_elected_cm( - ops_test: OpsTest, c_writes: ContinuousWrites, c_balanced_writes_runner -) -> None: - """Check cluster can self-heal, data indexed/read on process freeze on node with elected CM.""" - app = (await app_name(ops_test)) or APP_NAME - - units_ips = await get_application_unit_ids_ips(ops_test, app) - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - - # find unit currently elected cluster_manager - first_elected_cm_unit_id = await get_elected_cm_unit_id(ops_test, leader_unit_ip) - - # Killing the only instance can be disastrous. - if len(ops_test.model.applications[app].units) < 2: - old_units_count = len(ops_test.model.applications[app].units) - await ops_test.model.applications[app].add_unit(count=1) - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=old_units_count + 1, - idle_period=IDLE_PERIOD, - ) - - # Freeze the opensearch process - opensearch_pid = await send_kill_signal_to_process( - ops_test, app, first_elected_cm_unit_id, signal="SIGSTOP" - ) - - # wait until the SIGSTOP fully takes effect - time.sleep(10) - - # verify the unit is not reachable - is_node_up = await is_up(ops_test, units_ips[first_elected_cm_unit_id], retries=3) - assert not is_node_up - - await assert_continuous_writes_increasing(c_writes) - - # get reachable unit to perform requests against, in case the previously stopped unit - # is leader unit, so its address is not reachable - reachable_ip = (await get_reachable_unit_ips(ops_test))[0] - - # fetch the current elected cluster_manager - current_elected_cm_unit_id = await get_elected_cm_unit_id(ops_test, reachable_ip) - assert ( - current_elected_cm_unit_id != first_elected_cm_unit_id - ), "Cluster manager still assigned to the unit where the service was stopped." - - # Un-Freeze the opensearch process in the node previously elected CM - await send_kill_signal_to_process( - ops_test, - app, - first_elected_cm_unit_id, - signal="SIGCONT", - opensearch_pid=opensearch_pid, - ) - - # verify that the opensearch service is back running on the unit previously elected CM unit - assert await is_up( - ops_test, units_ips[first_elected_cm_unit_id], retries=3 - ), "OpenSearch service hasn't restarted." - - # verify the previously elected CM node successfully joined back the rest of the fleet - assert await check_cluster_formation_successful( - ops_test, leader_unit_ip, get_application_unit_names(ops_test, app=app) - ) - - # continuous writes checks - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_restart_db_process_node_with_elected_cm( - ops_test: OpsTest, c_writes: ContinuousWrites, c_balanced_writes_runner -) -> None: - """Check cluster self-healing & data indexed/read on process restart on CM node.""" - app = (await app_name(ops_test)) or APP_NAME - - units_ips = await get_application_unit_ids_ips(ops_test, app) - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - - # find unit currently elected cluster manager - first_elected_cm_unit_id = await get_elected_cm_unit_id(ops_test, leader_unit_ip) - - # Killing the only instance can be disastrous. - if len(ops_test.model.applications[app].units) < 2: - old_units_count = len(ops_test.model.applications[app].units) - await ops_test.model.applications[app].add_unit(count=1) - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=old_units_count + 1, - idle_period=IDLE_PERIOD, - ) - - # restart the opensearch process - await send_kill_signal_to_process(ops_test, app, first_elected_cm_unit_id, signal="SIGTERM") - - await assert_continuous_writes_increasing(c_writes) - - # verify that the opensearch service is back running on the unit previously elected CM unit - assert await is_up( - ops_test, units_ips[first_elected_cm_unit_id] - ), "OpenSearch service hasn't restarted." - - # fetch the current elected cluster manager - current_elected_cm_unit_id = await get_elected_cm_unit_id(ops_test, leader_unit_ip) - assert ( - current_elected_cm_unit_id != first_elected_cm_unit_id - ), "Cluster manager election did not happen." - - # verify the previously elected CM node successfully joined back the rest of the fleet - assert await check_cluster_formation_successful( - ops_test, leader_unit_ip, get_application_unit_names(ops_test, app=app) - ) - - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_restart_db_process_node_with_primary_shard( - ops_test: OpsTest, c_writes: ContinuousWrites, c_balanced_writes_runner -) -> None: - """Check cluster can self-heal, data indexed/read on process restart on primary shard node.""" - app = (await app_name(ops_test)) or APP_NAME - - units_ips = await get_application_unit_ids_ips(ops_test, app) - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - - # find unit hosting the primary shard of the index "series-index" - shards = await get_shards_by_index(ops_test, leader_unit_ip, ContinuousWrites.INDEX_NAME) - first_unit_with_primary_shard = [shard.unit_id for shard in shards if shard.is_prim][0] - - # Killing the only instance can be disastrous. - if len(ops_test.model.applications[app].units) < 2: - old_units_count = len(ops_test.model.applications[app].units) - await ops_test.model.applications[app].add_unit(count=1) - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=old_units_count + 1, - idle_period=IDLE_PERIOD, - ) - - # restart the opensearch process - await send_kill_signal_to_process( - ops_test, app, first_unit_with_primary_shard, signal="SIGTERM" - ) - - await assert_continuous_writes_increasing(c_writes) - - # verify that the opensearch service is back running on the previous primary shard unit - assert await is_up( - ops_test, units_ips[first_unit_with_primary_shard] - ), "OpenSearch service hasn't restarted." - - # fetch unit hosting the new primary shard of the previous index - shards = await get_shards_by_index(ops_test, leader_unit_ip, ContinuousWrites.INDEX_NAME) - units_with_p_shards = [shard.unit_id for shard in shards if shard.is_prim] - assert len(units_with_p_shards) == 2 - for unit_id in units_with_p_shards: - assert ( - unit_id != first_unit_with_primary_shard - ), "Primary shard still assigned to the unit where the service was killed." - - # check that the unit previously hosting the primary shard now hosts a replica - units_with_r_shards = [shard.unit_id for shard in shards if not shard.is_prim] - assert first_unit_with_primary_shard in units_with_r_shards - - # verify the node with the old primary successfully joined the rest of the fleet - assert await check_cluster_formation_successful( - ops_test, leader_unit_ip, get_application_unit_names(ops_test, app=app) - ) - - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -async def test_full_cluster_crash( - ops_test: OpsTest, c_writes: ContinuousWrites, c_balanced_writes_runner, reset_restart_delay -) -> None: - """Check cluster can operate normally after all nodes SIGKILL at same time and come back up.""" - app = (await app_name(ops_test)) or APP_NAME - - leader_ip = await get_leader_unit_ip(ops_test, app) - - # update all units to have a new RESTART_DELAY. Modifying the Restart delay to 3 minutes - # should ensure enough time for all replicas to be down at the same time. - for unit_id in get_application_unit_ids(ops_test, app): - await update_restart_delay(ops_test, app, unit_id, RESTART_DELAY) - - # kill all units simultaneously - await asyncio.gather( - *[ - send_kill_signal_to_process(ops_test, app, unit_id, signal="SIGKILL") - for unit_id in get_application_unit_ids(ops_test, app) - ] - ) - - # check that all units being down at the same time. - assert await all_processes_down(ops_test, app), "Not all units down at the same time." - - # Reset restart delay - for unit_id in get_application_unit_ids(ops_test, app): - await update_restart_delay(ops_test, app, unit_id, ORIGINAL_RESTART_DELAY) - - # sleep for restart delay + 45 secs max for the election time + node start + cluster formation - # around 10 sec enough in a good machine - 45 secs for CI - time.sleep(ORIGINAL_RESTART_DELAY + 45) - - # verify all units are up and running - for unit_id, unit_ip in (await get_application_unit_ids_ips(ops_test, app)).items(): - assert await is_up(ops_test, unit_ip), f"Unit {unit_id} not restarted after cluster crash." - - # check all nodes successfully joined the same cluster - assert await check_cluster_formation_successful( - ops_test, leader_ip, get_application_unit_names(ops_test, app=app) - ) - - await assert_continuous_writes_increasing(c_writes) - - # check that cluster health is green (all primary and replica shards allocated) - health_resp = await cluster_health(ops_test, leader_ip) - assert health_resp["status"] == "green", f"Cluster {health_resp['status']} - expected green." - - # continuous writes checks - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_full_cluster_restart( - ops_test: OpsTest, c_writes: ContinuousWrites, c_balanced_writes_runner, reset_restart_delay -) -> None: - """Check cluster can operate normally after all nodes SIGTERM at same time and come back up.""" - app = (await app_name(ops_test)) or APP_NAME - - leader_ip = await get_leader_unit_ip(ops_test, app) - - # update all units to have a new RESTART_DELAY. Modifying the Restart delay to 3 minutes - # should ensure enough time for all replicas to be down at the same time. - for unit_id in get_application_unit_ids(ops_test, app): - await update_restart_delay(ops_test, app, unit_id, RESTART_DELAY) - - # kill all units simultaneously - await asyncio.gather( - *[ - send_kill_signal_to_process(ops_test, app, unit_id, signal="SIGTERM") - for unit_id in get_application_unit_ids(ops_test, app) - ] - ) - - # check that all units being down at the same time. - assert await all_processes_down(ops_test, app), "Not all units down at the same time." - - # Reset restart delay - for unit_id in get_application_unit_ids(ops_test, app): - await update_restart_delay(ops_test, app, unit_id, ORIGINAL_RESTART_DELAY) - - # sleep for restart delay + 45 secs max for the election time + node start + cluster formation - # around 10 sec enough in a good machine - 45 secs for CI - time.sleep(ORIGINAL_RESTART_DELAY + 45) - - # verify all units are up and running - for unit_id, unit_ip in (await get_application_unit_ids_ips(ops_test, app)).items(): - assert await is_up(ops_test, unit_ip), f"Unit {unit_id} not restarted after cluster crash." - - # check all nodes successfully joined the same cluster - assert await check_cluster_formation_successful( - ops_test, leader_ip, get_application_unit_names(ops_test, app=app) - ) - - await assert_continuous_writes_increasing(c_writes) - - # check that cluster health is green (all primary and replica shards allocated) - health_resp = await cluster_health(ops_test, leader_ip) - assert health_resp["status"] == "green", f"Cluster {health_resp['status']} - expected green." - - # continuous writes checks - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) diff --git a/tests/integration/ha/test_ha_multi_clusters.py b/tests/integration/ha/test_ha_multi_clusters.py deleted file mode 100644 index c95ac5a357..0000000000 --- a/tests/integration/ha/test_ha_multi_clusters.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import asyncio -import logging - -import pytest -from pytest_operator.plugin import OpsTest - -from ..helpers import ( - APP_NAME, - CONFIG_OPTS, - MODEL_CONFIG, - SERIES, - app_name, - get_application_unit_ids, - get_leader_unit_ip, -) -from ..helpers_deployments import wait_until -from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME -from .continuous_writes import ContinuousWrites -from .helpers import SECOND_APP_NAME, assert_continuous_writes_consistency -from .helpers_data import delete_index, index_doc, search -from .test_horizontal_scaling import IDLE_PERIOD - -logger = logging.getLogger(__name__) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build and deploy one unit of OpenSearch.""" - # it is possible for users to provide their own cluster for HA testing. - # Hence, check if there is a pre-existing cluster. - if await app_name(ops_test): - return - - my_charm = await ops_test.build_charm(".") - await ops_test.model.set_config(MODEL_CONFIG) - # Deploy TLS Certificates operator. - config = {"ca-common-name": "CN_CA"} - await asyncio.gather( - ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config), - ops_test.model.deploy(my_charm, num_units=2, series=SERIES, config=CONFIG_OPTS), - ) - - # Relate it to OpenSearch to set up TLS. - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - await ops_test.model.wait_for_idle( - apps=[TLS_CERTIFICATES_APP_NAME, APP_NAME], - status="active", - timeout=1400, - idle_period=IDLE_PERIOD, - ) - assert len(ops_test.model.applications[APP_NAME].units) == 2 - - -# put this test at the end of the list of tests, as we delete an app during cleanup -# and the safeguards we have on the charm prevent us from doing so, so we'll keep -# using a unit without need - when other tests may need the unit on the CI -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -async def test_multi_clusters_db_isolation( - ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner -) -> None: - """Check that writes in cluster not replicated to another cluster.""" - app = (await app_name(ops_test)) or APP_NAME - - # remove 1 unit (for CI) - unit_ids = get_application_unit_ids(ops_test, app=app) - - # deploy new cluster - my_charm = await ops_test.build_charm(".") - await ops_test.model.deploy( - my_charm, num_units=1, application_name=SECOND_APP_NAME, config=CONFIG_OPTS - ) - await ops_test.model.integrate(SECOND_APP_NAME, TLS_CERTIFICATES_APP_NAME) - - # wait - await wait_until( - ops_test, - apps=[app, SECOND_APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units={app: len(unit_ids), SECOND_APP_NAME: 1}, - idle_period=IDLE_PERIOD, - timeout=1600, - ) - - index_name = "test_index_unique_cluster_dbs" - - # index document in the current cluster - main_app_leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - await index_doc(ops_test, app, main_app_leader_unit_ip, index_name, doc_id=1) - - # index document in second cluster - second_app_leader_ip = await get_leader_unit_ip(ops_test, app=SECOND_APP_NAME) - await index_doc(ops_test, SECOND_APP_NAME, second_app_leader_ip, index_name, doc_id=2) - - # fetch all documents in each cluster - current_app_docs = await search(ops_test, app, main_app_leader_unit_ip, index_name) - second_app_docs = await search(ops_test, SECOND_APP_NAME, second_app_leader_ip, index_name) - - # check that the only doc indexed in each cluster is different - assert len(current_app_docs) == 1 - assert len(second_app_docs) == 1 - assert current_app_docs[0] != second_app_docs[0] - - # cleanup - await delete_index(ops_test, app, main_app_leader_unit_ip, index_name) - await ops_test.model.remove_application(SECOND_APP_NAME) - - # continuous writes checks - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) diff --git a/tests/integration/ha/test_ha_networking.py b/tests/integration/ha/test_ha_networking.py index 789b2fdc84..6d5310b8a1 100644 --- a/tests/integration/ha/test_ha_networking.py +++ b/tests/integration/ha/test_ha_networking.py @@ -37,7 +37,8 @@ from ..helpers_deployments import wait_until from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME from .continuous_writes import ContinuousWrites -from .test_horizontal_scaling import IDLE_PERIOD + +IDLE_PERIOD = 75 logger = logging.getLogger(__name__) diff --git a/tests/integration/ha/test_horizontal_scaling.py b/tests/integration/ha/test_horizontal_scaling.py deleted file mode 100644 index 4dc99d258d..0000000000 --- a/tests/integration/ha/test_horizontal_scaling.py +++ /dev/null @@ -1,339 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import asyncio -import logging -import time - -import pytest -from charms.opensearch.v0.constants_charm import ClusterHealthYellow -from charms.opensearch.v0.helper_cluster import ClusterTopology -from pytest_operator.plugin import OpsTest - -from ..ha.helpers import ( - all_nodes, - assert_continuous_writes_consistency, - get_elected_cm_unit_id, - get_number_of_shards_by_node, - get_shards_by_index, - get_shards_by_state, -) -from ..helpers import ( - APP_NAME, - CONFIG_OPTS, - IDLE_PERIOD, - MODEL_CONFIG, - SERIES, - app_name, - check_cluster_formation_successful, - cluster_health, - get_application_unit_ids, - get_application_unit_names, - get_leader_unit_id, - get_leader_unit_ip, -) -from ..helpers_deployments import wait_until -from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME -from .continuous_writes import ContinuousWrites -from .helpers_data import create_dummy_docs, create_dummy_indexes, delete_dummy_indexes - -logger = logging.getLogger(__name__) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build and deploy one unit of OpenSearch.""" - # it is possible for users to provide their own cluster for HA testing. - # Hence, check if there is a pre-existing cluster. - if await app_name(ops_test): - return - - my_charm = await ops_test.build_charm(".") - await ops_test.model.set_config(MODEL_CONFIG) - # Deploy TLS Certificates operator. - config = {"ca-common-name": "CN_CA"} - await asyncio.gather( - ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config), - ops_test.model.deploy(my_charm, num_units=1, series=SERIES, config=CONFIG_OPTS), - ) - - # Relate it to OpenSearch to set up TLS. - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - await ops_test.model.wait_for_idle( - apps=[TLS_CERTIFICATES_APP_NAME, APP_NAME], status="active", timeout=1600 - ) - assert len(ops_test.model.applications[APP_NAME].units) == 1 - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_horizontal_scale_up( - ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner -) -> None: - """Tests that new added units to the cluster are discoverable.""" - app = (await app_name(ops_test)) or APP_NAME - init_units_count = len(ops_test.model.applications[app].units) - - # scale up - await ops_test.model.applications[app].add_unit(count=2) - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=init_units_count + 2, - idle_period=IDLE_PERIOD, - ) - num_units = len(ops_test.model.applications[app].units) - assert num_units == init_units_count + 2 - - unit_names = get_application_unit_names(ops_test, app=app) - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - - assert await check_cluster_formation_successful(ops_test, leader_unit_ip, unit_names) - - cluster_health_resp = await cluster_health(ops_test, leader_unit_ip) - assert cluster_health_resp["status"] == "green" - - shards_by_status = await get_shards_by_state(ops_test, leader_unit_ip) - assert not shards_by_status.get("INITIALIZING") - assert not shards_by_status.get("RELOCATING") - assert not shards_by_status.get("UNASSIGNED") - - # check roles, expecting all nodes to be cm_eligible - nodes = await all_nodes(ops_test, leader_unit_ip) - num_units = len(ops_test.model.applications[app].units) - assert ( - ClusterTopology.nodes_count_by_role(nodes)["cluster_manager"] == num_units - if num_units % 2 != 0 - else num_units - 1 - ) - - # continuous writes checks - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_safe_scale_down_shards_realloc( - ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner -) -> None: - """Tests the shutdown of a node, and re-allocation of shards to a newly joined unit. - - The goal of this test is to make sure that shards are automatically relocated after - a Yellow status on the cluster caused by a scale-down event. - """ - app = (await app_name(ops_test)) or APP_NAME - init_units_count = len(ops_test.model.applications[app].units) - - # scale up - await ops_test.model.applications[app].add_unit(count=1) - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=init_units_count + 1, - idle_period=IDLE_PERIOD, - ) - - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - leader_unit_id = await get_leader_unit_id(ops_test, app=app) - - # fetch all nodes - unit_ids = get_application_unit_ids(ops_test, app=app) - unit_id_to_stop = [unit_id for unit_id in unit_ids if unit_id != leader_unit_id][0] - unit_ids_to_keep = [unit_id for unit_id in unit_ids if unit_id != unit_id_to_stop] - - # create indices with right num of primary and replica shards, and populate with data - await create_dummy_indexes(ops_test, app, leader_unit_ip, max_r_shards=init_units_count) - await create_dummy_docs(ops_test, app, leader_unit_ip) - - # get initial cluster health - expected to be all good: green - cluster_health_resp = await cluster_health(ops_test, leader_unit_ip, wait_for_green_first=True) - assert cluster_health_resp["status"] == "green" - assert cluster_health_resp["unassigned_shards"] == 0 - - # get initial cluster allocation (nodes and their corresponding shards) - init_shards_per_node = await get_number_of_shards_by_node(ops_test, leader_unit_ip) - assert init_shards_per_node.get(-1, 0) == 0 # unallocated shards - - # remove the service in the chosen unit - await ops_test.model.applications[app].destroy_unit(f"{app}/{unit_id_to_stop}") - await wait_until( - ops_test, - apps=[app], - apps_full_statuses={app: {"blocked": [ClusterHealthYellow]}}, - units_statuses=["active"], - wait_for_exact_units=init_units_count, - idle_period=IDLE_PERIOD, - ) - - # check if at least partial shard re-allocation happened - new_shards_per_node = await get_number_of_shards_by_node(ops_test, leader_unit_ip) - - # some shards should have been reallocated, NOT ALL due to already existing replicas elsewhere - assert new_shards_per_node.get(-1, 0) > 0 # some shards not reallocated - - are_some_shards_reallocated = False - for unit_id in unit_ids_to_keep: - are_some_shards_reallocated = ( - are_some_shards_reallocated - or new_shards_per_node[unit_id] > init_shards_per_node[unit_id] - ) - assert are_some_shards_reallocated - - # get new cluster health - cluster_health_resp = await cluster_health(ops_test, leader_unit_ip) - - # not all replica shards should have been reallocated - assert cluster_health_resp["status"] == "yellow" - - # scale up by 1 unit - await ops_test.model.applications[app].add_unit(count=1) - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=init_units_count + 1, - idle_period=IDLE_PERIOD, - ) - - new_unit_id = [ - int(unit.name.split("/")[1]) - for unit in ops_test.model.applications[app].units - if int(unit.name.split("/")[1]) not in unit_ids - ][0] - - # check if the previously unallocated shards have successfully moved to the newest unit - new_shards_per_node = await get_number_of_shards_by_node(ops_test, leader_unit_ip) - assert new_shards_per_node[new_unit_id] > 0 - - # get new cluster health - cluster_health_resp = await cluster_health(ops_test, leader_unit_ip) - assert cluster_health_resp["status"] == "green" - assert cluster_health_resp["unassigned_shards"] == 0 - assert new_shards_per_node.get(-1, 0) == 0 - - # delete the dummy indexes - await delete_dummy_indexes(ops_test, app, leader_unit_ip) - - # continuous writes checks - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -async def test_safe_scale_down_remove_leaders( - ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner -) -> None: - """Tests the removal of specific units (elected cm, juju leader, node with prim shard). - - The goal of this test is to make sure that: - - the CM reelection happens successfully. - - the leader-elected event gets triggered successfully and - leadership related events on the charm work correctly, i.e: roles reassigning. - - the primary shards reelection happens successfully. - It is worth noting that we're going into this test with an odd number of units. - """ - app = (await app_name(ops_test)) or APP_NAME - init_units_count = len(ops_test.model.applications[app].units) - - if init_units_count < 5: - # scale up by 5 - init units - added_units = 5 - init_units_count - await ops_test.model.applications[app].add_unit(count=added_units) - - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=init_units_count + added_units, - idle_period=IDLE_PERIOD, - timeout=1800, - ) - - init_units_count += added_units - - # scale down: remove the juju leader - leader_unit_id = await get_leader_unit_id(ops_test, app=app) - - await ops_test.model.applications[app].destroy_unit(f"{app}/{leader_unit_id}") - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=init_units_count - 1, - idle_period=IDLE_PERIOD, - timeout=1800, - ) - - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - - # scale-down: remove the current elected CM - first_elected_cm_unit_id = await get_elected_cm_unit_id(ops_test, leader_unit_ip) - assert first_elected_cm_unit_id != -1 - await ops_test.model.applications[app].destroy_unit(f"{app}/{first_elected_cm_unit_id}") - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=init_units_count - 2, - idle_period=IDLE_PERIOD, - timeout=1800, - ) - - # check if CM re-election happened - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - second_elected_cm_unit_id = await get_elected_cm_unit_id(ops_test, leader_unit_ip) - assert second_elected_cm_unit_id != -1 - assert second_elected_cm_unit_id != first_elected_cm_unit_id - - # check health of cluster - cluster_health_resp = await cluster_health(ops_test, leader_unit_ip, wait_for_green_first=True) - assert cluster_health_resp["status"] == "green" - - # remove node containing primary shard of index "series_index" - shards = await get_shards_by_index(ops_test, leader_unit_ip, ContinuousWrites.INDEX_NAME) - unit_with_primary_shard = [shard.unit_id for shard in shards if shard.is_prim][0] - await ops_test.model.applications[app].destroy_unit(f"{app}/{unit_with_primary_shard}") - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=init_units_count - 3, - idle_period=IDLE_PERIOD, - timeout=1800, - ) - - writes = await c_writes.count() - - # check that the primary shard reelection happened - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - shards = await get_shards_by_index(ops_test, leader_unit_ip, ContinuousWrites.INDEX_NAME) - units_with_p_shards = [shard.unit_id for shard in shards if shard.is_prim] - assert len(units_with_p_shards) == 1 - - for unit_id in units_with_p_shards: - assert ( - unit_id != unit_with_primary_shard - ), "Primary shard still assigned to destroyed unit." - - # check that writes are still going after the removal / p_shard reelection - time.sleep(3) - new_writes = await c_writes.count() - assert new_writes > writes - - # continuous writes checks - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) diff --git a/tests/integration/ha/test_large_deployments_cluster_manager_only_nodes.py b/tests/integration/ha/test_large_deployments_cluster_manager_only_nodes.py deleted file mode 100644 index f6cb647067..0000000000 --- a/tests/integration/ha/test_large_deployments_cluster_manager_only_nodes.py +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import asyncio -import logging -import time - -import pytest -from charms.opensearch.v0.constants_charm import PClusterNoDataNode, PClusterNoRelation -from pytest_operator.plugin import OpsTest - -from ..helpers import CONFIG_OPTS, MODEL_CONFIG, SERIES, get_leader_unit_ip -from ..helpers_deployments import wait_until -from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME -from .continuous_writes import ContinuousWrites -from .helpers import all_nodes -from .test_horizontal_scaling import IDLE_PERIOD - -logger = logging.getLogger(__name__) - -REL_ORCHESTRATOR = "peer-cluster-orchestrator" -REL_PEER = "peer-cluster" - -MAIN_APP = "opensearch-main" -FAILOVER_APP = "opensearch-failover" -DATA_APP = "opensearch-data" - -CLUSTER_NAME = "log-app" - -APP_UNITS = {MAIN_APP: 1, FAILOVER_APP: 1, DATA_APP: 2} - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build and deploy one unit of OpenSearch.""" - # it is possible for users to provide their own cluster for HA testing. - # Hence, check if there is a pre-existing cluster. - my_charm = await ops_test.build_charm(".") - await ops_test.model.set_config(MODEL_CONFIG) - - # Deploy TLS Certificates operator. - config = {"ca-common-name": "CN_CA"} - await asyncio.gather( - ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config), - ops_test.model.deploy( - my_charm, - application_name=MAIN_APP, - num_units=1, - series=SERIES, - config={"cluster_name": CLUSTER_NAME, "roles": "cluster_manager"} | CONFIG_OPTS, - ), - ops_test.model.deploy( - my_charm, - application_name=FAILOVER_APP, - num_units=1, - series=SERIES, - config={"cluster_name": CLUSTER_NAME, "init_hold": True, "roles": "cluster_manager"} - | CONFIG_OPTS, - ), - ops_test.model.deploy( - my_charm, - application_name=DATA_APP, - num_units=2, - series=SERIES, - config={"cluster_name": CLUSTER_NAME, "init_hold": True, "roles": "data"} - | CONFIG_OPTS, - ), - ) - - # wait until the TLS operator is ready - await wait_until( - ops_test, - apps=[TLS_CERTIFICATES_APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units={TLS_CERTIFICATES_APP_NAME: 1}, - idle_period=IDLE_PERIOD, - ) - - # integrate TLS to all applications - for app in [MAIN_APP, FAILOVER_APP, DATA_APP]: - await ops_test.model.integrate(app, TLS_CERTIFICATES_APP_NAME) - - # confirm all apps are blocked because NO TLS relation established - await wait_until( - ops_test, - apps=list(APP_UNITS.keys()), - apps_full_statuses={ - MAIN_APP: {"blocked": [PClusterNoDataNode]}, - FAILOVER_APP: {"blocked": [PClusterNoRelation]}, - DATA_APP: {"blocked": [PClusterNoRelation]}, - }, - units_full_statuses={ - MAIN_APP: {"units": {"blocked": [PClusterNoDataNode]}}, - FAILOVER_APP: {"units": {"active": []}}, - DATA_APP: {"units": {"active": []}}, - }, - wait_for_exact_units={app: units for app, units in APP_UNITS.items()}, - idle_period=IDLE_PERIOD, - timeout=1800, - ) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_correct_startup_after_integration(ops_test: OpsTest) -> None: - """After integrating the cluster manager with the data application, both should start up.""" - await ops_test.model.integrate(f"{DATA_APP}:{REL_PEER}", f"{MAIN_APP}:{REL_ORCHESTRATOR}") - - await wait_until( - ops_test, - apps=[MAIN_APP, DATA_APP], - apps_full_statuses={ - MAIN_APP: {"active": []}, - DATA_APP: {"active": []}, - }, - units_statuses=["active"], - wait_for_exact_units={app: units for app, units in APP_UNITS.items()}, - idle_period=IDLE_PERIOD, - ) - - # make sure data can be written - c_writes = ContinuousWrites(ops_test, app=DATA_APP) - await c_writes.start() - time.sleep(30) - await c_writes.stop() - assert (await c_writes.count()) > 0, "Continuous writes did not increase" - - leader_unit_ip = await get_leader_unit_ip(ops_test, app=MAIN_APP) - nodes = await all_nodes(ops_test, leader_unit_ip, app=MAIN_APP) - assert len(nodes) == 3, f"Wrong node count. Expecting 3 online nodes, found: {len(nodes)}." - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_integrate_failover(ops_test: OpsTest) -> None: - """After integrating the failover app to the others, all should be started and fine.""" - await ops_test.model.integrate(f"{FAILOVER_APP}:{REL_PEER}", f"{MAIN_APP}:{REL_ORCHESTRATOR}") - await ops_test.model.integrate(f"{DATA_APP}:{REL_PEER}", f"{FAILOVER_APP}:{REL_ORCHESTRATOR}") - - await wait_until( - ops_test, - apps=[MAIN_APP, DATA_APP, FAILOVER_APP], - apps_full_statuses={ - MAIN_APP: {"active": []}, - DATA_APP: {"active": []}, - FAILOVER_APP: {"active": []}, - }, - units_statuses=["active"], - wait_for_exact_units={app: units for app, units in APP_UNITS.items()}, - idle_period=IDLE_PERIOD, - ) - - leader_unit_ip = await get_leader_unit_ip(ops_test, app=MAIN_APP) - nodes = await all_nodes(ops_test, leader_unit_ip, app=MAIN_APP) - assert len(nodes) == 4, f"Wrong node count. Expecting 4 online nodes, found: {len(nodes)}." diff --git a/tests/integration/ha/test_large_deployments_relations.py b/tests/integration/ha/test_large_deployments_relations.py deleted file mode 100644 index eace6035cb..0000000000 --- a/tests/integration/ha/test_large_deployments_relations.py +++ /dev/null @@ -1,246 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import asyncio -import logging -import time - -import pytest -from charms.opensearch.v0.constants_charm import PClusterNoRelation, TLSRelationMissing -from pytest_operator.plugin import OpsTest - -from ..helpers import CONFIG_OPTS, MODEL_CONFIG, SERIES, get_leader_unit_ip -from ..helpers_deployments import wait_until -from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME -from .continuous_writes import ContinuousWrites -from .helpers import all_nodes -from .test_horizontal_scaling import IDLE_PERIOD - -logger = logging.getLogger(__name__) - -REL_ORCHESTRATOR = "peer-cluster-orchestrator" -REL_PEER = "peer-cluster" - -MAIN_APP = "opensearch-main" -FAILOVER_APP = "opensearch-failover" -DATA_APP = "opensearch-data" -INVALID_APP = "opensearch-invalid" - -CLUSTER_NAME = "log-app" -INVALID_CLUSTER_NAME = "timeseries" - -APP_UNITS = {MAIN_APP: 3, FAILOVER_APP: 3, DATA_APP: 2, INVALID_APP: 1} - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build and deploy one unit of OpenSearch.""" - # it is possible for users to provide their own cluster for HA testing. - # Hence, check if there is a pre-existing cluster. - my_charm = await ops_test.build_charm(".") - await ops_test.model.set_config(MODEL_CONFIG) - - # Deploy TLS Certificates operator. - config = {"ca-common-name": "CN_CA"} - await asyncio.gather( - ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config), - ops_test.model.deploy( - my_charm, - application_name=MAIN_APP, - num_units=3, - series=SERIES, - config={"cluster_name": CLUSTER_NAME} | CONFIG_OPTS, - ), - ops_test.model.deploy( - my_charm, - application_name=FAILOVER_APP, - num_units=3, - series=SERIES, - config={"cluster_name": CLUSTER_NAME, "init_hold": True} | CONFIG_OPTS, - ), - ops_test.model.deploy( - my_charm, - application_name=DATA_APP, - num_units=2, - series=SERIES, - config={"cluster_name": CLUSTER_NAME, "init_hold": True, "roles": "data.hot,ml"} - | CONFIG_OPTS, - ), - ops_test.model.deploy( - my_charm, - application_name=INVALID_APP, - num_units=1, - series=SERIES, - config={"cluster_name": INVALID_CLUSTER_NAME, "init_hold": True, "roles": "data.cold"} - | CONFIG_OPTS, - ), - ) - - # wait until the TLS operator is ready - await wait_until( - ops_test, - apps=[TLS_CERTIFICATES_APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units={TLS_CERTIFICATES_APP_NAME: 1}, - idle_period=IDLE_PERIOD, - ) - - # confirm all apps are blocked because NO TLS relation established - await wait_until( - ops_test, - apps=list(APP_UNITS.keys()), - apps_full_statuses={ - MAIN_APP: {"blocked": [TLSRelationMissing]}, - FAILOVER_APP: {"blocked": [PClusterNoRelation]}, - DATA_APP: {"blocked": [PClusterNoRelation]}, - INVALID_APP: {"blocked": [PClusterNoRelation]}, - }, - units_full_statuses={ - MAIN_APP: {"units": {"blocked": [TLSRelationMissing]}}, - FAILOVER_APP: {"units": {"active": []}}, - DATA_APP: {"units": {"active": []}}, - INVALID_APP: {"units": {"active": []}}, - }, - wait_for_exact_units={app: units for app, units in APP_UNITS.items()}, - idle_period=IDLE_PERIOD, - timeout=1800, - ) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_invalid_conditions(ops_test: OpsTest) -> None: - """Check invalid conditions under different states.""" - # integrate an app with the main-orchestrator when TLS is not related to the provider - await ops_test.model.integrate(f"{FAILOVER_APP}:{REL_PEER}", f"{MAIN_APP}:{REL_ORCHESTRATOR}") - await wait_until( - ops_test, - apps=[MAIN_APP, FAILOVER_APP], - apps_full_statuses={ - MAIN_APP: {"blocked": [TLSRelationMissing]}, - FAILOVER_APP: { - "waiting": ["TLS not fully configured in related 'main-orchestrator'."] - }, - }, - units_full_statuses={ - MAIN_APP: {"units": {"blocked": [TLSRelationMissing]}}, - FAILOVER_APP: {"units": {"active": []}}, - }, - wait_for_exact_units={ - MAIN_APP: APP_UNITS[MAIN_APP], - FAILOVER_APP: APP_UNITS[FAILOVER_APP], - }, - idle_period=IDLE_PERIOD, - timeout=1800, - ) - - # integrate TLS to all applications - for app in [MAIN_APP, FAILOVER_APP, DATA_APP, INVALID_APP]: - await ops_test.model.integrate(app, TLS_CERTIFICATES_APP_NAME) - - await wait_until( - ops_test, - apps=[MAIN_APP, FAILOVER_APP, DATA_APP, INVALID_APP], - apps_full_statuses={ - MAIN_APP: {"active": []}, - FAILOVER_APP: {"active": []}, - DATA_APP: {"blocked": [PClusterNoRelation]}, - INVALID_APP: {"blocked": [PClusterNoRelation]}, - }, - units_statuses=["active"], - wait_for_exact_units={app: units for app, units in APP_UNITS.items()}, - idle_period=IDLE_PERIOD, - timeout=1800, - ) - - c_writes = ContinuousWrites(ops_test, app=MAIN_APP) - await c_writes.start() - time.sleep(120) - await c_writes.stop() - - # fetch nodes, we should have 6 nodes (main + failover)-orchestrators - leader_unit_ip = await get_leader_unit_ip(ops_test, app=MAIN_APP) - nodes = await all_nodes(ops_test, leader_unit_ip, app=MAIN_APP) - assert len(nodes) == 6, f"Wrong node count. Expecting 6 online nodes, found: {len(nodes)}." - - # integrate cluster with different name - await ops_test.model.integrate(f"{INVALID_APP}:{REL_PEER}", f"{MAIN_APP}:{REL_ORCHESTRATOR}") - await wait_until( - ops_test, - apps=[MAIN_APP, INVALID_APP], - apps_full_statuses={ - MAIN_APP: {"active": []}, - INVALID_APP: { - "blocked": ["Cannot relate 2 clusters with different 'cluster_name' values."] - }, - }, - units_statuses=["active"], - wait_for_exact_units={MAIN_APP: APP_UNITS[MAIN_APP], INVALID_APP: APP_UNITS[INVALID_APP]}, - idle_period=IDLE_PERIOD, - timeout=1800, - ) - - # delete the invalid app name - await ops_test.model.remove_application( - INVALID_APP, block_until_done=True, force=True, destroy_storage=True, no_wait=True - ) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_large_deployment_fully_formed( - ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner -) -> None: - """Test that under optimal conditions all the nodes form the same big cluster.""" - await ops_test.model.integrate(f"{DATA_APP}:{REL_PEER}", f"{MAIN_APP}:{REL_ORCHESTRATOR}") - await ops_test.model.integrate(f"{DATA_APP}:{REL_PEER}", f"{FAILOVER_APP}:{REL_ORCHESTRATOR}") - - await wait_until( - ops_test, - apps=[MAIN_APP, FAILOVER_APP, DATA_APP], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units={ - app: units for app, units in APP_UNITS.items() if app != INVALID_APP - }, - idle_period=IDLE_PERIOD, - timeout=1800, - ) - - # fetch nodes, we should have 6 nodes (main + failover)-orchestrators - leader_unit_ip = await get_leader_unit_ip(ops_test, app=MAIN_APP) - nodes = await all_nodes(ops_test, leader_unit_ip, app=MAIN_APP) - assert len(nodes) == 8, f"Wrong node count. Expecting 8 online nodes, found: {len(nodes)}." - - # check the roles - auto_gen_roles = ["cluster_manager", "data", "ingest", "ml"] - data_roles = ["data", "ml"] - for app, node_count in [(MAIN_APP, 3), (FAILOVER_APP, 3), (DATA_APP, 2)]: - current_app_nodes = [ - node for node in nodes if node.app.id == f"{ops_test.model.uuid}/{app}" - ] - assert ( - len(current_app_nodes) == node_count - ), f"Wrong count for {app}:{len(current_app_nodes)} - expected:{node_count}" - - roles = current_app_nodes[0].roles - temperature = current_app_nodes[0].temperature - if app in [MAIN_APP, FAILOVER_APP]: - assert sorted(roles) == sorted( - auto_gen_roles - ), f"Wrong roles for {app}:{roles} - expected:{auto_gen_roles}" - assert temperature is None, f"Wrong temperature for {app}:{roles} - expected:None" - else: - assert sorted(roles) == sorted( - data_roles - ), f"Wrong roles for {app}:{roles} - expected:{data_roles}" - assert ( - temperature == "hot" - ), f"Wrong temperature for {app}:{temperature} - expected:hot" diff --git a/tests/integration/ha/test_roles_managements.py b/tests/integration/ha/test_roles_managements.py deleted file mode 100644 index d62049832f..0000000000 --- a/tests/integration/ha/test_roles_managements.py +++ /dev/null @@ -1,170 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import asyncio -import logging - -import pytest -from charms.opensearch.v0.constants_charm import PClusterWrongNodesCountForQuorum -from pytest_operator.plugin import OpsTest - -from ..helpers import ( - APP_NAME, - CONFIG_OPTS, - MODEL_CONFIG, - SERIES, - check_cluster_formation_successful, - cluster_health, - get_application_unit_ids, - get_application_unit_names, - get_leader_unit_ip, -) -from ..helpers_deployments import wait_until -from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME -from .continuous_writes import ContinuousWrites -from .helpers import all_nodes, app_name -from .test_horizontal_scaling import IDLE_PERIOD - -logger = logging.getLogger(__name__) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build and deploy one unit of OpenSearch.""" - # it is possible for users to provide their own cluster for HA testing. - # Hence, check if there is a pre-existing cluster. - if await app_name(ops_test): - return - - my_charm = await ops_test.build_charm(".") - await ops_test.model.set_config(MODEL_CONFIG) - # Deploy TLS Certificates operator. - config = {"ca-common-name": "CN_CA"} - await asyncio.gather( - ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config), - ops_test.model.deploy(my_charm, num_units=3, series=SERIES, config=CONFIG_OPTS), - ) - - # Relate it to OpenSearch to set up TLS. - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - await wait_until( - ops_test, - apps=[TLS_CERTIFICATES_APP_NAME, APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units={TLS_CERTIFICATES_APP_NAME: 1, APP_NAME: 3}, - idle_period=IDLE_PERIOD, - ) - assert len(ops_test.model.applications[APP_NAME].units) == 3 - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_set_roles_manually( - ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner -) -> None: - """Check roles changes in all nodes.""" - app = (await app_name(ops_test)) or APP_NAME - - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - - cluster_name = (await cluster_health(ops_test, leader_unit_ip))["cluster_name"] - nodes = await all_nodes(ops_test, leader_unit_ip) - for node in nodes: - assert sorted(node.roles) == [ - "cluster_manager", - "data", - "ingest", - "ml", - ] - assert node.temperature is None, "Node temperature was erroneously set." - - # change cluster name and roles + temperature, should trigger a rolling restart - - logger.info("Changing cluster name and roles + temperature.") - await ops_test.model.applications[app].set_config( - {"cluster_name": "new_cluster_name", "roles": "cluster_manager, data.cold"} - ) - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=len(nodes), - idle_period=IDLE_PERIOD, - ) - - logger.info("Checking if the cluster name and roles + temperature were changed.") - assert await check_cluster_formation_successful( - ops_test, leader_unit_ip, get_application_unit_names(ops_test, app=app) - ) - new_cluster_name = (await cluster_health(ops_test, leader_unit_ip))["cluster_name"] - assert new_cluster_name == cluster_name, "Oops - cluster name changed." - - nodes = await all_nodes(ops_test, leader_unit_ip) - for node in nodes: - assert sorted(node.roles) == ["cluster_manager", "data"], "roles unchanged" - assert node.temperature == "cold", "Temperature unchanged." - - # scale up cluster by 1 unit, this should give the new node the same roles - await ops_test.model.applications[app].add_unit(count=1) - # TODO: this should have to go once we full trust that quorum is automatically established - await wait_until( - ops_test, - apps=[app], - units_full_statuses={ - app: { - "units": { - "blocked": [PClusterWrongNodesCountForQuorum], - "active": [], - }, - }, - }, - wait_for_exact_units=len(nodes) + 1, - idle_period=IDLE_PERIOD, - ) - new_nodes = await all_nodes(ops_test, leader_unit_ip) - assert len(new_nodes) == len(nodes) - - # remove new unit - last_unit_id = sorted(get_application_unit_ids(ops_test, app))[-1] - await ops_test.model.applications[app].destroy_unit(f"{app}/{last_unit_id}") - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_switch_back_to_auto_generated_roles( - ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner -) -> None: - """Check roles changes in all nodes.""" - app = (await app_name(ops_test)) or APP_NAME - - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - nodes = await all_nodes(ops_test, leader_unit_ip) - - await ops_test.model.applications[app].set_config({"roles": ""}) - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=len(nodes), - idle_period=IDLE_PERIOD, - ) - - # check that nodes' roles have indeed changed - nodes = await all_nodes(ops_test, leader_unit_ip) - for node in nodes: - assert sorted(node.roles) == [ - "cluster_manager", - "data", - "ingest", - "ml", - ] - assert node.temperature is None, "Node temperature was erroneously set." diff --git a/tests/integration/ha/test_scale_to_one_and_back.py b/tests/integration/ha/test_scale_to_one_and_back.py deleted file mode 100644 index 74f86d27dc..0000000000 --- a/tests/integration/ha/test_scale_to_one_and_back.py +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import asyncio -import logging - -import pytest -from pytest_operator.plugin import OpsTest - -from ..ha.helpers import get_elected_cm_unit_id -from ..helpers import ( - APP_NAME, - CONFIG_OPTS, - MODEL_CONFIG, - SERIES, - cluster_health, - cluster_voting_config_exclusions, - execute_update_status_manually, - get_leader_unit_ip, - set_watermark, -) -from ..helpers_deployments import wait_until -from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME -from .continuous_writes import ContinuousWrites -from .helpers import ( - app_name, - assert_continuous_writes_consistency, - assert_continuous_writes_increasing, -) -from .test_horizontal_scaling import IDLE_PERIOD - -logger = logging.getLogger(__name__) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build and deploy one unit of OpenSearch.""" - # it is possible for users to provide their own cluster for HA testing. - # Hence, check if there is a pre-existing cluster. - if await app_name(ops_test): - return - - my_charm = await ops_test.build_charm(".") - # This test will manually issue update-status hooks, as we want to see the change in behavior - # when applying `settle_voting` during start/stop and during update-status. - MODEL_CONFIG["update-status-hook-interval"] = "360m" - - await ops_test.model.set_config(MODEL_CONFIG) - - # Deploy TLS Certificates operator. - config = {"ca-common-name": "CN_CA"} - await asyncio.gather( - ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config), - ops_test.model.deploy(my_charm, num_units=3, series=SERIES, config=CONFIG_OPTS), - ) - - # Relate it to OpenSearch to set up TLS. - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - await ops_test.model.wait_for_idle( - apps=[TLS_CERTIFICATES_APP_NAME, APP_NAME], - status="active", - timeout=1400, - idle_period=IDLE_PERIOD, - ) - assert len(ops_test.model.applications[APP_NAME].units) == 3 - - # Set watermark - await set_watermark(ops_test, app=APP_NAME) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_scale_down( - ops_test: OpsTest, c_writes: ContinuousWrites, c_0_repl_writes_runner -) -> None: - """Tests the shutdown of a node, and see the voting exclusions to be applied. - - This test will remove the elected cluster manager. - """ - app = (await app_name(ops_test)) or APP_NAME - - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - voting_exclusions = await cluster_voting_config_exclusions(ops_test, unit_ip=leader_unit_ip) - assert len(voting_exclusions) == 0 - - init_count = len(ops_test.model.applications[app].units) - while init_count > 1: - # find unit currently elected cluster_manager - elected_cm_unit_id = await get_elected_cm_unit_id(ops_test, leader_unit_ip) - - # remove the service in the chosen unit - await ops_test.model.applications[app].destroy_unit(f"{app}/{elected_cm_unit_id}") - - await wait_until( - ops_test, - apps=[app], - units_statuses=["active"], - wait_for_exact_units=init_count - 1, - idle_period=IDLE_PERIOD, - ) - - # Check voting exclusions - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - voting_exclusions = await cluster_voting_config_exclusions( - ops_test, unit_ip=leader_unit_ip - ) - assert len(voting_exclusions) == 0 - # Test the cleanup() method - await execute_update_status_manually(ops_test, app=app) - voting_exclusions = await cluster_voting_config_exclusions( - ops_test, unit_ip=leader_unit_ip - ) - assert len(voting_exclusions) == 0 - - # get initial cluster health - expected to be all good: green - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - cluster_health_resp = await cluster_health( - ops_test, leader_unit_ip, wait_for_green_first=True - ) - assert cluster_health_resp["status"] == "green" - assert cluster_health_resp["unassigned_shards"] == 0 - - # Make sure we continue to be writable - await assert_continuous_writes_increasing(c_writes) - - init_count = len(ops_test.model.applications[app].units) - - # continuous writes checks - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_scale_back_up( - ops_test: OpsTest, c_writes: ContinuousWrites, c_0_repl_writes_runner -) -> None: - """Tests the scaling back to 3x node-cluster and see the voting exclusions to be applied.""" - app = (await app_name(ops_test)) or APP_NAME - - init_count = len(ops_test.model.applications[app].units) - while init_count < 3: - # find unit currently elected cluster_manager - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - - # remove the service in the chosen unit - await ops_test.model.applications[app].add_unit(count=1) - await wait_until( - ops_test, - apps=[app], - units_statuses=["active"], - wait_for_exact_units=init_count + 1, - idle_period=IDLE_PERIOD, - ) - - # get initial cluster health - expected to be all good: green - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - cluster_health_resp = await cluster_health( - ops_test, leader_unit_ip, wait_for_green_first=True - ) - assert cluster_health_resp["status"] == "green" - assert cluster_health_resp["unassigned_shards"] == 0 - - # Adding new units should not trigger a new voting exclusion - voting_exclusions = await cluster_voting_config_exclusions( - ops_test, unit_ip=leader_unit_ip - ) - assert len(voting_exclusions) == 0 - - # Make sure we continue to be writable - await assert_continuous_writes_increasing(c_writes) - - init_count = len(ops_test.model.applications[app].units) - - # Make sure update status is executed and fixes the voting exclusions - await execute_update_status_manually(ops_test, app=app) - voting_exclusions = await cluster_voting_config_exclusions(ops_test, unit_ip=leader_unit_ip) - assert len(voting_exclusions) == 0 - - # continuous writes checks - await assert_continuous_writes_consistency(ops_test, c_writes, [app]) - - -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_gracefully_cluster_remove(ops_test: OpsTest) -> None: - """Tests removing the entire application at once.""" - app = (await app_name(ops_test)) or APP_NAME - - # This removal must not leave units in error. - # We will block until it is finished. - await asyncio.gather( - ops_test.model.remove_application(app, block_until_done=True), - ) diff --git a/tests/integration/ha/test_storage.py b/tests/integration/ha/test_storage.py index 87585c3f64..f98c704c9d 100644 --- a/tests/integration/ha/test_storage.py +++ b/tests/integration/ha/test_storage.py @@ -16,7 +16,6 @@ storage_id, storage_type, ) -from ..ha.test_horizontal_scaling import IDLE_PERIOD from ..helpers import ( APP_NAME, CONFIG_OPTS, @@ -28,6 +27,10 @@ from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME from .continuous_writes import ContinuousWrites + +IDLE_PERIOD = 75 + + logger = logging.getLogger(__name__) diff --git a/tests/integration/plugins/__init__.py b/tests/integration/plugins/__init__.py deleted file mode 100644 index e3979c0f63..0000000000 --- a/tests/integration/plugins/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. diff --git a/tests/integration/plugins/helpers.py b/tests/integration/plugins/helpers.py deleted file mode 100644 index 51130e92b2..0000000000 --- a/tests/integration/plugins/helpers.py +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Helper functions related to testing the different plugins.""" -import json -import logging -import random -from typing import Any, Dict, List, Optional, Tuple - -from pytest_operator.plugin import OpsTest -from tenacity import ( - RetryError, - Retrying, - retry, - stop_after_attempt, - wait_fixed, - wait_random, -) - -from ..ha.helpers_data import bulk_insert, create_index -from ..helpers import http_request - -logger = logging.getLogger(__name__) - - -def generate_bulk_training_data( - index_name: str, - vector_name: str, - docs_count: int = 100, - dimensions: int = 4, - has_result: bool = False, -) -> Tuple[str, List[str]]: - random.seed("seed") - print("The seed for randomness is: 'seed'") - - data = random.randbytes(docs_count * dimensions) - if has_result: - responses = random.randbytes(docs_count) - result = "" - result_list = [] - for i in range(docs_count): - result += json.dumps({"index": {"_index": index_name, "_id": i}}) + "\n" - result_list.append([float(data[j]) for j in range(i * dimensions, (i + 1) * dimensions)]) - inter = {vector_name: result_list[i]} - if has_result: - inter["price"] = float(responses[i]) - result += json.dumps(inter) + "\n" - return result, result_list - - -@retry( - wait=wait_fixed(wait=5) + wait_random(0, 5), - stop=stop_after_attempt(15), -) -async def run_knn_training( - ops_test: OpsTest, - app: str, - unit_ip: str, - model_name: str, - payload: Dict[str, Any], -) -> Optional[List[Dict[str, Any]]]: - """Sets models.""" - endpoint = f"https://{unit_ip}:9200/_plugins/_knn/models/{model_name}/_train" - return await http_request(ops_test, "POST", endpoint, payload=payload, app=app) - - -async def is_knn_training_complete( - ops_test: OpsTest, - app: str, - unit_ip: str, - model_name: str, -) -> bool: - """Waits training models.""" - endpoint = f"https://{unit_ip}:9200/_plugins/_knn/models/{model_name}" - try: - for attempt in Retrying(stop=stop_after_attempt(15), wait=wait_fixed(wait=5)): - with attempt: - resp = await http_request(ops_test, "GET", endpoint, app=app) - if "created" not in resp.get("state", ""): - raise Exception - return True - except RetryError: - return False - - -async def create_index_and_bulk_insert( - ops_test: OpsTest, - app: str, - endpoint: str, - index_name: str, - shards: int, - vector_name: str, - model_name: str = None, -) -> List[float]: - if model_name: - extra_mappings = { - "properties": { - vector_name: { - "type": "knn_vector", - "model_id": model_name, - } - } - } - extra_index_settings = {"knn": "true"} - else: - extra_mappings = { - "properties": { - vector_name: { - "type": "knn_vector", - "dimension": 4, - } - } - } - extra_index_settings = {} - - await create_index( - ops_test, - app, - endpoint, - index_name, - r_shards=shards, - extra_index_settings=extra_index_settings, - extra_mappings=extra_mappings, - ) - payload, payload_list = generate_bulk_training_data( - index_name, vector_name, docs_count=100, dimensions=4, has_result=True - ) - # Insert data in bulk - await bulk_insert(ops_test, app, endpoint, payload) - return payload_list diff --git a/tests/integration/plugins/test_plugins.py b/tests/integration/plugins/test_plugins.py deleted file mode 100644 index d62ed03381..0000000000 --- a/tests/integration/plugins/test_plugins.py +++ /dev/null @@ -1,637 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import asyncio -import json -import logging -import subprocess - -import pytest -from pytest_operator.plugin import OpsTest -from tenacity import RetryError - -from ..ha.helpers import app_name -from ..ha.helpers_data import bulk_insert, create_index, search -from ..ha.test_horizontal_scaling import IDLE_PERIOD -from ..helpers import ( - APP_NAME, - CONFIG_OPTS, - MODEL_CONFIG, - SERIES, - check_cluster_formation_successful, - get_application_unit_ids_ips, - get_application_unit_ids_start_time, - get_application_unit_names, - get_leader_unit_id, - get_leader_unit_ip, - get_secret_by_label, - http_request, - is_each_unit_restarted, - run_action, - set_watermark, -) -from ..helpers_deployments import wait_until -from ..plugins.helpers import ( - create_index_and_bulk_insert, - generate_bulk_training_data, - is_knn_training_complete, - run_knn_training, -) -from ..relations.helpers import get_unit_relation_data -from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME - -logger = logging.getLogger(__name__) - - -COS_APP_NAME = "grafana-agent" -COS_RELATION_NAME = "cos-agent" -MAIN_ORCHESTRATOR_NAME = "main" -FAILOVER_ORCHESTRATOR_NAME = "failover" - - -ALL_GROUPS = { - deploy_type: pytest.param( - deploy_type, - id=deploy_type, - marks=[ - pytest.mark.group(deploy_type), - pytest.mark.runner( - [ - "self-hosted", - "linux", - "X64", - "jammy", - "xlarge" if deploy_type == "large" else "large", - ] - ), - ], - ) - for deploy_type in ["large_deployment", "small_deployment"] -} - -ALL_DEPLOYMENTS = list(ALL_GROUPS.values()) -SMALL_DEPLOYMENTS = [ALL_GROUPS["small_deployment"]] -LARGE_DEPLOYMENTS = [ALL_GROUPS["large_deployment"]] - - -async def assert_knn_config_updated( - ops_test: OpsTest, knn_enabled: bool, check_api: bool = True -) -> None: - """Check if the KNN plugin is enabled or disabled.""" - leader_unit_ip = await get_leader_unit_ip(ops_test, app=APP_NAME) - cmd = ( - f"juju ssh -m {ops_test.model.name} opensearch/0 -- " - "sudo grep -r 'knn.plugin.enabled' " - "/var/snap/opensearch/current/etc/opensearch/opensearch.yml" - ).split() - assert (knn_enabled and "true" in subprocess.check_output(cmd).decode()) or ( - not knn_enabled and "false" in subprocess.check_output(cmd).decode() - ) - if not check_api: - # We're finished - return - - endpoint = f"https://{leader_unit_ip}:9200/_cluster/settings?flat_settings=true" - settings = await http_request(ops_test, "GET", endpoint, app=APP_NAME, json_resp=True) - assert settings.get("persistent").get("knn.plugin.enabled") == str(knn_enabled).lower() - - -async def _set_config(ops_test: OpsTest, deploy_type: str, conf: dict[str, str]) -> None: - if deploy_type == "small_deployment": - await ops_test.model.applications[APP_NAME].set_config(conf) - return - await ops_test.model.applications[MAIN_ORCHESTRATOR_NAME].set_config(conf | CONFIG_OPTS) - await ops_test.model.applications[FAILOVER_ORCHESTRATOR_NAME].set_config(conf | CONFIG_OPTS) - await ops_test.model.applications[APP_NAME].set_config(conf | CONFIG_OPTS) - - -async def _wait_for_units( - ops_test: OpsTest, - deployment_type: str, - wait_for_cos: bool = False, -) -> None: - """Wait for all units to be active. - - This wait will behavior accordingly to small/large. - """ - if deployment_type == "small_deployment": - await wait_until( - ops_test, - apps=[APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - timeout=1800, - wait_for_exact_units={APP_NAME: 3}, - idle_period=IDLE_PERIOD, - ) - if wait_for_cos: - await wait_until( - ops_test, - apps=[COS_APP_NAME], - units_statuses=["blocked"], - timeout=1800, - idle_period=IDLE_PERIOD, - ) - return - await wait_until( - ops_test, - apps=[ - TLS_CERTIFICATES_APP_NAME, - MAIN_ORCHESTRATOR_NAME, - FAILOVER_ORCHESTRATOR_NAME, - APP_NAME, - ], - wait_for_exact_units={ - TLS_CERTIFICATES_APP_NAME: 1, - MAIN_ORCHESTRATOR_NAME: 1, - FAILOVER_ORCHESTRATOR_NAME: 2, - APP_NAME: 1, - }, - apps_statuses=["active"], - units_statuses=["active"], - timeout=1800, - idle_period=IDLE_PERIOD, - ) - if wait_for_cos: - await wait_until( - ops_test, - apps=[COS_APP_NAME], - units_statuses=["blocked"], - timeout=1800, - idle_period=IDLE_PERIOD, - ) - - -@pytest.mark.parametrize("deploy_type", SMALL_DEPLOYMENTS) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_build_and_deploy_small_deployment(ops_test: OpsTest, deploy_type: str) -> None: - """Build and deploy an OpenSearch cluster.""" - if await app_name(ops_test): - return - - my_charm = await ops_test.build_charm(".") - - model_conf = MODEL_CONFIG.copy() - # Make it more regular as COS relation-broken really happens on the - # next hook call in each opensearch unit. - # If this value is changed, then update the sleep accordingly at: - # test_prometheus_exporter_disabled_by_cos_relation_gone - model_conf["update-status-hook-interval"] = "1m" - await ops_test.model.set_config(model_conf) - - # Deploy TLS Certificates operator. - config = {"ca-common-name": "CN_CA"} - await asyncio.gather( - ops_test.model.deploy( - my_charm, - num_units=3, - series=SERIES, - config={"plugin_opensearch_knn": False} | CONFIG_OPTS, - ), - ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config), - ) - - await wait_until( - ops_test, - apps=[APP_NAME], - units_statuses=["blocked"], - wait_for_exact_units={APP_NAME: 3}, - timeout=3400, - idle_period=IDLE_PERIOD, - ) - assert len(ops_test.model.applications[APP_NAME].units) == 3 - - -@pytest.mark.parametrize("deploy_type", SMALL_DEPLOYMENTS) -@pytest.mark.abort_on_fail -async def test_config_switch_before_cluster_ready(ops_test: OpsTest, deploy_type) -> None: - """Configuration change before cluster is ready. - - We hold the cluster without starting its unit services by not relating to tls-operator. - """ - await ops_test.model.applications[APP_NAME].set_config({"plugin_opensearch_knn": "true"}) - await wait_until( - ops_test, - apps=[APP_NAME], - units_statuses=["blocked"], - wait_for_exact_units={APP_NAME: 3}, - timeout=3400, - idle_period=IDLE_PERIOD, - ) - await assert_knn_config_updated(ops_test, True, check_api=False) - - # Relate it to OpenSearch to set up TLS. - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - await _wait_for_units(ops_test, deploy_type) - assert len(ops_test.model.applications[APP_NAME].units) == 3 - - -@pytest.mark.parametrize("deploy_type", SMALL_DEPLOYMENTS) -@pytest.mark.abort_on_fail -async def test_prometheus_exporter_enabled_by_default(ops_test, deploy_type: str): - """Test that Prometheus Exporter is running before the relation is there. - - Test only on small deployments scenario, as this is a more functional check to the plugin. - """ - leader_unit_ip = await get_leader_unit_ip(ops_test, app=APP_NAME) - endpoint = f"https://{leader_unit_ip}:9200/_prometheus/metrics" - response = await http_request(ops_test, "get", endpoint, app=APP_NAME, json_resp=False) - - response_str = response.content.decode("utf-8") - assert response_str.count("opensearch_") > 500 - assert len(response_str.split("\n")) > 500 - - -@pytest.mark.parametrize("deploy_type", SMALL_DEPLOYMENTS) -@pytest.mark.abort_on_fail -async def test_small_deployments_prometheus_exporter_cos_relation(ops_test, deploy_type: str): - await ops_test.model.deploy(COS_APP_NAME, channel="edge", series=SERIES), - await ops_test.model.integrate(APP_NAME, COS_APP_NAME) - await _wait_for_units(ops_test, deploy_type, wait_for_cos=True) - - # Check that the correct settings were successfully communicated to grafana-agent - cos_leader_id = await get_leader_unit_id(ops_test, COS_APP_NAME) - cos_leader_name = f"{COS_APP_NAME}/{cos_leader_id}" - leader_id = await get_leader_unit_id(ops_test, APP_NAME) - leader_name = f"{APP_NAME}/{leader_id}" - relation_data = await get_unit_relation_data( - ops_test, cos_leader_name, leader_name, COS_RELATION_NAME, "config" - ) - if not isinstance(relation_data, dict): - relation_data = json.loads(relation_data) - relation_data = relation_data["metrics_scrape_jobs"][0] - secret = await get_secret_by_label(ops_test, "opensearch:app:monitor-password") - - assert relation_data["basic_auth"]["username"] == "monitor" - assert relation_data["basic_auth"]["password"] == secret["monitor-password"] - - admin_secret = await get_secret_by_label(ops_test, "opensearch:app:app-admin") - assert relation_data["tls_config"]["ca"] == admin_secret["ca-cert"] - assert relation_data["scheme"] == "https" - - -@pytest.mark.parametrize("deploy_type", LARGE_DEPLOYMENTS) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_large_deployment_build_and_deploy(ops_test: OpsTest, deploy_type: str) -> None: - """Build and deploy a large deployment for OpenSearch.""" - await ops_test.model.set_config(MODEL_CONFIG) - # Deploy TLS Certificates operator. - tls_config = {"ca-common-name": "CN_CA"} - - my_charm = await ops_test.build_charm(".") - - main_orchestrator_conf = { - "cluster_name": "plugins-test", - "init_hold": False, - "roles": "cluster_manager,data", - } - failover_orchestrator_conf = { - "cluster_name": "plugins-test", - "init_hold": True, - "roles": "cluster_manager,data", - } - data_hot_conf = {"cluster_name": "plugins-test", "init_hold": True, "roles": "data.hot,ml"} - - await asyncio.gather( - ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=tls_config), - ops_test.model.deploy( - my_charm, - application_name=MAIN_ORCHESTRATOR_NAME, - num_units=1, - series=SERIES, - config=main_orchestrator_conf | CONFIG_OPTS, - ), - ops_test.model.deploy( - my_charm, - application_name=FAILOVER_ORCHESTRATOR_NAME, - num_units=2, - series=SERIES, - config=failover_orchestrator_conf | CONFIG_OPTS, - ), - ops_test.model.deploy( - my_charm, - application_name=APP_NAME, - num_units=1, - series=SERIES, - config=data_hot_conf | CONFIG_OPTS, - ), - ) - - # Large deployment setup - await ops_test.model.integrate("main:peer-cluster-orchestrator", "failover:peer-cluster") - await ops_test.model.integrate("main:peer-cluster-orchestrator", f"{APP_NAME}:peer-cluster") - await ops_test.model.integrate( - "failover:peer-cluster-orchestrator", f"{APP_NAME}:peer-cluster" - ) - - # TLS setup - await ops_test.model.integrate(MAIN_ORCHESTRATOR_NAME, TLS_CERTIFICATES_APP_NAME) - await ops_test.model.integrate(FAILOVER_ORCHESTRATOR_NAME, TLS_CERTIFICATES_APP_NAME) - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - - await _wait_for_units(ops_test, deploy_type) - await set_watermark(ops_test, APP_NAME) - - -@pytest.mark.parametrize("deploy_type", LARGE_DEPLOYMENTS) -@pytest.mark.abort_on_fail -async def test_large_deployment_prometheus_exporter_cos_relation(ops_test, deploy_type: str): - # Check that the correct settings were successfully communicated to grafana-agent - await ops_test.model.deploy(COS_APP_NAME, channel="edge", series=SERIES), - await ops_test.model.integrate(FAILOVER_ORCHESTRATOR_NAME, COS_APP_NAME) - await ops_test.model.integrate(MAIN_ORCHESTRATOR_NAME, COS_APP_NAME) - await ops_test.model.integrate(APP_NAME, COS_APP_NAME) - - await _wait_for_units(ops_test, deploy_type, wait_for_cos=True) - - leader_id = await get_leader_unit_id(ops_test, APP_NAME) - leader_name = f"{APP_NAME}/{leader_id}" - - cos_leader_id = await get_leader_unit_id(ops_test, COS_APP_NAME) - relation_data = await get_unit_relation_data( - ops_test, f"{COS_APP_NAME}/{cos_leader_id}", leader_name, COS_RELATION_NAME, "config" - ) - if not isinstance(relation_data, dict): - relation_data = json.loads(relation_data) - relation_data = relation_data["metrics_scrape_jobs"][0] - secret = await get_secret_by_label(ops_test, "opensearch:app:monitor-password") - - assert relation_data["basic_auth"]["username"] == "monitor" - assert relation_data["basic_auth"]["password"] == secret["monitor-password"] - - admin_secret = await get_secret_by_label(ops_test, "opensearch:app:app-admin") - assert relation_data["tls_config"]["ca"] == admin_secret["ca-cert"] - assert relation_data["scheme"] == "https" - - -@pytest.mark.parametrize("deploy_type", ALL_DEPLOYMENTS) -@pytest.mark.abort_on_fail -async def test_monitoring_user_fetch_prometheus_data(ops_test, deploy_type: str): - leader_unit_ip = await get_leader_unit_ip(ops_test, app=APP_NAME) - endpoint = f"https://{leader_unit_ip}:9200/_prometheus/metrics" - - secret = await get_secret_by_label(ops_test, "opensearch:app:monitor-password") - response = await http_request( - ops_test, - "get", - endpoint, - app=APP_NAME, - json_resp=False, - user="monitor", - user_password=secret["monitor-password"], - ) - response_str = response.content.decode("utf-8") - - assert response_str.count("opensearch_") > 500 - assert len(response_str.split("\n")) > 500 - - -@pytest.mark.parametrize("deploy_type", ALL_DEPLOYMENTS) -@pytest.mark.abort_on_fail -async def test_prometheus_monitor_user_password_change(ops_test, deploy_type: str): - # Password change applied as expected - app = APP_NAME if deploy_type == "small_deployment" else MAIN_ORCHESTRATOR_NAME - - leader_id = await get_leader_unit_id(ops_test, app) - result1 = await run_action( - ops_test, leader_id, "set-password", {"username": "monitor"}, app=app - ) - await _wait_for_units(ops_test, deploy_type, wait_for_cos=True) - - new_password = result1.response.get("monitor-password") - # Now, we compare the change in the action above with the opensearch's nodes. - # In large deployments, that will mean checking if the change on main orchestrator - # was sent down to the opensearch (data node) cluster. - result2 = await run_action( - ops_test, leader_id, "get-password", {"username": "monitor"}, app=app - ) - assert result2.response.get("password") == new_password - - # Relation data is updated - # In both large and small deployments, we want to check if the relation data is updated - # on the data node: "opensearch" - leader_id = await get_leader_unit_id(ops_test, APP_NAME) - leader_name = f"{APP_NAME}/{leader_id}" - - # We're not sure which grafana-agent is sitting with APP_NAME in large deployments - cos_leader_id = await get_leader_unit_id(ops_test, COS_APP_NAME) - relation_data = await get_unit_relation_data( - ops_test, f"{COS_APP_NAME}/{cos_leader_id}", leader_name, COS_RELATION_NAME, "config" - ) - if not isinstance(relation_data, dict): - relation_data = json.loads(relation_data) - relation_data = relation_data["metrics_scrape_jobs"][0]["basic_auth"] - - assert relation_data["username"] == "monitor" - assert relation_data["password"] == new_password - - -@pytest.mark.parametrize("deploy_type", SMALL_DEPLOYMENTS) -@pytest.mark.abort_on_fail -async def test_knn_enabled_disabled(ops_test, deploy_type: str): - config = await ops_test.model.applications[APP_NAME].get_config() - assert config["plugin_opensearch_knn"]["default"] is True - assert config["plugin_opensearch_knn"]["value"] is True - - async with ops_test.fast_forward(): - await _set_config(ops_test, deploy_type, {"plugin_opensearch_knn": "False"}) - await _wait_for_units(ops_test, deploy_type) - - config = await ops_test.model.applications[APP_NAME].get_config() - assert config["plugin_opensearch_knn"]["value"] is False - - await _set_config(ops_test, deploy_type, {"plugin_opensearch_knn": "True"}) - await _wait_for_units(ops_test, deploy_type) - - config = await ops_test.model.applications[APP_NAME].get_config() - assert config["plugin_opensearch_knn"]["value"] is True - - await _wait_for_units(ops_test, deploy_type) - - -@pytest.mark.parametrize("deploy_type", SMALL_DEPLOYMENTS) -@pytest.mark.abort_on_fail -async def test_knn_search_with_hnsw_faiss(ops_test: OpsTest, deploy_type: str) -> None: - """Uploads data and runs a query search against the FAISS KNNEngine.""" - app = (await app_name(ops_test)) or APP_NAME - - units = await get_application_unit_ids_ips(ops_test, app=app) - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - - # create index with r_shards = nodes - 1 - index_name = "test_search_with_hnsw_faiss" - vector_name = "test_search_with_hnsw_faiss_vector" - await create_index( - ops_test, - app, - leader_unit_ip, - index_name, - r_shards=len(units) - 1, - extra_index_settings={"knn": "true", "knn.algo_param.ef_search": 100}, - extra_mappings={ - "properties": { - vector_name: { - "type": "knn_vector", - "dimension": 4, - "method": { - "name": "hnsw", - "space_type": "innerproduct", - "engine": "faiss", - "parameters": {"ef_construction": 256, "m": 48}, - }, - } - } - }, - ) - payload, payload_list = generate_bulk_training_data( - index_name, vector_name, docs_count=100, dimensions=4, has_result=True - ) - # Insert data in bulk - await bulk_insert(ops_test, app, leader_unit_ip, payload) - query = {"size": 2, "query": {"knn": {vector_name: {"vector": payload_list[0], "k": 2}}}} - docs = await search(ops_test, app, leader_unit_ip, index_name, query, retries=30) - assert len(docs) == 2 - - -@pytest.mark.parametrize("deploy_type", SMALL_DEPLOYMENTS) -@pytest.mark.abort_on_fail -async def test_knn_search_with_hnsw_nmslib(ops_test: OpsTest, deploy_type: str) -> None: - """Uploads data and runs a query search against the NMSLIB KNNEngine.""" - app = (await app_name(ops_test)) or APP_NAME - - units = await get_application_unit_ids_ips(ops_test, app=app) - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - - # create index with r_shards = nodes - 1 - index_name = "test_search_with_hnsw_nmslib" - vector_name = "test_search_with_hnsw_nmslib_vector" - await create_index( - ops_test, - app, - leader_unit_ip, - index_name, - r_shards=len(units) - 1, - extra_index_settings={"knn": "true", "knn.algo_param.ef_search": 100}, - extra_mappings={ - "properties": { - vector_name: { - "type": "knn_vector", - "dimension": 4, - "method": { - "name": "hnsw", - "space_type": "l2", - "engine": "nmslib", - "parameters": {"ef_construction": 256, "m": 48}, - }, - } - } - }, - ) - payload, payload_list = generate_bulk_training_data( - index_name, vector_name, docs_count=100, dimensions=4, has_result=True - ) - # Insert data in bulk - await bulk_insert(ops_test, app, leader_unit_ip, payload) - query = {"size": 2, "query": {"knn": {vector_name: {"vector": payload_list[0], "k": 2}}}} - docs = await search(ops_test, app, leader_unit_ip, index_name, query, retries=30) - assert len(docs) == 2 - - -@pytest.mark.parametrize("deploy_type", SMALL_DEPLOYMENTS) -@pytest.mark.abort_on_fail -async def test_knn_training_search(ops_test: OpsTest, deploy_type: str) -> None: - """Tests the entire cycle of KNN plugin. - - 1) Enters data and trains a model in "test_end_to_end_with_ivf_faiss_training" - 2) Trains model: "test_end_to_end_with_ivf_faiss_model" - 3) Once training is complete, creates a target index and connects with the model - 4) Disables KNN plugin: the search must fail - 5) Re-enables the plugin: search must succeed and return two vectors. - """ - app = (await app_name(ops_test)) or APP_NAME - - units = await get_application_unit_ids_ips(ops_test, app=app) - leader_unit_ip = await get_leader_unit_ip(ops_test, app=app) - # Get since when each unit has been active - - # create index with r_shards = nodes - 1 - index_name = "test_end_to_end_with_ivf_faiss_training" - vector_name = "test_end_to_end_with_ivf_faiss_vector" - model_name = "test_end_to_end_with_ivf_faiss_model" - await create_index_and_bulk_insert( - ops_test, app, leader_unit_ip, index_name, len(units) - 1, vector_name - ) - await run_knn_training( - ops_test, - app, - leader_unit_ip, - model_name, - { - "training_index": index_name, - "training_field": vector_name, - "dimension": 4, - "method": { - "name": "ivf", - "engine": "faiss", - "space_type": "l2", - "parameters": {"nlist": 4, "nprobes": 2}, - }, - }, - ) - # wait for training to finish -> fails with an exception otherwise - assert await is_knn_training_complete( - ops_test, app, leader_unit_ip, model_name - ), "KNN training did not complete." - - # Creates the target index, to use the model - payload_list = await create_index_and_bulk_insert( - ops_test, - app, - leader_unit_ip, - "test_end_to_end_with_ivf_faiss_target", - len(units) - 1, - vector_name="target-field", - model_name=model_name, - ) - - # Set the config to false, then to true - for knn_enabled in [False, True]: - logger.info(f"KNN test starting with {knn_enabled}") - - # get current timestamp, to compare with restarts later - ts = await get_application_unit_ids_start_time(ops_test, APP_NAME) - await _set_config(ops_test, deploy_type, {"plugin_opensearch_knn": str(knn_enabled)}) - - await _wait_for_units(ops_test, deploy_type) - - # Now use it to compare with the restart - assert not await is_each_unit_restarted(ops_test, APP_NAME, ts) - await assert_knn_config_updated(ops_test, knn_enabled, check_api=True) - assert await check_cluster_formation_successful( - ops_test, leader_unit_ip, get_application_unit_names(ops_test, app=APP_NAME) - ), "Restart happened but cluster did not start correctly" - logger.info("Config updated and was successful") - - query = { - "size": 2, - "query": {"knn": {"target-field": {"vector": payload_list[0], "k": 2}}}, - } - # If search eventually fails, then an exception is raised and the test fails as well - try: - docs = await search( - ops_test, - app, - leader_unit_ip, - "test_end_to_end_with_ivf_faiss_target", - query, - retries=3, - ) - assert ( - knn_enabled and len(docs) == 2 - ), f"KNN enabled: {knn_enabled} and search results: {len(docs)}." - except RetryError: - # The search should fail if knn_enabled is false - assert not knn_enabled diff --git a/tests/integration/relations/__init__.py b/tests/integration/relations/__init__.py deleted file mode 100644 index e3979c0f63..0000000000 --- a/tests/integration/relations/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. diff --git a/tests/integration/relations/conftest.py b/tests/integration/relations/conftest.py deleted file mode 100644 index 2dec298af5..0000000000 --- a/tests/integration/relations/conftest.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import shutil - -import pytest -from pytest_operator.plugin import OpsTest - - -@pytest.fixture(scope="module") -async def application_charm(ops_test: OpsTest): - """Build the application charm.""" - shutil.copyfile( - "./lib/charms/data_platform_libs/v0/data_interfaces.py", - "./tests/integration/relations/opensearch_provider/application-charm/lib/charms/data_platform_libs/v0/data_interfaces.py", - ) - test_charm_path = "./tests/integration/relations/opensearch_provider/application-charm" - return await ops_test.build_charm(test_charm_path) - - -@pytest.fixture(scope="module") -async def opensearch_charm(ops_test: OpsTest): - """Build the opensearch charm.""" - return await ops_test.build_charm(".") diff --git a/tests/integration/relations/helpers.py b/tests/integration/relations/helpers.py deleted file mode 100644 index 2b22efcba7..0000000000 --- a/tests/integration/relations/helpers.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. -import logging -import socket -from typing import Optional - -import yaml -from pytest_operator.plugin import OpsTest -from tenacity import ( - RetryError, - Retrying, - retry, - stop_after_attempt, - stop_after_delay, - wait_fixed, -) - -from ..helpers import run_action - - -async def get_application_relation_data( - ops_test: OpsTest, - unit_name: str, - relation_name: str, - key: str, - relation_id: str = None, -) -> Optional[str]: - """Get relation data for an application. - - Args: - ops_test: The ops test framework instance - unit_name: The name of the unit - relation_name: name of the relation to get connection data from - key: key of data to be retrieved - relation_id: id of the relation to get connection data from - - Returns: - the data that was requested or None - if no data in the relation - - Raises: - ValueError if it's not possible to get application unit data - or if there is no data for the particular relation endpoint - and/or alias. - """ - raw_data = (await ops_test.juju("show-unit", unit_name))[1] - if not raw_data: - raise ValueError(f"no unit info could be grabbed for {unit_name}") - data = yaml.safe_load(raw_data) - # Filter the data based on the relation name. - relation_data = [v for v in data[unit_name]["relation-info"] if v["endpoint"] == relation_name] - if relation_id: - # Filter the data based on the relation id. - relation_data = [v for v in relation_data if v["relation-id"] == relation_id] - if not relation_data: - raise ValueError( - f"no relation data could be grabbed on relation with endpoint {relation_name}" - ) - return relation_data[0]["application-data"].get(key) - - -async def get_unit_relation_data( - ops_test: OpsTest, - unit_name: str, - target_unit_name: str, - relation_name: str, - key: str, - relation_id: str = None, -) -> Optional[str]: - """Get relation data for an application. - - Args: - ops_test: The ops test framework instance - unit_name: The name of the unit - relation_name: name of the relation to get connection data from - key: key of data to be retrieved - relation_id: id of the relation to get connection data from - - Returns: - the data that was requested or None - if no data in the relation - - Raises: - ValueError if it's not possible to get application unit data - or if there is no data for the particular relation endpoint - and/or alias. - """ - raw_data = (await ops_test.juju("show-unit", unit_name))[1] - if not raw_data: - raise ValueError(f"no unit info could be grabbed for {unit_name}") - data = yaml.safe_load(raw_data) - # Filter the data based on the relation name. - relation_data = [v for v in data[unit_name]["relation-info"] if v["endpoint"] == relation_name] - if relation_id: - # Filter the data based on the relation id. - relation_data = [v for v in relation_data if v["relation-id"] == relation_id] - if not relation_data: - raise ValueError( - f"no relation data could be grabbed on relation with endpoint {relation_name}" - ) - # Consider the case we are dealing with subordinate charms, e.g. grafana-agent - # The field "relation-units" is structured slightly different. - for idx in range(len(relation_data)): - if target_unit_name in relation_data[idx]["related-units"]: - break - else: - return {} - return ( - relation_data[idx]["related-units"].get(target_unit_name, {}).get("data", {}).get(key, {}) - ) - - -def wait_for_relation_joined_between( - ops_test: OpsTest, endpoint_one: str, endpoint_two: str -) -> None: - """Wait for relation to be created before checking if it's waiting or idle. - - Args: - ops_test: running OpsTest instance - endpoint_one: one endpoint of the relation. Doesn't matter if it's provider or requirer. - endpoint_two: the other endpoint of the relation. - """ - try: - for attempt in Retrying(stop=stop_after_delay(3 * 60), wait=wait_fixed(3)): - with attempt: - if new_relation_joined(ops_test, endpoint_one, endpoint_two): - break - except RetryError: - assert False, "New relation failed to join after 3 minutes." - - -def new_relation_joined(ops_test: OpsTest, endpoint_one: str, endpoint_two: str) -> bool: - for rel in ops_test.model.relations: - endpoints = [endpoint.name for endpoint in rel.endpoints] - if endpoint_one in endpoints and endpoint_two in endpoints: - return True - return False - - -@retry(wait=wait_fixed(wait=15), stop=stop_after_attempt(15)) -async def run_request( - ops_test, - unit_name: str, - relation_name: str, - relation_id: int, - method: str, - endpoint: str, - payload: str = None, -): - # python can't have variable names with a hyphen, and Juju can't have action variables with an - # underscore, so this is a compromise. - params = { - "relation-id": relation_id, - "relation-name": relation_name, - "method": method, - "endpoint": endpoint, - } - if payload: - params["payload"] = payload - result = await run_action( - ops_test, - unit_id=int(unit_name.split("/")[-1]), - action_name="run-request", - params=params, - app="/".join(unit_name.split("/")[:-1]), - ) - logging.info(f"request results: {result}") - - if result.status != "completed": - raise Exception(result.response) - - return result.response - - -def ip_to_url(ip_str: str) -> str: - """Return a version of an IPV4 or IPV6 address that's fit for a URL.""" - try: - # Check if it's an IPV4 address - socket.inet_aton(ip_str) - return ip_str - except socket.error: - return f"[{ip_str}]" diff --git a/tests/integration/relations/opensearch_provider/application-charm/actions.yaml b/tests/integration/relations/opensearch_provider/application-charm/actions.yaml deleted file mode 100644 index b981595c07..0000000000 --- a/tests/integration/relations/opensearch_provider/application-charm/actions.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -run-request: - description: runs request on given relation (defined by name and id) - params: - relation-name: - description: name of the relation under test - type: string - relation-id: - description: id of the relation under test - type: integer - method: - description: HTTP method to run - type: string - endpoint: - description: endpoint on which to run the http method - type: string - payload: - description: fully escaped payload to be sent in bulk - type: string - required: - - relation-name - - relation-id - - method - - endpoint diff --git a/tests/integration/relations/opensearch_provider/application-charm/charmcraft.yaml b/tests/integration/relations/opensearch_provider/application-charm/charmcraft.yaml deleted file mode 100644 index e7ff5f1968..0000000000 --- a/tests/integration/relations/opensearch_provider/application-charm/charmcraft.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright 2022 Canonical Ltd. -# See LICENSE file for licensing details. - -type: charm -platforms: - ubuntu@22.04:amd64: -parts: - charm: - plugin: charm diff --git a/tests/integration/relations/opensearch_provider/application-charm/lib/charms/data_platform_libs/v0/data_interfaces.py b/tests/integration/relations/opensearch_provider/application-charm/lib/charms/data_platform_libs/v0/data_interfaces.py deleted file mode 100644 index fda5187a4f..0000000000 --- a/tests/integration/relations/opensearch_provider/application-charm/lib/charms/data_platform_libs/v0/data_interfaces.py +++ /dev/null @@ -1,3495 +0,0 @@ -# Copyright 2023 Canonical Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Library to manage the relation for the data-platform products. - -This library contains the Requires and Provides classes for handling the relation -between an application and multiple managed application supported by the data-team: -MySQL, Postgresql, MongoDB, Redis, and Kafka. - -### Database (MySQL, Postgresql, MongoDB, and Redis) - -#### Requires Charm -This library is a uniform interface to a selection of common database -metadata, with added custom events that add convenience to database management, -and methods to consume the application related data. - - -Following an example of using the DatabaseCreatedEvent, in the context of the -application charm code: - -```python - -from charms.data_platform_libs.v0.data_interfaces import ( - DatabaseCreatedEvent, - DatabaseRequires, -) - -class ApplicationCharm(CharmBase): - # Application charm that connects to database charms. - - def __init__(self, *args): - super().__init__(*args) - - # Charm events defined in the database requires charm library. - self.database = DatabaseRequires(self, relation_name="database", database_name="database") - self.framework.observe(self.database.on.database_created, self._on_database_created) - - def _on_database_created(self, event: DatabaseCreatedEvent) -> None: - # Handle the created database - - # Create configuration file for app - config_file = self._render_app_config_file( - event.username, - event.password, - event.endpoints, - ) - - # Start application with rendered configuration - self._start_application(config_file) - - # Set active status - self.unit.status = ActiveStatus("received database credentials") -``` - -As shown above, the library provides some custom events to handle specific situations, -which are listed below: - -- database_created: event emitted when the requested database is created. -- endpoints_changed: event emitted when the read/write endpoints of the database have changed. -- read_only_endpoints_changed: event emitted when the read-only endpoints of the database - have changed. Event is not triggered if read/write endpoints changed too. - -If it is needed to connect multiple database clusters to the same relation endpoint -the application charm can implement the same code as if it would connect to only -one database cluster (like the above code example). - -To differentiate multiple clusters connected to the same relation endpoint -the application charm can use the name of the remote application: - -```python - -def _on_database_created(self, event: DatabaseCreatedEvent) -> None: - # Get the remote app name of the cluster that triggered this event - cluster = event.relation.app.name -``` - -It is also possible to provide an alias for each different database cluster/relation. - -So, it is possible to differentiate the clusters in two ways. -The first is to use the remote application name, i.e., `event.relation.app.name`, as above. - -The second way is to use different event handlers to handle each cluster events. -The implementation would be something like the following code: - -```python - -from charms.data_platform_libs.v0.data_interfaces import ( - DatabaseCreatedEvent, - DatabaseRequires, -) - -class ApplicationCharm(CharmBase): - # Application charm that connects to database charms. - - def __init__(self, *args): - super().__init__(*args) - - # Define the cluster aliases and one handler for each cluster database created event. - self.database = DatabaseRequires( - self, - relation_name="database", - database_name="database", - relations_aliases = ["cluster1", "cluster2"], - ) - self.framework.observe( - self.database.on.cluster1_database_created, self._on_cluster1_database_created - ) - self.framework.observe( - self.database.on.cluster2_database_created, self._on_cluster2_database_created - ) - - def _on_cluster1_database_created(self, event: DatabaseCreatedEvent) -> None: - # Handle the created database on the cluster named cluster1 - - # Create configuration file for app - config_file = self._render_app_config_file( - event.username, - event.password, - event.endpoints, - ) - ... - - def _on_cluster2_database_created(self, event: DatabaseCreatedEvent) -> None: - # Handle the created database on the cluster named cluster2 - - # Create configuration file for app - config_file = self._render_app_config_file( - event.username, - event.password, - event.endpoints, - ) - ... - -``` - -When it's needed to check whether a plugin (extension) is enabled on the PostgreSQL -charm, you can use the is_postgresql_plugin_enabled method. To use that, you need to -add the following dependency to your charmcraft.yaml file: - -```yaml - -parts: - charm: - charm-binary-python-packages: - - psycopg[binary] - -``` - -### Provider Charm - -Following an example of using the DatabaseRequestedEvent, in the context of the -database charm code: - -```python -from charms.data_platform_libs.v0.data_interfaces import DatabaseProvides - -class SampleCharm(CharmBase): - - def __init__(self, *args): - super().__init__(*args) - # Charm events defined in the database provides charm library. - self.provided_database = DatabaseProvides(self, relation_name="database") - self.framework.observe(self.provided_database.on.database_requested, - self._on_database_requested) - # Database generic helper - self.database = DatabaseHelper() - - def _on_database_requested(self, event: DatabaseRequestedEvent) -> None: - # Handle the event triggered by a new database requested in the relation - # Retrieve the database name using the charm library. - db_name = event.database - # generate a new user credential - username = self.database.generate_user() - password = self.database.generate_password() - # set the credentials for the relation - self.provided_database.set_credentials(event.relation.id, username, password) - # set other variables for the relation event.set_tls("False") -``` -As shown above, the library provides a custom event (database_requested) to handle -the situation when an application charm requests a new database to be created. -It's preferred to subscribe to this event instead of relation changed event to avoid -creating a new database when other information other than a database name is -exchanged in the relation databag. - -### Kafka - -This library is the interface to use and interact with the Kafka charm. This library contains -custom events that add convenience to manage Kafka, and provides methods to consume the -application related data. - -#### Requirer Charm - -```python - -from charms.data_platform_libs.v0.data_interfaces import ( - BootstrapServerChangedEvent, - KafkaRequires, - TopicCreatedEvent, -) - -class ApplicationCharm(CharmBase): - - def __init__(self, *args): - super().__init__(*args) - self.kafka = KafkaRequires(self, "kafka_client", "test-topic") - self.framework.observe( - self.kafka.on.bootstrap_server_changed, self._on_kafka_bootstrap_server_changed - ) - self.framework.observe( - self.kafka.on.topic_created, self._on_kafka_topic_created - ) - - def _on_kafka_bootstrap_server_changed(self, event: BootstrapServerChangedEvent): - # Event triggered when a bootstrap server was changed for this application - - new_bootstrap_server = event.bootstrap_server - ... - - def _on_kafka_topic_created(self, event: TopicCreatedEvent): - # Event triggered when a topic was created for this application - username = event.username - password = event.password - tls = event.tls - tls_ca= event.tls_ca - bootstrap_server event.bootstrap_server - consumer_group_prefic = event.consumer_group_prefix - zookeeper_uris = event.zookeeper_uris - ... - -``` - -As shown above, the library provides some custom events to handle specific situations, -which are listed below: - -- topic_created: event emitted when the requested topic is created. -- bootstrap_server_changed: event emitted when the bootstrap server have changed. -- credential_changed: event emitted when the credentials of Kafka changed. - -### Provider Charm - -Following the previous example, this is an example of the provider charm. - -```python -class SampleCharm(CharmBase): - -from charms.data_platform_libs.v0.data_interfaces import ( - KafkaProvides, - TopicRequestedEvent, -) - - def __init__(self, *args): - super().__init__(*args) - - # Default charm events. - self.framework.observe(self.on.start, self._on_start) - - # Charm events defined in the Kafka Provides charm library. - self.kafka_provider = KafkaProvides(self, relation_name="kafka_client") - self.framework.observe(self.kafka_provider.on.topic_requested, self._on_topic_requested) - # Kafka generic helper - self.kafka = KafkaHelper() - - def _on_topic_requested(self, event: TopicRequestedEvent): - # Handle the on_topic_requested event. - - topic = event.topic - relation_id = event.relation.id - # set connection info in the databag relation - self.kafka_provider.set_bootstrap_server(relation_id, self.kafka.get_bootstrap_server()) - self.kafka_provider.set_credentials(relation_id, username=username, password=password) - self.kafka_provider.set_consumer_group_prefix(relation_id, ...) - self.kafka_provider.set_tls(relation_id, "False") - self.kafka_provider.set_zookeeper_uris(relation_id, ...) - -``` -As shown above, the library provides a custom event (topic_requested) to handle -the situation when an application charm requests a new topic to be created. -It is preferred to subscribe to this event instead of relation changed event to avoid -creating a new topic when other information other than a topic name is -exchanged in the relation databag. -""" - -import copy -import json -import logging -from abc import ABC, abstractmethod -from collections import UserDict, namedtuple -from datetime import datetime -from enum import Enum -from typing import ( - Callable, - Dict, - ItemsView, - KeysView, - List, - Optional, - Set, - Tuple, - Union, - ValuesView, -) - -from ops import JujuVersion, Model, Secret, SecretInfo, SecretNotFoundError -from ops.charm import ( - CharmBase, - CharmEvents, - RelationChangedEvent, - RelationCreatedEvent, - RelationEvent, - SecretChangedEvent, -) -from ops.framework import EventSource, Object -from ops.model import Application, ModelError, Relation, Unit - -# The unique Charmhub library identifier, never change it -LIBID = "6c3e6b6680d64e9c89e611d1a15f65be" - -# Increment this major API version when introducing breaking changes -LIBAPI = 0 - -# Increment this PATCH version before using `charmcraft publish-lib` or reset -# to 0 if you are raising the major API version -LIBPATCH = 38 - -PYDEPS = ["ops>=2.0.0"] - -logger = logging.getLogger(__name__) - -Diff = namedtuple("Diff", "added changed deleted") -Diff.__doc__ = """ -A tuple for storing the diff between two data mappings. - -added - keys that were added -changed - keys that still exist but have new values -deleted - key that were deleted""" - - -PROV_SECRET_PREFIX = "secret-" -REQ_SECRET_FIELDS = "requested-secrets" -GROUP_MAPPING_FIELD = "secret_group_mapping" -GROUP_SEPARATOR = "@" - - -class SecretGroup(str): - """Secret groups specific type.""" - - -class SecretGroupsAggregate(str): - """Secret groups with option to extend with additional constants.""" - - def __init__(self): - self.USER = SecretGroup("user") - self.TLS = SecretGroup("tls") - self.EXTRA = SecretGroup("extra") - - def __setattr__(self, name, value): - """Setting internal constants.""" - if name in self.__dict__: - raise RuntimeError("Can't set constant!") - else: - super().__setattr__(name, SecretGroup(value)) - - def groups(self) -> list: - """Return the list of stored SecretGroups.""" - return list(self.__dict__.values()) - - def get_group(self, group: str) -> Optional[SecretGroup]: - """If the input str translates to a group name, return that.""" - return SecretGroup(group) if group in self.groups() else None - - -SECRET_GROUPS = SecretGroupsAggregate() - - -class DataInterfacesError(Exception): - """Common ancestor for DataInterfaces related exceptions.""" - - -class SecretError(DataInterfacesError): - """Common ancestor for Secrets related exceptions.""" - - -class SecretAlreadyExistsError(SecretError): - """A secret that was to be added already exists.""" - - -class SecretsUnavailableError(SecretError): - """Secrets aren't yet available for Juju version used.""" - - -class SecretsIllegalUpdateError(SecretError): - """Secrets aren't yet available for Juju version used.""" - - -class IllegalOperationError(DataInterfacesError): - """To be used when an operation is not allowed to be performed.""" - - -def get_encoded_dict( - relation: Relation, member: Union[Unit, Application], field: str -) -> Optional[Dict[str, str]]: - """Retrieve and decode an encoded field from relation data.""" - data = json.loads(relation.data[member].get(field, "{}")) - if isinstance(data, dict): - return data - logger.error("Unexpected datatype for %s instead of dict.", str(data)) - - -def get_encoded_list( - relation: Relation, member: Union[Unit, Application], field: str -) -> Optional[List[str]]: - """Retrieve and decode an encoded field from relation data.""" - data = json.loads(relation.data[member].get(field, "[]")) - if isinstance(data, list): - return data - logger.error("Unexpected datatype for %s instead of list.", str(data)) - - -def set_encoded_field( - relation: Relation, - member: Union[Unit, Application], - field: str, - value: Union[str, list, Dict[str, str]], -) -> None: - """Set an encoded field from relation data.""" - relation.data[member].update({field: json.dumps(value)}) - - -def diff(event: RelationChangedEvent, bucket: Optional[Union[Unit, Application]]) -> Diff: - """Retrieves the diff of the data in the relation changed databag. - - Args: - event: relation changed event. - bucket: bucket of the databag (app or unit) - - Returns: - a Diff instance containing the added, deleted and changed - keys from the event relation databag. - """ - # Retrieve the old data from the data key in the application relation databag. - if not bucket: - return Diff([], [], []) - - old_data = get_encoded_dict(event.relation, bucket, "data") - - if not old_data: - old_data = {} - - # Retrieve the new data from the event relation databag. - new_data = ( - {key: value for key, value in event.relation.data[event.app].items() if key != "data"} - if event.app - else {} - ) - - # These are the keys that were added to the databag and triggered this event. - added = new_data.keys() - old_data.keys() # pyright: ignore [reportAssignmentType] - # These are the keys that were removed from the databag and triggered this event. - deleted = old_data.keys() - new_data.keys() # pyright: ignore [reportAssignmentType] - # These are the keys that already existed in the databag, - # but had their values changed. - changed = { - key - for key in old_data.keys() & new_data.keys() # pyright: ignore [reportAssignmentType] - if old_data[key] != new_data[key] # pyright: ignore [reportAssignmentType] - } - # Convert the new_data to a serializable format and save it for a next diff check. - set_encoded_field(event.relation, bucket, "data", new_data) - - # Return the diff with all possible changes. - return Diff(added, changed, deleted) - - -def leader_only(f): - """Decorator to ensure that only leader can perform given operation.""" - - def wrapper(self, *args, **kwargs): - if self.component == self.local_app and not self.local_unit.is_leader(): - logger.error( - "This operation (%s()) can only be performed by the leader unit", f.__name__ - ) - return - return f(self, *args, **kwargs) - - wrapper.leader_only = True - return wrapper - - -def juju_secrets_only(f): - """Decorator to ensure that certain operations would be only executed on Juju3.""" - - def wrapper(self, *args, **kwargs): - if not self.secrets_enabled: - raise SecretsUnavailableError("Secrets unavailable on current Juju version") - return f(self, *args, **kwargs) - - return wrapper - - -def dynamic_secrets_only(f): - """Decorator to ensure that certain operations would be only executed when NO static secrets are defined.""" - - def wrapper(self, *args, **kwargs): - if self.static_secret_fields: - raise IllegalOperationError( - "Unsafe usage of statically and dynamically defined secrets, aborting." - ) - return f(self, *args, **kwargs) - - return wrapper - - -def either_static_or_dynamic_secrets(f): - """Decorator to ensure that static and dynamic secrets won't be used in parallel.""" - - def wrapper(self, *args, **kwargs): - if self.static_secret_fields and set(self.current_secret_fields) - set( - self.static_secret_fields - ): - raise IllegalOperationError( - "Unsafe usage of statically and dynamically defined secrets, aborting." - ) - return f(self, *args, **kwargs) - - return wrapper - - -class Scope(Enum): - """Peer relations scope.""" - - APP = "app" - UNIT = "unit" - - -################################################################################ -# Secrets internal caching -################################################################################ - - -class CachedSecret: - """Locally cache a secret. - - The data structure is precisely reusing/simulating as in the actual Secret Storage - """ - - def __init__( - self, - model: Model, - component: Union[Application, Unit], - label: str, - secret_uri: Optional[str] = None, - legacy_labels: List[str] = [], - ): - self._secret_meta = None - self._secret_content = {} - self._secret_uri = secret_uri - self.label = label - self._model = model - self.component = component - self.legacy_labels = legacy_labels - self.current_label = None - - def add_secret( - self, - content: Dict[str, str], - relation: Optional[Relation] = None, - label: Optional[str] = None, - ) -> Secret: - """Create a new secret.""" - if self._secret_uri: - raise SecretAlreadyExistsError( - "Secret is already defined with uri %s", self._secret_uri - ) - - label = self.label if not label else label - - secret = self.component.add_secret(content, label=label) - if relation and relation.app != self._model.app: - # If it's not a peer relation, grant is to be applied - secret.grant(relation) - self._secret_uri = secret.id - self._secret_meta = secret - return self._secret_meta - - @property - def meta(self) -> Optional[Secret]: - """Getting cached secret meta-information.""" - if not self._secret_meta: - if not (self._secret_uri or self.label): - return - - for label in [self.label] + self.legacy_labels: - try: - self._secret_meta = self._model.get_secret(label=label) - except SecretNotFoundError: - pass - else: - if label != self.label: - self.current_label = label - break - - # If still not found, to be checked by URI, to be labelled with the proposed label - if not self._secret_meta and self._secret_uri: - self._secret_meta = self._model.get_secret(id=self._secret_uri, label=self.label) - return self._secret_meta - - def get_content(self) -> Dict[str, str]: - """Getting cached secret content.""" - if not self._secret_content: - if self.meta: - try: - self._secret_content = self.meta.get_content(refresh=True) - except (ValueError, ModelError) as err: - # https://bugs.launchpad.net/juju/+bug/2042596 - # Only triggered when 'refresh' is set - known_model_errors = [ - "ERROR either URI or label should be used for getting an owned secret but not both", - "ERROR secret owner cannot use --refresh", - ] - if isinstance(err, ModelError) and not any( - msg in str(err) for msg in known_model_errors - ): - raise - # Due to: ValueError: Secret owner cannot use refresh=True - self._secret_content = self.meta.get_content() - return self._secret_content - - def _move_to_new_label_if_needed(self): - """Helper function to re-create the secret with a different label.""" - if not self.current_label or not (self.meta and self._secret_meta): - return - - # Create a new secret with the new label - content = self._secret_meta.get_content() - self._secret_uri = None - - # I wish we could just check if we are the owners of the secret... - try: - self._secret_meta = self.add_secret(content, label=self.label) - except ModelError as err: - if "this unit is not the leader" not in str(err): - raise - self.current_label = None - - def set_content(self, content: Dict[str, str]) -> None: - """Setting cached secret content.""" - if not self.meta: - return - - # DPE-4182: do not create new revision if the content stay the same - if content == self.get_content(): - return - - if content: - self._move_to_new_label_if_needed() - self.meta.set_content(content) - self._secret_content = content - else: - self.meta.remove_all_revisions() - - def get_info(self) -> Optional[SecretInfo]: - """Wrapper function to apply the corresponding call on the Secret object within CachedSecret if any.""" - if self.meta: - return self.meta.get_info() - - def remove(self) -> None: - """Remove secret.""" - if not self.meta: - raise SecretsUnavailableError("Non-existent secret was attempted to be removed.") - try: - self.meta.remove_all_revisions() - except SecretNotFoundError: - pass - self._secret_content = {} - self._secret_meta = None - self._secret_uri = None - - -class SecretCache: - """A data structure storing CachedSecret objects.""" - - def __init__(self, model: Model, component: Union[Application, Unit]): - self._model = model - self.component = component - self._secrets: Dict[str, CachedSecret] = {} - - def get( - self, label: str, uri: Optional[str] = None, legacy_labels: List[str] = [] - ) -> Optional[CachedSecret]: - """Getting a secret from Juju Secret store or cache.""" - if not self._secrets.get(label): - secret = CachedSecret( - self._model, self.component, label, uri, legacy_labels=legacy_labels - ) - if secret.meta: - self._secrets[label] = secret - return self._secrets.get(label) - - def add(self, label: str, content: Dict[str, str], relation: Relation) -> CachedSecret: - """Adding a secret to Juju Secret.""" - if self._secrets.get(label): - raise SecretAlreadyExistsError(f"Secret {label} already exists") - - secret = CachedSecret(self._model, self.component, label) - secret.add_secret(content, relation) - self._secrets[label] = secret - return self._secrets[label] - - def remove(self, label: str) -> None: - """Remove a secret from the cache.""" - if secret := self.get(label): - try: - secret.remove() - self._secrets.pop(label) - except (SecretsUnavailableError, KeyError): - pass - else: - return - logging.debug("Non-existing Juju Secret was attempted to be removed %s", label) - - -################################################################################ -# Relation Data base/abstract ancestors (i.e. parent classes) -################################################################################ - - -# Base Data - - -class DataDict(UserDict): - """Python Standard Library 'dict' - like representation of Relation Data.""" - - def __init__(self, relation_data: "Data", relation_id: int): - self.relation_data = relation_data - self.relation_id = relation_id - - @property - def data(self) -> Dict[str, str]: - """Return the full content of the Abstract Relation Data dictionary.""" - result = self.relation_data.fetch_my_relation_data([self.relation_id]) - try: - result_remote = self.relation_data.fetch_relation_data([self.relation_id]) - except NotImplementedError: - result_remote = {self.relation_id: {}} - if result: - result_remote[self.relation_id].update(result[self.relation_id]) - return result_remote.get(self.relation_id, {}) - - def __setitem__(self, key: str, item: str) -> None: - """Set an item of the Abstract Relation Data dictionary.""" - self.relation_data.update_relation_data(self.relation_id, {key: item}) - - def __getitem__(self, key: str) -> str: - """Get an item of the Abstract Relation Data dictionary.""" - result = None - - # Avoiding "leader_only" error when cross-charm non-leader unit, not to report useless error - if ( - not hasattr(self.relation_data.fetch_my_relation_field, "leader_only") - or self.relation_data.component != self.relation_data.local_app - or self.relation_data.local_unit.is_leader() - ): - result = self.relation_data.fetch_my_relation_field(self.relation_id, key) - - if not result: - try: - result = self.relation_data.fetch_relation_field(self.relation_id, key) - except NotImplementedError: - pass - - if not result: - raise KeyError - return result - - def __eq__(self, d: dict) -> bool: - """Equality.""" - return self.data == d - - def __repr__(self) -> str: - """String representation Abstract Relation Data dictionary.""" - return repr(self.data) - - def __len__(self) -> int: - """Length of the Abstract Relation Data dictionary.""" - return len(self.data) - - def __delitem__(self, key: str) -> None: - """Delete an item of the Abstract Relation Data dictionary.""" - self.relation_data.delete_relation_data(self.relation_id, [key]) - - def has_key(self, key: str) -> bool: - """Does the key exist in the Abstract Relation Data dictionary?""" - return key in self.data - - def update(self, items: Dict[str, str]): - """Update the Abstract Relation Data dictionary.""" - self.relation_data.update_relation_data(self.relation_id, items) - - def keys(self) -> KeysView[str]: - """Keys of the Abstract Relation Data dictionary.""" - return self.data.keys() - - def values(self) -> ValuesView[str]: - """Values of the Abstract Relation Data dictionary.""" - return self.data.values() - - def items(self) -> ItemsView[str, str]: - """Items of the Abstract Relation Data dictionary.""" - return self.data.items() - - def pop(self, item: str) -> str: - """Pop an item of the Abstract Relation Data dictionary.""" - result = self.relation_data.fetch_my_relation_field(self.relation_id, item) - if not result: - raise KeyError(f"Item {item} doesn't exist.") - self.relation_data.delete_relation_data(self.relation_id, [item]) - return result - - def __contains__(self, item: str) -> bool: - """Does the Abstract Relation Data dictionary contain item?""" - return item in self.data.values() - - def __iter__(self): - """Iterate through the Abstract Relation Data dictionary.""" - return iter(self.data) - - def get(self, key: str, default: Optional[str] = None) -> Optional[str]: - """Safely get an item of the Abstract Relation Data dictionary.""" - try: - if result := self[key]: - return result - except KeyError: - return default - - -class Data(ABC): - """Base relation data mainpulation (abstract) class.""" - - SCOPE = Scope.APP - - # Local map to associate mappings with secrets potentially as a group - SECRET_LABEL_MAP = { - "username": SECRET_GROUPS.USER, - "password": SECRET_GROUPS.USER, - "uris": SECRET_GROUPS.USER, - "tls": SECRET_GROUPS.TLS, - "tls-ca": SECRET_GROUPS.TLS, - } - - def __init__( - self, - model: Model, - relation_name: str, - ) -> None: - self._model = model - self.local_app = self._model.app - self.local_unit = self._model.unit - self.relation_name = relation_name - self._jujuversion = None - self.component = self.local_app if self.SCOPE == Scope.APP else self.local_unit - self.secrets = SecretCache(self._model, self.component) - self.data_component = None - - @property - def relations(self) -> List[Relation]: - """The list of Relation instances associated with this relation_name.""" - return [ - relation - for relation in self._model.relations[self.relation_name] - if self._is_relation_active(relation) - ] - - @property - def secrets_enabled(self): - """Is this Juju version allowing for Secrets usage?""" - if not self._jujuversion: - self._jujuversion = JujuVersion.from_environ() - return self._jujuversion.has_secrets - - @property - def secret_label_map(self): - """Exposing secret-label map via a property -- could be overridden in descendants!""" - return self.SECRET_LABEL_MAP - - # Mandatory overrides for internal/helper methods - - @abstractmethod - def _get_relation_secret( - self, relation_id: int, group_mapping: SecretGroup, relation_name: Optional[str] = None - ) -> Optional[CachedSecret]: - """Retrieve a Juju Secret that's been stored in the relation databag.""" - raise NotImplementedError - - @abstractmethod - def _fetch_specific_relation_data( - self, relation: Relation, fields: Optional[List[str]] - ) -> Dict[str, str]: - """Fetch data available (directily or indirectly -- i.e. secrets) from the relation.""" - raise NotImplementedError - - @abstractmethod - def _fetch_my_specific_relation_data( - self, relation: Relation, fields: Optional[List[str]] - ) -> Dict[str, str]: - """Fetch data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - raise NotImplementedError - - @abstractmethod - def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: - """Update data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - raise NotImplementedError - - @abstractmethod - def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: - """Delete data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - raise NotImplementedError - - # Internal helper methods - - @staticmethod - def _is_relation_active(relation: Relation): - """Whether the relation is active based on contained data.""" - try: - _ = repr(relation.data) - return True - except (RuntimeError, ModelError): - return False - - @staticmethod - def _is_secret_field(field: str) -> bool: - """Is the field in question a secret reference (URI) field or not?""" - return field.startswith(PROV_SECRET_PREFIX) - - @staticmethod - def _generate_secret_label( - relation_name: str, relation_id: int, group_mapping: SecretGroup - ) -> str: - """Generate unique group_mappings for secrets within a relation context.""" - return f"{relation_name}.{relation_id}.{group_mapping}.secret" - - def _generate_secret_field_name(self, group_mapping: SecretGroup) -> str: - """Generate unique group_mappings for secrets within a relation context.""" - return f"{PROV_SECRET_PREFIX}{group_mapping}" - - def _relation_from_secret_label(self, secret_label: str) -> Optional[Relation]: - """Retrieve the relation that belongs to a secret label.""" - contents = secret_label.split(".") - - if not (contents and len(contents) >= 3): - return - - contents.pop() # ".secret" at the end - contents.pop() # Group mapping - relation_id = contents.pop() - try: - relation_id = int(relation_id) - except ValueError: - return - - # In case '.' character appeared in relation name - relation_name = ".".join(contents) - - try: - return self.get_relation(relation_name, relation_id) - except ModelError: - return - - def _group_secret_fields(self, secret_fields: List[str]) -> Dict[SecretGroup, List[str]]: - """Helper function to arrange secret mappings under their group. - - NOTE: All unrecognized items end up in the 'extra' secret bucket. - Make sure only secret fields are passed! - """ - secret_fieldnames_grouped = {} - for key in secret_fields: - if group := self.secret_label_map.get(key): - secret_fieldnames_grouped.setdefault(group, []).append(key) - else: - secret_fieldnames_grouped.setdefault(SECRET_GROUPS.EXTRA, []).append(key) - return secret_fieldnames_grouped - - def _get_group_secret_contents( - self, - relation: Relation, - group: SecretGroup, - secret_fields: Union[Set[str], List[str]] = [], - ) -> Dict[str, str]: - """Helper function to retrieve collective, requested contents of a secret.""" - if (secret := self._get_relation_secret(relation.id, group)) and ( - secret_data := secret.get_content() - ): - return { - k: v for k, v in secret_data.items() if not secret_fields or k in secret_fields - } - return {} - - def _content_for_secret_group( - self, content: Dict[str, str], secret_fields: Set[str], group_mapping: SecretGroup - ) -> Dict[str, str]: - """Select : pairs from input, that belong to this particular Secret group.""" - if group_mapping == SECRET_GROUPS.EXTRA: - return { - k: v - for k, v in content.items() - if k in secret_fields and k not in self.secret_label_map.keys() - } - - return { - k: v - for k, v in content.items() - if k in secret_fields and self.secret_label_map.get(k) == group_mapping - } - - @juju_secrets_only - def _get_relation_secret_data( - self, relation_id: int, group_mapping: SecretGroup, relation_name: Optional[str] = None - ) -> Optional[Dict[str, str]]: - """Retrieve contents of a Juju Secret that's been stored in the relation databag.""" - secret = self._get_relation_secret(relation_id, group_mapping, relation_name) - if secret: - return secret.get_content() - - # Core operations on Relation Fields manipulations (regardless whether the field is in the databag or in a secret) - # Internal functions to be called directly from transparent public interface functions (+closely related helpers) - - def _process_secret_fields( - self, - relation: Relation, - req_secret_fields: Optional[List[str]], - impacted_rel_fields: List[str], - operation: Callable, - *args, - **kwargs, - ) -> Tuple[Dict[str, str], Set[str]]: - """Isolate target secret fields of manipulation, and execute requested operation by Secret Group.""" - result = {} - - # If the relation started on a databag, we just stay on the databag - # (Rolling upgrades may result in a relation starting on databag, getting secrets enabled on-the-fly) - # self.local_app is sufficient to check (ignored if Requires, never has secrets -- works if Provider) - fallback_to_databag = ( - req_secret_fields - and (self.local_unit == self._model.unit and self.local_unit.is_leader()) - and set(req_secret_fields) & set(relation.data[self.component]) - ) - - normal_fields = set(impacted_rel_fields) - if req_secret_fields and self.secrets_enabled and not fallback_to_databag: - normal_fields = normal_fields - set(req_secret_fields) - secret_fields = set(impacted_rel_fields) - set(normal_fields) - - secret_fieldnames_grouped = self._group_secret_fields(list(secret_fields)) - - for group in secret_fieldnames_grouped: - # operation() should return nothing when all goes well - if group_result := operation(relation, group, secret_fields, *args, **kwargs): - # If "meaningful" data was returned, we take it. (Some 'operation'-s only return success/failure.) - if isinstance(group_result, dict): - result.update(group_result) - else: - # If it wasn't found as a secret, let's give it a 2nd chance as "normal" field - # Needed when Juju3 Requires meets Juju2 Provider - normal_fields |= set(secret_fieldnames_grouped[group]) - return (result, normal_fields) - - def _fetch_relation_data_without_secrets( - self, component: Union[Application, Unit], relation: Relation, fields: Optional[List[str]] - ) -> Dict[str, str]: - """Fetching databag contents when no secrets are involved. - - Since the Provider's databag is the only one holding secrest, we can apply - a simplified workflow to read the Require's side's databag. - This is used typically when the Provider side wants to read the Requires side's data, - or when the Requires side may want to read its own data. - """ - if component not in relation.data or not relation.data[component]: - return {} - - if fields: - return { - k: relation.data[component][k] for k in fields if k in relation.data[component] - } - else: - return dict(relation.data[component]) - - def _fetch_relation_data_with_secrets( - self, - component: Union[Application, Unit], - req_secret_fields: Optional[List[str]], - relation: Relation, - fields: Optional[List[str]] = None, - ) -> Dict[str, str]: - """Fetching databag contents when secrets may be involved. - - This function has internal logic to resolve if a requested field may be "hidden" - within a Relation Secret, or directly available as a databag field. Typically - used to read the Provider side's databag (eigher by the Requires side, or by - Provider side itself). - """ - result = {} - normal_fields = [] - - if not fields: - if component not in relation.data: - return {} - - all_fields = list(relation.data[component].keys()) - normal_fields = [field for field in all_fields if not self._is_secret_field(field)] - fields = normal_fields + req_secret_fields if req_secret_fields else normal_fields - - if fields: - result, normal_fields = self._process_secret_fields( - relation, req_secret_fields, fields, self._get_group_secret_contents - ) - - # Processing "normal" fields. May include leftover from what we couldn't retrieve as a secret. - # (Typically when Juju3 Requires meets Juju2 Provider) - if normal_fields: - result.update( - self._fetch_relation_data_without_secrets(component, relation, list(normal_fields)) - ) - return result - - def _update_relation_data_without_secrets( - self, component: Union[Application, Unit], relation: Relation, data: Dict[str, str] - ) -> None: - """Updating databag contents when no secrets are involved.""" - if component not in relation.data or relation.data[component] is None: - return - - if relation: - relation.data[component].update(data) - - def _delete_relation_data_without_secrets( - self, component: Union[Application, Unit], relation: Relation, fields: List[str] - ) -> None: - """Remove databag fields 'fields' from Relation.""" - if component not in relation.data or relation.data[component] is None: - return - - for field in fields: - try: - relation.data[component].pop(field) - except KeyError: - logger.debug( - "Non-existing field '%s' was attempted to be removed from the databag (relation ID: %s)", - str(field), - str(relation.id), - ) - pass - - # Public interface methods - # Handling Relation Fields seamlessly, regardless if in databag or a Juju Secret - - def as_dict(self, relation_id: int) -> UserDict: - """Dict behavior representation of the Abstract Data.""" - return DataDict(self, relation_id) - - def get_relation(self, relation_name, relation_id) -> Relation: - """Safe way of retrieving a relation.""" - relation = self._model.get_relation(relation_name, relation_id) - - if not relation: - raise DataInterfacesError( - "Relation %s %s couldn't be retrieved", relation_name, relation_id - ) - - return relation - - def fetch_relation_data( - self, - relation_ids: Optional[List[int]] = None, - fields: Optional[List[str]] = None, - relation_name: Optional[str] = None, - ) -> Dict[int, Dict[str, str]]: - """Retrieves data from relation. - - This function can be used to retrieve data from a relation - in the charm code when outside an event callback. - Function cannot be used in `*-relation-broken` events and will raise an exception. - - Returns: - a dict of the values stored in the relation data bag - for all relation instances (indexed by the relation ID). - """ - if not relation_name: - relation_name = self.relation_name - - relations = [] - if relation_ids: - relations = [ - self.get_relation(relation_name, relation_id) for relation_id in relation_ids - ] - else: - relations = self.relations - - data = {} - for relation in relations: - if not relation_ids or (relation_ids and relation.id in relation_ids): - data[relation.id] = self._fetch_specific_relation_data(relation, fields) - return data - - def fetch_relation_field( - self, relation_id: int, field: str, relation_name: Optional[str] = None - ) -> Optional[str]: - """Get a single field from the relation data.""" - return ( - self.fetch_relation_data([relation_id], [field], relation_name) - .get(relation_id, {}) - .get(field) - ) - - def fetch_my_relation_data( - self, - relation_ids: Optional[List[int]] = None, - fields: Optional[List[str]] = None, - relation_name: Optional[str] = None, - ) -> Optional[Dict[int, Dict[str, str]]]: - """Fetch data of the 'owner' (or 'this app') side of the relation. - - NOTE: Since only the leader can read the relation's 'this_app'-side - Application databag, the functionality is limited to leaders - """ - if not relation_name: - relation_name = self.relation_name - - relations = [] - if relation_ids: - relations = [ - self.get_relation(relation_name, relation_id) for relation_id in relation_ids - ] - else: - relations = self.relations - - data = {} - for relation in relations: - if not relation_ids or relation.id in relation_ids: - data[relation.id] = self._fetch_my_specific_relation_data(relation, fields) - return data - - def fetch_my_relation_field( - self, relation_id: int, field: str, relation_name: Optional[str] = None - ) -> Optional[str]: - """Get a single field from the relation data -- owner side. - - NOTE: Since only the leader can read the relation's 'this_app'-side - Application databag, the functionality is limited to leaders - """ - if relation_data := self.fetch_my_relation_data([relation_id], [field], relation_name): - return relation_data.get(relation_id, {}).get(field) - - @leader_only - def update_relation_data(self, relation_id: int, data: dict) -> None: - """Update the data within the relation.""" - relation_name = self.relation_name - relation = self.get_relation(relation_name, relation_id) - return self._update_relation_data(relation, data) - - @leader_only - def delete_relation_data(self, relation_id: int, fields: List[str]) -> None: - """Remove field from the relation.""" - relation_name = self.relation_name - relation = self.get_relation(relation_name, relation_id) - return self._delete_relation_data(relation, fields) - - -class EventHandlers(Object): - """Requires-side of the relation.""" - - def __init__(self, charm: CharmBase, relation_data: Data, unique_key: str = ""): - """Manager of base client relations.""" - if not unique_key: - unique_key = relation_data.relation_name - super().__init__(charm, unique_key) - - self.charm = charm - self.relation_data = relation_data - - self.framework.observe( - charm.on[self.relation_data.relation_name].relation_changed, - self._on_relation_changed_event, - ) - - def _diff(self, event: RelationChangedEvent) -> Diff: - """Retrieves the diff of the data in the relation changed databag. - - Args: - event: relation changed event. - - Returns: - a Diff instance containing the added, deleted and changed - keys from the event relation databag. - """ - return diff(event, self.relation_data.data_component) - - @abstractmethod - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the relation data has changed.""" - raise NotImplementedError - - -# Base ProviderData and RequiresData - - -class ProviderData(Data): - """Base provides-side of the data products relation.""" - - def __init__( - self, - model: Model, - relation_name: str, - ) -> None: - super().__init__(model, relation_name) - self.data_component = self.local_app - - # Private methods handling secrets - - @juju_secrets_only - def _add_relation_secret( - self, - relation: Relation, - group_mapping: SecretGroup, - secret_fields: Set[str], - data: Dict[str, str], - uri_to_databag=True, - ) -> bool: - """Add a new Juju Secret that will be registered in the relation databag.""" - secret_field = self._generate_secret_field_name(group_mapping) - if uri_to_databag and relation.data[self.component].get(secret_field): - logging.error("Secret for relation %s already exists, not adding again", relation.id) - return False - - content = self._content_for_secret_group(data, secret_fields, group_mapping) - - label = self._generate_secret_label(self.relation_name, relation.id, group_mapping) - secret = self.secrets.add(label, content, relation) - - # According to lint we may not have a Secret ID - if uri_to_databag and secret.meta and secret.meta.id: - relation.data[self.component][secret_field] = secret.meta.id - - # Return the content that was added - return True - - @juju_secrets_only - def _update_relation_secret( - self, - relation: Relation, - group_mapping: SecretGroup, - secret_fields: Set[str], - data: Dict[str, str], - ) -> bool: - """Update the contents of an existing Juju Secret, referred in the relation databag.""" - secret = self._get_relation_secret(relation.id, group_mapping) - - if not secret: - logging.error("Can't update secret for relation %s", relation.id) - return False - - content = self._content_for_secret_group(data, secret_fields, group_mapping) - - old_content = secret.get_content() - full_content = copy.deepcopy(old_content) - full_content.update(content) - secret.set_content(full_content) - - # Return True on success - return True - - def _add_or_update_relation_secrets( - self, - relation: Relation, - group: SecretGroup, - secret_fields: Set[str], - data: Dict[str, str], - uri_to_databag=True, - ) -> bool: - """Update contents for Secret group. If the Secret doesn't exist, create it.""" - if self._get_relation_secret(relation.id, group): - return self._update_relation_secret(relation, group, secret_fields, data) - else: - return self._add_relation_secret(relation, group, secret_fields, data, uri_to_databag) - - @juju_secrets_only - def _delete_relation_secret( - self, relation: Relation, group: SecretGroup, secret_fields: List[str], fields: List[str] - ) -> bool: - """Update the contents of an existing Juju Secret, referred in the relation databag.""" - secret = self._get_relation_secret(relation.id, group) - - if not secret: - logging.error("Can't delete secret for relation %s", str(relation.id)) - return False - - old_content = secret.get_content() - new_content = copy.deepcopy(old_content) - for field in fields: - try: - new_content.pop(field) - except KeyError: - logging.debug( - "Non-existing secret was attempted to be removed %s, %s", - str(relation.id), - str(field), - ) - return False - - # Remove secret from the relation if it's fully gone - if not new_content: - field = self._generate_secret_field_name(group) - try: - relation.data[self.component].pop(field) - except KeyError: - pass - label = self._generate_secret_label(self.relation_name, relation.id, group) - self.secrets.remove(label) - else: - secret.set_content(new_content) - - # Return the content that was removed - return True - - # Mandatory internal overrides - - @juju_secrets_only - def _get_relation_secret( - self, relation_id: int, group_mapping: SecretGroup, relation_name: Optional[str] = None - ) -> Optional[CachedSecret]: - """Retrieve a Juju Secret that's been stored in the relation databag.""" - if not relation_name: - relation_name = self.relation_name - - label = self._generate_secret_label(relation_name, relation_id, group_mapping) - if secret := self.secrets.get(label): - return secret - - relation = self._model.get_relation(relation_name, relation_id) - if not relation: - return - - secret_field = self._generate_secret_field_name(group_mapping) - if secret_uri := relation.data[self.local_app].get(secret_field): - return self.secrets.get(label, secret_uri) - - def _fetch_specific_relation_data( - self, relation: Relation, fields: Optional[List[str]] - ) -> Dict[str, str]: - """Fetching relation data for Provider. - - NOTE: Since all secret fields are in the Provider side of the databag, we don't need to worry about that - """ - if not relation.app: - return {} - - return self._fetch_relation_data_without_secrets(relation.app, relation, fields) - - def _fetch_my_specific_relation_data( - self, relation: Relation, fields: Optional[List[str]] - ) -> dict: - """Fetching our own relation data.""" - secret_fields = None - if relation.app: - secret_fields = get_encoded_list(relation, relation.app, REQ_SECRET_FIELDS) - - return self._fetch_relation_data_with_secrets( - self.local_app, - secret_fields, - relation, - fields, - ) - - def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: - """Set values for fields not caring whether it's a secret or not.""" - req_secret_fields = [] - if relation.app: - req_secret_fields = get_encoded_list(relation, relation.app, REQ_SECRET_FIELDS) - - _, normal_fields = self._process_secret_fields( - relation, - req_secret_fields, - list(data), - self._add_or_update_relation_secrets, - data=data, - ) - - normal_content = {k: v for k, v in data.items() if k in normal_fields} - self._update_relation_data_without_secrets(self.local_app, relation, normal_content) - - def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: - """Delete fields from the Relation not caring whether it's a secret or not.""" - req_secret_fields = [] - if relation.app: - req_secret_fields = get_encoded_list(relation, relation.app, REQ_SECRET_FIELDS) - - _, normal_fields = self._process_secret_fields( - relation, req_secret_fields, fields, self._delete_relation_secret, fields=fields - ) - self._delete_relation_data_without_secrets(self.local_app, relation, list(normal_fields)) - - # Public methods - "native" - - def set_credentials(self, relation_id: int, username: str, password: str) -> None: - """Set credentials. - - This function writes in the application data bag, therefore, - only the leader unit can call it. - - Args: - relation_id: the identifier for a particular relation. - username: user that was created. - password: password of the created user. - """ - self.update_relation_data(relation_id, {"username": username, "password": password}) - - def set_tls(self, relation_id: int, tls: str) -> None: - """Set whether TLS is enabled. - - Args: - relation_id: the identifier for a particular relation. - tls: whether tls is enabled (True or False). - """ - self.update_relation_data(relation_id, {"tls": tls}) - - def set_tls_ca(self, relation_id: int, tls_ca: str) -> None: - """Set the TLS CA in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - tls_ca: TLS certification authority. - """ - self.update_relation_data(relation_id, {"tls-ca": tls_ca}) - - # Public functions -- inherited - - fetch_my_relation_data = leader_only(Data.fetch_my_relation_data) - fetch_my_relation_field = leader_only(Data.fetch_my_relation_field) - - -class RequirerData(Data): - """Requirer-side of the relation.""" - - SECRET_FIELDS = ["username", "password", "tls", "tls-ca", "uris"] - - def __init__( - self, - model, - relation_name: str, - extra_user_roles: Optional[str] = None, - additional_secret_fields: Optional[List[str]] = [], - ): - """Manager of base client relations.""" - super().__init__(model, relation_name) - self.extra_user_roles = extra_user_roles - self._secret_fields = list(self.SECRET_FIELDS) - if additional_secret_fields: - self._secret_fields += additional_secret_fields - self.data_component = self.local_unit - - @property - def secret_fields(self) -> Optional[List[str]]: - """Local access to secrets field, in case they are being used.""" - if self.secrets_enabled: - return self._secret_fields - - # Internal helper functions - - def _register_secret_to_relation( - self, relation_name: str, relation_id: int, secret_id: str, group: SecretGroup - ): - """Fetch secrets and apply local label on them. - - [MAGIC HERE] - If we fetch a secret using get_secret(id=, label=), - then will be "stuck" on the Secret object, whenever it may - appear (i.e. as an event attribute, or fetched manually) on future occasions. - - This will allow us to uniquely identify the secret on Provider side (typically on - 'secret-changed' events), and map it to the corresponding relation. - """ - label = self._generate_secret_label(relation_name, relation_id, group) - - # Fetching the Secret's meta information ensuring that it's locally getting registered with - CachedSecret(self._model, self.component, label, secret_id).meta - - def _register_secrets_to_relation(self, relation: Relation, params_name_list: List[str]): - """Make sure that secrets of the provided list are locally 'registered' from the databag. - - More on 'locally registered' magic is described in _register_secret_to_relation() method - """ - if not relation.app: - return - - for group in SECRET_GROUPS.groups(): - secret_field = self._generate_secret_field_name(group) - if secret_field in params_name_list: - if secret_uri := relation.data[relation.app].get(secret_field): - self._register_secret_to_relation( - relation.name, relation.id, secret_uri, group - ) - - def _is_resource_created_for_relation(self, relation: Relation) -> bool: - if not relation.app: - return False - - data = self.fetch_relation_data([relation.id], ["username", "password"]).get( - relation.id, {} - ) - return bool(data.get("username")) and bool(data.get("password")) - - def is_resource_created(self, relation_id: Optional[int] = None) -> bool: - """Check if the resource has been created. - - This function can be used to check if the Provider answered with data in the charm code - when outside an event callback. - - Args: - relation_id (int, optional): When provided the check is done only for the relation id - provided, otherwise the check is done for all relations - - Returns: - True or False - - Raises: - IndexError: If relation_id is provided but that relation does not exist - """ - if relation_id is not None: - try: - relation = [relation for relation in self.relations if relation.id == relation_id][ - 0 - ] - return self._is_resource_created_for_relation(relation) - except IndexError: - raise IndexError(f"relation id {relation_id} cannot be accessed") - else: - return ( - all( - self._is_resource_created_for_relation(relation) for relation in self.relations - ) - if self.relations - else False - ) - - # Mandatory internal overrides - - @juju_secrets_only - def _get_relation_secret( - self, relation_id: int, group: SecretGroup, relation_name: Optional[str] = None - ) -> Optional[CachedSecret]: - """Retrieve a Juju Secret that's been stored in the relation databag.""" - if not relation_name: - relation_name = self.relation_name - - label = self._generate_secret_label(relation_name, relation_id, group) - return self.secrets.get(label) - - def _fetch_specific_relation_data( - self, relation, fields: Optional[List[str]] = None - ) -> Dict[str, str]: - """Fetching Requirer data -- that may include secrets.""" - if not relation.app: - return {} - return self._fetch_relation_data_with_secrets( - relation.app, self.secret_fields, relation, fields - ) - - def _fetch_my_specific_relation_data(self, relation, fields: Optional[List[str]]) -> dict: - """Fetching our own relation data.""" - return self._fetch_relation_data_without_secrets(self.local_app, relation, fields) - - def _update_relation_data(self, relation: Relation, data: dict) -> None: - """Updates a set of key-value pairs in the relation. - - This function writes in the application data bag, therefore, - only the leader unit can call it. - - Args: - relation: the particular relation. - data: dict containing the key-value pairs - that should be updated in the relation. - """ - return self._update_relation_data_without_secrets(self.local_app, relation, data) - - def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: - """Deletes a set of fields from the relation. - - This function writes in the application data bag, therefore, - only the leader unit can call it. - - Args: - relation: the particular relation. - fields: list containing the field names that should be removed from the relation. - """ - return self._delete_relation_data_without_secrets(self.local_app, relation, fields) - - # Public functions -- inherited - - fetch_my_relation_data = leader_only(Data.fetch_my_relation_data) - fetch_my_relation_field = leader_only(Data.fetch_my_relation_field) - - -class RequirerEventHandlers(EventHandlers): - """Requires-side of the relation.""" - - def __init__(self, charm: CharmBase, relation_data: RequirerData, unique_key: str = ""): - """Manager of base client relations.""" - super().__init__(charm, relation_data, unique_key) - - self.framework.observe( - self.charm.on[relation_data.relation_name].relation_created, - self._on_relation_created_event, - ) - self.framework.observe( - charm.on.secret_changed, - self._on_secret_changed_event, - ) - - # Event handlers - - def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: - """Event emitted when the relation is created.""" - if not self.relation_data.local_unit.is_leader(): - return - - if self.relation_data.secret_fields: # pyright: ignore [reportAttributeAccessIssue] - set_encoded_field( - event.relation, - self.relation_data.component, - REQ_SECRET_FIELDS, - self.relation_data.secret_fields, # pyright: ignore [reportAttributeAccessIssue] - ) - - @abstractmethod - def _on_secret_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the relation data has changed.""" - raise NotImplementedError - - -################################################################################ -# Peer Relation Data -################################################################################ - - -class DataPeerData(RequirerData, ProviderData): - """Represents peer relations data.""" - - SECRET_FIELDS = [] - SECRET_FIELD_NAME = "internal_secret" - SECRET_LABEL_MAP = {} - - def __init__( - self, - model, - relation_name: str, - extra_user_roles: Optional[str] = None, - additional_secret_fields: Optional[List[str]] = [], - additional_secret_group_mapping: Dict[str, str] = {}, - secret_field_name: Optional[str] = None, - deleted_label: Optional[str] = None, - ): - """Manager of base client relations.""" - RequirerData.__init__( - self, - model, - relation_name, - extra_user_roles, - additional_secret_fields, - ) - self.secret_field_name = secret_field_name if secret_field_name else self.SECRET_FIELD_NAME - self.deleted_label = deleted_label - self._secret_label_map = {} - # Secrets that are being dynamically added within the scope of this event handler run - self._new_secrets = [] - self._additional_secret_group_mapping = additional_secret_group_mapping - - for group, fields in additional_secret_group_mapping.items(): - if group not in SECRET_GROUPS.groups(): - setattr(SECRET_GROUPS, group, group) - for field in fields: - secret_group = SECRET_GROUPS.get_group(group) - internal_field = self._field_to_internal_name(field, secret_group) - self._secret_label_map.setdefault(group, []).append(internal_field) - self._secret_fields.append(internal_field) - - @property - def scope(self) -> Optional[Scope]: - """Turn component information into Scope.""" - if isinstance(self.component, Application): - return Scope.APP - if isinstance(self.component, Unit): - return Scope.UNIT - - @property - def secret_label_map(self) -> Dict[str, str]: - """Property storing secret mappings.""" - return self._secret_label_map - - @property - def static_secret_fields(self) -> List[str]: - """Re-definition of the property in a way that dynamically extended list is retrieved.""" - return self._secret_fields - - @property - def secret_fields(self) -> List[str]: - """Re-definition of the property in a way that dynamically extended list is retrieved.""" - return ( - self.static_secret_fields if self.static_secret_fields else self.current_secret_fields - ) - - @property - def current_secret_fields(self) -> List[str]: - """Helper method to get all currently existing secret fields (added statically or dynamically).""" - if not self.secrets_enabled: - return [] - - if len(self._model.relations[self.relation_name]) > 1: - raise ValueError(f"More than one peer relation on {self.relation_name}") - - relation = self._model.relations[self.relation_name][0] - fields = [] - - ignores = [SECRET_GROUPS.get_group("user"), SECRET_GROUPS.get_group("tls")] - for group in SECRET_GROUPS.groups(): - if group in ignores: - continue - if content := self._get_group_secret_contents(relation, group): - fields += list(content.keys()) - return list(set(fields) | set(self._new_secrets)) - - @dynamic_secrets_only - def set_secret( - self, - relation_id: int, - field: str, - value: str, - group_mapping: Optional[SecretGroup] = None, - ) -> None: - """Public interface method to add a Relation Data field specifically as a Juju Secret. - - Args: - relation_id: ID of the relation - field: The secret field that is to be added - value: The string value of the secret - group_mapping: The name of the "secret group", in case the field is to be added to an existing secret - """ - full_field = self._field_to_internal_name(field, group_mapping) - if self.secrets_enabled and full_field not in self.current_secret_fields: - self._new_secrets.append(full_field) - if self._no_group_with_databag(field, full_field): - self.update_relation_data(relation_id, {full_field: value}) - - # Unlike for set_secret(), there's no harm using this operation with static secrets - # The restricion is only added to keep the concept clear - @dynamic_secrets_only - def get_secret( - self, - relation_id: int, - field: str, - group_mapping: Optional[SecretGroup] = None, - ) -> Optional[str]: - """Public interface method to fetch secrets only.""" - full_field = self._field_to_internal_name(field, group_mapping) - if ( - self.secrets_enabled - and full_field not in self.current_secret_fields - and field not in self.current_secret_fields - ): - return - if self._no_group_with_databag(field, full_field): - return self.fetch_my_relation_field(relation_id, full_field) - - @dynamic_secrets_only - def delete_secret( - self, - relation_id: int, - field: str, - group_mapping: Optional[SecretGroup] = None, - ) -> Optional[str]: - """Public interface method to delete secrets only.""" - full_field = self._field_to_internal_name(field, group_mapping) - if self.secrets_enabled and full_field not in self.current_secret_fields: - logger.warning(f"Secret {field} from group {group_mapping} was not found") - return - if self._no_group_with_databag(field, full_field): - self.delete_relation_data(relation_id, [full_field]) - - # Helpers - - @staticmethod - def _field_to_internal_name(field: str, group: Optional[SecretGroup]) -> str: - if not group or group == SECRET_GROUPS.EXTRA: - return field - return f"{field}{GROUP_SEPARATOR}{group}" - - @staticmethod - def _internal_name_to_field(name: str) -> Tuple[str, SecretGroup]: - parts = name.split(GROUP_SEPARATOR) - if not len(parts) > 1: - return (parts[0], SECRET_GROUPS.EXTRA) - secret_group = SECRET_GROUPS.get_group(parts[1]) - if not secret_group: - raise ValueError(f"Invalid secret field {name}") - return (parts[0], secret_group) - - def _group_secret_fields(self, secret_fields: List[str]) -> Dict[SecretGroup, List[str]]: - """Helper function to arrange secret mappings under their group. - - NOTE: All unrecognized items end up in the 'extra' secret bucket. - Make sure only secret fields are passed! - """ - secret_fieldnames_grouped = {} - for key in secret_fields: - field, group = self._internal_name_to_field(key) - secret_fieldnames_grouped.setdefault(group, []).append(field) - return secret_fieldnames_grouped - - def _content_for_secret_group( - self, content: Dict[str, str], secret_fields: Set[str], group_mapping: SecretGroup - ) -> Dict[str, str]: - """Select : pairs from input, that belong to this particular Secret group.""" - if group_mapping == SECRET_GROUPS.EXTRA: - return {k: v for k, v in content.items() if k in self.secret_fields} - return { - self._internal_name_to_field(k)[0]: v - for k, v in content.items() - if k in self.secret_fields - } - - # Backwards compatibility - - def _check_deleted_label(self, relation, fields) -> None: - """Helper function for legacy behavior.""" - current_data = self.fetch_my_relation_data([relation.id], fields) - if current_data is not None: - # Check if the secret we wanna delete actually exists - # Given the "deleted label", here we can't rely on the default mechanism (i.e. 'key not found') - if non_existent := (set(fields) & set(self.secret_fields)) - set( - current_data.get(relation.id, []) - ): - logger.debug( - "Non-existing secret %s was attempted to be removed.", - ", ".join(non_existent), - ) - - def _remove_secret_from_databag(self, relation, fields: List[str]) -> None: - """For Rolling Upgrades -- when moving from databag to secrets usage. - - Practically what happens here is to remove stuff from the databag that is - to be stored in secrets. - """ - if not self.secret_fields: - return - - secret_fields_passed = set(self.secret_fields) & set(fields) - for field in secret_fields_passed: - if self._fetch_relation_data_without_secrets(self.component, relation, [field]): - self._delete_relation_data_without_secrets(self.component, relation, [field]) - - def _remove_secret_field_name_from_databag(self, relation) -> None: - """Making sure that the old databag URI is gone. - - This action should not be executed more than once. - """ - # Nothing to do if 'internal-secret' is not in the databag - if not (relation.data[self.component].get(self._generate_secret_field_name())): - return - - # Making sure that the secret receives its label - # (This should have happened by the time we get here, rather an extra security measure.) - secret = self._get_relation_secret(relation.id) - - # Either app scope secret with leader executing, or unit scope secret - leader_or_unit_scope = self.component != self.local_app or self.local_unit.is_leader() - if secret and leader_or_unit_scope: - # Databag reference to the secret URI can be removed, now that it's labelled - relation.data[self.component].pop(self._generate_secret_field_name(), None) - - def _previous_labels(self) -> List[str]: - """Generator for legacy secret label names, for backwards compatibility.""" - result = [] - members = [self._model.app.name] - if self.scope: - members.append(self.scope.value) - result.append(f"{'.'.join(members)}") - return result - - def _no_group_with_databag(self, field: str, full_field: str) -> bool: - """Check that no secret group is attempted to be used together with databag.""" - if not self.secrets_enabled and full_field != field: - logger.error( - f"Can't access {full_field}: no secrets available (i.e. no secret groups either)." - ) - return False - return True - - # Event handlers - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the relation has changed.""" - pass - - def _on_secret_changed_event(self, event: SecretChangedEvent) -> None: - """Event emitted when the secret has changed.""" - pass - - # Overrides of Relation Data handling functions - - def _generate_secret_label( - self, relation_name: str, relation_id: int, group_mapping: SecretGroup - ) -> str: - members = [relation_name, self._model.app.name] - if self.scope: - members.append(self.scope.value) - if group_mapping != SECRET_GROUPS.EXTRA: - members.append(group_mapping) - return f"{'.'.join(members)}" - - def _generate_secret_field_name(self, group_mapping: SecretGroup = SECRET_GROUPS.EXTRA) -> str: - """Generate unique group_mappings for secrets within a relation context.""" - return f"{self.secret_field_name}" - - @juju_secrets_only - def _get_relation_secret( - self, - relation_id: int, - group_mapping: SecretGroup = SECRET_GROUPS.EXTRA, - relation_name: Optional[str] = None, - ) -> Optional[CachedSecret]: - """Retrieve a Juju Secret specifically for peer relations. - - In case this code may be executed within a rolling upgrade, and we may need to - migrate secrets from the databag to labels, we make sure to stick the correct - label on the secret, and clean up the local databag. - """ - if not relation_name: - relation_name = self.relation_name - - relation = self._model.get_relation(relation_name, relation_id) - if not relation: - return - - label = self._generate_secret_label(relation_name, relation_id, group_mapping) - secret_uri = relation.data[self.component].get(self._generate_secret_field_name(), None) - - # URI or legacy label is only to applied when moving single legacy secret to a (new) label - if group_mapping == SECRET_GROUPS.EXTRA: - # Fetching the secret with fallback to URI (in case label is not yet known) - # Label would we "stuck" on the secret in case it is found - return self.secrets.get(label, secret_uri, legacy_labels=self._previous_labels()) - return self.secrets.get(label) - - def _get_group_secret_contents( - self, - relation: Relation, - group: SecretGroup, - secret_fields: Union[Set[str], List[str]] = [], - ) -> Dict[str, str]: - """Helper function to retrieve collective, requested contents of a secret.""" - secret_fields = [self._internal_name_to_field(k)[0] for k in secret_fields] - result = super()._get_group_secret_contents(relation, group, secret_fields) - if self.deleted_label: - result = {key: result[key] for key in result if result[key] != self.deleted_label} - if self._additional_secret_group_mapping: - return {self._field_to_internal_name(key, group): result[key] for key in result} - return result - - @either_static_or_dynamic_secrets - def _fetch_my_specific_relation_data( - self, relation: Relation, fields: Optional[List[str]] - ) -> Dict[str, str]: - """Fetch data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - return self._fetch_relation_data_with_secrets( - self.component, self.secret_fields, relation, fields - ) - - @either_static_or_dynamic_secrets - def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: - """Update data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - self._remove_secret_from_databag(relation, list(data.keys())) - _, normal_fields = self._process_secret_fields( - relation, - self.secret_fields, - list(data), - self._add_or_update_relation_secrets, - data=data, - uri_to_databag=False, - ) - self._remove_secret_field_name_from_databag(relation) - - normal_content = {k: v for k, v in data.items() if k in normal_fields} - self._update_relation_data_without_secrets(self.component, relation, normal_content) - - @either_static_or_dynamic_secrets - def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: - """Delete data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - if self.secret_fields and self.deleted_label: - # Legacy, backwards compatibility - self._check_deleted_label(relation, fields) - - _, normal_fields = self._process_secret_fields( - relation, - self.secret_fields, - fields, - self._update_relation_secret, - data={field: self.deleted_label for field in fields}, - ) - else: - _, normal_fields = self._process_secret_fields( - relation, self.secret_fields, fields, self._delete_relation_secret, fields=fields - ) - self._delete_relation_data_without_secrets(self.component, relation, list(normal_fields)) - - def fetch_relation_data( - self, - relation_ids: Optional[List[int]] = None, - fields: Optional[List[str]] = None, - relation_name: Optional[str] = None, - ) -> Dict[int, Dict[str, str]]: - """This method makes no sense for a Peer Relation.""" - raise NotImplementedError( - "Peer Relation only supports 'self-side' fetch methods: " - "fetch_my_relation_data() and fetch_my_relation_field()" - ) - - def fetch_relation_field( - self, relation_id: int, field: str, relation_name: Optional[str] = None - ) -> Optional[str]: - """This method makes no sense for a Peer Relation.""" - raise NotImplementedError( - "Peer Relation only supports 'self-side' fetch methods: " - "fetch_my_relation_data() and fetch_my_relation_field()" - ) - - # Public functions -- inherited - - fetch_my_relation_data = Data.fetch_my_relation_data - fetch_my_relation_field = Data.fetch_my_relation_field - - -class DataPeerEventHandlers(RequirerEventHandlers): - """Requires-side of the relation.""" - - def __init__(self, charm: CharmBase, relation_data: RequirerData, unique_key: str = ""): - """Manager of base client relations.""" - super().__init__(charm, relation_data, unique_key) - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the relation has changed.""" - pass - - def _on_secret_changed_event(self, event: SecretChangedEvent) -> None: - """Event emitted when the secret has changed.""" - pass - - -class DataPeer(DataPeerData, DataPeerEventHandlers): - """Represents peer relations.""" - - def __init__( - self, - charm, - relation_name: str, - extra_user_roles: Optional[str] = None, - additional_secret_fields: Optional[List[str]] = [], - additional_secret_group_mapping: Dict[str, str] = {}, - secret_field_name: Optional[str] = None, - deleted_label: Optional[str] = None, - unique_key: str = "", - ): - DataPeerData.__init__( - self, - charm.model, - relation_name, - extra_user_roles, - additional_secret_fields, - additional_secret_group_mapping, - secret_field_name, - deleted_label, - ) - DataPeerEventHandlers.__init__(self, charm, self, unique_key) - - -class DataPeerUnitData(DataPeerData): - """Unit data abstraction representation.""" - - SCOPE = Scope.UNIT - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - -class DataPeerUnit(DataPeerUnitData, DataPeerEventHandlers): - """Unit databag representation.""" - - def __init__( - self, - charm, - relation_name: str, - extra_user_roles: Optional[str] = None, - additional_secret_fields: Optional[List[str]] = [], - additional_secret_group_mapping: Dict[str, str] = {}, - secret_field_name: Optional[str] = None, - deleted_label: Optional[str] = None, - unique_key: str = "", - ): - DataPeerData.__init__( - self, - charm.model, - relation_name, - extra_user_roles, - additional_secret_fields, - additional_secret_group_mapping, - secret_field_name, - deleted_label, - ) - DataPeerEventHandlers.__init__(self, charm, self, unique_key) - - -class DataPeerOtherUnitData(DataPeerUnitData): - """Unit data abstraction representation.""" - - def __init__(self, unit: Unit, *args, **kwargs): - super().__init__(*args, **kwargs) - self.local_unit = unit - self.component = unit - - def update_relation_data(self, relation_id: int, data: dict) -> None: - """This method makes no sense for a Other Peer Relation.""" - raise NotImplementedError("It's not possible to update data of another unit.") - - def delete_relation_data(self, relation_id: int, fields: List[str]) -> None: - """This method makes no sense for a Other Peer Relation.""" - raise NotImplementedError("It's not possible to delete data of another unit.") - - -class DataPeerOtherUnitEventHandlers(DataPeerEventHandlers): - """Requires-side of the relation.""" - - def __init__(self, charm: CharmBase, relation_data: DataPeerUnitData): - """Manager of base client relations.""" - unique_key = f"{relation_data.relation_name}-{relation_data.local_unit.name}" - super().__init__(charm, relation_data, unique_key=unique_key) - - -class DataPeerOtherUnit(DataPeerOtherUnitData, DataPeerOtherUnitEventHandlers): - """Unit databag representation for another unit than the executor.""" - - def __init__( - self, - unit: Unit, - charm: CharmBase, - relation_name: str, - extra_user_roles: Optional[str] = None, - additional_secret_fields: Optional[List[str]] = [], - additional_secret_group_mapping: Dict[str, str] = {}, - secret_field_name: Optional[str] = None, - deleted_label: Optional[str] = None, - ): - DataPeerOtherUnitData.__init__( - self, - unit, - charm.model, - relation_name, - extra_user_roles, - additional_secret_fields, - additional_secret_group_mapping, - secret_field_name, - deleted_label, - ) - DataPeerOtherUnitEventHandlers.__init__(self, charm, self) - - -################################################################################ -# Cross-charm Relatoins Data Handling and Evenets -################################################################################ - -# Generic events - - -class ExtraRoleEvent(RelationEvent): - """Base class for data events.""" - - @property - def extra_user_roles(self) -> Optional[str]: - """Returns the extra user roles that were requested.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("extra-user-roles") - - -class RelationEventWithSecret(RelationEvent): - """Base class for Relation Events that need to handle secrets.""" - - @property - def _secrets(self) -> dict: - """Caching secrets to avoid fetching them each time a field is referrd. - - DON'T USE the encapsulated helper variable outside of this function - """ - if not hasattr(self, "_cached_secrets"): - self._cached_secrets = {} - return self._cached_secrets - - def _get_secret(self, group) -> Optional[Dict[str, str]]: - """Retrieving secrets.""" - if not self.app: - return - if not self._secrets.get(group): - self._secrets[group] = None - secret_field = f"{PROV_SECRET_PREFIX}{group}" - if secret_uri := self.relation.data[self.app].get(secret_field): - secret = self.framework.model.get_secret(id=secret_uri) - self._secrets[group] = secret.get_content() - return self._secrets[group] - - @property - def secrets_enabled(self): - """Is this Juju version allowing for Secrets usage?""" - return JujuVersion.from_environ().has_secrets - - -class AuthenticationEvent(RelationEventWithSecret): - """Base class for authentication fields for events. - - The amount of logic added here is not ideal -- but this was the only way to preserve - the interface when moving to Juju Secrets - """ - - @property - def username(self) -> Optional[str]: - """Returns the created username.""" - if not self.relation.app: - return None - - if self.secrets_enabled: - secret = self._get_secret("user") - if secret: - return secret.get("username") - - return self.relation.data[self.relation.app].get("username") - - @property - def password(self) -> Optional[str]: - """Returns the password for the created user.""" - if not self.relation.app: - return None - - if self.secrets_enabled: - secret = self._get_secret("user") - if secret: - return secret.get("password") - - return self.relation.data[self.relation.app].get("password") - - @property - def tls(self) -> Optional[str]: - """Returns whether TLS is configured.""" - if not self.relation.app: - return None - - if self.secrets_enabled: - secret = self._get_secret("tls") - if secret: - return secret.get("tls") - - return self.relation.data[self.relation.app].get("tls") - - @property - def tls_ca(self) -> Optional[str]: - """Returns TLS CA.""" - if not self.relation.app: - return None - - if self.secrets_enabled: - secret = self._get_secret("tls") - if secret: - return secret.get("tls-ca") - - return self.relation.data[self.relation.app].get("tls-ca") - - -# Database related events and fields - - -class DatabaseProvidesEvent(RelationEvent): - """Base class for database events.""" - - @property - def database(self) -> Optional[str]: - """Returns the database that was requested.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("database") - - -class DatabaseRequestedEvent(DatabaseProvidesEvent, ExtraRoleEvent): - """Event emitted when a new database is requested for use on this relation.""" - - @property - def external_node_connectivity(self) -> bool: - """Returns the requested external_node_connectivity field.""" - if not self.relation.app: - return False - - return ( - self.relation.data[self.relation.app].get("external-node-connectivity", "false") - == "true" - ) - - -class DatabaseProvidesEvents(CharmEvents): - """Database events. - - This class defines the events that the database can emit. - """ - - database_requested = EventSource(DatabaseRequestedEvent) - - -class DatabaseRequiresEvent(RelationEventWithSecret): - """Base class for database events.""" - - @property - def database(self) -> Optional[str]: - """Returns the database name.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("database") - - @property - def endpoints(self) -> Optional[str]: - """Returns a comma separated list of read/write endpoints. - - In VM charms, this is the primary's address. - In kubernetes charms, this is the service to the primary pod. - """ - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("endpoints") - - @property - def read_only_endpoints(self) -> Optional[str]: - """Returns a comma separated list of read only endpoints. - - In VM charms, this is the address of all the secondary instances. - In kubernetes charms, this is the service to all replica pod instances. - """ - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("read-only-endpoints") - - @property - def replset(self) -> Optional[str]: - """Returns the replicaset name. - - MongoDB only. - """ - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("replset") - - @property - def uris(self) -> Optional[str]: - """Returns the connection URIs. - - MongoDB, Redis, OpenSearch. - """ - if not self.relation.app: - return None - - if self.secrets_enabled: - secret = self._get_secret("user") - if secret: - return secret.get("uris") - - return self.relation.data[self.relation.app].get("uris") - - @property - def version(self) -> Optional[str]: - """Returns the version of the database. - - Version as informed by the database daemon. - """ - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("version") - - -class DatabaseCreatedEvent(AuthenticationEvent, DatabaseRequiresEvent): - """Event emitted when a new database is created for use on this relation.""" - - -class DatabaseEndpointsChangedEvent(AuthenticationEvent, DatabaseRequiresEvent): - """Event emitted when the read/write endpoints are changed.""" - - -class DatabaseReadOnlyEndpointsChangedEvent(AuthenticationEvent, DatabaseRequiresEvent): - """Event emitted when the read only endpoints are changed.""" - - -class DatabaseRequiresEvents(CharmEvents): - """Database events. - - This class defines the events that the database can emit. - """ - - database_created = EventSource(DatabaseCreatedEvent) - endpoints_changed = EventSource(DatabaseEndpointsChangedEvent) - read_only_endpoints_changed = EventSource(DatabaseReadOnlyEndpointsChangedEvent) - - -# Database Provider and Requires - - -class DatabaseProviderData(ProviderData): - """Provider-side data of the database relations.""" - - def __init__(self, model: Model, relation_name: str) -> None: - super().__init__(model, relation_name) - - def set_database(self, relation_id: int, database_name: str) -> None: - """Set database name. - - This function writes in the application data bag, therefore, - only the leader unit can call it. - - Args: - relation_id: the identifier for a particular relation. - database_name: database name. - """ - self.update_relation_data(relation_id, {"database": database_name}) - - def set_endpoints(self, relation_id: int, connection_strings: str) -> None: - """Set database primary connections. - - This function writes in the application data bag, therefore, - only the leader unit can call it. - - In VM charms, only the primary's address should be passed as an endpoint. - In kubernetes charms, the service endpoint to the primary pod should be - passed as an endpoint. - - Args: - relation_id: the identifier for a particular relation. - connection_strings: database hosts and ports comma separated list. - """ - self.update_relation_data(relation_id, {"endpoints": connection_strings}) - - def set_read_only_endpoints(self, relation_id: int, connection_strings: str) -> None: - """Set database replicas connection strings. - - This function writes in the application data bag, therefore, - only the leader unit can call it. - - Args: - relation_id: the identifier for a particular relation. - connection_strings: database hosts and ports comma separated list. - """ - self.update_relation_data(relation_id, {"read-only-endpoints": connection_strings}) - - def set_replset(self, relation_id: int, replset: str) -> None: - """Set replica set name in the application relation databag. - - MongoDB only. - - Args: - relation_id: the identifier for a particular relation. - replset: replica set name. - """ - self.update_relation_data(relation_id, {"replset": replset}) - - def set_uris(self, relation_id: int, uris: str) -> None: - """Set the database connection URIs in the application relation databag. - - MongoDB, Redis, and OpenSearch only. - - Args: - relation_id: the identifier for a particular relation. - uris: connection URIs. - """ - self.update_relation_data(relation_id, {"uris": uris}) - - def set_version(self, relation_id: int, version: str) -> None: - """Set the database version in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - version: database version. - """ - self.update_relation_data(relation_id, {"version": version}) - - def set_subordinated(self, relation_id: int) -> None: - """Raises the subordinated flag in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - """ - self.update_relation_data(relation_id, {"subordinated": "true"}) - - -class DatabaseProviderEventHandlers(EventHandlers): - """Provider-side of the database relation handlers.""" - - on = DatabaseProvidesEvents() # pyright: ignore [reportAssignmentType] - - def __init__( - self, charm: CharmBase, relation_data: DatabaseProviderData, unique_key: str = "" - ): - """Manager of base client relations.""" - super().__init__(charm, relation_data, unique_key) - # Just to calm down pyright, it can't parse that the same type is being used in the super() call above - self.relation_data = relation_data - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the relation has changed.""" - # Leader only - if not self.relation_data.local_unit.is_leader(): - return - # Check which data has changed to emit customs events. - diff = self._diff(event) - - # Emit a database requested event if the setup key (database name and optional - # extra user roles) was added to the relation databag by the application. - if "database" in diff.added: - getattr(self.on, "database_requested").emit( - event.relation, app=event.app, unit=event.unit - ) - - -class DatabaseProvides(DatabaseProviderData, DatabaseProviderEventHandlers): - """Provider-side of the database relations.""" - - def __init__(self, charm: CharmBase, relation_name: str) -> None: - DatabaseProviderData.__init__(self, charm.model, relation_name) - DatabaseProviderEventHandlers.__init__(self, charm, self) - - -class DatabaseRequirerData(RequirerData): - """Requirer-side of the database relation.""" - - def __init__( - self, - model: Model, - relation_name: str, - database_name: str, - extra_user_roles: Optional[str] = None, - relations_aliases: Optional[List[str]] = None, - additional_secret_fields: Optional[List[str]] = [], - external_node_connectivity: bool = False, - ): - """Manager of database client relations.""" - super().__init__(model, relation_name, extra_user_roles, additional_secret_fields) - self.database = database_name - self.relations_aliases = relations_aliases - self.external_node_connectivity = external_node_connectivity - - def is_postgresql_plugin_enabled(self, plugin: str, relation_index: int = 0) -> bool: - """Returns whether a plugin is enabled in the database. - - Args: - plugin: name of the plugin to check. - relation_index: optional relation index to check the database - (default: 0 - first relation). - - PostgreSQL only. - """ - # Psycopg 3 is imported locally to avoid the need of its package installation - # when relating to a database charm other than PostgreSQL. - import psycopg - - # Return False if no relation is established. - if len(self.relations) == 0: - return False - - relation_id = self.relations[relation_index].id - host = self.fetch_relation_field(relation_id, "endpoints") - - # Return False if there is no endpoint available. - if host is None: - return False - - host = host.split(":")[0] - - content = self.fetch_relation_data([relation_id], ["username", "password"]).get( - relation_id, {} - ) - user = content.get("username") - password = content.get("password") - - connection_string = ( - f"host='{host}' dbname='{self.database}' user='{user}' password='{password}'" - ) - try: - with psycopg.connect(connection_string) as connection: - with connection.cursor() as cursor: - cursor.execute( - "SELECT TRUE FROM pg_extension WHERE extname=%s::text;", (plugin,) - ) - return cursor.fetchone() is not None - except psycopg.Error as e: - logger.exception( - f"failed to check whether {plugin} plugin is enabled in the database: %s", str(e) - ) - return False - - -class DatabaseRequirerEventHandlers(RequirerEventHandlers): - """Requires-side of the relation.""" - - on = DatabaseRequiresEvents() # pyright: ignore [reportAssignmentType] - - def __init__( - self, charm: CharmBase, relation_data: DatabaseRequirerData, unique_key: str = "" - ): - """Manager of base client relations.""" - super().__init__(charm, relation_data, unique_key) - # Just to keep lint quiet, can't resolve inheritance. The same happened in super().__init__() above - self.relation_data = relation_data - - # Define custom event names for each alias. - if self.relation_data.relations_aliases: - # Ensure the number of aliases does not exceed the maximum - # of connections allowed in the specific relation. - relation_connection_limit = self.charm.meta.requires[ - self.relation_data.relation_name - ].limit - if len(self.relation_data.relations_aliases) != relation_connection_limit: - raise ValueError( - f"The number of aliases must match the maximum number of connections allowed in the relation. " - f"Expected {relation_connection_limit}, got {len(self.relation_data.relations_aliases)}" - ) - - if self.relation_data.relations_aliases: - for relation_alias in self.relation_data.relations_aliases: - self.on.define_event(f"{relation_alias}_database_created", DatabaseCreatedEvent) - self.on.define_event( - f"{relation_alias}_endpoints_changed", DatabaseEndpointsChangedEvent - ) - self.on.define_event( - f"{relation_alias}_read_only_endpoints_changed", - DatabaseReadOnlyEndpointsChangedEvent, - ) - - def _on_secret_changed_event(self, event: SecretChangedEvent): - """Event notifying about a new value of a secret.""" - pass - - def _assign_relation_alias(self, relation_id: int) -> None: - """Assigns an alias to a relation. - - This function writes in the unit data bag. - - Args: - relation_id: the identifier for a particular relation. - """ - # If no aliases were provided, return immediately. - if not self.relation_data.relations_aliases: - return - - # Return if an alias was already assigned to this relation - # (like when there are more than one unit joining the relation). - relation = self.charm.model.get_relation(self.relation_data.relation_name, relation_id) - if relation and relation.data[self.relation_data.local_unit].get("alias"): - return - - # Retrieve the available aliases (the ones that weren't assigned to any relation). - available_aliases = self.relation_data.relations_aliases[:] - for relation in self.charm.model.relations[self.relation_data.relation_name]: - alias = relation.data[self.relation_data.local_unit].get("alias") - if alias: - logger.debug("Alias %s was already assigned to relation %d", alias, relation.id) - available_aliases.remove(alias) - - # Set the alias in the unit relation databag of the specific relation. - relation = self.charm.model.get_relation(self.relation_data.relation_name, relation_id) - if relation: - relation.data[self.relation_data.local_unit].update({"alias": available_aliases[0]}) - - # We need to set relation alias also on the application level so, - # it will be accessible in show-unit juju command, executed for a consumer application unit - if self.relation_data.local_unit.is_leader(): - self.relation_data.update_relation_data(relation_id, {"alias": available_aliases[0]}) - - def _emit_aliased_event(self, event: RelationChangedEvent, event_name: str) -> None: - """Emit an aliased event to a particular relation if it has an alias. - - Args: - event: the relation changed event that was received. - event_name: the name of the event to emit. - """ - alias = self._get_relation_alias(event.relation.id) - if alias: - getattr(self.on, f"{alias}_{event_name}").emit( - event.relation, app=event.app, unit=event.unit - ) - - def _get_relation_alias(self, relation_id: int) -> Optional[str]: - """Returns the relation alias. - - Args: - relation_id: the identifier for a particular relation. - - Returns: - the relation alias or None if the relation was not found. - """ - for relation in self.charm.model.relations[self.relation_data.relation_name]: - if relation.id == relation_id: - return relation.data[self.relation_data.local_unit].get("alias") - return None - - def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: - """Event emitted when the database relation is created.""" - super()._on_relation_created_event(event) - - # If relations aliases were provided, assign one to the relation. - self._assign_relation_alias(event.relation.id) - - # Sets both database and extra user roles in the relation - # if the roles are provided. Otherwise, sets only the database. - if not self.relation_data.local_unit.is_leader(): - return - - event_data = {"database": self.relation_data.database} - - if self.relation_data.extra_user_roles: - event_data["extra-user-roles"] = self.relation_data.extra_user_roles - - # set external-node-connectivity field - if self.relation_data.external_node_connectivity: - event_data["external-node-connectivity"] = "true" - - self.relation_data.update_relation_data(event.relation.id, event_data) - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the database relation has changed.""" - is_subordinate = False - remote_unit_data = None - for key in event.relation.data.keys(): - if isinstance(key, Unit) and not key.name.startswith(self.charm.app.name): - remote_unit_data = event.relation.data[key] - elif isinstance(key, Application) and key.name != self.charm.app.name: - is_subordinate = event.relation.data[key].get("subordinated") == "true" - - if is_subordinate: - if not remote_unit_data: - return - - if remote_unit_data.get("state") != "ready": - return - - # Check which data has changed to emit customs events. - diff = self._diff(event) - - # Register all new secrets with their labels - if any(newval for newval in diff.added if self.relation_data._is_secret_field(newval)): - self.relation_data._register_secrets_to_relation(event.relation, diff.added) - - # Check if the database is created - # (the database charm shared the credentials). - secret_field_user = self.relation_data._generate_secret_field_name(SECRET_GROUPS.USER) - if ( - "username" in diff.added and "password" in diff.added - ) or secret_field_user in diff.added: - # Emit the default event (the one without an alias). - logger.info("database created at %s", datetime.now()) - getattr(self.on, "database_created").emit( - event.relation, app=event.app, unit=event.unit - ) - - # Emit the aliased event (if any). - self._emit_aliased_event(event, "database_created") - - # To avoid unnecessary application restarts do not trigger - # “endpoints_changed“ event if “database_created“ is triggered. - return - - # Emit an endpoints changed event if the database - # added or changed this info in the relation databag. - if "endpoints" in diff.added or "endpoints" in diff.changed: - # Emit the default event (the one without an alias). - logger.info("endpoints changed on %s", datetime.now()) - getattr(self.on, "endpoints_changed").emit( - event.relation, app=event.app, unit=event.unit - ) - - # Emit the aliased event (if any). - self._emit_aliased_event(event, "endpoints_changed") - - # To avoid unnecessary application restarts do not trigger - # “read_only_endpoints_changed“ event if “endpoints_changed“ is triggered. - return - - # Emit a read only endpoints changed event if the database - # added or changed this info in the relation databag. - if "read-only-endpoints" in diff.added or "read-only-endpoints" in diff.changed: - # Emit the default event (the one without an alias). - logger.info("read-only-endpoints changed on %s", datetime.now()) - getattr(self.on, "read_only_endpoints_changed").emit( - event.relation, app=event.app, unit=event.unit - ) - - # Emit the aliased event (if any). - self._emit_aliased_event(event, "read_only_endpoints_changed") - - -class DatabaseRequires(DatabaseRequirerData, DatabaseRequirerEventHandlers): - """Provider-side of the database relations.""" - - def __init__( - self, - charm: CharmBase, - relation_name: str, - database_name: str, - extra_user_roles: Optional[str] = None, - relations_aliases: Optional[List[str]] = None, - additional_secret_fields: Optional[List[str]] = [], - external_node_connectivity: bool = False, - ): - DatabaseRequirerData.__init__( - self, - charm.model, - relation_name, - database_name, - extra_user_roles, - relations_aliases, - additional_secret_fields, - external_node_connectivity, - ) - DatabaseRequirerEventHandlers.__init__(self, charm, self) - - -################################################################################ -# Charm-specific Relations Data and Events -################################################################################ - -# Kafka Events - - -class KafkaProvidesEvent(RelationEvent): - """Base class for Kafka events.""" - - @property - def topic(self) -> Optional[str]: - """Returns the topic that was requested.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("topic") - - @property - def consumer_group_prefix(self) -> Optional[str]: - """Returns the consumer-group-prefix that was requested.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("consumer-group-prefix") - - -class TopicRequestedEvent(KafkaProvidesEvent, ExtraRoleEvent): - """Event emitted when a new topic is requested for use on this relation.""" - - -class KafkaProvidesEvents(CharmEvents): - """Kafka events. - - This class defines the events that the Kafka can emit. - """ - - topic_requested = EventSource(TopicRequestedEvent) - - -class KafkaRequiresEvent(RelationEvent): - """Base class for Kafka events.""" - - @property - def topic(self) -> Optional[str]: - """Returns the topic.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("topic") - - @property - def bootstrap_server(self) -> Optional[str]: - """Returns a comma-separated list of broker uris.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("endpoints") - - @property - def consumer_group_prefix(self) -> Optional[str]: - """Returns the consumer-group-prefix.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("consumer-group-prefix") - - @property - def zookeeper_uris(self) -> Optional[str]: - """Returns a comma separated list of Zookeeper uris.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("zookeeper-uris") - - -class TopicCreatedEvent(AuthenticationEvent, KafkaRequiresEvent): - """Event emitted when a new topic is created for use on this relation.""" - - -class BootstrapServerChangedEvent(AuthenticationEvent, KafkaRequiresEvent): - """Event emitted when the bootstrap server is changed.""" - - -class KafkaRequiresEvents(CharmEvents): - """Kafka events. - - This class defines the events that the Kafka can emit. - """ - - topic_created = EventSource(TopicCreatedEvent) - bootstrap_server_changed = EventSource(BootstrapServerChangedEvent) - - -# Kafka Provides and Requires - - -class KafkaProviderData(ProviderData): - """Provider-side of the Kafka relation.""" - - def __init__(self, model: Model, relation_name: str) -> None: - super().__init__(model, relation_name) - - def set_topic(self, relation_id: int, topic: str) -> None: - """Set topic name in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - topic: the topic name. - """ - self.update_relation_data(relation_id, {"topic": topic}) - - def set_bootstrap_server(self, relation_id: int, bootstrap_server: str) -> None: - """Set the bootstrap server in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - bootstrap_server: the bootstrap server address. - """ - self.update_relation_data(relation_id, {"endpoints": bootstrap_server}) - - def set_consumer_group_prefix(self, relation_id: int, consumer_group_prefix: str) -> None: - """Set the consumer group prefix in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - consumer_group_prefix: the consumer group prefix string. - """ - self.update_relation_data(relation_id, {"consumer-group-prefix": consumer_group_prefix}) - - def set_zookeeper_uris(self, relation_id: int, zookeeper_uris: str) -> None: - """Set the zookeeper uris in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - zookeeper_uris: comma-separated list of ZooKeeper server uris. - """ - self.update_relation_data(relation_id, {"zookeeper-uris": zookeeper_uris}) - - -class KafkaProviderEventHandlers(EventHandlers): - """Provider-side of the Kafka relation.""" - - on = KafkaProvidesEvents() # pyright: ignore [reportAssignmentType] - - def __init__(self, charm: CharmBase, relation_data: KafkaProviderData) -> None: - super().__init__(charm, relation_data) - # Just to keep lint quiet, can't resolve inheritance. The same happened in super().__init__() above - self.relation_data = relation_data - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the relation has changed.""" - # Leader only - if not self.relation_data.local_unit.is_leader(): - return - - # Check which data has changed to emit customs events. - diff = self._diff(event) - - # Emit a topic requested event if the setup key (topic name and optional - # extra user roles) was added to the relation databag by the application. - if "topic" in diff.added: - getattr(self.on, "topic_requested").emit( - event.relation, app=event.app, unit=event.unit - ) - - -class KafkaProvides(KafkaProviderData, KafkaProviderEventHandlers): - """Provider-side of the Kafka relation.""" - - def __init__(self, charm: CharmBase, relation_name: str) -> None: - KafkaProviderData.__init__(self, charm.model, relation_name) - KafkaProviderEventHandlers.__init__(self, charm, self) - - -class KafkaRequirerData(RequirerData): - """Requirer-side of the Kafka relation.""" - - def __init__( - self, - model: Model, - relation_name: str, - topic: str, - extra_user_roles: Optional[str] = None, - consumer_group_prefix: Optional[str] = None, - additional_secret_fields: Optional[List[str]] = [], - ): - """Manager of Kafka client relations.""" - super().__init__(model, relation_name, extra_user_roles, additional_secret_fields) - self.topic = topic - self.consumer_group_prefix = consumer_group_prefix or "" - - @property - def topic(self): - """Topic to use in Kafka.""" - return self._topic - - @topic.setter - def topic(self, value): - # Avoid wildcards - if value == "*": - raise ValueError(f"Error on topic '{value}', cannot be a wildcard.") - self._topic = value - - -class KafkaRequirerEventHandlers(RequirerEventHandlers): - """Requires-side of the Kafka relation.""" - - on = KafkaRequiresEvents() # pyright: ignore [reportAssignmentType] - - def __init__(self, charm: CharmBase, relation_data: KafkaRequirerData) -> None: - super().__init__(charm, relation_data) - # Just to keep lint quiet, can't resolve inheritance. The same happened in super().__init__() above - self.relation_data = relation_data - - def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: - """Event emitted when the Kafka relation is created.""" - super()._on_relation_created_event(event) - - if not self.relation_data.local_unit.is_leader(): - return - - # Sets topic, extra user roles, and "consumer-group-prefix" in the relation - relation_data = {"topic": self.relation_data.topic} - - if self.relation_data.extra_user_roles: - relation_data["extra-user-roles"] = self.relation_data.extra_user_roles - - if self.relation_data.consumer_group_prefix: - relation_data["consumer-group-prefix"] = self.relation_data.consumer_group_prefix - - self.relation_data.update_relation_data(event.relation.id, relation_data) - - def _on_secret_changed_event(self, event: SecretChangedEvent): - """Event notifying about a new value of a secret.""" - pass - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the Kafka relation has changed.""" - # Check which data has changed to emit customs events. - diff = self._diff(event) - - # Check if the topic is created - # (the Kafka charm shared the credentials). - - # Register all new secrets with their labels - if any(newval for newval in diff.added if self.relation_data._is_secret_field(newval)): - self.relation_data._register_secrets_to_relation(event.relation, diff.added) - - secret_field_user = self.relation_data._generate_secret_field_name(SECRET_GROUPS.USER) - if ( - "username" in diff.added and "password" in diff.added - ) or secret_field_user in diff.added: - # Emit the default event (the one without an alias). - logger.info("topic created at %s", datetime.now()) - getattr(self.on, "topic_created").emit(event.relation, app=event.app, unit=event.unit) - - # To avoid unnecessary application restarts do not trigger - # “endpoints_changed“ event if “topic_created“ is triggered. - return - - # Emit an endpoints (bootstrap-server) changed event if the Kafka endpoints - # added or changed this info in the relation databag. - if "endpoints" in diff.added or "endpoints" in diff.changed: - # Emit the default event (the one without an alias). - logger.info("endpoints changed on %s", datetime.now()) - getattr(self.on, "bootstrap_server_changed").emit( - event.relation, app=event.app, unit=event.unit - ) # here check if this is the right design - return - - -class KafkaRequires(KafkaRequirerData, KafkaRequirerEventHandlers): - """Provider-side of the Kafka relation.""" - - def __init__( - self, - charm: CharmBase, - relation_name: str, - topic: str, - extra_user_roles: Optional[str] = None, - consumer_group_prefix: Optional[str] = None, - additional_secret_fields: Optional[List[str]] = [], - ) -> None: - KafkaRequirerData.__init__( - self, - charm.model, - relation_name, - topic, - extra_user_roles, - consumer_group_prefix, - additional_secret_fields, - ) - KafkaRequirerEventHandlers.__init__(self, charm, self) - - -# Opensearch related events - - -class OpenSearchProvidesEvent(RelationEvent): - """Base class for OpenSearch events.""" - - @property - def index(self) -> Optional[str]: - """Returns the index that was requested.""" - if not self.relation.app: - return None - - return self.relation.data[self.relation.app].get("index") - - -class IndexRequestedEvent(OpenSearchProvidesEvent, ExtraRoleEvent): - """Event emitted when a new index is requested for use on this relation.""" - - -class OpenSearchProvidesEvents(CharmEvents): - """OpenSearch events. - - This class defines the events that OpenSearch can emit. - """ - - index_requested = EventSource(IndexRequestedEvent) - - -class OpenSearchRequiresEvent(DatabaseRequiresEvent): - """Base class for OpenSearch requirer events.""" - - -class IndexCreatedEvent(AuthenticationEvent, OpenSearchRequiresEvent): - """Event emitted when a new index is created for use on this relation.""" - - -class OpenSearchRequiresEvents(CharmEvents): - """OpenSearch events. - - This class defines the events that the opensearch requirer can emit. - """ - - index_created = EventSource(IndexCreatedEvent) - endpoints_changed = EventSource(DatabaseEndpointsChangedEvent) - authentication_updated = EventSource(AuthenticationEvent) - - -# OpenSearch Provides and Requires Objects - - -class OpenSearchProvidesData(ProviderData): - """Provider-side of the OpenSearch relation.""" - - def __init__(self, model: Model, relation_name: str) -> None: - super().__init__(model, relation_name) - - def set_index(self, relation_id: int, index: str) -> None: - """Set the index in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - index: the index as it is _created_ on the provider charm. This needn't match the - requested index, and can be used to present a different index name if, for example, - the requested index is invalid. - """ - self.update_relation_data(relation_id, {"index": index}) - - def set_endpoints(self, relation_id: int, endpoints: str) -> None: - """Set the endpoints in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - endpoints: the endpoint addresses for opensearch nodes. - """ - self.update_relation_data(relation_id, {"endpoints": endpoints}) - - def set_version(self, relation_id: int, version: str) -> None: - """Set the opensearch version in the application relation databag. - - Args: - relation_id: the identifier for a particular relation. - version: database version. - """ - self.update_relation_data(relation_id, {"version": version}) - - -class OpenSearchProvidesEventHandlers(EventHandlers): - """Provider-side of the OpenSearch relation.""" - - on = OpenSearchProvidesEvents() # pyright: ignore[reportAssignmentType] - - def __init__(self, charm: CharmBase, relation_data: OpenSearchProvidesData) -> None: - super().__init__(charm, relation_data) - # Just to keep lint quiet, can't resolve inheritance. The same happened in super().__init__() above - self.relation_data = relation_data - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the relation has changed.""" - # Leader only - if not self.relation_data.local_unit.is_leader(): - return - # Check which data has changed to emit customs events. - diff = self._diff(event) - - # Emit an index requested event if the setup key (index name and optional extra user roles) - # have been added to the relation databag by the application. - if "index" in diff.added: - getattr(self.on, "index_requested").emit( - event.relation, app=event.app, unit=event.unit - ) - - -class OpenSearchProvides(OpenSearchProvidesData, OpenSearchProvidesEventHandlers): - """Provider-side of the OpenSearch relation.""" - - def __init__(self, charm: CharmBase, relation_name: str) -> None: - OpenSearchProvidesData.__init__(self, charm.model, relation_name) - OpenSearchProvidesEventHandlers.__init__(self, charm, self) - - -class OpenSearchRequiresData(RequirerData): - """Requires data side of the OpenSearch relation.""" - - def __init__( - self, - model: Model, - relation_name: str, - index: str, - extra_user_roles: Optional[str] = None, - additional_secret_fields: Optional[List[str]] = [], - ): - """Manager of OpenSearch client relations.""" - super().__init__(model, relation_name, extra_user_roles, additional_secret_fields) - self.index = index - - -class OpenSearchRequiresEventHandlers(RequirerEventHandlers): - """Requires events side of the OpenSearch relation.""" - - on = OpenSearchRequiresEvents() # pyright: ignore[reportAssignmentType] - - def __init__(self, charm: CharmBase, relation_data: OpenSearchRequiresData) -> None: - super().__init__(charm, relation_data) - # Just to keep lint quiet, can't resolve inheritance. The same happened in super().__init__() above - self.relation_data = relation_data - - def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: - """Event emitted when the OpenSearch relation is created.""" - super()._on_relation_created_event(event) - - if not self.relation_data.local_unit.is_leader(): - return - - # Sets both index and extra user roles in the relation if the roles are provided. - # Otherwise, sets only the index. - data = {"index": self.relation_data.index} - if self.relation_data.extra_user_roles: - data["extra-user-roles"] = self.relation_data.extra_user_roles - - self.relation_data.update_relation_data(event.relation.id, data) - - def _on_secret_changed_event(self, event: SecretChangedEvent): - """Event notifying about a new value of a secret.""" - if not event.secret.label: - return - - relation = self.relation_data._relation_from_secret_label(event.secret.label) - if not relation: - logging.info( - f"Received secret {event.secret.label} but couldn't parse, seems irrelevant" - ) - return - - if relation.app == self.charm.app: - logging.info("Secret changed event ignored for Secret Owner") - - remote_unit = None - for unit in relation.units: - if unit.app != self.charm.app: - remote_unit = unit - - logger.info("authentication updated") - getattr(self.on, "authentication_updated").emit( - relation, app=relation.app, unit=remote_unit - ) - - def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: - """Event emitted when the OpenSearch relation has changed. - - This event triggers individual custom events depending on the changing relation. - """ - # Check which data has changed to emit customs events. - diff = self._diff(event) - - # Register all new secrets with their labels - if any(newval for newval in diff.added if self.relation_data._is_secret_field(newval)): - self.relation_data._register_secrets_to_relation(event.relation, diff.added) - - secret_field_user = self.relation_data._generate_secret_field_name(SECRET_GROUPS.USER) - secret_field_tls = self.relation_data._generate_secret_field_name(SECRET_GROUPS.TLS) - updates = {"username", "password", "tls", "tls-ca", secret_field_user, secret_field_tls} - if len(set(diff._asdict().keys()) - updates) < len(diff): - logger.info("authentication updated at: %s", datetime.now()) - getattr(self.on, "authentication_updated").emit( - event.relation, app=event.app, unit=event.unit - ) - - # Check if the index is created - # (the OpenSearch charm shares the credentials). - if ( - "username" in diff.added and "password" in diff.added - ) or secret_field_user in diff.added: - # Emit the default event (the one without an alias). - logger.info("index created at: %s", datetime.now()) - getattr(self.on, "index_created").emit(event.relation, app=event.app, unit=event.unit) - - # To avoid unnecessary application restarts do not trigger - # “endpoints_changed“ event if “index_created“ is triggered. - return - - # Emit a endpoints changed event if the OpenSearch application added or changed this info - # in the relation databag. - if "endpoints" in diff.added or "endpoints" in diff.changed: - # Emit the default event (the one without an alias). - logger.info("endpoints changed on %s", datetime.now()) - getattr(self.on, "endpoints_changed").emit( - event.relation, app=event.app, unit=event.unit - ) # here check if this is the right design - return - - -class OpenSearchRequires(OpenSearchRequiresData, OpenSearchRequiresEventHandlers): - """Requires-side of the OpenSearch relation.""" - - def __init__( - self, - charm: CharmBase, - relation_name: str, - index: str, - extra_user_roles: Optional[str] = None, - additional_secret_fields: Optional[List[str]] = [], - ) -> None: - OpenSearchRequiresData.__init__( - self, - charm.model, - relation_name, - index, - extra_user_roles, - additional_secret_fields, - ) - OpenSearchRequiresEventHandlers.__init__(self, charm, self) diff --git a/tests/integration/relations/opensearch_provider/application-charm/metadata.yaml b/tests/integration/relations/opensearch_provider/application-charm/metadata.yaml deleted file mode 100644 index 36e01879e1..0000000000 --- a/tests/integration/relations/opensearch_provider/application-charm/metadata.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. -name: application -description: | - Data platform libs application charm used in integration tests. -summary: | - Data platform libs application meant to be used only for testing of the libs in this repository. -series: - - jammy - -requires: - first-index: - interface: opensearch_client - second-index: - interface: opensearch_client - admin: - interface: opensearch_client - opensearch-dashboards: - interface: opensearch_client diff --git a/tests/integration/relations/opensearch_provider/application-charm/requirements.txt b/tests/integration/relations/opensearch_provider/application-charm/requirements.txt deleted file mode 100644 index e285b104db..0000000000 --- a/tests/integration/relations/opensearch_provider/application-charm/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -ops==2.14.1 diff --git a/tests/integration/relations/opensearch_provider/application-charm/src/charm.py b/tests/integration/relations/opensearch_provider/application-charm/src/charm.py deleted file mode 100755 index 943228b8e3..0000000000 --- a/tests/integration/relations/opensearch_provider/application-charm/src/charm.py +++ /dev/null @@ -1,242 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Application charm that connects to opensearch using the opensearch-client relation.""" - -import json -import logging -from typing import Dict, List, Optional, Union - -import requests -from charms.data_platform_libs.v0.data_interfaces import ( - AuthenticationEvent, - OpenSearchRequires, -) -from ops.charm import ActionEvent, CharmBase -from ops.main import main -from ops.model import ActiveStatus, BlockedStatus - -logger = logging.getLogger(__name__) - - -CERT_PATH = "/tmp/test_cert.ca" - - -class ApplicationCharm(CharmBase): - """Application charm that connects to database charms. - - Enters BlockedStatus if it cannot constantly reach the database. - """ - - def __init__(self, *args): - super().__init__(*args) - # Default charm events. - self.framework.observe(self.on.update_status, self._on_update_status) - - # `albums` index is used in integration test - self.first_opensearch = OpenSearchRequires(self, "first-index", "albums", "") - - index_name = f'{self.app.name.replace("-", "_")}_second_opensearch' - # set invalid permissions to guarantee we still get default permissions. - self.second_opensearch = OpenSearchRequires(self, "second-index", index_name, "hackerman") - - # Checking comma-separated permissions. These should still basically have admin - # permissions. - self.admin_opensearch = OpenSearchRequires(self, "admin", "admin-index", "admin,default") - - self.relations = { - "first-index": self.first_opensearch, - "second-index": self.second_opensearch, - "admin": self.admin_opensearch, - } - - for relation_handler in self.relations.values(): - self.framework.observe( - relation_handler.on.index_created, self._on_authentication_updated - ) - self.framework.observe( - relation_handler.on.authentication_updated, self._on_authentication_updated - ) - - self.framework.observe(self.on.run_request_action, self._on_run_request_action) - - def _on_update_status(self, _) -> None: - """Health check for index connection.""" - if self.connection_check(): - self.unit.status = ActiveStatus() - else: - logger.error("connection check to opensearch charm failed") - self.unit.status = BlockedStatus("No connection to opensearch charm") - - def connection_check(self) -> bool: - """Simple connection check to see if backend exists and we can connect to it.""" - relations = [] - for relation in self.relations.keys(): - relations += self.model.relations.get(relation, []) - if not relations: - return False - - connected = True - for relation in relations: - try: - self.relation_request(relation.name, relation.id, "GET", "/") - except Exception as e: - logger.error(e) - logger.error(f"relation {relation} didn't connect") - connected = False - - return connected - - def _get_requires(self, relation_name): - for requires in self.relations.values(): - if requires.relation_name == relation_name: - return requires - - def _on_authentication_updated(self, event: AuthenticationEvent): - if not hasattr(event, "relation"): - return - - requires = self.relations.get(event.relation.name) - tls_ca = requires.fetch_relation_field(event.relation.id, "tls-ca") - - if not tls_ca: - event.defer() # We're waiting until we get a CA. - return - - logger.error(f"writing cert to {CERT_PATH}.") - with open(CERT_PATH, "w") as f: - f.write(tls_ca) - - # ============== - # Action hooks - # ============== - - def _on_run_request_action(self, event: ActionEvent): - logger.info(event.params) - relation_id = event.params["relation-id"] - method = event.params["method"] - endpoint = event.params["endpoint"] - payload = event.params.get("payload", None) - if payload: - payload = payload.replace("\\", "") - - requires = self._get_requires(event.params["relation-name"]) - username = requires.fetch_relation_field(relation_id, "username") - password = requires.fetch_relation_field(relation_id, "password") - hosts = requires.fetch_relation_field(relation_id, "endpoints") - - if not username or not password: - event.fail("Secrets not accessible yet.") - return - - host = None - if not hosts: - return - - host = hosts.split(",")[0] - host_addr, port = host.split(":") - - logger.info(f"sending {method} request to {endpoint}") - try: - response = self.request( - method, endpoint, int(port), username, password, host_addr, payload - ) - except OpenSearchHttpError as e: - response = [str(e)] - logger.info(response) - - event.set_results({"results": json.dumps(response)}) - - # ================================= - # Opensearch connection functions - # ================================= - - def relation_request( - self, - relation_name: str, - relation_id: int, - method: str, - endpoint: str, - payload: Optional[Dict[str, any]] = None, - ) -> Union[Dict[str, any], List[any]]: - """Make an HTTP request to a specific relation.""" - requires = self._get_requires(relation_name) - username = requires.fetch_relation_field(relation_id, "username") - password = requires.fetch_relation_field(relation_id, "password") - hosts = requires.fetch_relation_field(relation_id, "endpoints") - - if None in [username, password] or not hosts: - raise OpenSearchHttpError - - host, port = hosts.split(",")[0].split(":") - - return self.request( - method, - endpoint, - int(port), - username, - password, - host, - payload=payload, - ) - - def request( - self, - method: str, - endpoint: str, - port: int, - username: str, - password: str, - host: str, - payload: Optional[Dict[str, any]] = None, - ) -> Union[Dict[str, any], List[any]]: - """Make an HTTP request. - - TODO swap this over to a more normal opensearch client - Args: - method: matching the known http methods. - endpoint: relative to the base uri. - payload: JSON / map body payload. - host: host of the node we wish to make a request on. - port: the port for the server. - username: the username to use for authentication - password: the password for {username} - """ - if None in [endpoint, method]: - raise ValueError("endpoint or method missing") - - if endpoint.startswith("/"): - endpoint = endpoint[1:] - - full_url = f"https://{host}:{port}/{endpoint}" - - request_kwargs = { - "verify": CERT_PATH, - "method": method.upper(), - "url": full_url, - "headers": {"Content-Type": "application/json", "Accept": "application/json"}, - } - - if isinstance(payload, str): - request_kwargs["data"] = payload - elif isinstance(payload, dict): - request_kwargs["data"] = json.dumps(payload) - try: - with requests.Session() as s: - s.auth = (username, password) - resp = s.request(**request_kwargs) - resp.raise_for_status() - except requests.exceptions.RequestException as e: - logger.error(f"Request {method} to {full_url} with payload: {payload} failed. \n{e}") - raise OpenSearchHttpError(str(e)) - - return resp.json() - - -class OpenSearchHttpError(Exception): - """Exception thrown when an OpenSearch REST call fails.""" - - -if __name__ == "__main__": - main(ApplicationCharm) diff --git a/tests/integration/relations/test_opensearch_provider.py b/tests/integration/relations/test_opensearch_provider.py deleted file mode 100644 index 8ba7d83d6d..0000000000 --- a/tests/integration/relations/test_opensearch_provider.py +++ /dev/null @@ -1,714 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. -import asyncio -import json -import logging -import re -import time - -import pytest -from charms.opensearch.v0.constants_charm import ClientRelationName -from pytest_operator.plugin import OpsTest - -from ..helpers import APP_NAME as OPENSEARCH_APP_NAME -from ..helpers import ( - CONFIG_OPTS, - MODEL_CONFIG, - SERIES, - get_application_unit_ids, - get_leader_unit_id, - get_leader_unit_ip, - http_request, - run_action, -) -from ..helpers_deployments import wait_until -from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME -from .helpers import ( - get_application_relation_data, - ip_to_url, - run_request, - wait_for_relation_joined_between, -) - -logger = logging.getLogger(__name__) - -CLIENT_APP_NAME = "application" -SECONDARY_CLIENT_APP_NAME = "secondary-application" -DASHBOARDS_APP_NAME = "opensearch-dashboards" -ALL_APPS = [OPENSEARCH_APP_NAME, TLS_CERTIFICATES_APP_NAME, CLIENT_APP_NAME, DASHBOARDS_APP_NAME] - -NUM_UNITS = 3 - -FIRST_RELATION_NAME = "first-index" -SECOND_RELATION_NAME = "second-index" -DASHBOARDS_RELATION_NAME = "opensearch-client" -ADMIN_RELATION_NAME = "admin" -PROTECTED_INDICES = [ - ".opendistro_security", - ".opendistro-alerting-config", - ".opendistro-alerting-alert", - ".opendistro-anomaly-results", - ".opendistro-anomaly-detector", - ".opendistro-anomaly-checkpoints", - ".opendistro-anomaly-detection-state", -] - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_create_relation(ops_test: OpsTest, application_charm, opensearch_charm): - """Test basic functionality of relation interface.""" - # Deploy both charms (multiple units for each application to test that later they correctly - # set data in the relation application databag using only the leader unit). - new_model_conf = MODEL_CONFIG.copy() - new_model_conf["update-status-hook-interval"] = "1m" - - config = {"ca-common-name": "CN_CA"} - await ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config) - - await ops_test.model.set_config(new_model_conf) - await asyncio.gather( - ops_test.model.deploy( - application_charm, - application_name=CLIENT_APP_NAME, - ), - ops_test.model.deploy( - DASHBOARDS_APP_NAME, - application_name=DASHBOARDS_APP_NAME, - channel="2/edge", - series=SERIES, - ), - ops_test.model.deploy( - opensearch_charm, - application_name=OPENSEARCH_APP_NAME, - num_units=NUM_UNITS, - series=SERIES, - config=CONFIG_OPTS, - ), - ) - await ops_test.model.integrate(OPENSEARCH_APP_NAME, TLS_CERTIFICATES_APP_NAME) - await ops_test.model.wait_for_idle( - apps=[TLS_CERTIFICATES_APP_NAME, OPENSEARCH_APP_NAME], status="active", timeout=1600 - ) - - global client_relation - client_relation = await ops_test.model.integrate( - f"{OPENSEARCH_APP_NAME}:{ClientRelationName}", f"{CLIENT_APP_NAME}:{FIRST_RELATION_NAME}" - ) - - # This test shouldn't take so long - await ops_test.model.wait_for_idle( - apps=[OPENSEARCH_APP_NAME, CLIENT_APP_NAME], - timeout=1600, - status="active", - ) - await ops_test.model.wait_for_idle( - apps=[DASHBOARDS_APP_NAME], - timeout=1600, - ) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_index_usage(ops_test: OpsTest): - """Check we can update and delete things. - - The client application authenticates using the cert provided in the index; if this is - invalid for any reason, the test will fail, so this test implicitly verifies that TLS works. - """ - await run_request( - ops_test, - unit_name=ops_test.model.applications[CLIENT_APP_NAME].units[0].name, - relation_name=FIRST_RELATION_NAME, - relation_id=client_relation.id, - method="PUT", - endpoint="/albums/_doc/1?refresh=true", - payload=re.escape( - '{"artist": "Vulfpeck", "genre": ["Funk", "Jazz"], "title": "Thrill of the Arts"}' - ), - ) - - read_index_endpoint = "/albums/_search?q=Jazz" - run_read_index = await run_request( - ops_test, - unit_name=ops_test.model.applications[CLIENT_APP_NAME].units[0].name, - endpoint=read_index_endpoint, - method="GET", - relation_id=client_relation.id, - relation_name=FIRST_RELATION_NAME, - ) - results = json.loads(run_read_index["results"]) - logging.info(results) - assert results.get("timed_out") is False - assert results.get("hits", {}).get("total", {}).get("value") == 1 - assert ( - results.get("hits", {}).get("hits", [{}])[0].get("_source", {}).get("artist") == "Vulfpeck" - ) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_bulk_index_usage(ops_test: OpsTest): - """Check we can update and delete things using bulk api.""" - bulk_payload = """{ "index" : { "_index": "albums", "_id" : "2" } } -{"artist": "Herbie Hancock", "genre": ["Jazz"], "title": "Head Hunters"} -{ "index" : { "_index": "albums", "_id" : "3" } } -{"artist": "Lydian Collective", "genre": ["Jazz"], "title": "Adventure"} -{ "index" : { "_index": "albums", "_id" : "4" } } -{"artist": "Liquid Tension Experiment", "genre": ["Prog", "Metal"], "title": "Liquid Tension Experiment 2"} -""" - await run_request( - ops_test, - unit_name=ops_test.model.applications[CLIENT_APP_NAME].units[0].name, - relation_name=FIRST_RELATION_NAME, - relation_id=client_relation.id, - method="POST", - endpoint="/_bulk?refresh=true", - payload=re.escape(bulk_payload), - ) - - read_index_endpoint = "/albums/_search?q=Jazz" - run_bulk_read_index = await run_request( - ops_test, - unit_name=ops_test.model.applications[CLIENT_APP_NAME].units[0].name, - endpoint=read_index_endpoint, - method="GET", - relation_id=client_relation.id, - relation_name=FIRST_RELATION_NAME, - ) - # TODO assert we're getting the correct value - results = json.loads(run_bulk_read_index["results"]) - logging.info(results) - assert results.get("timed_out") is False - assert results.get("hits", {}).get("total", {}).get("value") == 3 - artists = [ - hit.get("_source", {}).get("artist") for hit in results.get("hits", {}).get("hits", [{}]) - ] - assert set(artists) == {"Herbie Hancock", "Lydian Collective", "Vulfpeck"} - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_version(ops_test: OpsTest): - """Check version reported in the databag is consistent with the version on the charm.""" - run_version_request = await run_request( - ops_test, - unit_name=ops_test.model.applications[CLIENT_APP_NAME].units[0].name, - method="GET", - endpoint="/", - relation_id=client_relation.id, - relation_name=FIRST_RELATION_NAME, - ) - version = await get_application_relation_data( - ops_test, f"{CLIENT_APP_NAME}/0", FIRST_RELATION_NAME, "version" - ) - logging.info(run_version_request) - logging.info(version) - results = json.loads(run_version_request["results"]) - assert version == results.get("version", {}).get("number"), results - - -async def get_secret_data(ops_test, secret_uri): - secret_unique_id = secret_uri.split("/")[-1] - complete_command = f"show-secret {secret_uri} --reveal --format=json" - _, stdout, _ = await ops_test.juju(*complete_command.split()) - return json.loads(stdout)[secret_unique_id]["content"]["Data"] - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_dashboard_relation(ops_test: OpsTest): - """Test we can create relations with admin permissions.""" - # Add a dashboard relation and wait for them to exchange data - global dashboards_relation - dashboards_relation = await ops_test.model.integrate(OPENSEARCH_APP_NAME, DASHBOARDS_APP_NAME) - wait_for_relation_joined_between(ops_test, OPENSEARCH_APP_NAME, DASHBOARDS_APP_NAME) - - await wait_until( - ops_test, - apps=ALL_APPS, - apps_statuses=["active"], - units_statuses=["active"], - idle_period=70, - ) - - # On this request, kibanaserver user with its own password should be exposed - secret_uri = await get_application_relation_data( - ops_test, f"{DASHBOARDS_APP_NAME}/0", DASHBOARDS_RELATION_NAME, "secret-user" - ) - relation_user_data = await get_secret_data(ops_test, secret_uri) - relation_user_name = relation_user_data.get("username") - relation_user_pwd = relation_user_data.get("password") - - assert relation_user_name == "kibanaserver" - - leader_id = await get_leader_unit_id(ops_test) - result = await run_action(ops_test, leader_id, "get-password", {"username": "kibanaserver"}) - assert relation_user_pwd == result.response.get("password") - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_dashboard_relation_password_change(ops_test: OpsTest): - """Test we can create relations with admin permissions.""" - # Changing Opensearch kibanaserver password - leader_id = await get_leader_unit_id(ops_test) - result = await run_action(ops_test, leader_id, "get-password", {"username": "kibanaserver"}) - orig_pwd = result.response.get("password") - result = await run_action(ops_test, leader_id, "set-password", {"username": "kibanaserver"}) - result = await run_action(ops_test, leader_id, "get-password", {"username": "kibanaserver"}) - new_pwd = result.response.get("password") - assert orig_pwd != new_pwd - - # Checking if password also changed for the relation - secret_uri = await get_application_relation_data( - ops_test, f"{DASHBOARDS_APP_NAME}/0", DASHBOARDS_RELATION_NAME, "secret-user" - ) - relation_user_data = await get_secret_data(ops_test, secret_uri) - relation_user_name = relation_user_data.get("username") - relation_user_pwd = relation_user_data.get("password") - - assert relation_user_name == "kibanaserver" - assert relation_user_pwd == new_pwd - - # Double-checking - result = await run_action(ops_test, leader_id, "get-password", {"username": "kibanaserver"}) - assert relation_user_pwd == result.response.get("password") - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_scaling(ops_test: OpsTest): - """Test that scaling correctly updates endpoints in databag. - - scale_application also contains a wait_for_idle check, including checking for active status. - Idle_period checks must be greater than 1 minute to guarantee update_status fires correctly. - """ - - async def rel_endpoints(app_name: str, rel_name: str) -> str: - return await get_application_relation_data( - ops_test, f"{app_name}/0", rel_name, "endpoints" - ) - - async def _is_number_of_endpoints_valid(client_app: str, rel: str) -> bool: - units = get_application_unit_ids(ops_test, OPENSEARCH_APP_NAME) - endpoints = await rel_endpoints(client_app, rel) - return len(units) == len(endpoints.split(",")) - - # Test things are already working fine - assert await _is_number_of_endpoints_valid( - CLIENT_APP_NAME, FIRST_RELATION_NAME - ), await rel_endpoints(CLIENT_APP_NAME, FIRST_RELATION_NAME) - await wait_until( - ops_test, - apps=ALL_APPS, - apps_statuses=["active"], - idle_period=70, - ) - - # Test scale down - opensearch_unit_ids = get_application_unit_ids(ops_test, OPENSEARCH_APP_NAME) - await ops_test.model.applications[OPENSEARCH_APP_NAME].destroy_unit( - f"{OPENSEARCH_APP_NAME}/{max(opensearch_unit_ids)}" - ) - await wait_until( - ops_test, - apps=ALL_APPS, - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units={OPENSEARCH_APP_NAME: len(opensearch_unit_ids) - 1}, - idle_period=70, - ) - assert await _is_number_of_endpoints_valid( - CLIENT_APP_NAME, FIRST_RELATION_NAME - ), await rel_endpoints(CLIENT_APP_NAME, FIRST_RELATION_NAME) - - # test scale back up again - await ops_test.model.applications[OPENSEARCH_APP_NAME].add_unit(count=1) - await wait_until( - ops_test, - apps=ALL_APPS, - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units={OPENSEARCH_APP_NAME: len(opensearch_unit_ids)}, - idle_period=50, # slightly less than update-status-interval period - ) - # Now, we want to sleep until an update-status happens - time.sleep(30) - assert await _is_number_of_endpoints_valid( - CLIENT_APP_NAME, FIRST_RELATION_NAME - ), await rel_endpoints(CLIENT_APP_NAME, FIRST_RELATION_NAME) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_multiple_relations(ops_test: OpsTest, application_charm): - """Test that two different applications can connect to the database.""" - # scale-down for CI - logger.info("Removing 1 unit for CI and sleep a minute..") - opensearch_unit_ids = get_application_unit_ids(ops_test, app=OPENSEARCH_APP_NAME) - await ops_test.model.applications[OPENSEARCH_APP_NAME].destroy_unit( - f"{OPENSEARCH_APP_NAME}/{max(opensearch_unit_ids)}" - ) - - # sleep a minute to ease the load on machine - time.sleep(60) - - # Deploy secondary application. - logger.info(f"Deploying 1 unit of {SECONDARY_CLIENT_APP_NAME}") - await ops_test.model.deploy( - application_charm, - num_units=1, - application_name=SECONDARY_CLIENT_APP_NAME, - ) - - # Relate the new application and wait for them to exchange connection data. - logger.info( - f"Adding relation {SECONDARY_CLIENT_APP_NAME}:{SECOND_RELATION_NAME} with {OPENSEARCH_APP_NAME}" - ) - second_client_relation = await ops_test.model.integrate( - f"{SECONDARY_CLIENT_APP_NAME}:{SECOND_RELATION_NAME}", OPENSEARCH_APP_NAME - ) - wait_for_relation_joined_between(ops_test, OPENSEARCH_APP_NAME, SECONDARY_CLIENT_APP_NAME) - - await wait_until( - ops_test, - apps=ALL_APPS + [SECONDARY_CLIENT_APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units={ - OPENSEARCH_APP_NAME: len(opensearch_unit_ids) - 1, - CLIENT_APP_NAME: 1, - SECONDARY_CLIENT_APP_NAME: 1, - TLS_CERTIFICATES_APP_NAME: 1, - }, - idle_period=70, - timeout=2000, - ) - - # Test that the permissions are respected between relations by running the same request as - # before, but expecting it to fail. SECOND_RELATION_NAME doesn't contain permissions for the - # `albums` index, so we are expecting a 403 forbidden error. - unit = ops_test.model.applications[SECONDARY_CLIENT_APP_NAME].units[0] - read_index_endpoint = "/albums/_search?q=Jazz" - run_read_index = await run_request( - ops_test, - unit_name=unit.name, - endpoint=read_index_endpoint, - method="GET", - relation_id=second_client_relation.id, - relation_name=SECOND_RELATION_NAME, - ) - - results = json.loads(run_read_index["results"]) - logging.info(results) - assert "403 Client Error: Forbidden for url:" in results[0], results - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_multiple_relations_accessing_same_index(ops_test: OpsTest): - """Test that two different applications can connect to the database.""" - # Relate the new application and wait for them to exchange connection data. - second_app_first_client_relation = await ops_test.model.integrate( - f"{SECONDARY_CLIENT_APP_NAME}:{FIRST_RELATION_NAME}", OPENSEARCH_APP_NAME - ) - await wait_until( - ops_test, - apps=ALL_APPS + [SECONDARY_CLIENT_APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - idle_period=70, - ) - - # Test that different applications can access the same index if they present it in their - # relation databag. FIRST_RELATION_NAME contains `albums` in its databag, so we should be able - # to query that index if we want. - unit = ops_test.model.applications[SECONDARY_CLIENT_APP_NAME].units[0] - read_index_endpoint = "/albums/_search?q=Jazz" - run_bulk_read_index = await run_request( - ops_test, - unit_name=unit.name, - endpoint=read_index_endpoint, - method="GET", - relation_id=second_app_first_client_relation.id, - relation_name=FIRST_RELATION_NAME, - ) - results = json.loads(run_bulk_read_index["results"]) - logging.info(results) - artists = [ - hit.get("_source", {}).get("artist") for hit in results.get("hits", {}).get("hits", [{}]) - ] - assert set(artists) == {"Herbie Hancock", "Lydian Collective", "Vulfpeck"} - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_admin_relation(ops_test: OpsTest): - """Test we can create relations with admin permissions.""" - # Add an admin relation and wait for them to exchange data - global admin_relation - admin_relation = await ops_test.model.integrate( - f"{CLIENT_APP_NAME}:{ADMIN_RELATION_NAME}", OPENSEARCH_APP_NAME - ) - wait_for_relation_joined_between(ops_test, OPENSEARCH_APP_NAME, CLIENT_APP_NAME) - await wait_until( - ops_test, - apps=ALL_APPS + [SECONDARY_CLIENT_APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - idle_period=70, - ) - - # Verify we can access whatever data we like as admin - read_index_endpoint = "/albums/_search?q=Jazz" - run_bulk_read_index = await run_request( - ops_test, - unit_name=ops_test.model.applications[CLIENT_APP_NAME].units[0].name, - endpoint=read_index_endpoint, - method="GET", - relation_id=admin_relation.id, - relation_name=ADMIN_RELATION_NAME, - ) - logging.info(f"{run_bulk_read_index=}") - results = json.loads(run_bulk_read_index["results"]) - logging.info(results) - artists = [ - hit.get("_source", {}).get("artist") for hit in results.get("hits", {}).get("hits", [{}]) - ] - assert set(artists) == {"Herbie Hancock", "Lydian Collective", "Vulfpeck"} - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_admin_permissions(ops_test: OpsTest): - """Test admin permissions behave the way we want. - - admin-only actions include: - - creating multiple indices - - removing indices they've created - - set cluster roles. - - verify that: - - we can't remove .opendistro_security index - - otherwise create client-admin-role - - verify neither admin nor default users can access user api - - otherwise create client-default-role - """ - test_unit = ops_test.model.applications[CLIENT_APP_NAME].units[0] - # Verify admin can't access security API - security_api_endpoint = "/_plugins/_security/api/internalusers" - run_dump_users = await run_request( - ops_test, - unit_name=test_unit.name, - endpoint=security_api_endpoint, - method="GET", - relation_id=admin_relation.id, - relation_name=ADMIN_RELATION_NAME, - ) - results = json.loads(run_dump_users["results"]) - logging.info(results) - assert "403 Client Error: Forbidden for url:" in results[0], results - - # verify admin can't delete users - secret_uri = await get_application_relation_data( - ops_test, f"{CLIENT_APP_NAME}/0", FIRST_RELATION_NAME, "secret-user" - ) - - first_relation_user_data = await get_secret_data(ops_test, secret_uri) - first_relation_user = first_relation_user_data.get("username") - - first_relation_user_endpoint = f"/_plugins/_security/api/internalusers/{first_relation_user}" - run_delete_users = await run_request( - ops_test, - unit_name=test_unit.name, - endpoint=first_relation_user_endpoint, - method="DELETE", - relation_id=admin_relation.id, - relation_name=ADMIN_RELATION_NAME, - ) - results = json.loads(run_delete_users["results"]) - logging.info(results) - assert "403 Client Error: Forbidden for url:" in results[0], results - - # verify admin can't modify protected indices - for protected_index in PROTECTED_INDICES: - protected_index_endpoint = f"/{protected_index}" - run_remove_distro = await run_request( - ops_test, - unit_name=test_unit.name, - endpoint=protected_index_endpoint, - method="DELETE", - relation_id=admin_relation.id, - relation_name=ADMIN_RELATION_NAME, - ) - results = json.loads(run_remove_distro["results"]) - logging.info(results) - assert "Error:" in results[0], results - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_normal_user_permissions(ops_test: OpsTest): - """Test normal user permissions behave the way we want. - - verify that: - - we can't remove .opendistro_security index - - verify neither admin nor default users can access user api - """ - test_unit = ops_test.model.applications[CLIENT_APP_NAME].units[0] - - # Verify normal users can't access security API - security_api_endpoint = "/_plugins/_security/api/internalusers" - run_dump_users = await run_request( - ops_test, - unit_name=test_unit.name, - endpoint=security_api_endpoint, - method="GET", - relation_id=client_relation.id, - relation_name=FIRST_RELATION_NAME, - ) - results = json.loads(run_dump_users["results"]) - logging.info(results) - assert "403 Client Error: Forbidden for url:" in results[0], results - - # verify normal users can't delete users - secret_uri = await get_application_relation_data( - ops_test, f"{CLIENT_APP_NAME}/0", FIRST_RELATION_NAME, "secret-user" - ) - first_relation_user_data = await get_secret_data(ops_test, secret_uri) - first_relation_user = first_relation_user_data.get("username") - - first_relation_user_endpoint = f"/_plugins/_security/api/internalusers/{first_relation_user}" - run_delete_users = await run_request( - ops_test, - unit_name=test_unit.name, - endpoint=first_relation_user_endpoint, - method="DELETE", - relation_id=client_relation.id, - relation_name=FIRST_RELATION_NAME, - ) - results = json.loads(run_delete_users["results"]) - logging.info(results) - assert "403 Client Error: Forbidden for url:" in results[0], results - - # verify user can't modify protected indices - for protected_index in PROTECTED_INDICES: - protected_index_endpoint = f"/{protected_index}" - run_remove_index = await run_request( - ops_test, - unit_name=test_unit.name, - endpoint=protected_index_endpoint, - method="DELETE", - relation_id=client_relation.id, - relation_name=FIRST_RELATION_NAME, - ) - results = json.loads(run_remove_index["results"]) - logging.info(results) - assert "Error:" in results[0], results - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_relation_broken(ops_test: OpsTest): - """Test that the user is removed when the relation is broken.""" - # Retrieve the relation user. - secret_uri = await get_application_relation_data( - ops_test, f"{CLIENT_APP_NAME}/0", FIRST_RELATION_NAME, "secret-user" - ) - - client_app_user_data = await get_secret_data(ops_test, secret_uri) - relation_user = client_app_user_data.get("username") - - await wait_until( - ops_test, - apps=ALL_APPS + [SECONDARY_CLIENT_APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - idle_period=70, - ) - - # Break the relation. - await asyncio.gather( - ops_test.model.applications[OPENSEARCH_APP_NAME].remove_relation( - f"{OPENSEARCH_APP_NAME}:{ClientRelationName}", - f"{CLIENT_APP_NAME}:{FIRST_RELATION_NAME}", - ), - ops_test.model.applications[OPENSEARCH_APP_NAME].remove_relation( - f"{OPENSEARCH_APP_NAME}:{ClientRelationName}", - f"{CLIENT_APP_NAME}:{ADMIN_RELATION_NAME}", - ), - ) - - await asyncio.gather( - wait_until(ops_test, apps=[CLIENT_APP_NAME], apps_statuses=["blocked"], idle_period=70), - wait_until( - ops_test, - apps=[OPENSEARCH_APP_NAME, TLS_CERTIFICATES_APP_NAME, SECONDARY_CLIENT_APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - idle_period=70, - ), - ) - - leader_ip = await get_leader_unit_ip(ops_test) - users = await http_request( - ops_test, - "GET", - f"https://{ip_to_url(leader_ip)}:9200/_plugins/_security/api/internalusers/", - verify=False, - ) - logger.info(relation_user) - logger.info(users) - assert relation_user not in users.keys() - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_data_persists_on_relation_rejoin(ops_test: OpsTest): - """Verify that if we recreate a relation, we can access the same index.""" - client_relation = await ops_test.model.integrate( - f"{OPENSEARCH_APP_NAME}:{ClientRelationName}", f"{CLIENT_APP_NAME}:{FIRST_RELATION_NAME}" - ) - wait_for_relation_joined_between(ops_test, OPENSEARCH_APP_NAME, CLIENT_APP_NAME) - - await wait_until( - ops_test, - apps=ALL_APPS + [SECONDARY_CLIENT_APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - idle_period=70, - ), - - read_index_endpoint = "/albums/_search?q=Jazz" - run_bulk_read_index = await run_request( - ops_test, - unit_name=ops_test.model.applications[CLIENT_APP_NAME].units[0].name, - endpoint=read_index_endpoint, - method="GET", - relation_id=client_relation.id, - relation_name=FIRST_RELATION_NAME, - ) - results = json.loads(run_bulk_read_index["results"]) - logging.info(results) - assert results.get("timed_out") is False - assert results.get("hits", {}).get("total", {}).get("value") == 3 - artists = [ - hit.get("_source", {}).get("artist") for hit in results.get("hits", {}).get("hits", [{}]) - ] - assert set(artists) == {"Herbie Hancock", "Lydian Collective", "Vulfpeck"} diff --git a/tests/integration/spaces/__init__.py b/tests/integration/spaces/__init__.py deleted file mode 100644 index e3979c0f63..0000000000 --- a/tests/integration/spaces/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. diff --git a/tests/integration/spaces/conftest.py b/tests/integration/spaces/conftest.py deleted file mode 100644 index 52128c7125..0000000000 --- a/tests/integration/spaces/conftest.py +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import logging -import os -import subprocess - -import pytest -from pytest_operator.plugin import OpsTest - -logger = logging.getLogger(__name__) - - -DEFAULT_LXD_NETWORK = "lxdbr0" -RAW_DNSMASQ = """dhcp-option=3 -dhcp-option=6""" - - -def _lxd_network(name: str, subnet: str, external: bool = True): - try: - output = subprocess.run( - [ - "sudo", - "lxc", - "network", - "create", - name, - "--type=bridge", - f"ipv4.address={subnet}", - f"ipv4.nat={external}".lower(), - "ipv6.address=none", - "dns.mode=none", - ], - capture_output=True, - check=True, - encoding="utf-8", - ).stdout - logger.info(f"LXD network created: {output}") - output = subprocess.run( - ["sudo", "lxc", "network", "show", name], - capture_output=True, - check=True, - encoding="utf-8", - ).stdout - logger.debug(f"LXD network status: {output}") - - if not external: - subprocess.check_output( - ["sudo", "lxc", "network", "set", name, "raw.dnsmasq", RAW_DNSMASQ] - ) - - subprocess.check_output( - f"sudo ip link set up dev {name}".split(), - ) - except subprocess.CalledProcessError as e: - logger.error(f"Error creating LXD network {name} with: {e.returncode} {e.stderr}") - raise - - -@pytest.fixture(scope="session", autouse=True) -def lxd(): - try: - # Set all networks' dns.mode=none - # We want to avoid check: - # https://github.com/canonical/lxd/blob/ - # 762f7dc5c3dc4dbd0863a796898212d8fbe3f7c3/lxd/device/nic_bridged.go#L403 - # As described on: - # https://discuss.linuxcontainers.org/t/ - # error-failed-start-validation-for-device-enp3s0f0-instance - # -dns-name-net17-nicole-munoz-marketing-already-used-on-network/15586/22?page=2 - subprocess.run( - [ - "sudo", - "lxc", - "network", - "set", - DEFAULT_LXD_NETWORK, - "dns.mode=none", - ], - check=True, - ) - except subprocess.CalledProcessError as e: - logger.error( - f"Error creating LXD network {DEFAULT_LXD_NETWORK} with: {e.returncode} {e.stderr}" - ) - raise - _lxd_network("client", "10.0.0.1/24", True) - _lxd_network("cluster", "10.10.10.1/24", False) - _lxd_network("backup", "10.20.20.1/24", False) - - -@pytest.fixture(scope="module") -async def lxd_spaces(ops_test: OpsTest): - subprocess.run( - [ - "juju", - "reload-spaces", - ], - ) - await ops_test.model.add_space("client", cidrs=["10.0.0.0/24"]) - await ops_test.model.add_space("cluster", cidrs=["10.10.10.0/24"]) - await ops_test.model.add_space("backup", cidrs=["10.20.20.0/24"]) - - -@pytest.hookimpl() -def pytest_sessionfinish(session, exitstatus): - if os.environ.get("CI", "true").lower() == "true": - # Nothing to do, as this is a temp runner only - return - - def __exec(cmd): - try: - subprocess.check_output(cmd.split()) - except subprocess.CalledProcessError as e: - # Log and try to delete the next network - logger.warning(f"Error deleting LXD network with: {e.returncode} {e.stderr}") - - for network in ["client", "cluster", "backup"]: - __exec(f"sudo lxc network delete {network}") - - __exec(f"sudo lxc network unset {DEFAULT_LXD_NETWORK} dns.mode") diff --git a/tests/integration/spaces/test_wrong_space.py b/tests/integration/spaces/test_wrong_space.py deleted file mode 100644 index 4dca5b473e..0000000000 --- a/tests/integration/spaces/test_wrong_space.py +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import logging -import socket -import subprocess - -import pytest -import yaml -from pytest_operator.plugin import OpsTest - -from ..helpers import ( - APP_NAME, - CONFIG_OPTS, - IDLE_PERIOD, - MODEL_CONFIG, - SERIES, - get_application_unit_ids, -) -from ..helpers_deployments import wait_until -from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME - -logger = logging.getLogger(__name__) - - -DEFAULT_NUM_UNITS = 3 - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_build_and_deploy(ops_test: OpsTest, lxd_spaces) -> None: - """Build and deploy OpenSearch. - - For this test, we will misconfigure space bindings and see if the charm still - respects the setup. - - More information: gh:canonical/opensearch-operator#334 - """ - my_charm = await ops_test.build_charm(".") - await ops_test.model.set_config(MODEL_CONFIG) - - # Create a deployment that binds to the wrong space. - # That should trigger #334. - await ops_test.model.deploy( - my_charm, - num_units=DEFAULT_NUM_UNITS, - series=SERIES, - constraints="spaces=alpha,client,cluster,backup", - bind={"": "cluster"}, - config=CONFIG_OPTS, - ) - config = {"ca-common-name": "CN_CA"} - await ops_test.model.deploy( - TLS_CERTIFICATES_APP_NAME, - channel="stable", - constraints="spaces=alpha,client,cluster,backup", - bind={"": "cluster"}, - config=config, - ) - # Relate it to OpenSearch to set up TLS. - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - await wait_until( - ops_test, - apps=[APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=DEFAULT_NUM_UNITS, - timeout=1400, - idle_period=IDLE_PERIOD, - ) - assert len(ops_test.model.applications[APP_NAME].units) == DEFAULT_NUM_UNITS - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_check_opensearch_transport(ops_test: OpsTest) -> None: - """Test which IP will be assigned to transport bind in the end.""" - ids = get_application_unit_ids(ops_test, APP_NAME) - # Build the dict containing each id - opensearch-peers' ingress IP - ids_to_addr = {} - for id in ids: - ids_to_addr[id] = yaml.safe_load( - subprocess.check_output( - f"juju exec --unit opensearch/{id} -- network-get opensearch-peers".split() - ).decode() - )["bind-addresses"][0]["addresses"][0]["address"] - - logger.info(f"IPs assigned to opensearch-peers: {ids_to_addr}") - - # Now, for each unit, we must ensure all opensearch-peers' ingress IPs are present - for id in ids_to_addr.keys(): - hosts = ( - subprocess.check_output( - f"juju ssh opensearch/{id} -- sudo cat /var/snap/opensearch/current/etc/opensearch/unicast_hosts.txt".split() - ) - .decode() - .rsplit() - ) - addrs = list(ids_to_addr.values()) - assert sorted(addrs) == sorted(hosts), f"Expected {sorted(addrs)}, got {sorted(hosts)}" - - # Now, ensure we only have IPs - for host in hosts: - # It will throw a socket.error exception otherwise - assert socket.inet_aton(host) diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py deleted file mode 100644 index 748f021a41..0000000000 --- a/tests/integration/test_charm.py +++ /dev/null @@ -1,381 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import asyncio -import logging -import shlex -import subprocess - -import pytest -import yaml -from charms.opensearch.v0.constants_charm import ( - OPENSEARCH_SNAP_REVISION, - OpenSearchSystemUsers, - TLSRelationMissing, -) -from pytest_operator.plugin import OpsTest - -from .ha.continuous_writes import ContinuousWrites -from .ha.helpers import ( - assert_continuous_writes_consistency, - assert_continuous_writes_increasing, -) -from .helpers import ( - APP_NAME, - CONFIG_OPTS, - MODEL_CONFIG, - SERIES, - get_application_unit_ids, - get_conf_as_dict, - get_leader_unit_id, - get_leader_unit_ip, - get_secrets, - http_request, - run_action, -) -from .helpers_deployments import wait_until -from .tls.test_tls import TLS_CERTIFICATES_APP_NAME - -logger = logging.getLogger(__name__) - - -DEFAULT_NUM_UNITS = 2 - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_deploy_and_remove_single_unit(ops_test: OpsTest) -> None: - """Build and deploy OpenSearch with a single unit and remove it.""" - my_charm = await ops_test.build_charm(".") - await ops_test.model.set_config(MODEL_CONFIG) - - await ops_test.model.deploy( - my_charm, - num_units=1, - series=SERIES, - config=CONFIG_OPTS, - ) - # Deploy TLS Certificates operator. - config = {"ca-common-name": "CN_CA"} - await ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config) - # Relate it to OpenSearch to set up TLS. - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - await wait_until( - ops_test, - apps=[APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=1, - ) - assert len(ops_test.model.applications[APP_NAME].units) == 1 - - c_writes = ContinuousWrites(ops_test, APP_NAME) - await c_writes.start() - await assert_continuous_writes_increasing(c_writes) - await assert_continuous_writes_consistency(ops_test, c_writes, [APP_NAME]) - - # Now, clean up - await ops_test.model.remove_application(APP_NAME, block_until_done=True) - await ops_test.model.remove_application(TLS_CERTIFICATES_APP_NAME, block_until_done=True) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build and deploy a couple of OpenSearch units.""" - my_charm = await ops_test.build_charm(".") - model_config = MODEL_CONFIG - model_config["update-status-hook-interval"] = "1m" - - await ops_test.model.set_config(MODEL_CONFIG) - - await ops_test.model.deploy( - my_charm, - num_units=DEFAULT_NUM_UNITS, - series=SERIES, - config=CONFIG_OPTS, - ) - await wait_until( - ops_test, - apps=[APP_NAME], - wait_for_exact_units=DEFAULT_NUM_UNITS, - apps_full_statuses={APP_NAME: {"blocked": [TLSRelationMissing]}}, - ) - assert len(ops_test.model.applications[APP_NAME].units) == DEFAULT_NUM_UNITS - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_actions_get_admin_password(ops_test: OpsTest) -> None: - """Test the retrieval of admin secrets.""" - leader_id = await get_leader_unit_id(ops_test) - - # 1. run the action prior to finishing the config of TLS - result = await run_action(ops_test, leader_id, "get-password") - assert result.status == "failed" - - # Deploy TLS Certificates operator. - config = {"ca-common-name": "CN_CA"} - await ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config) - # Relate it to OpenSearch to set up TLS. - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - await wait_until( - ops_test, - apps=[APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units=DEFAULT_NUM_UNITS, - ) - - leader_ip = await get_leader_unit_ip(ops_test) - test_url = f"https://{leader_ip}:9200/" - - # 2. run the action after finishing the config of TLS - result = await get_secrets(ops_test) - assert result.get("username") == "admin" - assert result.get("password") - assert result.get("ca-chain") - - # parse_output fields non-null + make http request success - http_resp_code = await http_request(ops_test, "GET", test_url, resp_status_code=True) - assert http_resp_code == 200 - - # 3. test retrieving password from non-supported user - result = await run_action(ops_test, leader_id, "get-password", {"username": "non-existent"}) - assert result.status == "failed" - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_actions_rotate_admin_password(ops_test: OpsTest) -> None: - """Test the rotation and change of admin password.""" - leader_ip = await get_leader_unit_ip(ops_test) - test_url = f"https://{leader_ip}:9200/" - - leader_id = await get_leader_unit_id(ops_test) - non_leader_id = [ - unit_id for unit_id in get_application_unit_ids(ops_test) if unit_id != leader_id - ][0] - - # 1. run the action on a non_leader unit. - result = await run_action(ops_test, non_leader_id, "set-password") - assert result.status == "failed" - - # 2. run the action with the wrong username - result = await run_action(ops_test, leader_id, "set-password", {"username": "wrong-user"}) - assert result.status == "failed" - - # 3. change password and verify the new password works and old password not - password0 = (await get_secrets(ops_test, leader_id))["password"] - result = await run_action(ops_test, leader_id, "set-password", {"password": "new_pwd"}) - password1 = result.response.get("admin-password") - assert password1 - assert password1 == (await get_secrets(ops_test, leader_id))["password"] - - http_resp_code = await http_request(ops_test, "GET", test_url, resp_status_code=True) - assert http_resp_code == 200 - - http_resp_code = await http_request( - ops_test, "GET", test_url, resp_status_code=True, user_password=password0 - ) - assert http_resp_code == 401 - - # 4. change password with auto-generated one - result = await run_action(ops_test, leader_id, "set-password") - password2 = result.response.get("admin-password") - assert password2 - - http_resp_code = await http_request(ops_test, "GET", test_url, resp_status_code=True) - assert http_resp_code == 200 - - http_resp_code = await http_request( - ops_test, "GET", test_url, resp_status_code=True, user_password=password1 - ) - assert http_resp_code == 401 - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.parametrize("user", [("monitor"), ("kibanaserver")]) -async def test_actions_rotate_system_user_password(ops_test: OpsTest, user) -> None: - """Test the rotation and change of admin password.""" - leader_ip = await get_leader_unit_ip(ops_test) - test_url = f"https://{leader_ip}:9200/" - - leader_id = await get_leader_unit_id(ops_test) - - # run the action w/o password parameter - password0 = (await get_secrets(ops_test, leader_id, user))["password"] - result = await run_action(ops_test, leader_id, "set-password", {"username": user}) - password1 = result.response.get(f"{user}-password") - assert password1 != password0 - - # 1. change password with auto-generated one - http_resp_code = await http_request( - ops_test, "GET", test_url, resp_status_code=True, user=user, user_password=password1 - ) - assert http_resp_code == 200 - - http_resp_code = await http_request( - ops_test, "GET", test_url, resp_status_code=True, user=user, user_password=password0 - ) - assert http_resp_code == 401 - - # 2. change password and verify the new password works and old password not - password0 = (await get_secrets(ops_test, leader_id, user))["password"] - result = await run_action( - ops_test, leader_id, "set-password", {"username": user, "password": "new_pwd"} - ) - password1 = result.response.get(f"{user}-password") - assert password1 - assert password1 == (await get_secrets(ops_test, leader_id, user))["password"] - - http_resp_code = await http_request( - ops_test, "GET", test_url, resp_status_code=True, user=user, user_password=password1 - ) - assert http_resp_code == 200 - - http_resp_code = await http_request( - ops_test, "GET", test_url, resp_status_code=True, user=user, user_password=password0 - ) - assert http_resp_code == 401 - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_check_pinned_revision(ops_test: OpsTest) -> None: - """Test check the pinned revision.""" - leader_id = await get_leader_unit_id(ops_test) - - installed_info = yaml.safe_load( - subprocess.check_output( - [ - "juju", - "ssh", - f"opensearch/{leader_id}", - "--", - "sudo", - "snap", - "info", - "opensearch", - "--color=never", - "--unicode=always", - ], - text=True, - ).replace("\r\n", "\n") - )["installed"].split() - logger.info(f"Installed snap: {installed_info}") - assert installed_info[1] == f"({OPENSEARCH_SNAP_REVISION})" - assert installed_info[3] == "held" - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_check_workload_version(ops_test: OpsTest) -> None: - """Test to check if the workload_version file is updated.""" - leader_id = await get_leader_unit_id(ops_test) - - installed_info = yaml.safe_load( - subprocess.check_output( - [ - "juju", - "ssh", - "-m", - ops_test.model.info.name, - f"opensearch/{leader_id}", - "--", - "sudo", - "snap", - "info", - "opensearch", - "--color=never", - "--unicode=always", - ], - text=True, - ).replace("\r\n", "\n") - )["installed"].split() - logger.info(f"Installed snap: {installed_info}") - - workload_version = None - with open("./workload_version") as f: - workload_version = f.read().rstrip("\n") - assert installed_info[0] == workload_version - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_all_units_have_all_local_users(ops_test: OpsTest) -> None: - """Compare the internal_users.yaml of all units.""" - # Get the leader's version of internal_users.yml - leader_id = await get_leader_unit_id(ops_test) - leader_name = f"{APP_NAME}/{leader_id}" - filename = "/var/snap/opensearch/current/etc/opensearch/opensearch-security/internal_users.yml" - leader_conf = get_conf_as_dict(ops_test, leader_name, filename) - - # Check on all units if they have the same - for unit in ops_test.model.applications[APP_NAME].units: - unit_conf = get_conf_as_dict(ops_test, unit.name, filename) - for user in OpenSearchSystemUsers: - assert leader_conf[user]["hash"] == unit_conf[user]["hash"] - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_all_units_have_internal_users_synced(ops_test: OpsTest) -> None: - """Compare the internal_users.yaml of all units.""" - # Get the leader's version of internal_users.yml - leader_id = await get_leader_unit_id(ops_test) - leader_name = f"{APP_NAME}/{leader_id}" - filename = "/var/snap/opensearch/current/etc/opensearch/opensearch-security/internal_users.yml" - leader_conf = get_conf_as_dict(ops_test, leader_name, filename) - - # Check on all units if they have the same - for unit in ops_test.model.applications[APP_NAME].units: - unit_conf = get_conf_as_dict(ops_test, unit.name, filename) - assert leader_conf == unit_conf - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_add_users_and_calling_update_status(ops_test: OpsTest) -> None: - """Add users and call update status.""" - leader_id = await get_leader_unit_id(ops_test) - leader_ip = await get_leader_unit_ip(ops_test) - test_url = f"https://{leader_ip}:9200/_plugins/_security/api/internalusers/my_user" - - http_resp_code = await http_request( - ops_test, - "PUT", - test_url, - resp_status_code=True, - payload={"hash": "1234"}, - ) - assert http_resp_code >= 200 and http_resp_code < 300 - - cmd = '"export JUJU_DISPATCH_PATH=hooks/update-status; ./dispatch"' - exec_cmd = f"juju exec -u opensearch/{leader_id} -m {ops_test.model.name} -- {cmd}" - try: - # The "normal" subprocess.run with "export ...; ..." cmd was failing - # Noticed that, for this case, canonical/jhack uses shlex instead to split. - # Adding it fixed the issue. - subprocess.run(shlex.split(exec_cmd)) - except Exception as e: - logger.error( - f"Failed to apply state: process exited with {e.returncode}; " - f"stdout = {e.stdout}; " - f"stderr = {e.stderr}.", - ) - await asyncio.sleep(300) - http_resp_code = await http_request(ops_test, "GET", test_url, resp_status_code=True) - assert http_resp_code >= 200 and http_resp_code < 300 diff --git a/tests/integration/upgrades/__init__.py b/tests/integration/upgrades/__init__.py deleted file mode 100644 index e3979c0f63..0000000000 --- a/tests/integration/upgrades/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. diff --git a/tests/integration/upgrades/conftest.py b/tests/integration/upgrades/conftest.py deleted file mode 100644 index 198f54306f..0000000000 --- a/tests/integration/upgrades/conftest.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import logging - -import pytest -from pytest_operator.plugin import OpsTest - -from ..ha.continuous_writes import ContinuousWrites, ReplicationMode -from ..helpers import APP_NAME -from .helpers import app_name - -logger = logging.getLogger(__name__) - - -@pytest.fixture(scope="function") -async def c_writes(ops_test: OpsTest): - """Creates instance of the ContinuousWrites.""" - app = (await app_name(ops_test)) or APP_NAME - return ContinuousWrites(ops_test, app) - - -@pytest.fixture(scope="function") -async def c_writes_runner(ops_test: OpsTest, c_writes: ContinuousWrites): - """Starts continuous write operations and clears writes at the end of the test.""" - await c_writes.start() - yield - await c_writes.clear() - logger.info("\n\n\n\nThe writes have been cleared.\n\n\n\n") - - -@pytest.fixture(scope="function") -async def c_balanced_writes_runner(ops_test: OpsTest, c_writes: ContinuousWrites): - """Same as previous runner, but starts continuous writes on cluster wide replicated index.""" - await c_writes.start(repl_on_all_nodes=ReplicationMode.WITH_AT_LEAST_1_REPL) - yield - await c_writes.clear() - logger.info("\n\n\n\nThe writes have been cleared.\n\n\n\n") diff --git a/tests/integration/upgrades/helpers.py b/tests/integration/upgrades/helpers.py deleted file mode 100644 index f37bc370f8..0000000000 --- a/tests/integration/upgrades/helpers.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import logging -import subprocess -from typing import Optional - -from pytest_operator.plugin import OpsTest -from tenacity import Retrying, stop_after_attempt, wait_fixed - -from ..ha.continuous_writes import ContinuousWrites -from ..ha.helpers import ( - assert_continuous_writes_consistency, - assert_continuous_writes_increasing, -) -from ..helpers import APP_NAME, IDLE_PERIOD, app_name, run_action -from ..helpers_deployments import get_application_units, wait_until - -OPENSEARCH_SERVICE_PATH = "/etc/systemd/system/snap.opensearch.daemon.service" -ORIGINAL_RESTART_DELAY = 20 -SECOND_APP_NAME = "second-opensearch" -RESTART_DELAY = 360 - - -logger = logging.getLogger(__name__) - - -async def refresh( - ops_test: OpsTest, - app_name: str, - *, - revision: Optional[int] = None, - switch: Optional[str] = None, - channel: Optional[str] = None, - path: Optional[str] = None, - config: Optional[dict[str, str]] = None, -) -> None: - # due to: https://github.com/juju/python-libjuju/issues/1057 - # the following call does not work: - # application = ops_test.model.applications[APP_NAME] - # await application.refresh( - # revision=rev, - # ) - - # Point to the right model, as we are calling the juju cli directly - args = [f"--model={ops_test.model.info.name}"] - if revision: - args.append(f"--revision={revision}") - if switch: - args.append(f"--switch={switch}") - if channel: - args.append(f"--channel={channel}") - if path: - args.append(f"--path={path}") - if config: - for key, val in config.items(): - args.extend(["--config", f"{key}={val}"]) - - for attempt in Retrying(stop=stop_after_attempt(6), wait=wait_fixed(wait=30)): - with attempt: - cmd = ["juju", "refresh"] - cmd.append(app_name) - cmd.extend(args) - subprocess.check_output(cmd) - - -async def assert_upgrade_to_local( - ops_test: OpsTest, cwrites: ContinuousWrites, local_charm: str -) -> None: - """Does the upgrade to local and asserts continuous writes.""" - app = (await app_name(ops_test)) or APP_NAME - units = await get_application_units(ops_test, app) - leader_id = [u.id for u in units if u.is_leader][0] - - action = await run_action( - ops_test, - leader_id, - "pre-upgrade-check", - app=app, - ) - assert action.status == "completed" - - async with ops_test.fast_forward(): - logger.info("Refresh the charm") - - await refresh(ops_test, app, path=local_charm, config={"profile": "testing"}) - - await wait_until( - ops_test, - apps=[app], - apps_statuses=["blocked"], - units_statuses=["active"], - wait_for_exact_units={ - APP_NAME: 3, - }, - timeout=2800, - idle_period=IDLE_PERIOD, - ) - - logger.info("Upgrade finished") - # Resume the upgrade - action = await run_action( - ops_test, - leader_id, - "resume-upgrade", - app=app, - ) - logger.info(action) - assert action.status == "completed" - - logger.info("Refresh is over, waiting for the charm to settle") - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units={ - APP_NAME: 3, - }, - timeout=2800, - idle_period=IDLE_PERIOD, - ) - - # continuous writes checks - await assert_continuous_writes_increasing(cwrites) - await assert_continuous_writes_consistency(ops_test, cwrites, [app]) diff --git a/tests/integration/upgrades/test_manual_large_deployment_upgrades.py b/tests/integration/upgrades/test_manual_large_deployment_upgrades.py deleted file mode 100644 index cf7fc42774..0000000000 --- a/tests/integration/upgrades/test_manual_large_deployment_upgrades.py +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import asyncio -import logging - -import pytest -from pytest_operator.plugin import OpsTest - -from ..ha.continuous_writes import ContinuousWrites -from ..ha.helpers import assert_continuous_writes_consistency -from ..helpers import ( - APP_NAME, - CONFIG_OPTS, - IDLE_PERIOD, - MODEL_CONFIG, - SERIES, - run_action, -) -from ..helpers_deployments import get_application_units, wait_until -from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME - -logger = logging.getLogger(__name__) - - -OPENSEARCH_ORIGINAL_CHARM_NAME = "opensearch" -OPENSEARCH_INITIAL_CHANNEL = "2/edge" -OPENSEARCH_MAIN_APP_NAME = "main" -OPENSEARCH_FAILOVER_APP_NAME = "failover" - - -charm = None - - -WORKLOAD = { - APP_NAME: 3, - OPENSEARCH_FAILOVER_APP_NAME: 2, - OPENSEARCH_MAIN_APP_NAME: 1, -} - - -@pytest.mark.skip(reason="Fix with DPE-4528") -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_large_deployment_deploy_original_charm(ops_test: OpsTest) -> None: - """Build and deploy the charm for large deployment tests.""" - await ops_test.model.set_config(MODEL_CONFIG) - # Deploy TLS Certificates operator. - tls_config = {"ca-common-name": "CN_CA"} - - main_orchestrator_conf = { - "cluster_name": "backup-test", - "init_hold": False, - "roles": "cluster_manager", - } - failover_orchestrator_conf = { - "cluster_name": "backup-test", - "init_hold": True, - "roles": "cluster_manager", - } - data_hot_conf = {"cluster_name": "backup-test", "init_hold": True, "roles": "data.hot"} - - await asyncio.gather( - ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=tls_config), - ops_test.model.deploy( - OPENSEARCH_ORIGINAL_CHARM_NAME, - application_name=OPENSEARCH_MAIN_APP_NAME, - num_units=WORKLOAD[OPENSEARCH_MAIN_APP_NAME], - series=SERIES, - channel=OPENSEARCH_INITIAL_CHANNEL, - config=main_orchestrator_conf | CONFIG_OPTS, - ), - ops_test.model.deploy( - OPENSEARCH_ORIGINAL_CHARM_NAME, - application_name=OPENSEARCH_FAILOVER_APP_NAME, - num_units=WORKLOAD[OPENSEARCH_FAILOVER_APP_NAME], - series=SERIES, - channel=OPENSEARCH_INITIAL_CHANNEL, - config=failover_orchestrator_conf | CONFIG_OPTS, - ), - ops_test.model.deploy( - OPENSEARCH_ORIGINAL_CHARM_NAME, - application_name=APP_NAME, - num_units=WORKLOAD[APP_NAME], - series=SERIES, - channel=OPENSEARCH_INITIAL_CHANNEL, - config=data_hot_conf | CONFIG_OPTS, - ), - ) - - # Large deployment setup - await ops_test.model.integrate("main:peer-cluster-orchestrator", "failover:peer-cluster") - await ops_test.model.integrate("main:peer-cluster-orchestrator", f"{APP_NAME}:peer-cluster") - await ops_test.model.integrate( - "failover:peer-cluster-orchestrator", f"{APP_NAME}:peer-cluster" - ) - - # TLS setup - await ops_test.model.integrate("main", TLS_CERTIFICATES_APP_NAME) - await ops_test.model.integrate("failover", TLS_CERTIFICATES_APP_NAME) - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - - # Charms except s3-integrator should be active - await wait_until( - ops_test, - apps=[ - TLS_CERTIFICATES_APP_NAME, - OPENSEARCH_MAIN_APP_NAME, - OPENSEARCH_FAILOVER_APP_NAME, - APP_NAME, - ], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units={ - TLS_CERTIFICATES_APP_NAME: 1, - OPENSEARCH_MAIN_APP_NAME: WORKLOAD[OPENSEARCH_MAIN_APP_NAME], - OPENSEARCH_FAILOVER_APP_NAME: WORKLOAD[OPENSEARCH_FAILOVER_APP_NAME], - APP_NAME: WORKLOAD[APP_NAME], - }, - idle_period=IDLE_PERIOD, - timeout=3600, - ) - - -@pytest.mark.skip(reason="Fix with DPE-4528") -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_manually_upgrade_to_local( - ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner -) -> None: - """Test upgrade from usptream to currently locally built version.""" - units = await get_application_units(ops_test, OPENSEARCH_MAIN_APP_NAME) - leader_id = [u.id for u in units if u.is_leader][0] - - action = await run_action( - ops_test, - leader_id, - "pre-upgrade-check", - app=OPENSEARCH_MAIN_APP_NAME, - ) - assert action.status == "completed" - - logger.info("Build charm locally") - global charm - if not charm: - charm = await ops_test.build_charm(".") - - async with ops_test.fast_forward(): - for app, unit_count in WORKLOAD.items(): - application = ops_test.model.applications[app] - units = await get_application_units(ops_test, app) - leader_id = [u.id for u in units if u.is_leader][0] - - logger.info(f"Refresh app {app}, leader {leader_id}") - - await application.refresh(path=charm) - logger.info("Refresh is over, waiting for the charm to settle") - - if unit_count == 1: - # Upgrade already happened for this unit, wait for idle and continue - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - idle_period=IDLE_PERIOD, - timeout=3600, - ) - logger.info(f"Upgrade of app {app} finished") - continue - - await wait_until( - ops_test, - apps=[app], - apps_statuses=["blocked"], - units_statuses=["active"], - wait_for_exact_units={ - app: unit_count, - }, - idle_period=120, - timeout=3600, - ) - # Resume the upgrade - action = await run_action( - ops_test, - leader_id, - "resume-upgrade", - app=app, - ) - assert action.status == "completed" - logger.info(f"resume-upgrade: {action}") - - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - idle_period=IDLE_PERIOD, - timeout=3600, - ) - logger.info(f"Upgrade of app {app} finished") - - # continuous writes checks - await assert_continuous_writes_consistency( - ops_test, - c_writes, - [APP_NAME, OPENSEARCH_MAIN_APP_NAME], - ) diff --git a/tests/integration/upgrades/test_small_deployment_upgrades.py b/tests/integration/upgrades/test_small_deployment_upgrades.py deleted file mode 100644 index 8d8ad959e7..0000000000 --- a/tests/integration/upgrades/test_small_deployment_upgrades.py +++ /dev/null @@ -1,298 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -import logging - -import pytest -from pytest_operator.plugin import OpsTest - -from ..ha.continuous_writes import ContinuousWrites -from ..ha.helpers import app_name -from ..helpers import ( - APP_NAME, - IDLE_PERIOD, - MODEL_CONFIG, - SERIES, - run_action, - set_watermark, -) -from ..helpers_deployments import get_application_units, wait_until -from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME -from .helpers import assert_upgrade_to_local, refresh - -logger = logging.getLogger(__name__) - - -OPENSEARCH_ORIGINAL_CHARM_NAME = "opensearch" -OPENSEARCH_CHANNEL = "2/edge" - - -STARTING_VERSION = "2.15.0" - - -VERSION_TO_REVISION = { - STARTING_VERSION: 144, - "2.16.0": 160, -} - - -FROM_VERSION_PREFIX = "from_v{}_to_local" - - -UPGRADE_INITIAL_VERSION = [ - ( - pytest.param( - version, - id=FROM_VERSION_PREFIX.format(version), - marks=pytest.mark.group(FROM_VERSION_PREFIX.format(version)), - ) - ) - for version in VERSION_TO_REVISION.keys() -] - - -charm = None - - -####################################################################### -# -# Auxiliary functions -# -####################################################################### -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -async def _build_env(ops_test: OpsTest, version: str) -> None: - """Deploy OpenSearch cluster from a given revision.""" - await ops_test.model.set_config(MODEL_CONFIG) - - await ops_test.model.deploy( - OPENSEARCH_ORIGINAL_CHARM_NAME, - application_name=APP_NAME, - num_units=3, - channel=OPENSEARCH_CHANNEL, - revision=VERSION_TO_REVISION[version], - series=SERIES, - ) - - # Deploy TLS Certificates operator. - config = {"ca-common-name": "CN_CA"} - await ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=config) - - # Relate it to OpenSearch to set up TLS. - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) - await ops_test.model.wait_for_idle( - apps=[TLS_CERTIFICATES_APP_NAME, APP_NAME], - status="active", - timeout=1400, - idle_period=50, - ) - assert len(ops_test.model.applications[APP_NAME].units) == 3 - - await set_watermark(ops_test, APP_NAME) - - -####################################################################### -# -# Tests -# -####################################################################### - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group("happy_path_upgrade") -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_deploy_latest_from_channel(ops_test: OpsTest) -> None: - """Deploy OpenSearch.""" - await _build_env(ops_test, STARTING_VERSION) - - -@pytest.mark.group("happy_path_upgrade") -@pytest.mark.abort_on_fail -async def test_upgrade_between_versions( - ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner -) -> None: - """Test upgrade from upstream to currently locally built version.""" - app = (await app_name(ops_test)) or APP_NAME - units = await get_application_units(ops_test, app) - leader_id = [u.id for u in units if u.is_leader][0] - - for version, rev in VERSION_TO_REVISION.items(): - if version == STARTING_VERSION: - # We're starting in this version - continue - - logger.info(f"Upgrading to version {version}") - - action = await run_action( - ops_test, - leader_id, - "pre-upgrade-check", - app=app, - ) - assert action.status == "completed" - - async with ops_test.fast_forward(): - logger.info("Refresh the charm") - await refresh(ops_test, app, revision=rev) - - await wait_until( - ops_test, - apps=[app], - apps_statuses=["blocked"], - units_statuses=["active"], - wait_for_exact_units={ - APP_NAME: 3, - }, - timeout=1400, - idle_period=IDLE_PERIOD, - ) - - logger.info("Upgrade finished") - # Resume the upgrade - action = await run_action( - ops_test, - leader_id, - "resume-upgrade", - app=app, - ) - logger.info(action) - assert action.status == "completed" - - logger.info("Refresh is over, waiting for the charm to settle") - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units={ - APP_NAME: 3, - }, - timeout=1400, - idle_period=IDLE_PERIOD, - ) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group("happy_path_upgrade") -@pytest.mark.abort_on_fail -async def test_upgrade_to_local( - ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner -) -> None: - """Test upgrade from usptream to currently locally built version.""" - logger.info("Build charm locally") - charm = await ops_test.build_charm(".") - await assert_upgrade_to_local(ops_test, c_writes, charm) - - -################################################################################## -# -# test scenarios from each version: -# Start with each version, moving to local and then rolling back mid-upgrade -# Once this test passes, the 2nd test will rerun the upgrade, this time to -# its end. -# -################################################################################## - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.parametrize("version", UPGRADE_INITIAL_VERSION) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_deploy_from_version(ops_test: OpsTest, version) -> None: - """Deploy OpenSearch.""" - await _build_env(ops_test, version) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.parametrize("version", UPGRADE_INITIAL_VERSION) -@pytest.mark.abort_on_fail -async def test_upgrade_rollback_from_local( - ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner, version -) -> None: - """Test upgrade and rollback to each version available.""" - app = (await app_name(ops_test)) or APP_NAME - units = await get_application_units(ops_test, app) - leader_id = [u.id for u in units if u.is_leader][0] - - action = await run_action( - ops_test, - leader_id, - "pre-upgrade-check", - app=app, - ) - assert action.status == "completed" - - logger.info("Build charm locally") - global charm - if not charm: - charm = await ops_test.build_charm(".") - - async with ops_test.fast_forward(): - logger.info("Refresh the charm") - await refresh(ops_test, app, path=charm, config={"profile": "testing"}) - - await wait_until( - ops_test, - apps=[app], - apps_statuses=["blocked"], - units_statuses=["active"], - wait_for_exact_units={ - APP_NAME: 3, - }, - timeout=1400, - idle_period=IDLE_PERIOD, - ) - - logger.info(f"Rolling back to {version}") - await refresh( - ops_test, - app, - switch=OPENSEARCH_ORIGINAL_CHARM_NAME, - channel=OPENSEARCH_CHANNEL, - ) - # Wait until we are set in an idle state and can rollback the revision. - # app status blocked: that will happen if we are jumping N-2 versions in our test - # app status active: that will happen if we are jumping N-1 in our test - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active", "blocked"], - units_statuses=["active"], - wait_for_exact_units={ - APP_NAME: 3, - }, - timeout=1400, - idle_period=IDLE_PERIOD, - ) - await refresh( - ops_test, - app, - revision=VERSION_TO_REVISION[version], - ) - - await wait_until( - ops_test, - apps=[app], - apps_statuses=["active"], - units_statuses=["active"], - wait_for_exact_units={ - APP_NAME: 3, - }, - timeout=1400, - idle_period=IDLE_PERIOD, - ) - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.parametrize("version", UPGRADE_INITIAL_VERSION) -@pytest.mark.abort_on_fail -async def test_upgrade_from_version_to_local( - ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner, version -) -> None: - """Test upgrade from usptream to currently locally built version.""" - logger.info("Build charm locally") - global charm - if not charm: - charm = await ops_test.build_charm(".") - await assert_upgrade_to_local(ops_test, c_writes, charm)