Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions src/events/tls.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import ops
from charmlibs.interfaces.tls_certificates import (
CertificateAvailableEvent,
CertificateDeniedEvent,
CertificateRequestAttributes,
TLSCertificatesRequiresV4,
)
Expand Down Expand Up @@ -72,6 +73,9 @@ def __init__(self, charm: "ValkeyCharm"):
self.framework.observe(
self.client_certificate.on.certificate_available, self._on_certificate_available
)
self.framework.observe(
self.client_certificate.on.certificate_denied, self._on_certificate_denied
)
self.framework.observe(
self.charm.on[PEER_RELATION].relation_created, self._on_peer_relation_created
)
Expand Down Expand Up @@ -212,11 +216,30 @@ def _on_certificate_available(self, event: CertificateAvailableEvent) -> None:
event.defer()
return

def _on_certificate_denied(self, event: CertificateDeniedEvent) -> None:
"""Handle the `certificate-denied` event from TLS provider."""
if event.certificate_signing_request in [
csr.certificate_signing_request
for csr in self.client_certificate.get_csrs_from_requirer_relation_data()
]:
logger.error("Certificate request was denied: %s", event.error.message)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have 2 questions here:

  1. do we ever expect to receive from this hook a CSR we didn't create ourselves? if yes, how?
  2. if yes, we should also log when a CSR is not ours

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory we should not, but as we have the same safeguard in place for the certificate available event ... I've added some error logging for this case.

return

logger.warning(
"Certificate denied event received for unknown signing request: %s",
event.certificate_signing_request,
)

def _on_tls_relation_broken(self, event: ops.RelationBrokenEvent) -> None:
"""Handle the `relation-broken` event."""
if self.charm.app.planned_units() == 0 or self.charm.state.unit_server.is_being_removed:
return

if not self.charm.state.unit_server.model.client_cert_ready:
logger.info("Client TLS relation removed, no certificate was stored yet")
self.charm.tls_manager.set_tls_state(TLSState.NO_TLS)
return

if not self.charm.state.cluster.internal_ca_certificate:
if self.charm.unit.is_leader():
self.charm.tls_manager.generate_ca_certificate()
Expand Down
5 changes: 5 additions & 0 deletions src/managers/tls.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,11 @@ def get_statuses(self, scope: Scope, recompute: bool = False) -> list[StatusObje
if not self.state.cluster.model or not self.state.unit_server.model:
return status_list or [CharmStatuses.ACTIVE_IDLE.value]

if (relation := self.state.client_tls_relation) and relation.data[relation.app].get(
"request_errors"
):
status_list.append(TLSStatuses.CERTIFICATE_DENIED.value)

if self.state.unit_server.tls_client_state == TLSState.TO_TLS:
status_list.append(TLSStatuses.ENABLING_CLIENT_TLS.value)

Expand Down
3 changes: 3 additions & 0 deletions src/statuses.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,6 @@ class TLSStatuses(Enum):
message="Invalid value for config option 'certificate-extra-sans'",
short_message="Invalid value `certificate-extra-sans`",
)
CERTIFICATE_DENIED = StatusObject(
status="blocked", message="Certificate request was denied, check logs for details"
)
2 changes: 1 addition & 1 deletion tests/integration/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,9 @@ async def test_user_secret_permissions(juju: jubilant.Juju) -> None:
)

logger.info("Secret access will be granted now - wait for updated password")
juju.grant_secret(identifier=secret_name, app=APP_NAME)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this to fix the flakiness we've been noticing on CI?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I was investigating these tests locally and found that the update-status triggered by fast_forward can happen before the secret gets granted, and then the purpose of fast_forward gets defeated. So I moved it.

# deferred `config_changed` event will be retried before `update_status`
with fast_forward(juju):
juju.grant_secret(identifier=secret_name, app=APP_NAME)
juju.wait(
lambda status: are_apps_active_and_agents_idle(status, APP_NAME, idle_period=10),
timeout=1200,
Expand Down
181 changes: 180 additions & 1 deletion tests/integration/tls/test_certificate_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
# Copyright 2026 Canonical Ltd.
# See LICENSE file for licensing details.
import logging
import os
import re
import subprocess
from pathlib import Path

import jubilant

from literals import Substrate
from literals import CharmUsers, Substrate
from statuses import TLSStatuses
from tests.integration.helpers import (
APP_NAME,
Expand All @@ -18,27 +21,49 @@
are_apps_active_and_agents_idle,
does_status_match,
download_client_certificate_from_unit,
get_cluster_hostnames,
get_password,
set_key,
)

logger = logging.getLogger(__name__)

NUM_UNITS = 3
TEST_KEY = "test_key"
TEST_VALUE = "test_value"
VAULT_NAME = "vault"


def test_build_and_deploy(charm: str, juju: jubilant.Juju, substrate: Substrate) -> None:
"""Deploy the charm under test and a TLS provider."""
logger.info("Installing vault cli client")
subprocess.run(
["sudo", "snap", "install", "vault"], check=True, text=True, capture_output=True
)

juju.deploy(
charm,
resources=IMAGE_RESOURCE if substrate == Substrate.K8S else None,
num_units=NUM_UNITS,
trust=True,
)
juju.deploy(TLS_NAME, channel=TLS_CHANNEL)
juju.deploy(
"vault-k8s" if substrate == Substrate.K8S else "vault",
app=VAULT_NAME,
channel="1.18/edge",
config={
"pki_ca_common_name": "mydomain.com",
"pki_allow_any_name": False,
"pki_allow_ip_sans": False,
},
)
juju.integrate(f"{APP_NAME}:client-certificates", TLS_NAME)
juju.wait(
lambda status: are_agents_idle(status, APP_NAME, idle_period=30, unit_count=NUM_UNITS),
timeout=600,
)
juju.wait(lambda status: jubilant.all_blocked(status, VAULT_NAME))


def test_extra_sans_config_option(juju: jubilant.Juju) -> None:
Expand Down Expand Up @@ -98,3 +123,157 @@ def test_extra_sans_config_option(juju: jubilant.Juju) -> None:
assert expected_sans not in client_cert_sans, (
f"sans value {expected_sans} found in certificate sans {client_cert_sans}"
)

logger.info("Remove relation with %s", TLS_NAME)
juju.remove_relation(f"{APP_NAME}:client-certificates", f"{TLS_NAME}:certificates")
juju.wait(
lambda status: are_agents_idle(status, APP_NAME, idle_period=30, unit_count=NUM_UNITS),
timeout=600,
)


def test_initialize_vault(juju: jubilant.Juju, substrate: Substrate) -> None:
"""Initialize Vault and wait for it to be ready."""
# follows the procedure for initializing and unsealing Vault as described in
# https://canonical-vault-charms.readthedocs-hosted.com/en/latest/tutorial/getting_started_k8s/#deploy-vault
logger.info("Initializing Vault")

logger.info("Getting the Vault address")
vault_units = juju.status().get_units(VAULT_NAME)
vault_unit = next(iter(vault_units.values()))
vault_ip = (
juju.status().apps[VAULT_NAME].address
if substrate == Substrate.K8S
else vault_unit.public_address
)
secrets = juju.secrets()

logger.info("Extracting Vault's CA certificate")
vault_ca = None
for secret in secrets:
if secret.label == "self-signed-vault-ca-certificate":
vault_ca = juju.show_secret(identifier=secret.uri, reveal=True).content.get(
"certificate"
)
assert vault_ca, "Vault CA certificate not found in secrets"
Path("./vault_ca.pem").write_text(vault_ca)

# point the locally installed Vault client to the Vault deployment
vault_env = os.environ.copy()
vault_env["VAULT_CACERT"] = "./vault_ca.pem"
vault_env["VAULT_ADDR"] = f"https://{vault_ip}:8200"

# initialize the deployed Vault
logger.info("Running vault operator init")
init_cmd = [
"vault",
"operator",
"init",
"-key-shares=1",
"-key-threshold=1",
]
init_result = subprocess.run(
init_cmd, check=True, text=True, capture_output=True, env=vault_env
)
logger.info(f"Vault operator init output: {init_result.stdout}")
init_results_list = [line.strip() for line in init_result.stdout.splitlines() if line.strip()]

# on init, Vault returns the root token and a key that are required for unsealing Vault
unseal_key = init_results_list[0].split(":")[1].strip()
root_token = init_results_list[1].split(":")[1].strip()
vault_env["VAULT_TOKEN"] = root_token

# unseal the deployed Vault
logger.info("Running vault operator unseal")
unseal_cmd = [
"vault",
"operator",
"unseal",
unseal_key,
]
unseal_result = subprocess.run(
unseal_cmd, check=True, text=True, capture_output=True, env=vault_env
)
logger.info(f"Vault operator unseal output: {unseal_result.stdout}")

# authorize Vault charm
# create a one-time token and store it as a secret
logger.info("Creating Vault token for the vault charm")
create_token_cmd = [
"vault",
"token",
"create",
"-ttl=60m",
]
create_token_result = subprocess.run(
create_token_cmd, check=True, text=True, capture_output=True, env=vault_env
)
logger.info(f"Vault token create output: {create_token_result.stdout}")
token_regex = r"token\s+([\w\.]+)"

# extract token using regex
match = re.search(token_regex, create_token_result.stdout)
assert match, "Failed to extract token from Vault token create output"
charm_vault_token = match.group(1)
secret_id = juju.add_secret(
"vault-token",
{
"token": charm_vault_token,
},
)

assert secret_id, "Failed to create vault-token secret"
juju.grant_secret("vault-token", VAULT_NAME)

# authorize the charm to interact with Vault using the token value from the secret
vault_unit_name = next(iter(vault_units))
action = juju.run(
unit=vault_unit_name,
action="authorize-charm",
params={
"secret-id": str(secret_id),
},
)

assert action.status == "completed", "Action should succeed"
juju.wait(lambda status: are_apps_active_and_agents_idle(status, VAULT_NAME))


async def test_certificate_denied(juju: jubilant.Juju) -> None:
"""Process denied certificate request."""
logger.info("Integrate %s with %s for Intermediate CA", VAULT_NAME, TLS_NAME)
juju.integrate(f"{VAULT_NAME}:tls-certificates-pki", TLS_NAME)
juju.wait(lambda status: are_agents_idle(status, VAULT_NAME, idle_period=30), timeout=600)

logger.info("Integrate Valkey with Vault for client TLS")
logger.info("Certificate requests should be denied because Vault does not allow IP SANs")
juju.integrate(f"{APP_NAME}:client-certificates", VAULT_NAME)
juju.wait(
lambda status: does_status_match(
status,
expected_unit_statuses={APP_NAME: [TLSStatuses.CERTIFICATE_DENIED.value]},
num_units={APP_NAME: NUM_UNITS},
),
timeout=600,
)

logger.info("Ensure access without TLS is still possible")
hostnames = get_cluster_hostnames(juju, APP_NAME)
result = await set_key(
hostnames=hostnames,
username=CharmUsers.VALKEY_ADMIN.value,
password=get_password(juju, user=CharmUsers.VALKEY_ADMIN),
tls_enabled=False,
key=TEST_KEY,
value=TEST_VALUE,
)
assert result == "OK", "Failed to write data without TLS"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add an additional test where you remove the relation with Vault and the status goes away?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea - I found a nice improvement for the tls-relation-broken workflow on checking that.

logger.info("Removing TLS relation again")
juju.remove_relation(f"{APP_NAME}:client-certificates", VAULT_NAME)
juju.wait(
lambda status: are_apps_active_and_agents_idle(
status, APP_NAME, idle_period=30, unit_count=NUM_UNITS
),
timeout=100,
)
4 changes: 4 additions & 0 deletions tests/integration/tls/test_certificate_rotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,10 @@ async def test_ca_rotation_by_expiration(juju: jubilant.Juju) -> None:

logger.info("Waiting for CA certificate to expire")
sleep(CA_EXPIRY_TIME)
juju.wait(
lambda status: are_agents_idle(status, APP_NAME, idle_period=10, unit_count=NUM_UNITS),
timeout=600,
)

logger.info("Check access with previous certificate fails after expiration")
with pytest.raises(Exception) as exc_info:
Expand Down
Loading
Loading