diff --git a/deployments/upgrades/6_2_to_6_3_upgrade.md b/deployments/upgrades/6_2_to_6_3_upgrade.md index ffe3422ba..04a2c01d6 100644 --- a/deployments/upgrades/6_2_to_6_3_upgrade.md +++ b/deployments/upgrades/6_2_to_6_3_upgrade.md @@ -49,10 +49,33 @@ python3 deployments/upgrades/export_configs_to_helm.py \ The script: - Exports all config sections (service, workflow, dataset, backends, pools, templates, validations, roles) - Strips runtime/computed fields (`parsed_pod_template`, `parsed_resource_validations`, etc.) — the service resolves these at load time from template name references -- Replaces masked credentials with `secretName` placeholders +- Drops `None`-valued keys and empty containers — Pydantic defaults don't need to be written out +- Strips pinned tags from `workflow.backend_images.{init,client}` so workflow pods track `global.osmoImageTag` after the upgrade instead of staying on the version that was running at export time +- Replaces masked secret values (`**********`) with `{secretName: TODO-REPLACE-ME, secretKey: }` placeholders and lists each path on stderr so you know which K8s Secrets to create +- Diffs the output against the chart's `services.configs.*` defaults so only fields you've genuinely customized appear in the file (pass `--no-strip-defaults` for a full dump) - Outputs YAML ready to paste into your Helm values under `services.configs` -Review the output and check the `secretRefs` list printed to stderr — you'll need to create matching K8s Secrets. +Review the stderr output carefully — it lists the TODO placeholders you need to fill in plus any existing `secretRefs` that need matching K8s Secrets in the target namespace. + +### Resolving the TODO placeholders + +For each `{secretName: TODO-REPLACE-ME, secretKey: }` block in the output, pick one of these patterns and replace `TODO-REPLACE-ME` with a real Secret name: + +1. **Per-field Secret (matches the placeholder layout as-is).** Create a Secret with `--from-literal` keys that match each `secretKey` referenced in the placeholders. The loader reads files from `/etc/osmo/secrets//` so each masked field resolves independently. + ```bash + kubectl create secret generic osmo-workflow-creds \ + --from-literal=access_key= \ + --from-literal=auth= + ``` + +2. **Whole-credential Secret (collapses the entire credential dict).** Replace the parent dict (e.g. the whole `credential:` block under `workflow_data`) with a single `{secretName: }` ref pointing at a Secret whose `cred.yaml` key contains the full YAML mapping. The loader detects `cred.yaml` and merges all its keys into the parent dict — useful when you want to keep `endpoint` / `region` / `access_key_id` / `access_key` together. + ```yaml + workflow_data: + credential: + secretName: osmo-workflow-data-cred # provides cred.yaml + ``` + +Either way, every `secretName` you settle on must also be listed under `services.configs.secretRefs` so the chart actually mounts it into the service pods. ### Dependencies diff --git a/deployments/upgrades/export_configs_to_helm.py b/deployments/upgrades/export_configs_to_helm.py index 172c7418c..aaf4d1a04 100755 --- a/deployments/upgrades/export_configs_to_helm.py +++ b/deployments/upgrades/export_configs_to_helm.py @@ -23,6 +23,10 @@ services.configs section. Users can paste the output into their Helm values file to migrate from DB-based to ConfigMap-based config. +The output is a minimal diff from the chart's built-in defaults — fields +whose values are identical to what the chart ships out-of-the-box are +stripped. Use --no-strip-defaults to get a full dump. + Usage: export OSMO_URL=https://osmo.example.com export OSMO_TOKEN= @@ -40,6 +44,7 @@ import argparse import json import os +import pathlib import sys import urllib.error import urllib.parse @@ -48,6 +53,15 @@ import yaml +# Minimum all-asterisk run length to treat as a server-side redaction mark. +# The service currently emits exactly `'**********'` (10 asterisks), but a +# length-based check is more forgiving if the mask ever changes. +MASKED_MIN_ASTERISKS = 8 + +# Placeholder emitted in the output wherever the exporter found a masked value. +# The operator replaces this with a real K8s Secret name. +TODO_PLACEHOLDER = 'TODO-REPLACE-ME' + # Runtime-generated fields that should NOT be in the ConfigMap. # These are auto-generated by configure_app() on startup. SERVICE_RUNTIME_FIELDS = { @@ -91,31 +105,60 @@ 'sync_mode', } +# Endpoints that may legitimately 400 when their DB table hasn't been created +# yet on the current schema version (e.g. group_template on pre-6.3 DBs). +# The script logs a short note and continues instead of dumping the full +# DATABASE error body. +OPTIONAL_CONFIG_ENDPOINTS = { + 'group_template', +} + + +class ExportError(Exception): + """Raised when a required API call fails. Aborts the export — we don't + want a partial export silently dropping a config section.""" + + +def fetch(base_url, path, headers, optional=False): + """Fetch JSON from the OSMO API. -def fetch(base_url, path, headers): - """Fetch JSON from the OSMO API.""" + When optional=True and the API responds with HTTP 400 "relation … does + not exist" (table absent on this schema version), logs a short message + and returns None quietly instead of dumping the raw SQL error. + + For all other failures, raises ExportError so the caller fails fast + instead of producing a partial export. A migration export that + silently omits a section because of a 401 / 500 / timeout is much + worse than an aborted run — partial output is still syntactically + valid YAML and the operator may apply it without realizing. + """ url = f'{base_url}{path}' parsed = urllib.parse.urlsplit(url) if parsed.scheme not in ('http', 'https'): - print(f'Error: URL must use http or https scheme: {url}', - file=sys.stderr) - return None + raise ExportError(f'URL must use http or https scheme: {url}') req = urllib.request.Request(url, headers=headers) try: with urllib.request.urlopen(req, timeout=30) as resp: return json.loads(resp.read()) except urllib.error.HTTPError as error: - print(f'Error fetching {path}: HTTP {error.code}', file=sys.stderr) + body = '' try: body = error.read().decode() - print(f' {body[:200]}', file=sys.stderr) - except Exception as read_error: - print(f' (Could not read error body: {read_error})', + except OSError: + pass + if (optional and error.code == 400 + and 'does not exist' in body + and '"DATABASE"' in body): + print(f' (skipped {path}: table not present on this schema)', file=sys.stderr) - return None + return None + message = f'Error fetching {path}: HTTP {error.code}' + if body: + message = f'{message}\n {body[:200]}' + raise ExportError(message) from error except urllib.error.URLError as error: - print(f'Error connecting to {url}: {error.reason}', file=sys.stderr) - return None + raise ExportError( + f'Error connecting to {url}: {error.reason}') from error def strip_fields(data, fields): @@ -190,7 +233,8 @@ def export_pools(base_url, headers): def export_named_configs(base_url, headers, path): """Export named configs (pod_templates, resource_validations, etc.).""" - data = fetch(base_url, f'/api/configs/{path}', headers) + optional = path in OPTIONAL_CONFIG_ENDPOINTS + data = fetch(base_url, f'/api/configs/{path}', headers, optional=optional) if data is None: return None return data @@ -210,13 +254,214 @@ def export_roles(base_url, headers): return items +def drop_empty(obj): + """Recursively drop None, empty dicts, and empty lists. + + A field set to None (Pydantic-optional default) or an empty container + is equivalent to omitting the field — stripping keeps exported files + as minimal overrides rather than full dumps. Applied symmetrically + inside dicts and lists: an item that cleans down to None or `{}` / `[]` + is removed from its parent. + """ + if isinstance(obj, dict): + cleaned = {} + for key, value in obj.items(): + new_value = drop_empty(value) + if _is_empty(new_value): + continue + cleaned[key] = new_value + return cleaned + if isinstance(obj, list): + cleaned = [] + for item in obj: + new_item = drop_empty(item) + if _is_empty(new_item): + continue + cleaned.append(new_item) + return cleaned + return obj + + +def _is_empty(value): + return value is None or (isinstance(value, (dict, list)) and not value) + + +def strip_backend_image_pins(configs): + """Remove pinned image tags from workflow.backend_images. + + The `init` / `client` fields pin specific image tags at export time. + Keeping them silently locks workflow pods to the version that was + running when the export happened — easy to miss during an upgrade. + Strip them so the service falls back to defaults derived from + global.osmoImageTag. + """ + workflow = configs.get('workflow') + if not isinstance(workflow, dict): + return [] + backend_images = workflow.get('backend_images') + if not isinstance(backend_images, dict): + return [] + removed = [] + for key in ('init', 'client'): + if key in backend_images: + pinned = backend_images.pop(key) + removed.append(f'workflow.backend_images.{key}={pinned}') + return removed + + +def _is_masked(value): + """True when value is a server-side redaction sentinel (run of asterisks). + + Tolerates mask-length changes in the service — any string that's pure + asterisks and at least MASKED_MIN_ASTERISKS long counts. + """ + return (isinstance(value, str) + and len(value) >= MASKED_MIN_ASTERISKS + and set(value) == {'*'}) + + +def rewrite_masked_secrets(obj, path='', collected=None): + """Replace masked values with TODO secretName placeholders in-place. + + Rewrites each masked scalar as `{secretName: TODO_PLACEHOLDER, + secretKey: }` — the operator replaces the placeholder with a + real K8s Secret name and wires it into services.configs.secretRefs. + Plaintext siblings (endpoint, region, access_key_id, registry, + username, …) are left untouched so the operator sees what the + credential was pointing at. + + Returns the accumulated list of (dotted_path, field_name) tuples so + the caller can print a stderr summary of every TODO that needs + wiring up. + """ + if collected is None: + collected = [] + if isinstance(obj, dict): + for key in list(obj.keys()): + full_path = f'{path}.{key}' if path else key + value = obj[key] + if _is_masked(value): + obj[key] = {'secretName': TODO_PLACEHOLDER, 'secretKey': key} + collected.append((full_path, key)) + else: + rewrite_masked_secrets(value, full_path, collected) + elif isinstance(obj, list): + for i in range(len(obj)): + item_path = f'{path}[{i}]' + if _is_masked(obj[i]): + # Bare masked scalar in a list — no field name to use as + # secretKey, so flag with the index so the stderr summary + # points at the exact slot. + obj[i] = {'secretName': TODO_PLACEHOLDER, + 'secretKey': f'item_{i}'} + collected.append((item_path, f'item_{i}')) + else: + rewrite_masked_secrets(obj[i], item_path, collected) + return collected + + +def load_chart_defaults(values_path): + """Load services.configs.* from the chart's values.yaml to use as baseline. + + Returns an empty dict if the file can't be read or parsed — caller + treats that as "no defaults available, skip diffing." + """ + try: + with open(values_path, encoding='utf-8') as f: + values = yaml.safe_load(f) or {} + except (OSError, yaml.YAMLError) as error: + print(f'Warning: could not load chart defaults from {values_path}: ' + f'{error}', file=sys.stderr) + return {} + + # Walk the expected services.configs path, falling back to "no defaults + # available" if any node isn't a mapping. --chart-values can point at + # any YAML file, so don't trust the shape. + configs = values + for key in ('services', 'configs'): + if not isinstance(configs, dict): + print(f'Warning: unexpected structure at {values_path} ' + f'(no services.configs mapping); skipping default-diff', + file=sys.stderr) + return {} + configs = configs.get(key, {}) + if not isinstance(configs, dict): + return {} + # Drop the enabled flag — we always set that explicitly in the output. + configs = {k: v for k, v in configs.items() if k != 'enabled'} + # Normalize: the same drop_empty pass we run on the export, so sides + # aren't asymmetric in how they represent "absent" fields. drop_empty + # already returns a new tree, so no copy needed. + return drop_empty(configs) + + +def diff_against_defaults(exported, defaults): + """Deep-diff: drop keys in `exported` that equal `defaults`. + + - Scalars and lists: if equal, drop. + - Dicts: recurse; if the recursed dict is empty, drop the key. + - Named-dict sections (podTemplates, resourceValidations, roles, backends, + pools): an entry is dropped if deep-equal to the chart's corresponding + entry; otherwise kept as-is (no partial diffing inside a single named + entry, since policies/platforms are intentionally all-or-nothing). + """ + if not isinstance(exported, dict) or not isinstance(defaults, dict): + return exported + result = {} + for key, exp_value in exported.items(): + def_value = defaults.get(key) + if def_value is None: + result[key] = exp_value + continue + + if _is_named_dict_section(key, exp_value, def_value): + kept = { + name: entry + for name, entry in exp_value.items() + if entry != def_value.get(name) + } + if kept: + result[key] = kept + continue + + if isinstance(exp_value, dict) and isinstance(def_value, dict): + nested = diff_against_defaults(exp_value, def_value) + if nested: + result[key] = nested + continue + + if exp_value != def_value: + result[key] = exp_value + return result + + +def _is_named_dict_section(key, exp_value, def_value): + """Sections keyed by user-chosen names (podTemplates, roles, …). + + These are dict-of-dicts where each child represents a single named + entity. We compare entries whole rather than diffing inside them. + """ + named_sections = { + 'podTemplates', 'resourceValidations', 'roles', 'backends', 'pools', + 'backendTests', 'groupTemplates', + } + return (key in named_sections + and isinstance(exp_value, dict) + and isinstance(def_value, dict)) + + def collect_secret_names(configs): - """Walk the config dict and collect all secretName references.""" + """Walk the config dict and collect all secretName references. + + Skips the TODO_PLACEHOLDER emitted by rewrite_masked_secrets so the + summary only lists real secretNames the operator has set. + """ secrets = set() if not isinstance(configs, dict): return secrets for key, value in configs.items(): - if key == 'secretName' and isinstance(value, str): + if (key == 'secretName' and isinstance(value, str) + and value != TODO_PLACEHOLDER): secrets.add(value) elif isinstance(value, dict): secrets.update(collect_secret_names(value)) @@ -227,6 +472,12 @@ def collect_secret_names(configs): return secrets +def _default_chart_values_path(): + """Resolve the chart values.yaml that ships next to this script.""" + here = pathlib.Path(__file__).resolve().parent + return str(here.parent / 'charts' / 'service' / 'values.yaml') + + def main(): parser = argparse.ArgumentParser( description='Export OSMO configs to Helm values YAML format.', @@ -241,6 +492,15 @@ def main(): parser.add_argument( '--header', action='append', default=[], help='Custom header (e.g., "x-osmo-user: admin"). Can be repeated.') + parser.add_argument( + '--chart-values', default=_default_chart_values_path(), + help='Path to chart values.yaml used as the "default" baseline for ' + 'diffing. Defaults to ../charts/service/values.yaml relative ' + 'to this script.') + parser.add_argument( + '--no-strip-defaults', action='store_true', + help='Emit the full config tree instead of stripping fields that ' + 'match the chart defaults.') args = parser.parse_args() if not args.url: @@ -320,6 +580,30 @@ def main(): if role_items: configs['roles'] = role_items + # 1. Drop None / empty-container keys (Pydantic defaults clutter). + configs = drop_empty(configs) + + # 2. Strip pinned backend image tags — they silently lock workflow pods + # to whatever version was running at export time. Re-run drop_empty so + # workflow.backend_images doesn't survive as `{}` if it only held the + # pinned init/client tags. + stripped_image_pins = strip_backend_image_pins(configs) + if stripped_image_pins: + configs = drop_empty(configs) + + # 3. Diff against the chart's shipped defaults. Only fields that differ + # from the chart's out-of-the-box configs remain. + defaults_used = False + if not args.no_strip_defaults: + defaults = load_chart_defaults(args.chart_values) + if defaults: + configs = diff_against_defaults(configs, defaults) + defaults_used = True + + # 4. Rewrite masked secrets into secretName placeholders the operator + # can fill in, and collect a summary of what they need to create. + masked_fields = rewrite_masked_secrets(configs) + # Collect secret references for secretRefs all_secrets = collect_secret_names(configs) secret_refs = [{'secretName': name} for name in sorted(all_secrets)] @@ -337,13 +621,38 @@ def main(): yaml.dump(output, sys.stdout, default_flow_style=False, sort_keys=False) + # Operator guidance on stderr print(f'\nExported {len(configs)} config sections.', file=sys.stderr) + if defaults_used: + print('(Fields matching chart defaults were stripped. ' + 'Pass --no-strip-defaults for the full tree.)', + file=sys.stderr) + if stripped_image_pins: + print('\nStripped pinned backend image tags:', file=sys.stderr) + for entry in stripped_image_pins: + print(f' - {entry}', file=sys.stderr) + print(' (Falls back to global.osmoImageTag. Re-add under ' + 'services.configs.workflow.backend_images if you want ' + 'to pin explicitly.)', file=sys.stderr) + if masked_fields: + print('\nReplaced masked secret values with TODO placeholders at:', + file=sys.stderr) + for dotted_path, _ in masked_fields: + print(f' - services.configs.{dotted_path}', file=sys.stderr) + print(f' Create a K8s Secret for each and replace {TODO_PLACEHOLDER} ' + 'with the real secretName. Add every secretName to ' + 'services.configs.secretRefs so the chart mounts it.', + file=sys.stderr) if secret_refs: - print(f'Found {len(secret_refs)} secret references: ' + print(f'\nFound {len(secret_refs)} existing secret references: ' f'{[s["secretName"] for s in secret_refs]}', file=sys.stderr) print('Ensure these K8s Secrets exist in your namespace.', file=sys.stderr) if __name__ == '__main__': - main() + try: + main() + except ExportError as error: + print(f'\nExport aborted: {error}', file=sys.stderr) + sys.exit(1)