From 565eddd55f10e484b52ce1648448d1565b3d3aec Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Fri, 6 Feb 2026 14:56:05 +0000 Subject: [PATCH 01/25] feat: add isAggregator flag to validator configuration Add support for configuring nodes as aggregators through validator-config.yaml. This allows selective designation of nodes to perform aggregation duties by setting isAggregator: true in the validator configuration. Changes: - Add isAggregator field (default: false) to all validators in both local and ansible configs - Update parse-vc.sh to extract and export isAggregator flag - Modify all client command scripts to pass --is-aggregator flag when enabled - Add isAggregator status to node information output --- ansible-devnet/genesis/validator-config.yaml | 7 +++++++ client-cmds/ethlambda-cmd.sh | 12 ++++++++++-- client-cmds/grandine-cmd.sh | 12 ++++++++++-- client-cmds/lantern-cmd.sh | 12 ++++++++++-- client-cmds/lighthouse-cmd.sh | 12 ++++++++++-- client-cmds/qlean-cmd.sh | 9 +++++++++ client-cmds/ream-cmd.sh | 12 ++++++++++-- client-cmds/zeam-cmd.sh | 12 ++++++++++-- local-devnet/genesis/validator-config.yaml | 7 +++++++ parse-vc.sh | 8 ++++++++ 10 files changed, 91 insertions(+), 12 deletions(-) diff --git a/ansible-devnet/genesis/validator-config.yaml b/ansible-devnet/genesis/validator-config.yaml index 81e80f5..091275a 100644 --- a/ansible-devnet/genesis/validator-config.yaml +++ b/ansible-devnet/genesis/validator-config.yaml @@ -14,6 +14,7 @@ validators: ip: "46.224.123.223" quic: 9001 metricsPort: 9095 + isAggregator: false count: 1 # number of indices for this node - name: "ream_0" @@ -25,6 +26,7 @@ validators: ip: "77.42.27.219" quic: 9001 metricsPort: 9095 + isAggregator: false devnet: 1 count: 1 @@ -36,6 +38,7 @@ validators: ip: "46.224.123.220" quic: 9001 metricsPort: 9095 + isAggregator: false count: 1 - name: "lantern_0" @@ -47,6 +50,7 @@ validators: ip: "46.224.135.177" quic: 9001 metricsPort: 9095 + isAggregator: false count: 1 - name: "lighthouse_0" @@ -58,6 +62,7 @@ validators: ip: "46.224.135.169" quic: 9001 metricsPort: 9095 + isAggregator: false count: 1 - name: "grandine_0" @@ -66,6 +71,7 @@ validators: ip: "37.27.250.20" quic: 9001 metricsPort: 9095 + isAggregator: false count: 1 - name: "ethlambda_0" @@ -74,4 +80,5 @@ validators: ip: "78.47.44.215" quic: 9001 metricsPort: 9095 + isAggregator: false count: 1 \ No newline at end of file diff --git a/client-cmds/ethlambda-cmd.sh b/client-cmds/ethlambda-cmd.sh index 50bd63c..a7967f2 100644 --- a/client-cmds/ethlambda-cmd.sh +++ b/client-cmds/ethlambda-cmd.sh @@ -4,6 +4,12 @@ binary_path="$scriptDir/../ethlambda/target/release/ethlambda" +# Set aggregator flag based on isAggregator value +aggregator_flag="" +if [ "$isAggregator" == "true" ]; then + aggregator_flag="--is-aggregator" +fi + # Command when running as binary node_binary="$binary_path \ --custom-network-config-dir $configDir \ @@ -11,7 +17,8 @@ node_binary="$binary_path \ --node-id $item \ --node-key $configDir/$item.key \ --metrics-address 0.0.0.0 \ - --metrics-port $metricsPort" + --metrics-port $metricsPort \ + $aggregator_flag" # Command when running as docker container node_docker="ghcr.io/lambdaclass/ethlambda:devnet2 \ @@ -20,6 +27,7 @@ node_docker="ghcr.io/lambdaclass/ethlambda:devnet2 \ --node-id $item \ --node-key /config/$item.key \ --metrics-address 0.0.0.0 \ - --metrics-port $metricsPort" + --metrics-port $metricsPort \ + $aggregator_flag" node_setup="docker" diff --git a/client-cmds/grandine-cmd.sh b/client-cmds/grandine-cmd.sh index cd92472..63c9de3 100644 --- a/client-cmds/grandine-cmd.sh +++ b/client-cmds/grandine-cmd.sh @@ -1,5 +1,11 @@ #!/bin/bash +# Set aggregator flag based on isAggregator value +aggregator_flag="" +if [ "$isAggregator" == "true" ]; then + aggregator_flag="--is-aggregator" +fi + node_binary="$grandine_bin \ --genesis $configDir/config.yaml \ --validator-registry-path $configDir/validators.yaml \ @@ -11,7 +17,8 @@ node_binary="$grandine_bin \ --metrics \ --http-address 0.0.0.0 \ --http-port $metricsPort \ - --hash-sig-key-dir $configDir/hash-sig-keys" + --hash-sig-key-dir $configDir/hash-sig-keys \ + $aggregator_flag" node_docker="sifrai/lean:devnet-2 \ --genesis /config/config.yaml \ @@ -24,7 +31,8 @@ node_docker="sifrai/lean:devnet-2 \ --metrics \ --http-address 0.0.0.0 \ --http-port $metricsPort \ - --hash-sig-key-dir /config/hash-sig-keys" + --hash-sig-key-dir /config/hash-sig-keys \ + $aggregator_flag" # choose either binary or docker node_setup="docker" diff --git a/client-cmds/lantern-cmd.sh b/client-cmds/lantern-cmd.sh index b918355..2a2940f 100755 --- a/client-cmds/lantern-cmd.sh +++ b/client-cmds/lantern-cmd.sh @@ -8,6 +8,12 @@ if [ -n "$devnet" ]; then devnet_flag="--devnet $devnet" fi +# Set aggregator flag based on isAggregator value +aggregator_flag="" +if [ "$isAggregator" == "true" ]; then + aggregator_flag="--is-aggregator" +fi + # Lantern's repo: https://github.com/Pier-Two/lantern node_binary="$scriptDir/lantern/build/lantern_cli \ --data-dir $dataDir/$item \ @@ -22,7 +28,8 @@ node_binary="$scriptDir/lantern/build/lantern_cli \ --metrics-port $metricsPort \ --http-port 5055 \ --log-level debug \ - --hash-sig-key-dir $configDir/hash-sig-keys" + --hash-sig-key-dir $configDir/hash-sig-keys \ + $aggregator_flag" node_docker="$LANTERN_IMAGE --data-dir /data \ --genesis-config /config/config.yaml \ @@ -36,7 +43,8 @@ node_docker="$LANTERN_IMAGE --data-dir /data \ --metrics-port $metricsPort \ --http-port 5055 \ --log-level debug \ - --hash-sig-key-dir /config/hash-sig-keys" + --hash-sig-key-dir /config/hash-sig-keys \ + $aggregator_flag" # choose either binary or docker node_setup="docker" diff --git a/client-cmds/lighthouse-cmd.sh b/client-cmds/lighthouse-cmd.sh index 1e129c2..219b0e1 100644 --- a/client-cmds/lighthouse-cmd.sh +++ b/client-cmds/lighthouse-cmd.sh @@ -3,6 +3,12 @@ # Metrics enabled by default metrics_flag="--metrics" +# Set aggregator flag based on isAggregator value +aggregator_flag="" +if [ "$isAggregator" == "true" ]; then + aggregator_flag="--is-aggregator" +fi + node_binary="$lighthouse_bin lean_node \ --datadir \"$dataDir/$item\" \ --config \"$configDir/config.yaml\" \ @@ -14,7 +20,8 @@ node_binary="$lighthouse_bin lean_node \ --socket-port $quicPort\ $metrics_flag \ --metrics-address 0.0.0.0 \ - --metrics-port $metricsPort" + --metrics-port $metricsPort \ + $aggregator_flag" node_docker="hopinheimer/lighthouse:latest lighthouse lean_node \ --datadir /data \ @@ -27,6 +34,7 @@ node_docker="hopinheimer/lighthouse:latest lighthouse lean_node \ --socket-port $quicPort\ $metrics_flag \ --metrics-address 0.0.0.0 \ - --metrics-port $metricsPort" + --metrics-port $metricsPort \ + $aggregator_flag" node_setup="docker" diff --git a/client-cmds/qlean-cmd.sh b/client-cmds/qlean-cmd.sh index 28de40b..3dd34c3 100644 --- a/client-cmds/qlean-cmd.sh +++ b/client-cmds/qlean-cmd.sh @@ -3,6 +3,13 @@ #-----------------------qlean setup---------------------- # expects "qlean" submodule or symlink inside "lean-quickstart" root directory # https://github.com/qdrvm/qlean-mini + +# Set aggregator flag based on isAggregator value +aggregator_flag="" +if [ "$isAggregator" == "true" ]; then + aggregator_flag="--is-aggregator" +fi + node_binary="$scriptDir/qlean/build/src/executable/qlean \ --modules-dir $scriptDir/qlean/build/src/modules \ --genesis $configDir/config.yaml \ @@ -15,6 +22,7 @@ node_binary="$scriptDir/qlean/build/src/executable/qlean \ --node-id $item --node-key $configDir/$privKeyPath \ --listen-addr /ip4/0.0.0.0/udp/$quicPort/quic-v1 \ --prometheus-port $metricsPort \ + $aggregator_flag \ -ldebug \ -ltrace" @@ -29,6 +37,7 @@ node_docker="qdrvm/qlean-mini:devnet-2 \ --node-id $item --node-key /config/$privKeyPath \ --listen-addr /ip4/0.0.0.0/udp/$quicPort/quic-v1 \ --prometheus-port $metricsPort \ + $aggregator_flag \ -ldebug \ -ltrace" diff --git a/client-cmds/ream-cmd.sh b/client-cmds/ream-cmd.sh index 9985c92..04bd8ec 100755 --- a/client-cmds/ream-cmd.sh +++ b/client-cmds/ream-cmd.sh @@ -4,6 +4,12 @@ # Metrics enabled by default metrics_flag="--metrics" +# Set aggregator flag based on isAggregator value +aggregator_flag="" +if [ "$isAggregator" == "true" ]; then + aggregator_flag="--is-aggregator" +fi + # modify the path to the ream binary as per your system node_binary="$scriptDir/../ream/target/release/ream --data-dir $dataDir/$item \ lean_node \ @@ -15,7 +21,8 @@ node_binary="$scriptDir/../ream/target/release/ream --data-dir $dataDir/$item \ $metrics_flag \ --metrics-address 0.0.0.0 \ --metrics-port $metricsPort \ - --http-address 0.0.0.0" + --http-address 0.0.0.0 \ + $aggregator_flag" node_docker="ghcr.io/reamlabs/ream:latest-devnet2 --data-dir /data \ lean_node \ @@ -27,7 +34,8 @@ node_docker="ghcr.io/reamlabs/ream:latest-devnet2 --data-dir /data \ $metrics_flag \ --metrics-address 0.0.0.0 \ --metrics-port $metricsPort \ - --http-address 0.0.0.0" + --http-address 0.0.0.0 \ + $aggregator_flag" # choose either binary or docker node_setup="docker" diff --git a/client-cmds/zeam-cmd.sh b/client-cmds/zeam-cmd.sh index 4b56d7a..e9fd36d 100644 --- a/client-cmds/zeam-cmd.sh +++ b/client-cmds/zeam-cmd.sh @@ -6,13 +6,20 @@ # Metrics enabled by default metrics_flag="--metrics_enable" +# Set aggregator flag based on isAggregator value +aggregator_flag="" +if [ "$isAggregator" == "true" ]; then + aggregator_flag="--is-aggregator" +fi + node_binary="$scriptDir/../zig-out/bin/zeam node \ --custom_genesis $configDir \ --validator_config $validatorConfig \ --data-dir $dataDir/$item \ --node-id $item --node-key $configDir/$item.key \ $metrics_flag \ - --api-port $metricsPort" + --api-port $metricsPort \ + $aggregator_flag" node_docker="--security-opt seccomp=unconfined blockblaz/zeam:devnet2 node \ --custom_genesis /config \ @@ -20,7 +27,8 @@ node_docker="--security-opt seccomp=unconfined blockblaz/zeam:devnet2 node \ --data-dir /data \ --node-id $item --node-key /config/$item.key \ $metrics_flag \ - --api-port $metricsPort" + --api-port $metricsPort \ + $aggregator_flag" # choose either binary or docker node_setup="docker" \ No newline at end of file diff --git a/local-devnet/genesis/validator-config.yaml b/local-devnet/genesis/validator-config.yaml index 7f99d48..72d3ad3 100644 --- a/local-devnet/genesis/validator-config.yaml +++ b/local-devnet/genesis/validator-config.yaml @@ -14,6 +14,7 @@ validators: ip: "127.0.0.1" quic: 9001 metricsPort: 8081 + isAggregator: false count: 1 # number of indices for this node - name: "ream_0" @@ -25,6 +26,7 @@ validators: ip: "127.0.0.1" quic: 9002 metricsPort: 8082 + isAggregator: false devnet: 1 count: 1 @@ -36,6 +38,7 @@ validators: ip: "127.0.0.1" quic: 9003 metricsPort: 8083 + isAggregator: false count: 1 - name: "lantern_0" @@ -47,6 +50,7 @@ validators: ip: "127.0.0.1" quic: 9004 metricsPort: 8084 + isAggregator: false count: 1 - name: "lighthouse_0" @@ -58,6 +62,7 @@ validators: ip: "127.0.0.1" quic: 9005 metricsPort: 8085 + isAggregator: false count: 1 - name: "grandine_0" @@ -66,6 +71,7 @@ validators: ip: "127.0.0.1" quic: 9006 metricsPort: 8086 + isAggregator: false count: 1 - name: "ethlambda_0" @@ -77,4 +83,5 @@ validators: ip: "127.0.0.1" quic: 9007 metricsPort: 8087 + isAggregator: false count: 1 diff --git a/parse-vc.sh b/parse-vc.sh index 8b50e82..629f4e1 100644 --- a/parse-vc.sh +++ b/parse-vc.sh @@ -51,6 +51,12 @@ if [ -z "$devnet" ] || [ "$devnet" == "null" ]; then devnet="" fi +# Automatically extract isAggregator flag using yq (defaults to false if not set) +isAggregator=$(yq eval ".validators[] | select(.name == \"$item\") | .isAggregator // false" "$validator_config_file") +if [ -z "$isAggregator" ] || [ "$isAggregator" == "null" ]; then + isAggregator="false" +fi + # Automatically extract private key using yq privKey=$(yq eval ".validators[] | select(.name == \"$item\") | .privkey" "$validator_config_file") @@ -99,10 +105,12 @@ if [ "$keyType" == "hash-sig" ] && [ "$hashSigKeyIndex" != "null" ] && [ -n "$ha echo "Hash-Sig Key Index: $hashSigKeyIndex" echo "Hash-Sig Public Key: $hashSigPkPath" echo "Hash-Sig Secret Key: $hashSigSkPath" + echo "Is Aggregator: $isAggregator" else echo "Node: $item" echo "QUIC Port: $quicPort" echo "Metrics Port: $metricsPort" echo "Devnet: ${devnet:-}" echo "Private Key File: $privKeyPath" + echo "Is Aggregator: $isAggregator" fi From 414d49cf49957360a84b11b18d8322a326292e1b Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Wed, 18 Mar 2026 12:04:52 +0000 Subject: [PATCH 02/25] spin-node: add --subnets flag to deploy multiple nodes per client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds --subnets N (1–5) to deploy N nodes of each client on their associated servers, each on a distinct attestation subnet. New files: - generate-subnet-config.py: expands validator-config.yaml into validator-config-subnets-N.yaml with unique node names, incremented ports (quic/metrics/api), fresh P2P private keys, and explicit subnet membership per entry. Also sets config.attestation_committee_count = N so each client correctly partitions validators across N committees. Changes: - parse-env.sh: add --subnets N and --dry-run flags - spin-node.sh: - expand validator-config before genesis setup when --subnets N given - select one aggregator per subnet randomly; print prominent summary - --dry-run: simulate full deployment without applying any changes (Ansible runs with --check --diff, local execs are echoed only) - run-ansible.sh: pass validator_config_basename extra var so playbooks use the active (possibly expanded) config; add --check --diff in dry-run - ansible/playbooks/deploy-nodes.yml: use validator_config_basename to sync the correct config file to remote hosts - ansible/playbooks/prepare.yml: open port ranges for all subnet nodes on a host by matching entries via IP, not just hostname - convert-validator-config.py: fall back to httpPort for Lantern nodes when generating Leanpoint upstreams - README.md: document --subnets and --dry-run; update --prepare firewall table to reflect port ranges when --subnets N is active Rules enforced by generate-subnet-config.py: - No two nodes on the same server may share a subnet (template validated) - Each subnet has exactly one node per client - N=1 is a no-op expansion (single-subnet baseline) - N capped at 5 --- README.md | 60 ++++++++-- ansible/playbooks/deploy-nodes.yml | 5 +- ansible/playbooks/prepare.yml | 74 +++++++----- convert-validator-config.py | 4 +- generate-subnet-config.py | 186 +++++++++++++++++++++++++++++ parse-env.sh | 10 ++ run-ansible.sh | 19 ++- spin-node.sh | 147 ++++++++++++++++++----- 8 files changed, 432 insertions(+), 73 deletions(-) create mode 100644 generate-subnet-config.py diff --git a/README.md b/README.md index 75dd609..4018410 100644 --- a/README.md +++ b/README.md @@ -212,10 +212,17 @@ Every Ansible deployment automatically deploys an observability stack alongside 15. `--prepare` verify and install the software required to run lean nodes on every remote server, and open + persist the necessary firewall ports. - **Ansible mode only** — fails with an error if `deployment_mode` is not `ansible` - Installs: `python3` (Ansible requirement), Docker CE + Compose plugin (all clients run as containers), `yq` (required by the `common` role at every deploy) - - Opens per-node ports (`quicPort`/UDP, `metricsPort`/TCP, `apiPort`/TCP) read from `validator-config.yaml`, plus fixed observability ports (9090, 9080, 9098, 9100). Enables `ufw` with default deny incoming (persisted across reboots). + - Opens per-node ports (`quicPort`/UDP, `metricsPort`/TCP, `apiPort`/TCP) read from the active validator config, plus fixed observability ports (9090, 9080, 9098, 9100). With `--subnets N`, all N nodes' port ranges are opened per host. Enables `ufw` with default deny incoming (persisted across reboots). - Prints a per-tool, per-host status summary (`✅ ok` / `❌ missing`) and `ufw status verbose` - - `--node` is not required and is ignored; all other flags are also ignored except `--sshKey` and `--useRoot` + - `--node` is not required; passing unsupported flags alongside `--prepare` produces a prominent error — only `--sshKey` and `--useRoot` are accepted - Example: `NETWORK_DIR=ansible-devnet ./spin-node.sh --prepare --sshKey ~/.ssh/id_ed25519 --useRoot` +16. `--subnets N` expand the validator config to deploy N nodes of each client on the same server, where N is 1–5. + - Generates `validator-config-subnets-N.yaml` from the template (without modifying the original) + - Each subnet node gets a unique name (`{client}_0`, `{client}_1`, …), ports incremented by the subnet index, and a fresh P2P identity key for subnets > 0 + - Subnet assignment rule: each server contributes **exactly one node per subnet** — nodes on the same server are never in the same subnet + - Every subnet contains the same set of client types + - `N=1` renames nodes to `{client}_0` with no port changes (useful for canonical naming) + - Example: `NETWORK_DIR=ansible-devnet ./spin-node.sh --node all --subnets 3 --sshKey ~/.ssh/id_ed25519 --useRoot` ### Preparing remote servers @@ -237,7 +244,7 @@ NETWORK_DIR=ansible-devnet ./spin-node.sh --prepare --sshKey ~/.ssh/id_ed25519 - **Constraints:** - Only works in ansible mode (`deployment_mode: ansible` in your config, or `--deploymentMode ansible`) -- Any other flags (e.g., `--node`, `--generateGenesis`) are silently ignored — only `--sshKey` and `--useRoot` are used +- Passing unsupported flags (e.g. `--node`, `--generateGenesis`) alongside `--prepare` produces a prominent error — only `--sshKey` and `--useRoot` are accepted - `--node` is not required; the playbook runs on all remote hosts in the inventory Once preparation succeeds, proceed with the normal deploy command: @@ -246,6 +253,43 @@ Once preparation succeeds, proceed with the normal deploy command: NETWORK_DIR=ansible-devnet ./spin-node.sh --node all --generateGenesis --sshKey ~/.ssh/id_ed25519 --useRoot ``` +### Deploying multiple subnets + +Use `--subnets N` to run N independent copies of each client on the same server. This is useful for testing multi-subnet P2P scenarios without provisioning additional machines. + +```sh +# Deploy 3 subnets of every client (ansible) +NETWORK_DIR=ansible-devnet ./spin-node.sh --node all --subnets 3 \ + --generateGenesis --sshKey ~/.ssh/id_ed25519 --useRoot +``` + +**How it works:** + +`--subnets N` generates `validator-config-subnets-N.yaml` from the template (the original file is never modified). For each client in the template it creates N entries: + +| Subnet index | Name | quicPort | metricsPort | apiPort | +|---|---|---|---|---| +| 0 | `zeam_0` | base | base | base | +| 1 | `zeam_1` | base+1 | base+1 | base+1 | +| … | … | … | … | … | +| N-1 | `zeam_N-1` | base+N-1 | base+N-1 | base+N-1 | + +**Rules enforced:** +- `N` must be between 1 and 5 +- Each server contributes exactly one node per subnet (nodes on the same server are never in the same subnet) +- Every subnet contains the same set of client types +- Each node beyond subnet 0 gets a fresh P2P identity key + +**Running `--prepare` with subnets:** + +Always run `--prepare` with the same `--subnets N` value before deploying, so the firewall opens all N port ranges per host: + +```sh +# Prepare firewall for 3 subnets +NETWORK_DIR=ansible-devnet ./spin-node.sh --prepare --subnets 3 \ + --sshKey ~/.ssh/id_ed25519 --useRoot +``` + ### Checkpoint sync Checkpoint sync lets you restart clients by syncing from a remote checkpoint instead of from genesis. This is useful for joining an existing network (e.g., leanpoint mainnet) without replaying the full chain. @@ -804,7 +848,7 @@ ansible/ │ └── all.yml # Global variables ├── playbooks/ │ ├── site.yml # Main playbook (clean + copy genesis + deploy) -│ ├── prepare.yml # Bootstrap: install Docker, build-essential, yq, etc. +│ ├── prepare.yml # Bootstrap: install Docker CE, yq; open firewall ports │ ├── clean-node-data.yml # Clean node data directories │ ├── generate-genesis.yml # Generate genesis files │ ├── copy-genesis.yml # Copy genesis files to remote hosts @@ -844,13 +888,13 @@ The command runs `ansible/playbooks/prepare.yml` against all remote hosts in the **Firewall rules opened (via `ufw`):** -Each host's ports are read directly from `validator-config.yaml`, so only the ports actually configured for that node are opened: +Ports are read from the active validator config (the `--subnets`-expanded file when `--subnets N` is used, or `validator-config.yaml` otherwise). Entries are matched by IP address, so all N subnet nodes on a server are found and all their ports are opened: | Port | Protocol | Source | |---|---|---| -| `quicPort` | UDP | Per-node — QUIC/P2P transport (e.g. 9001) | -| `metricsPort` | TCP | Per-node — Prometheus scrape endpoint (e.g. 9095) | -| `apiPort` / `httpPort` | TCP | Per-node — REST API (e.g. 5055) | +| `quicPort` … `quicPort+N-1` | UDP | Per-node — QUIC/P2P transport (e.g. 9001–9003 for N=3) | +| `metricsPort` … `metricsPort+N-1` | TCP | Per-node — Prometheus scrape endpoint | +| `apiPort`/`httpPort` … `+N-1` | TCP | Per-node — REST API | | 9090 | TCP | Observability — Prometheus | | 9080 | TCP | Observability — Promtail | | 9098 | TCP | Observability — cAdvisor | diff --git a/ansible/playbooks/deploy-nodes.yml b/ansible/playbooks/deploy-nodes.yml index 398952c..523b858 100644 --- a/ansible/playbooks/deploy-nodes.yml +++ b/ansible/playbooks/deploy-nodes.yml @@ -122,7 +122,10 @@ - name: Sync validator-config.yaml to remote host copy: - src: "{{ local_genesis_dir }}/validator-config.yaml" + # Use the expanded subnet config when --subnets was specified; fall back + # to the standard validator-config.yaml otherwise. The destination is + # always validator-config.yaml so client roles don't need to change. + src: "{{ local_genesis_dir }}/{{ validator_config_basename | default('validator-config.yaml') }}" dest: "{{ genesis_dir }}/validator-config.yaml" mode: '0644' force: yes diff --git a/ansible/playbooks/prepare.yml b/ansible/playbooks/prepare.yml index 9e09628..de7bb3c 100644 --- a/ansible/playbooks/prepare.yml +++ b/ansible/playbooks/prepare.yml @@ -13,12 +13,17 @@ # 3. yq — the common role (which runs at every deploy) hard-fails if # `yq --version` is not available on the remote host. # -# Firewall rules opened per host (read from validator-config.yaml): +# Firewall rules opened per host (read from the active validator config, +# which is the --subnets-expanded file when --subnets N was given): # # - quicPort (UDP) — QUIC/P2P transport for inter-node communication # - metricsPort (TCP) — Prometheus metrics scrape endpoint # - apiPort / httpPort (TCP) — REST API # +# With --subnets N, a server runs N nodes with ports base, base+1, …, base+N-1. +# All N port ranges are opened because the entries are matched by IP address, +# collecting every node entry that lives on the current host. +# # Observability ports opened on every host (fixed, from observability role defaults): # # - 9090/tcp — prometheus @@ -164,12 +169,17 @@ # frontend that writes iptables rules and persists them across reboots. # Both are checked and installed if absent before any rules are applied. # - # Ports are read from the local validator-config.yaml on the Ansible - # controller. Each host's inventory_hostname matches its node name - # (e.g. zeam_0, ream_0), so we select the right validator entry by name. + # Port lookup uses the active validator config file, which is the + # --subnets-expanded file (e.g. validator-config-subnets-3.yaml) when + # --subnets N was passed, or validator-config.yaml otherwise. + # validator_config_basename is injected by run-ansible.sh. + # + # Entries are matched by IP (ansible_host) rather than by hostname so + # that all subnet nodes sharing a server are found in one pass. + # With N subnets, a server has N entries and all N × 3 ports are opened. # # Lantern uses httpPort instead of apiPort — both are handled via the - # default() chain. + # per-entry default() chain. # ────────────────────────────────────────────────────────────────────────── - name: Check if iptables is already installed @@ -194,22 +204,21 @@ become: yes when: ansible_os_family == "Debian" - - name: Read per-node port configuration from validator-config.yaml + - name: Read all node entries for this host from the active validator config vars: - _vc: "{{ lookup('file', _genesis_dir + '/validator-config.yaml') | from_yaml }}" - _entry: "{{ _vc.validators | selectattr('name', 'equalto', inventory_hostname) | list | first | default({}) }}" + _vc_file: "{{ _genesis_dir + '/' + (validator_config_basename | default('validator-config.yaml')) }}" + _vc: "{{ lookup('file', _vc_file) | from_yaml }}" + _entries: "{{ _vc.validators | selectattr('enrFields.ip', 'equalto', ansible_host) | list }}" set_fact: - fw_quic_port: "{{ _entry.enrFields.quic | default(9001) | string }}" - fw_metrics_port: "{{ _entry.metricsPort | default(9095) | string }}" - fw_api_port: "{{ _entry.apiPort | default(_entry.httpPort | default(5055)) | string }}" - fw_node_found: "{{ (_entry | length) > 0 }}" + fw_node_entries: "{{ _entries }}" + fw_nodes_found: "{{ (_entries | length) > 0 }}" - - name: Warn if node not found in validator-config.yaml + - name: Warn if no entries found for this host in the validator config debug: msg: >- - Warning: {{ inventory_hostname }} not found in validator-config.yaml. - Node-specific firewall rules will be skipped. - when: not fw_node_found | bool + Warning: no validator entries found for {{ ansible_host }} ({{ inventory_hostname }}) + in the active config. Node-specific firewall rules will be skipped. + when: not fw_nodes_found | bool # SSH must be allowed before enabling ufw, or we lock ourselves out. - name: Allow SSH (22/tcp) @@ -220,32 +229,41 @@ comment: "SSH" become: yes - - name: Open QUIC P2P port (UDP) + - name: Open QUIC P2P ports (UDP) for all subnet nodes on this host ufw: rule: allow - port: "{{ fw_quic_port }}" + port: "{{ item.enrFields.quic | string }}" proto: udp - comment: "lean-quickstart QUIC P2P ({{ inventory_hostname }})" + comment: "lean-quickstart QUIC ({{ item.name }})" become: yes - when: fw_node_found | bool + loop: "{{ fw_node_entries }}" + loop_control: + label: "{{ item.name }}" + when: fw_nodes_found | bool - - name: Open metrics port (TCP) + - name: Open metrics ports (TCP) for all subnet nodes on this host ufw: rule: allow - port: "{{ fw_metrics_port }}" + port: "{{ item.metricsPort | string }}" proto: tcp - comment: "lean-quickstart metrics ({{ inventory_hostname }})" + comment: "lean-quickstart metrics ({{ item.name }})" become: yes - when: fw_node_found | bool + loop: "{{ fw_node_entries }}" + loop_control: + label: "{{ item.name }}" + when: fw_nodes_found | bool - - name: Open API port (TCP) + - name: Open API ports (TCP) for all subnet nodes on this host ufw: rule: allow - port: "{{ fw_api_port }}" + port: "{{ (item.apiPort | default(item.httpPort | default(5055))) | string }}" proto: tcp - comment: "lean-quickstart API ({{ inventory_hostname }})" + comment: "lean-quickstart API ({{ item.name }})" become: yes - when: fw_node_found | bool + loop: "{{ fw_node_entries }}" + loop_control: + label: "{{ item.name }}" + when: fw_nodes_found | bool - name: Open observability stack ports (TCP) ufw: diff --git a/convert-validator-config.py b/convert-validator-config.py index 91a6e18..14bc96b 100644 --- a/convert-validator-config.py +++ b/convert-validator-config.py @@ -67,8 +67,8 @@ def convert_validator_config( if docker_host: ip = "host.docker.internal" - # Use apiPort from config - http_port = validator.get('apiPort', base_port + idx) + # Use apiPort, falling back to httpPort (used by Lantern), then a derived default. + http_port = validator.get('apiPort') or validator.get('httpPort') or (base_port + idx) upstream = { "name": name, diff --git a/generate-subnet-config.py b/generate-subnet-config.py new file mode 100644 index 0000000..fe0283d --- /dev/null +++ b/generate-subnet-config.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 +""" +Generate an expanded validator-config.yaml from a template by distributing +each client across N subnets, one node per subnet per server. + +Subnet assignment rules +----------------------- + - Each server (IP) contributes exactly ONE node to each subnet. + - No two nodes on the same server share a subnet. + - Every subnet contains exactly the same number of clients. + - Every subnet contains at least one unique client (i.e. no two subnets + share a node identity). + +These rules are automatically satisfied by the expansion algorithm: the +template is expected to have one entry per client, each on its own server. +The script validates this assumption and errors out if it is violated. + +Port assignment +--------------- + For subnet i, all ports are incremented by i relative to the template entry: + quicPort += i + metricsPort += i + apiPort += i (or httpPort for Lantern) + + This keeps nodes on the same host from binding conflicting ports. + +Limits +------ + N must be between 1 and 5 (inclusive). + N=1 produces a single subnet (nodes renamed to {client}_0) with no port changes. + +Usage +----- + python3 generate-subnet-config.py + +Example +------- + python3 generate-subnet-config.py \\ + ansible-devnet/genesis/validator-config.yaml 2 \\ + ansible-devnet/genesis/validator-config-subnets-2.yaml +""" + +from __future__ import annotations + +import copy +import secrets +import sys +from collections import Counter + +import yaml + +MAX_SUBNETS = 5 + + +def _client_name(node_name: str) -> str: + """Extract the client type prefix (e.g. 'zeam' from 'zeam_0').""" + return node_name.split("_")[0] + + +def _validate_template(validators: list[dict]) -> None: + """ + Enforce that the template satisfies the one-server-one-node requirement: + - No two entries share the same IP address. + - No two entries share the same client type (name prefix). + Either violation would break the subnet isolation guarantee. + """ + ips = [v["enrFields"]["ip"] for v in validators] + clients = [_client_name(v["name"]) for v in validators] + + duplicate_ips = [ip for ip, n in Counter(ips).items() if n > 1] + if duplicate_ips: + raise ValueError( + "Template validator-config.yaml has multiple entries sharing the " + f"same IP address: {duplicate_ips}. Each server must have exactly " + "one entry in the template. Use --subnets to add more nodes per server." + ) + + duplicate_clients = [c for c, n in Counter(clients).items() if n > 1] + if duplicate_clients: + raise ValueError( + "Template validator-config.yaml has multiple entries for the same " + f"client type: {duplicate_clients}. Each client type must appear " + "exactly once in the template." + ) + + +def expand(template: dict, n_subnets: int) -> dict: + """ + Return a new config dict with every validator entry replicated across + n_subnets subnets. + + Output ordering: all subnet-0 nodes first, then all subnet-1 nodes, … + This makes the subnet grouping visually obvious in the generated file. + """ + validators = template["validators"] + _validate_template(validators) + + result = copy.deepcopy(template) + + # attestation_committee_count must equal the number of subnets so that + # each client correctly partitions itself into N separate committees. + if "config" not in result: + result["config"] = {} + result["config"]["attestation_committee_count"] = n_subnets + + expanded: list[dict] = [] + + for i in range(n_subnets): + for validator in validators: + client = _client_name(validator["name"]) + entry = copy.deepcopy(validator) + + # Canonical name: {client}_{subnet_index} + entry["name"] = f"{client}_{i}" + entry["subnet"] = i # explicit membership for human readability + + # Every node beyond subnet 0 gets a fresh P2P identity key so + # nodes on the same server have different identities. + if i > 0: + entry["privkey"] = secrets.token_hex(32) + + # Increment all network ports by the subnet index so nodes that + # share a host do not bind the same port. + entry["enrFields"]["quic"] = validator["enrFields"]["quic"] + i + entry["metricsPort"] = validator["metricsPort"] + i + if "apiPort" in entry: + entry["apiPort"] = validator["apiPort"] + i + if "httpPort" in entry: + entry["httpPort"] = validator["httpPort"] + i + + # spin-node.sh re-assigns the aggregator before deploying. + entry["isAggregator"] = False + + expanded.append(entry) + + result["validators"] = expanded + return result + + +def main() -> None: + if len(sys.argv) != 4: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + + template_path = sys.argv[1] + output_path = sys.argv[3] + + try: + n_subnets = int(sys.argv[2]) + if not (1 <= n_subnets <= MAX_SUBNETS): + raise ValueError + except ValueError: + print( + f"Error: N must be an integer between 1 and {MAX_SUBNETS}, " + f"got: {sys.argv[2]!r}" + ) + sys.exit(1) + + with open(template_path) as fh: + template = yaml.safe_load(fh) + + if "validators" not in template or not template["validators"]: + print(f"Error: no validators found in {template_path}") + sys.exit(1) + + try: + expanded = expand(template, n_subnets) + except ValueError as exc: + print(f"Error: {exc}") + sys.exit(1) + + with open(output_path, "w") as fh: + yaml.dump(expanded, fh, default_flow_style=False, sort_keys=False) + + n_clients = len(template["validators"]) + n_nodes = len(expanded["validators"]) + print( + f"Generated {output_path}:\n" + f" {n_clients} client(s) × {n_subnets} subnet(s) = {n_nodes} nodes\n" + f" config.attestation_committee_count = {n_subnets}\n" + f" Each server contributes exactly 1 node per subnet (no intra-server subnet sharing)" + ) + + +if __name__ == "__main__": + main() diff --git a/parse-env.sh b/parse-env.sh index 506b421..7b5b4b6 100755 --- a/parse-env.sh +++ b/parse-env.sh @@ -108,6 +108,15 @@ while [[ $# -gt 0 ]]; do prepareMode=true shift ;; + --subnets) + subnets="$2" + shift # past argument + shift # past value + ;; + --dry-run) + dryRun=true + shift + ;; *) # unknown option shift # past argument ;; @@ -148,3 +157,4 @@ echo "coreDumps = ${coreDumps:-disabled}" echo "checkpointSyncUrl = ${checkpointSyncUrl:-}" echo "restartClient = ${restartClient:-}" echo "skipLeanpoint = ${skipLeanpoint:-false}" +echo "dryRun = ${dryRun:-false}" diff --git a/run-ansible.sh b/run-ansible.sh index 64c8942..e0fe7c0 100755 --- a/run-ansible.sh +++ b/run-ansible.sh @@ -29,6 +29,7 @@ action="$8" # Action: "stop" to stop nodes, otherwise deploy coreDumps="$9" # Core dump configuration: "all", node names, or client types skipGenesis="${10}" # Set to "true" to skip genesis generation (e.g. when restarting with checkpoint sync) checkpointSyncUrl="${11}" # URL for checkpoint sync (when restarting with --restart-client) +dryRun="${12}" # Set to "true" to run Ansible with --check --diff (no changes applied) # Determine SSH user: use root if --useRoot flag is set, otherwise use current user if [ "$useRoot" == "true" ]; then @@ -110,6 +111,11 @@ if [ -n "$validatorConfig" ] && [ "$validatorConfig" != "genesis_bootnode" ]; th EXTRA_VARS="$EXTRA_VARS validator_config=$validatorConfig" fi +# Pass the basename of the active validator config file so deploy-nodes.yml +# can sync the correct file (e.g. validator-config-subnets-2.yaml) to remotes. +validator_config_basename=$(basename "$validator_config_file") +EXTRA_VARS="$EXTRA_VARS validator_config_basename=$validator_config_basename" + if [ -n "$coreDumps" ]; then EXTRA_VARS="$EXTRA_VARS enable_core_dumps=$coreDumps" fi @@ -152,6 +158,11 @@ ANSIBLE_CMD="$ANSIBLE_CMD -i $INVENTORY_FILE" ANSIBLE_CMD="$ANSIBLE_CMD $PLAYBOOK" ANSIBLE_CMD="$ANSIBLE_CMD -e \"$EXTRA_VARS\"" +# Dry-run: show what Ansible would change without applying anything. +if [ "$dryRun" == "true" ]; then + ANSIBLE_CMD="$ANSIBLE_CMD --check --diff" +fi + echo "Running Ansible playbook for $ACTION_MSG..." echo "Command: $ANSIBLE_CMD" echo "" @@ -161,14 +172,16 @@ cd "$ANSIBLE_DIR" eval $ANSIBLE_CMD EXIT_CODE=$? +_dry_tag="" +[ "$dryRun" == "true" ] && _dry_tag=" (dry-run — no changes applied)" if [ $EXIT_CODE -eq 0 ]; then echo "" if [ "$action" == "stop" ]; then - echo "✅ Ansible stop operation completed successfully!" + echo "✅ Ansible stop operation completed successfully!${_dry_tag}" elif [ "$action" == "prepare" ]; then - echo "✅ Server preparation completed successfully!" + echo "✅ Server preparation completed successfully!${_dry_tag}" else - echo "✅ Ansible deployment completed successfully!" + echo "✅ Ansible deployment completed successfully!${_dry_tag}" fi else echo "" diff --git a/spin-node.sh b/spin-node.sh index 39963e0..0459bea 100755 --- a/spin-node.sh +++ b/spin-node.sh @@ -68,6 +68,35 @@ if [ "$deployment_mode" == "ansible" ] && ([ "$validatorConfig" == "genesis_boot echo "Using Ansible deployment: configDir=$configDir, validator config=$validator_config_file" fi +# If --subnets N is specified, expand the validator config template into a new +# file with N nodes per client (same IP, unique incremented ports and keys). +# This must run after configDir/validator_config_file are resolved so the +# generated file lands in the correct genesis directory. +if [ -n "$subnets" ] && [ "$subnets" -ge 1 ] 2>/dev/null; then + if ! [[ "$subnets" =~ ^[0-9]+$ ]] || [ "$subnets" -lt 1 ] || [ "$subnets" -gt 5 ]; then + echo "Error: --subnets requires an integer between 1 and 5, got: $subnets" + exit 1 + fi + + if ! command -v python3 &> /dev/null; then + echo "Error: python3 is required to generate the subnet config." + exit 1 + fi + + expanded_config="${configDir}/validator-config-subnets-${subnets}.yaml" + [ "$dryRun" == "true" ] && echo "[DRY RUN] Generating subnet config preview (no deployment will occur)" + echo "Generating subnet config ($subnets subnet(s) per client) → $expanded_config" + + if ! python3 "$scriptDir/generate-subnet-config.py" \ + "$validator_config_file" "$subnets" "$expanded_config"; then + echo "❌ Failed to generate subnet config." + exit 1 + fi + + validator_config_file="$expanded_config" + echo "Using expanded config: $validator_config_file" +fi + # Handle --prepare mode: verify and install required software on all remote servers. # Must run after deployment_mode is resolved but before genesis setup. if [ -n "$prepareMode" ] && [ "$prepareMode" == "true" ]; then @@ -121,21 +150,30 @@ if [ -n "$prepareMode" ] && [ "$prepareMode" == "true" ]; then exit 1 fi - echo "Preparing remote servers (verifying and installing required software)..." + if [ "$dryRun" == "true" ]; then + echo "[DRY RUN] Would prepare remote servers — running Ansible with --check --diff" + else + echo "Preparing remote servers (verifying and installing required software)..." + fi - if ! "$scriptDir/run-ansible.sh" "$configDir" "" "" "" "$validator_config_file" "$sshKeyFile" "$useRoot" "prepare" "" "" ""; then + if ! "$scriptDir/run-ansible.sh" "$configDir" "" "" "" "$validator_config_file" "$sshKeyFile" "$useRoot" "prepare" "" "" "" "$dryRun"; then echo "❌ Server preparation failed." exit 1 fi - echo "✅ All remote servers are prepared." + [ "$dryRun" == "true" ] && echo "✅ Dry-run complete — no changes were made." || echo "✅ All remote servers are prepared." exit 0 fi #1. setup genesis params and run genesis generator -source "$(dirname $0)/set-up.sh" -# ✅ Genesis generator implemented using PK's eth-beacon-genesis tool -# Generates: validators.yaml, nodes.yaml, genesis.json, genesis.ssz, and .key files +if [ "$dryRun" == "true" ]; then + echo "[DRY RUN] Skipping genesis generation (set-up.sh would run here)" + node_setup="${node_setup:-docker}" # ensure local-loop variable has a default +else + source "$(dirname $0)/set-up.sh" + # ✅ Genesis generator implemented using PK's eth-beacon-genesis tool + # Generates: validators.yaml, nodes.yaml, genesis.json, genesis.ssz, and .key files +fi # 2. collect the nodes that the user has asked us to spin and perform setup @@ -163,42 +201,81 @@ echo "Detected nodes: ${nodes[@]}" spin_nodes=() restart_with_checkpoint_sync=false -# Aggregator selection logic (1 aggregator per subnet) -# If user specified --aggregator, use that; otherwise randomly select one +# Aggregator selection — one randomly chosen aggregator per subnet. +# +# Subnet membership is derived from the numeric suffix of each node name: +# zeam_0 → subnet 0 +# zeam_1 → subnet 1 +# Without --subnets all nodes carry suffix _0, so a single aggregator is +# selected as before. +# +# When --aggregator is specified, that node is used as the aggregator for +# its own subnet; all other subnets still get a random selection. + +# If --aggregator was given, validate it exists before doing anything else. if [ -n "$aggregatorNode" ]; then - # Validate that the specified aggregator exists in the validator list aggregator_found=false for available_node in "${nodes[@]}"; do if [[ "$aggregatorNode" == "$available_node" ]]; then - selected_aggregator="$aggregatorNode" aggregator_found=true - echo "Using user-specified aggregator: $selected_aggregator" break fi done - if [[ "$aggregator_found" == false ]]; then echo "Error: Specified aggregator '$aggregatorNode' not found in validator config" echo "Available nodes: ${nodes[@]}" exit 1 fi -else - # Randomly select one node as aggregator - # Get the number of nodes - num_nodes=${#nodes[@]} - # Generate random index (0 to num_nodes-1) - random_index=$((RANDOM % num_nodes)) - selected_aggregator="${nodes[$random_index]}" - echo "Randomly selected aggregator: $selected_aggregator (index $random_index out of $num_nodes nodes)" fi -# Update the validator-config.yaml to set isAggregator flag -# First, reset all nodes to isAggregator: false -yq eval -i '.validators[].isAggregator = false' "$validator_config_file" +# Collect unique subnet indices (the part after the last '_' in each name). +_subnet_indices=() +for _node in "${nodes[@]}"; do + _subnet_indices+=("${_node##*_}") +done +_unique_subnets=($(printf '%s\n' "${_subnet_indices[@]}" | sort -un)) + +echo "Detected ${#_unique_subnets[@]} subnet(s): ${_unique_subnets[*]}" + +# Reset every node's isAggregator flag first (skipped in dry-run). +if [ "$dryRun" != "true" ]; then + yq eval -i '.validators[].isAggregator = false' "$validator_config_file" +fi + +# Select one aggregator per subnet and set the flag. +_aggregator_summary=() +for _subnet_idx in "${_unique_subnets[@]}"; do + _subnet_nodes=() + for _node in "${nodes[@]}"; do + [[ "${_node##*_}" == "$_subnet_idx" ]] && _subnet_nodes+=("$_node") + done + + _selected_agg="" -# Then set the selected aggregator to isAggregator: true -yq eval -i "(.validators[] | select(.name == \"$selected_aggregator\") | .isAggregator) = true" "$validator_config_file" -echo "Set $selected_aggregator as aggregator in $validator_config_file" + # Use the user-specified aggregator if it belongs to this subnet. + if [ -n "$aggregatorNode" ] && [[ "${aggregatorNode##*_}" == "$_subnet_idx" ]]; then + _selected_agg="$aggregatorNode" + else + _n=${#_subnet_nodes[@]} + _selected_agg="${_subnet_nodes[$((RANDOM % _n))]}" + fi + + if [ "$dryRun" != "true" ]; then + yq eval -i "(.validators[] | select(.name == \"$_selected_agg\") | .isAggregator) = true" "$validator_config_file" + fi + _aggregator_summary+=("subnet $_subnet_idx → $_selected_agg") +done + +# Print a prominent aggregator summary banner. +echo "" +echo "╔══════════════════════════════════════════════════════════════╗" +echo "║ 🗳 Aggregator Selection ║" +echo "╠══════════════════════════════════════════════════════════════╣" +for _line in "${_aggregator_summary[@]}"; do + printf "║ %-60s║\n" "$_line" +done +echo "╚══════════════════════════════════════════════════════════════╝" +echo "" # When --restart-client is specified, use it as the node list and enable checkpoint sync mode if [[ -n "$restartClient" ]]; then @@ -306,7 +383,7 @@ if [ "$deployment_mode" == "ansible" ]; then # Handle stop action if [ -n "$stopNodes" ] && [ "$stopNodes" == "true" ]; then echo "Stopping nodes via Ansible..." - if ! "$scriptDir/run-ansible.sh" "$configDir" "$ansible_node_arg" "$cleanData" "$validatorConfig" "$validator_config_file" "$sshKeyFile" "$useRoot" "stop" "$coreDumps" "$ansible_skip_genesis" ""; then + if ! "$scriptDir/run-ansible.sh" "$configDir" "$ansible_node_arg" "$cleanData" "$validatorConfig" "$validator_config_file" "$sshKeyFile" "$useRoot" "stop" "$coreDumps" "$ansible_skip_genesis" "" "$dryRun"; then echo "❌ Ansible stop operation failed. Exiting." exit 1 fi @@ -315,7 +392,10 @@ if [ "$deployment_mode" == "ansible" ]; then # Call separate Ansible execution script # If Ansible deployment fails, exit immediately (don't fall through to local deployment) - if ! "$scriptDir/run-ansible.sh" "$configDir" "$ansible_node_arg" "$cleanData" "$validatorConfig" "$validator_config_file" "$sshKeyFile" "$useRoot" "" "$coreDumps" "$ansible_skip_genesis" "$ansible_checkpoint_url"; then + if [ "$dryRun" == "true" ]; then + echo "[DRY RUN] Would deploy via Ansible — running playbook with --check --diff" + fi + if ! "$scriptDir/run-ansible.sh" "$configDir" "$ansible_node_arg" "$cleanData" "$validatorConfig" "$validator_config_file" "$sshKeyFile" "$useRoot" "" "$coreDumps" "$ansible_skip_genesis" "$ansible_checkpoint_url" "$dryRun"; then echo "❌ Ansible deployment failed. Exiting." exit 1 fi @@ -511,9 +591,14 @@ for item in "${spin_nodes[@]}"; do execCmd="$popupTerminalCmd $execCmd" fi; - echo "$execCmd" - eval "$execCmd" & - pid=$! + if [ "$dryRun" == "true" ]; then + echo "[DRY RUN] Would execute: $execCmd" + pid=0 + else + echo "$execCmd" + eval "$execCmd" & + pid=$! + fi spinned_pids+=($pid) done; From 32f6a28fc00426be92fd8383c090023cbcde2f56 Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Wed, 18 Mar 2026 13:42:59 +0000 Subject: [PATCH 03/25] ansible: copy only the node's own hash-sig keys to each server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously both deploy-nodes.yml and copy-genesis.yml synced the entire hash-sig-keys/ directory to every remote host, meaning every server received every validator's sk/pk pair. Now each playbook: 1. Reads annotated_validators.yaml on the controller to look up the privkey_file entries for the node being deployed (inventory_hostname). 2. Derives the pk filename by replacing _sk.ssz → _pk.ssz. 3. Copies only those specific files to the target host. A server running zeam_0 (validator_0_sk.ssz / validator_0_pk.ssz) no longer receives validator_1_sk.ssz, validator_2_sk.ssz, etc. --- ansible/playbooks/copy-genesis.yml | 23 +++++++++++++++++------ ansible/playbooks/deploy-nodes.yml | 28 +++++++++++++++++++++------- 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/ansible/playbooks/copy-genesis.yml b/ansible/playbooks/copy-genesis.yml index 76a25f8..6fc709f 100644 --- a/ansible/playbooks/copy-genesis.yml +++ b/ansible/playbooks/copy-genesis.yml @@ -153,20 +153,31 @@ loop_control: label: "{{ item }}.key" + - name: Resolve hash-sig key files for this node + vars: + _av: "{{ lookup('file', genesis_dir + '/annotated_validators.yaml') | from_yaml }}" + _assignments: "{{ _av[inventory_hostname] | default([]) }}" + _sk_files: "{{ _assignments | map(attribute='privkey_file') | list }}" + _pk_files: "{{ _sk_files | map('regex_replace', '_sk\\.ssz$', '_pk.ssz') | list }}" + set_fact: + node_hash_sig_files: "{{ _sk_files + _pk_files }}" + when: hash_sig_keys_stat.stat.exists + - name: Create hash-sig-keys directory on remote file: path: "{{ actual_remote_genesis_dir }}/hash-sig-keys" state: directory mode: '0755' - when: hash_sig_keys_stat.stat.exists + when: hash_sig_keys_stat.stat.exists and (node_hash_sig_files | default([]) | length > 0) - - name: Copy hash-sig-keys directory to remote host + - name: Copy hash-sig key files for this node only copy: - src: "{{ genesis_dir }}/hash-sig-keys/" - dest: "{{ actual_remote_genesis_dir }}/hash-sig-keys/" - mode: '0644' + src: "{{ genesis_dir }}/hash-sig-keys/{{ item }}" + dest: "{{ actual_remote_genesis_dir }}/hash-sig-keys/{{ item }}" + mode: '0600' force: yes - when: hash_sig_keys_stat.stat.exists + loop: "{{ node_hash_sig_files | default([]) }}" + when: hash_sig_keys_stat.stat.exists and (node_hash_sig_files | default([]) | length > 0) - name: List files on remote genesis directory find: diff --git a/ansible/playbooks/deploy-nodes.yml b/ansible/playbooks/deploy-nodes.yml index 523b858..41f408f 100644 --- a/ansible/playbooks/deploy-nodes.yml +++ b/ansible/playbooks/deploy-nodes.yml @@ -8,7 +8,7 @@ # - node key files (*.key) # - config.yaml, validators.yaml, nodes.yaml # - genesis.ssz, genesis.json -# - hash-sig-keys/ directory (if exists, for qlean nodes) +# - hash-sig-keys/ directory (if exists): only the sk/pk files for this node's validators - name: Parse and validate node names hosts: localhost @@ -168,23 +168,37 @@ - deploy - sync + - name: Resolve hash-sig key files for this node + vars: + _av: "{{ lookup('file', local_genesis_dir + '/annotated_validators.yaml') | from_yaml }}" + _assignments: "{{ _av[node_name] | default([]) }}" + _sk_files: "{{ _assignments | map(attribute='privkey_file') | list }}" + _pk_files: "{{ _sk_files | map('regex_replace', '_sk\\.ssz$', '_pk.ssz') | list }}" + set_fact: + node_hash_sig_files: "{{ _sk_files + _pk_files }}" + when: hash_sig_keys_local.stat.exists + tags: + - deploy + - sync + - name: Create hash-sig-keys directory on remote file: path: "{{ genesis_dir }}/hash-sig-keys" state: directory mode: '0755' - when: hash_sig_keys_local.stat.exists + when: hash_sig_keys_local.stat.exists and (node_hash_sig_files | default([]) | length > 0) tags: - deploy - sync - - name: Sync hash-sig-keys directory (for qlean nodes) + - name: Copy hash-sig key files for this node only copy: - src: "{{ local_genesis_dir }}/hash-sig-keys/" - dest: "{{ genesis_dir }}/hash-sig-keys/" - mode: '0644' + src: "{{ local_genesis_dir }}/hash-sig-keys/{{ item }}" + dest: "{{ genesis_dir }}/hash-sig-keys/{{ item }}" + mode: '0600' force: yes - when: hash_sig_keys_local.stat.exists + loop: "{{ node_hash_sig_files | default([]) }}" + when: hash_sig_keys_local.stat.exists and (node_hash_sig_files | default([]) | length > 0) tags: - deploy - sync From 1db5cd143c33e1466c5d3612fe76b196b30ccd7c Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Wed, 18 Mar 2026 13:48:19 +0000 Subject: [PATCH 04/25] spin-node: assert exactly 1 aggregator per subnet after selection --- spin-node.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/spin-node.sh b/spin-node.sh index 0459bea..c3e5007 100755 --- a/spin-node.sh +++ b/spin-node.sh @@ -266,6 +266,28 @@ for _subnet_idx in "${_unique_subnets[@]}"; do _aggregator_summary+=("subnet $_subnet_idx → $_selected_agg") done +# Verify the invariant: exactly 1 aggregator per subnet (skipped in dry-run). +if [ "$dryRun" != "true" ]; then + _verify_failed=false + for _subnet_idx in "${_unique_subnets[@]}"; do + _agg_count=0 + for _node in "${nodes[@]}"; do + if [[ "${_node##*_}" == "$_subnet_idx" ]]; then + _is_agg=$(yq eval ".validators[] | select(.name == \"$_node\") | .isAggregator" "$validator_config_file") + [[ "$_is_agg" == "true" ]] && _agg_count=$((_agg_count + 1)) + fi + done + if [ "$_agg_count" -ne 1 ]; then + echo "Error: subnet $_subnet_idx has $_agg_count aggregator(s) — expected exactly 1" >&2 + _verify_failed=true + fi + done + if [ "$_verify_failed" == "true" ]; then + echo "Aggregator invariant check failed. Aborting." >&2 + exit 1 + fi +fi + # Print a prominent aggregator summary banner. echo "" echo "╔══════════════════════════════════════════════════════════════╗" From a305f3061d2a598667ef1b7b5e416eb1f6486761 Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Wed, 18 Mar 2026 13:59:45 +0000 Subject: [PATCH 05/25] validator-config: add privkey for commented-out gean_0, lean_node_0, peam_0 --- ansible-devnet/genesis/validator-config.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ansible-devnet/genesis/validator-config.yaml b/ansible-devnet/genesis/validator-config.yaml index 815b437..818d95b 100644 --- a/ansible-devnet/genesis/validator-config.yaml +++ b/ansible-devnet/genesis/validator-config.yaml @@ -98,6 +98,7 @@ validators: count: 1 # - name: "gean_0" + # privkey: "df008e968231c25c3938d80fee9bcc93b4b9711312cf471c1b6f77e67ad68d08" # enrFields: # ip: "204.168.134.201" # quic: 9001 @@ -107,6 +108,7 @@ validators: # count: 1 # - name: "lean_node_0" + # privkey: "d94e3dc35e320440c891b66bd82d1aaf2079364162815b32c2633ecae009c84c" # enrFields: # ip: "95.217.19.42" # quic: 9001 @@ -116,6 +118,7 @@ validators: # count: 1 # - name: "peam_0" + # privkey: "6ccb18b68c94188d9da6635dd1540cf62c0d418eb09eca25e4ad62d712f69b92" # enrFields: # ip: "95.216.173.151" # quic: 9001 From f035af4ee030bc6b3c65bda7966a197b5db32403 Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Wed, 18 Mar 2026 14:22:27 +0000 Subject: [PATCH 06/25] spin-node: derive subnet from config 'subnet' field, not node name suffix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old suffix-based detection (ethlambda_1 → subnet 1) broke when a config contained multiple nodes for the same client without --subnets (e.g. ethlambda_0..4 for redundancy), incorrectly creating 5 subnets and forcing ethlambda nodes as the sole aggregator on subnets 1-4. Subnet membership is now read from the explicit 'subnet:' field that generate-subnet-config.py writes for each entry. Nodes without this field (all standard configs) default to subnet 0, so a single-subnet deployment always selects exactly one aggregator from all active nodes regardless of numeric suffixes in their names. --- spin-node.sh | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/spin-node.sh b/spin-node.sh index c3e5007..01cda91 100755 --- a/spin-node.sh +++ b/spin-node.sh @@ -203,15 +203,19 @@ restart_with_checkpoint_sync=false # Aggregator selection — one randomly chosen aggregator per subnet. # -# Subnet membership is derived from the numeric suffix of each node name: -# zeam_0 → subnet 0 -# zeam_1 → subnet 1 -# Without --subnets all nodes carry suffix _0, so a single aggregator is -# selected as before. +# Subnet membership is read from the explicit 'subnet:' field in the config, +# which generate-subnet-config.py writes when --subnets N is used. +# Nodes without a 'subnet' field (standard single-subnet configs) all +# default to subnet 0 regardless of their name suffix. # # When --aggregator is specified, that node is used as the aggregator for # its own subnet; all other subnets still get a random selection. +# Helper: get the subnet index for a node from the config (defaults to 0). +_node_subnet() { + yq eval ".validators[] | select(.name == \"$1\") | .subnet // 0" "$validator_config_file" +} + # If --aggregator was given, validate it exists before doing anything else. if [ -n "$aggregatorNode" ]; then aggregator_found=false @@ -228,10 +232,10 @@ if [ -n "$aggregatorNode" ]; then fi fi -# Collect unique subnet indices (the part after the last '_' in each name). +# Collect unique subnet indices from the 'subnet' field (0 when absent). _subnet_indices=() for _node in "${nodes[@]}"; do - _subnet_indices+=("${_node##*_}") + _subnet_indices+=("$(_node_subnet "$_node")") done _unique_subnets=($(printf '%s\n' "${_subnet_indices[@]}" | sort -un)) @@ -247,13 +251,13 @@ _aggregator_summary=() for _subnet_idx in "${_unique_subnets[@]}"; do _subnet_nodes=() for _node in "${nodes[@]}"; do - [[ "${_node##*_}" == "$_subnet_idx" ]] && _subnet_nodes+=("$_node") + [[ "$(_node_subnet "$_node")" == "$_subnet_idx" ]] && _subnet_nodes+=("$_node") done _selected_agg="" # Use the user-specified aggregator if it belongs to this subnet. - if [ -n "$aggregatorNode" ] && [[ "${aggregatorNode##*_}" == "$_subnet_idx" ]]; then + if [ -n "$aggregatorNode" ] && [[ "$(_node_subnet "$aggregatorNode")" == "$_subnet_idx" ]]; then _selected_agg="$aggregatorNode" else _n=${#_subnet_nodes[@]} @@ -272,7 +276,7 @@ if [ "$dryRun" != "true" ]; then for _subnet_idx in "${_unique_subnets[@]}"; do _agg_count=0 for _node in "${nodes[@]}"; do - if [[ "${_node##*_}" == "$_subnet_idx" ]]; then + if [[ "$(_node_subnet "$_node")" == "$_subnet_idx" ]]; then _is_agg=$(yq eval ".validators[] | select(.name == \"$_node\") | .isAggregator" "$validator_config_file") [[ "$_is_agg" == "true" ]] && _agg_count=$((_agg_count + 1)) fi From 4133a3174bc36d7985b7efc5c8592b078c1acecb Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Wed, 18 Mar 2026 14:39:31 +0000 Subject: [PATCH 07/25] docs: add client integration guide with link from README --- README.md | 5 +- docs/adding-a-new-client.md | 432 ++++++++++++++++++++++++++++++++++++ 2 files changed, 436 insertions(+), 1 deletion(-) create mode 100644 docs/adding-a-new-client.md diff --git a/README.md b/README.md index 4018410..9264afe 100644 --- a/README.md +++ b/README.md @@ -336,8 +336,11 @@ Current following clients are supported: 4. Lantern 5. Lighthouse 6. Grandine +7. Ethlambda -However adding a lean client to this setup is very easy. Feel free to do the PR or reach out to the maintainers. +Adding a new client requires 6 small, well-defined steps. See the full integration guide: + +📖 **[Adding a New Client](docs/adding-a-new-client.md)** ## How It Works diff --git a/docs/adding-a-new-client.md b/docs/adding-a-new-client.md new file mode 100644 index 0000000..6210aa9 --- /dev/null +++ b/docs/adding-a-new-client.md @@ -0,0 +1,432 @@ +# Adding a New Client to lean-quickstart + +This guide walks through every file you need to create or modify to integrate a new Lean +Ethereum client into lean-quickstart. The integration has **6 touch points**. All other +infrastructure (genesis generation, key management, Ansible inventory, subnet expansion, +leanpoint upstreams, aggregator selection) is generic and requires no changes. + +--- + +## Naming convention + +Every client follows the pattern `{client}_{index}`: + +- `myclient_0` — first (and usually only) node for `myclient` +- `myclient_1`, `myclient_2` — additional nodes when `--subnets N` is used + +The prefix before the first `_` is the **client type**. lean-quickstart derives it +automatically (`node_name.split('_')[0]`). All file and role names must use this prefix +consistently. + +--- + +## Touch point 1 — `validator-config.yaml` + +Add an entry for your node in whichever config file you are targeting: + +- **Local devnet**: `local-devnet/genesis/validator-config.yaml` +- **Ansible devnet**: `ansible-devnet/genesis/validator-config.yaml` + +```yaml +validators: + # ... existing entries ... + + - name: "myclient_0" + # A unique 32-byte hex P2P identity key. + # Generate one: python3 -c "import secrets; print(secrets.token_hex(32))" + privkey: "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2" + enrFields: + ip: "127.0.0.1" # Use real server IP for Ansible deployments + quic: 9009 # Must be unique per server; UDP port for QUIC/P2P + metricsPort: 9104 # TCP port Prometheus scrapes + apiPort: 5064 # TCP port for the REST API (use httpPort instead if your client uses that key) + isAggregator: false # Managed automatically by spin-node.sh — do not set manually + count: 1 # Number of validator indices to assign to this node +``` + +> **`apiPort` vs `httpPort`**: use `apiPort` if your client serves its REST API on that key. +> If your client uses `httpPort` (as Lantern does), use `httpPort` instead — both are +> understood everywhere in lean-quickstart. + +**Port uniqueness rules:** +- `quic`, `metricsPort`, and `apiPort`/`httpPort` must not clash with any other node on the + same server. +- When `--subnets N` is used, `generate-subnet-config.py` increments each port by the subnet + index, so base ports only need to be unique among subnet-0 nodes. + +--- + +## Touch point 2 — `client-cmds/myclient-cmd.sh` + +This file defines how lean-quickstart starts your client. It must set exactly two variables: + +- **`node_binary`** — command line for running the client as a local binary +- **`node_docker`** — docker arguments (everything after `docker run … -v … -v …`) for + running the client as a container +- **`node_setup`** — either `"docker"` or `"binary"` to select which of the above is used + +The following shell variables are available when this script is sourced: + +| Variable | Content | +|---|---| +| `$item` | Node name (e.g. `myclient_0`) — use as `--node-id` | +| `$configDir` | Absolute path to the genesis directory (e.g. `local-devnet/genesis`) — mounted as `/config` in Docker | +| `$dataDir` | Absolute path to the data root — mounted as `/data` in Docker | +| `$quicPort` | QUIC/P2P UDP port read from `validator-config.yaml` | +| `$metricsPort` | Prometheus metrics TCP port | +| `$apiPort` | REST API TCP port (`httpPort` is also available if you used that key) | +| `$privKeyPath` | Relative path to the P2P key file inside `$configDir` (e.g. `myclient_0.key`) | +| `$validatorConfig` | Either `"genesis_bootnode"` or a path to a node-specific `validator-config.yaml` | +| `$isAggregator` | `"true"` or `"false"` — set by aggregator selection before startup | +| `$attestationCommitteeCount` | Number of subnets (set when `--subnets N` is used) | +| `$checkpoint_sync_url` | Checkpoint sync URL (set when `--restart-client` is used) | +| `$scriptDir` | Directory of `spin-node.sh` (the lean-quickstart root) | + +```bash +#!/bin/bash + +#-----------------------myclient setup---------------------- + +# Build optional flags from environment variables injected by spin-node.sh. +aggregator_flag="" +if [ "$isAggregator" == "true" ]; then + aggregator_flag="--aggregator" +fi + +attestation_committee_flag="" +if [ -n "$attestationCommitteeCount" ]; then + attestation_committee_flag="--attestation-committee-count $attestationCommitteeCount" +fi + +checkpoint_sync_flag="" +if [ -n "${checkpoint_sync_url:-}" ]; then + checkpoint_sync_flag="--checkpoint-sync-url $checkpoint_sync_url" +fi + +# Binary mode: path relative to the lean-quickstart root. +node_binary="$scriptDir/../myclient/target/release/myclient \ + --data-dir $dataDir/$item \ + --genesis $configDir/config.yaml \ + --validators $configDir/validators.yaml \ + --bootnodes $configDir/nodes.yaml \ + --node-id $item \ + --node-key $configDir/$privKeyPath \ + --listen-port $quicPort \ + --metrics-port $metricsPort \ + --api-port $apiPort \ + $attestation_committee_flag \ + $aggregator_flag \ + $checkpoint_sync_flag" + +# Docker mode: everything after 'docker run -v ... -v ...'. +# The genesis dir is always at /config and data dir at /data inside the container. +node_docker="ghcr.io/yourorg/myclient:latest \ + --data-dir /data \ + --genesis /config/config.yaml \ + --validators /config/validators.yaml \ + --bootnodes /config/nodes.yaml \ + --node-id $item \ + --node-key /config/$privKeyPath \ + --listen-port $quicPort \ + --metrics-port $metricsPort \ + --api-port $apiPort \ + $attestation_committee_flag \ + $aggregator_flag \ + $checkpoint_sync_flag" + +# Set to "binary" to use the binary path during development. +node_setup="docker" +``` + +### Required CLI flags your client must support + +| Flag | Purpose | +|---|---| +| `--node-id ` | Identifies the node in logs and config lookups | +| `--node-key ` | Path to the P2P libp2p private key file | +| `--genesis` / `--custom_genesis` / `--network` | Path to `config.yaml` (or directory containing it) | +| `--validators` / `--validator-registry-path` | Path to `validators.yaml` (index assignments) | +| `--bootnodes` | Path to `nodes.yaml` (ENRs for peer discovery) | +| `--metrics-port ` | Prometheus metrics endpoint | +| `--api-port ` (or `--http-port`) | REST API endpoint (used by leanpoint health checks) | +| `--is-aggregator` / `--aggregator` | Enable aggregator mode for this node | +| `--attestation-committee-count ` | Number of subnets; controls which attestation gossip topics the node subscribes to | +| `--checkpoint-sync-url ` | URL to fetch finalized checkpoint state from | + +> **`GET /v0/health`** — your client's REST API must respond to this endpoint. leanpoint +> uses it to monitor node health. Return HTTP 200 when the node is healthy. + +### Files provided in the genesis directory + +Your client will find these files at `$configDir` (or `/config` in Docker): + +| File | Contents | +|---|---| +| `config.yaml` | Chain config — genesis time, ACTIVE\_EPOCH, VALIDATOR\_COUNT, GENESIS\_VALIDATORS pubkeys | +| `validators.yaml` | Validator index → node name assignments | +| `annotated_validators.yaml` | Validator index + pubkey\_hex + privkey\_file per node name (preferred over validators.yaml) | +| `nodes.yaml` | ENR list for all nodes — use as static bootnode list | +| `genesis.json` | Genesis state (JSON) | +| `genesis.ssz` | Genesis state (SSZ) | +| `hash-sig-keys/validator_N_sk.ssz` | Post-quantum secret key for validator N | +| `hash-sig-keys/validator_N_pk.ssz` | Post-quantum public key for validator N | +| `myclient_0.key` | P2P libp2p private key for this node | + +> Clients should derive their genesis state from `config.yaml` directly (using +> `GENESIS_VALIDATORS` pubkeys and `GENESIS_TIME`). The `genesis.json` / `genesis.ssz` +> files are provided for compatibility but their format may not be up to date. + +--- + +## Touch point 3 — Ansible role: `ansible/roles/myclient/defaults/main.yml` + +```yaml +--- +# Default variables for myclient role. +# Actual values are extracted from client-cmds/myclient-cmd.sh at deploy time. + +myclient_docker_image: "ghcr.io/yourorg/myclient:latest" +myclient_binary_path: "{{ playbook_dir }}/../myclient/target/release/myclient" +deployment_mode: docker # docker or binary +``` + +--- + +## Touch point 4 — Ansible role: `ansible/roles/myclient/tasks/main.yml` + +This is the most substantial file. Copy the pattern from an existing role (e.g. `ream`) and +adjust the variable names and docker command arguments to match your client. + +```yaml +--- +# myclient role: Deploy and manage myclient nodes + +- name: Extract docker image from client-cmd.sh + shell: | + project_root="$(cd '{{ playbook_dir }}/../..' && pwd)" + grep -E '^node_docker=' "$project_root/client-cmds/myclient-cmd.sh" | head -1 \ + | grep -oE '[a-zA-Z0-9._/-]+:[a-zA-Z0-9._-]+' | head -1 + register: myclient_docker_image_raw + changed_when: false + delegate_to: localhost + run_once: true + +- name: Extract deployment mode from client-cmd.sh + shell: | + project_root="$(cd '{{ playbook_dir }}/../..' && pwd)" + grep -E '^node_setup=' "$project_root/client-cmds/myclient-cmd.sh" | head -1 \ + | sed -E 's/.*node_setup="([^"]+)".*/\1/' + register: myclient_deployment_mode_raw + changed_when: false + delegate_to: localhost + run_once: true + +- name: Set docker image and deployment mode + set_fact: + myclient_docker_image: "{{ myclient_docker_image_raw.stdout | trim | default('ghcr.io/yourorg/myclient:latest') }}" + deployment_mode: "{{ myclient_deployment_mode_raw.stdout | trim | default('docker') }}" + delegate_to: localhost + run_once: true + +- name: Extract node configuration from validator-config.yaml + shell: | + yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" \ + "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + register: myclient_node_config + changed_when: false + delegate_to: localhost + loop: + - enrFields.quic + - metricsPort + - apiPort # change to httpPort if your config uses that key + - privkey + - isAggregator + when: node_name is defined + +- name: Set node ports and aggregator flag + set_fact: + myclient_quic_port: "{{ myclient_node_config.results[0].stdout }}" + myclient_metrics_port: "{{ myclient_node_config.results[1].stdout }}" + myclient_api_port: "{{ myclient_node_config.results[2].stdout }}" + myclient_is_aggregator: "{{ 'true' if (myclient_node_config.results[4].stdout | default('') | trim) == 'true' else 'false' }}" + when: myclient_node_config is defined + +- name: Ensure node key file exists + stat: + path: "{{ genesis_dir }}/{{ node_name }}.key" + register: node_key_stat + +- name: Fail if node key file is missing + fail: + msg: "Node key file {{ node_name }}.key not found in {{ genesis_dir }}" + when: not (node_key_stat.stat.exists | default(false)) + +- name: Create node data directory + file: + path: "{{ data_dir }}/{{ node_name }}" + state: directory + mode: '0755' + +- name: Deploy myclient node using Docker + block: + - name: Stop existing container (if any) + command: docker rm -f {{ node_name }} + register: myclient_stop + failed_when: false + changed_when: myclient_stop.rc == 0 + + - name: Start myclient container + command: >- + docker run -d + --pull=always + --name {{ node_name }} + --restart unless-stopped + --network host + {{ '--init --ulimit core=-1 --workdir /data' + if (enable_core_dumps | default('') == 'all') + or (node_name in (enable_core_dumps | default('')).split(',')) + or (node_name.split('_')[0] in (enable_core_dumps | default('')).split(',')) + else '' }} + -v {{ genesis_dir }}:/config:ro + -v {{ data_dir }}/{{ node_name }}:/data + {{ myclient_docker_image }} + --data-dir /data + --genesis /config/config.yaml + --validators /config/validators.yaml + --bootnodes /config/nodes.yaml + --node-id {{ node_name }} + --node-key /config/{{ node_name }}.key + --listen-port {{ myclient_quic_port }} + --metrics-port {{ myclient_metrics_port }} + --api-port {{ myclient_api_port }} + {{ '--aggregator' if (myclient_is_aggregator | default('false')) == 'true' else '' }} + {{ ('--checkpoint-sync-url ' + checkpoint_sync_url) + if (checkpoint_sync_url is defined and checkpoint_sync_url | length > 0) + else '' }} + register: myclient_container + changed_when: myclient_container.rc == 0 + when: deployment_mode == 'docker' +``` + +> **Core dumps** — the `enable_core_dumps` logic is boilerplate. Keep it as-is; it allows +> the operator to enable core dumps for specific nodes or client types at deploy time without +> changing the role. + +--- + +## Touch point 5 — Register in `ansible/playbooks/helpers/deploy-single-node.yml` + +Add a block for your client type. The client type is the prefix before the first `_` in the +node name (`myclient_0` → `myclient`). + +```yaml +# ... existing entries for zeam, ream, qlean, lantern, lighthouse, grandine, ethlambda ... + +- name: Deploy myclient node + include_role: + name: myclient + when: client_type == "myclient" + tags: + - myclient + - deploy +``` + +Also update the final guard at the bottom of the file to include your client type in the +known list: + +```yaml +- name: Fail if unknown client type + fail: + msg: "Unknown client type '{{ client_type }}' for node '{{ node_name }}'. Expected: zeam, ream, qlean, lantern, lighthouse, grandine, ethlambda or myclient" + when: client_type not in ["zeam", "ream", "qlean", "lantern", "lighthouse", "grandine", "ethlambda", "myclient"] +``` + +--- + +## Touch point 6 — Update `README.md` + +In the **Clients supported** section, add your client to the numbered list: + +```markdown +### Clients supported + +Current following clients are supported: + +1. Zeam +2. Ream +3. Qlean +4. Lantern +5. Lighthouse +6. Grandine +7. Ethlambda +8. Myclient ← add here +``` + +--- + +## No changes needed elsewhere + +The following components are fully generic and require zero modifications: + +| Component | Why no changes needed | +|---|---| +| `spin-node.sh` | Discovers clients from `validator-config.yaml`; routes to `client-cmds/{client}-cmd.sh` by name | +| `generate-genesis.sh` | Uses `validator-config.yaml` as source of truth; assigns validator indices round-robin regardless of client type | +| `generate-subnet-config.py` | Derives client name from node name prefix; works for any `{client}_{index}` naming | +| `convert-validator-config.py` | Reads `apiPort` / `httpPort` from any entry; generates leanpoint upstreams for all nodes | +| `ansible/playbooks/deploy-nodes.yml` | Calls `deploy-single-node.yml` per node; no client-specific logic | +| `ansible/playbooks/prepare.yml` | Reads ports from config by IP; opens firewall rules for any node | +| `ansible/roles/observability/` | Scrapes `metricsPort` from config; works for any client | +| Aggregator selection | Derives subnet from `subnet:` field or defaults to 0; works for any node name | + +--- + +## Checklist + +``` +[ ] 1. validator-config.yaml — add entry with unique privkey, IP, ports +[ ] 2. client-cmds/myclient-cmd.sh — define node_binary, node_docker, node_setup +[ ] 3. ansible/roles/myclient/defaults/main.yml — fallback image and deployment mode +[ ] 4. ansible/roles/myclient/tasks/main.yml — extract config, start Docker container +[ ] 5. ansible/playbooks/helpers/deploy-single-node.yml — add include_role block + update guard +[ ] 6. README.md — add to Clients supported list +``` + +--- + +## Local test + +```sh +# Generate genesis and spin up only your new node locally +NETWORK_DIR=local-devnet ./spin-node.sh --node myclient_0 --generateGenesis + +# Verify it is running +docker ps | grep myclient_0 + +# Check health endpoint +curl http://127.0.0.1:5064/v0/health +``` + +## Ansible test + +```sh +# Prepare the remote server (first time only) +NETWORK_DIR=ansible-devnet ./spin-node.sh --prepare --sshKey ~/.ssh/id_ed25519 --useRoot + +# Deploy your node alongside the existing nodes +NETWORK_DIR=ansible-devnet ./spin-node.sh --node all --generateGenesis \ + --sshKey ~/.ssh/id_ed25519 --useRoot + +# Dry run first to verify without deploying +NETWORK_DIR=ansible-devnet ./spin-node.sh --node myclient_0 --dry-run \ + --sshKey ~/.ssh/id_ed25519 --useRoot +``` + +## Subnet test + +```sh +# Verify your client works correctly with 2 subnets +NETWORK_DIR=ansible-devnet ./spin-node.sh --node all --subnets 2 --generateGenesis \ + --sshKey ~/.ssh/id_ed25519 --useRoot +# Expected: myclient_0 (subnet 0) and myclient_1 (subnet 1) are both running +``` From 37c2c96eeb4e9622a2e3cf81dd3b5708b1f8e4a4 Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Wed, 18 Mar 2026 14:51:50 +0000 Subject: [PATCH 08/25] spin-node: honour pre-existing isAggregator: true when no --aggregator flag is passed Previously the script always reset all flags and randomly re-selected an aggregator, ignoring any manual isAggregator: true already set in the YAML. This caused ethlambda_0 (user's choice) to be silently replaced by ethlambda_1 (random pick). Aggregator selection now follows a three-level priority: 1. --aggregator CLI flag 2. Pre-existing isAggregator: true in the config (manual YAML edit) 3. Random selection (fallback when neither is set) The preset node is validated against the active node list. If it no longer exists a warning is printed and random selection takes over. --- spin-node.sh | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/spin-node.sh b/spin-node.sh index 01cda91..7fd5067 100755 --- a/spin-node.sh +++ b/spin-node.sh @@ -241,12 +241,25 @@ _unique_subnets=($(printf '%s\n' "${_subnet_indices[@]}" | sort -un)) echo "Detected ${#_unique_subnets[@]} subnet(s): ${_unique_subnets[*]}" -# Reset every node's isAggregator flag first (skipped in dry-run). +# Snapshot which nodes already have isAggregator: true before we reset anything. +# This lets us honour manual edits in the YAML when no --aggregator flag was passed. +declare -A _preset_agg # subnet_idx -> node_name +for _node in "${nodes[@]}"; do + _is_agg=$(yq eval ".validators[] | select(.name == \"$_node\") | .isAggregator" "$validator_config_file") + if [[ "$_is_agg" == "true" ]]; then + _sn="$(_node_subnet "$_node")" + # Keep the first preset aggregator found per subnet. + [[ -z "${_preset_agg[$_sn]:-}" ]] && _preset_agg["$_sn"]="$_node" + fi +done + +# Reset every node's isAggregator flag (skipped in dry-run). if [ "$dryRun" != "true" ]; then yq eval -i '.validators[].isAggregator = false' "$validator_config_file" fi # Select one aggregator per subnet and set the flag. +# Priority: 1) --aggregator CLI flag 2) pre-existing isAggregator: true 3) random _aggregator_summary=() for _subnet_idx in "${_unique_subnets[@]}"; do _subnet_nodes=() @@ -256,12 +269,27 @@ for _subnet_idx in "${_unique_subnets[@]}"; do _selected_agg="" - # Use the user-specified aggregator if it belongs to this subnet. if [ -n "$aggregatorNode" ] && [[ "$(_node_subnet "$aggregatorNode")" == "$_subnet_idx" ]]; then + # 1. Explicit --aggregator flag. _selected_agg="$aggregatorNode" + elif [ -n "${_preset_agg[$_subnet_idx]:-}" ]; then + # 2. A node had isAggregator: true in the config — respect the manual choice. + _preset="${_preset_agg[$_subnet_idx]}" + # Validate the preset node is still in the active nodes list. + _preset_valid=false + for _n in "${_subnet_nodes[@]}"; do + [[ "$_n" == "$_preset" ]] && _preset_valid=true && break + done + if [[ "$_preset_valid" == "true" ]]; then + _selected_agg="$_preset" + else + # Preset node no longer exists — fall back to random and warn. + echo "Warning: preset aggregator '$_preset' for subnet $_subnet_idx is not in the active node list; selecting randomly." >&2 + _selected_agg="${_subnet_nodes[$((RANDOM % ${#_subnet_nodes[@]}))]}" + fi else - _n=${#_subnet_nodes[@]} - _selected_agg="${_subnet_nodes[$((RANDOM % _n))]}" + # 3. No preference set — pick randomly. + _selected_agg="${_subnet_nodes[$((RANDOM % ${#_subnet_nodes[@]}))]}" fi if [ "$dryRun" != "true" ]; then From 92279ea3eeee3d26395c6747c80268ce1ba9b5df Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Wed, 18 Mar 2026 14:58:51 +0000 Subject: [PATCH 09/25] =?UTF-8?q?docs:=20clarify=20touch=20point=201=20?= =?UTF-8?q?=E2=80=94=20both=20configs=20required,=20separate=20local/ansib?= =?UTF-8?q?le=20examples?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/adding-a-new-client.md | 75 +++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/docs/adding-a-new-client.md b/docs/adding-a-new-client.md index 6210aa9..8869eaf 100644 --- a/docs/adding-a-new-client.md +++ b/docs/adding-a-new-client.md @@ -22,12 +22,21 @@ consistently. ## Touch point 1 — `validator-config.yaml` -Add an entry for your node in whichever config file you are targeting: +You must add your entry to **both** config files. They serve different purposes and are kept +intentionally separate: -- **Local devnet**: `local-devnet/genesis/validator-config.yaml` -- **Ansible devnet**: `ansible-devnet/genesis/validator-config.yaml` +| File | Purpose | +|---|---| +| `local-devnet/genesis/validator-config.yaml` | Local development on your own machine | +| `ansible-devnet/genesis/validator-config.yaml` | Remote deployment to production servers | + +### Local devnet entry + +For local use all nodes run on the same machine, so every node gets `127.0.0.1` and a unique +port. ```yaml +# local-devnet/genesis/validator-config.yaml validators: # ... existing entries ... @@ -36,23 +45,59 @@ validators: # Generate one: python3 -c "import secrets; print(secrets.token_hex(32))" privkey: "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2" enrFields: - ip: "127.0.0.1" # Use real server IP for Ansible deployments - quic: 9009 # Must be unique per server; UDP port for QUIC/P2P - metricsPort: 9104 # TCP port Prometheus scrapes - apiPort: 5064 # TCP port for the REST API (use httpPort instead if your client uses that key) + ip: "127.0.0.1" + quic: 9009 # Must be unique among all local nodes + metricsPort: 9104 # Must be unique among all local nodes + apiPort: 5064 # Must be unique among all local nodes isAggregator: false # Managed automatically by spin-node.sh — do not set manually count: 1 # Number of validator indices to assign to this node ``` -> **`apiPort` vs `httpPort`**: use `apiPort` if your client serves its REST API on that key. -> If your client uses `httpPort` (as Lantern does), use `httpPort` instead — both are -> understood everywhere in lean-quickstart. +### Ansible devnet entry + +For remote deployment each node gets the IP of the server it will run on. Ports must be +unique per server (not globally, since nodes on different servers don't share a network +namespace). + +```yaml +# ansible-devnet/genesis/validator-config.yaml +validators: + # ... existing entries ... + + - name: "myclient_0" + privkey: "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2" + enrFields: + ip: "203.0.113.42" # Public IP of the server this node will run on + quic: 9001 # Can reuse port 9001 if no other node is on this server + metricsPort: 9095 + apiPort: 5055 + isAggregator: false + count: 1 +``` + +> **Note — server assignment:** The `enrFields.ip` field is currently how lean-quickstart +> ties a node to a specific server. The Ansible inventory is generated from this IP, and +> `--prepare` opens firewall ports by matching this IP against each host. This coupling of +> server IP to node name is expected to be decoupled in a future release. + +### Ports and `--subnets N` + +When `--subnets N` is used, `generate-subnet-config.py` generates `myclient_0` … `myclient_{N-1}` +from your single template entry, incrementing every port by the subnet index: + +| Subnet | Node | quic | metricsPort | apiPort | +|---|---|---|---|---| +| 0 | `myclient_0` | base | base | base | +| 1 | `myclient_1` | base+1 | base+1 | base+1 | +| … | … | … | … | … | + +Your base ports therefore only need to be unique among subnet-0 entries. The generated nodes +`myclient_1` … `myclient_{N-1}` also receive fresh P2P keys automatically — you do not need +to provide them. -**Port uniqueness rules:** -- `quic`, `metricsPort`, and `apiPort`/`httpPort` must not clash with any other node on the - same server. -- When `--subnets N` is used, `generate-subnet-config.py` increments each port by the subnet - index, so base ports only need to be unique among subnet-0 nodes. +> **`apiPort` vs `httpPort`**: use `apiPort` if your client serves its REST API under that +> config key. If your client uses `httpPort` (as Lantern does), use `httpPort` instead — both +> are understood everywhere in lean-quickstart. --- From f153b388bd799e0866ffa4394eb9cba552405b4e Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Wed, 18 Mar 2026 15:00:43 +0000 Subject: [PATCH 10/25] docs: add note to contact zeam team for server IP assignment --- docs/adding-a-new-client.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/adding-a-new-client.md b/docs/adding-a-new-client.md index 8869eaf..03d5fc8 100644 --- a/docs/adding-a-new-client.md +++ b/docs/adding-a-new-client.md @@ -67,7 +67,8 @@ validators: - name: "myclient_0" privkey: "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2" enrFields: - ip: "203.0.113.42" # Public IP of the server this node will run on + ip: "203.0.113.42" # Public IP of the server this node will run on. + # Contact the zeam team to get a server assigned for your client. quic: 9001 # Can reuse port 9001 if no other node is on this server metricsPort: 9095 apiPort: 5055 From fbce72901ce83ea127798ce677f90c4a54ed6343 Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Wed, 18 Mar 2026 15:44:37 +0000 Subject: [PATCH 11/25] spin-node: fix associative array for bash 3.2 compatibility --- spin-node.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/spin-node.sh b/spin-node.sh index 7fd5067..ea38675 100755 --- a/spin-node.sh +++ b/spin-node.sh @@ -243,13 +243,15 @@ echo "Detected ${#_unique_subnets[@]} subnet(s): ${_unique_subnets[*]}" # Snapshot which nodes already have isAggregator: true before we reset anything. # This lets us honour manual edits in the YAML when no --aggregator flag was passed. -declare -A _preset_agg # subnet_idx -> node_name +# Uses dynamic variable names (_preset_agg_) for bash 3.2 compatibility +# (bash 3.2 ships with macOS and does not support declare -A). for _node in "${nodes[@]}"; do _is_agg=$(yq eval ".validators[] | select(.name == \"$_node\") | .isAggregator" "$validator_config_file") if [[ "$_is_agg" == "true" ]]; then _sn="$(_node_subnet "$_node")" + _varname="_preset_agg_${_sn}" # Keep the first preset aggregator found per subnet. - [[ -z "${_preset_agg[$_sn]:-}" ]] && _preset_agg["$_sn"]="$_node" + [[ -z "${!_varname:-}" ]] && printf -v "$_varname" '%s' "$_node" fi done @@ -272,9 +274,9 @@ for _subnet_idx in "${_unique_subnets[@]}"; do if [ -n "$aggregatorNode" ] && [[ "$(_node_subnet "$aggregatorNode")" == "$_subnet_idx" ]]; then # 1. Explicit --aggregator flag. _selected_agg="$aggregatorNode" - elif [ -n "${_preset_agg[$_subnet_idx]:-}" ]; then + elif _pv="_preset_agg_${_subnet_idx}"; [ -n "${!_pv:-}" ]; then # 2. A node had isAggregator: true in the config — respect the manual choice. - _preset="${_preset_agg[$_subnet_idx]}" + _preset="${!_pv}" # Validate the preset node is still in the active nodes list. _preset_valid=false for _n in "${_subnet_nodes[@]}"; do From fe4b5273caa88f995110694ed35372ff57ba2141 Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Wed, 18 Mar 2026 15:56:51 +0000 Subject: [PATCH 12/25] validator-config: use apiPort for lantern instead of httpPort --- ansible-devnet/genesis/validator-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible-devnet/genesis/validator-config.yaml b/ansible-devnet/genesis/validator-config.yaml index 818d95b..87bb120 100644 --- a/ansible-devnet/genesis/validator-config.yaml +++ b/ansible-devnet/genesis/validator-config.yaml @@ -54,7 +54,7 @@ validators: ip: "65.109.131.177" quic: 9001 metricsPort: 9095 - httpPort: 5055 + apiPort: 5055 isAggregator: false count: 1 From 6dcccf1311de7fe6fa336887328ff5135b54c85a Mon Sep 17 00:00:00 2001 From: Katya Ryazantseva Date: Wed, 18 Mar 2026 18:19:22 +0100 Subject: [PATCH 13/25] fix: cadvisor deploy --- ansible/roles/observability/tasks/main.yml | 51 ++++++++-------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/ansible/roles/observability/tasks/main.yml b/ansible/roles/observability/tasks/main.yml index f7ae6b2..fcead33 100644 --- a/ansible/roles/observability/tasks/main.yml +++ b/ansible/roles/observability/tasks/main.yml @@ -31,56 +31,38 @@ dest: "{{ observability_dir }}/promtail.yml" mode: '0644' -# --- cadvisor (no config, only start if not running) --- +# --- cadvisor (always recreate to ensure correct flags) --- -- name: Check if cadvisor exists - command: docker ps -a --filter name=^cadvisor$ -q - register: cadvisor_exists - changed_when: false +- name: Remove existing cadvisor container + command: docker rm -f cadvisor failed_when: false -- name: Check if cadvisor is running - command: docker ps --filter name=^cadvisor$ -q - register: cadvisor_running - changed_when: false - failed_when: false - -- name: Start stopped cadvisor container - command: docker start cadvisor - when: cadvisor_exists.stdout != "" and cadvisor_running.stdout == "" - -- name: Create cadvisor container +- name: Start cadvisor container command: >- docker run -d --name cadvisor --restart unless-stopped --network host + --privileged + --device=/dev/kmsg + --health-cmd="wget --quiet --tries=1 --spider http://localhost:{{ cadvisor_port }}/healthz || exit 1" + --health-interval=30s + --health-timeout=5s + --health-retries=3 -v /:/rootfs:ro -v /var/run:/var/run:ro -v /sys:/sys:ro -v /var/lib/docker/:/var/lib/docker:ro {{ cadvisor_image }} - when: cadvisor_exists.stdout == "" + --port={{ cadvisor_port }} -# --- node_exporter (no config, only start if not running) --- +# --- node_exporter (always recreate to ensure correct flags) --- -- name: Check if node_exporter exists - command: docker ps -a --filter name=^node_exporter$ -q - register: node_exporter_exists - changed_when: false +- name: Remove existing node_exporter container + command: docker rm -f node_exporter failed_when: false -- name: Check if node_exporter is running - command: docker ps --filter name=^node_exporter$ -q - register: node_exporter_running - changed_when: false - failed_when: false - -- name: Start stopped node_exporter container - command: docker start node_exporter - when: node_exporter_exists.stdout != "" and node_exporter_running.stdout == "" - -- name: Create node_exporter container +- name: Start node_exporter container command: >- docker run -d --name node_exporter @@ -94,7 +76,7 @@ --path.procfs=/host/proc --path.sysfs=/host/sys --path.rootfs=/rootfs - when: node_exporter_exists.stdout == "" + --web.listen-address=0.0.0.0:{{ node_exporter_port }} # --- prometheus (always recreate to pick up config/mount changes, data persists on host) --- @@ -139,3 +121,4 @@ -v /var/lib/docker/containers:/var/lib/docker/containers:ro {{ promtail_image }} -config.file=/etc/promtail/config.yml + -server.http-listen-port={{ promtail_port }} From b785bd892c5cebb8950367fd2181f985e1195e65 Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Wed, 18 Mar 2026 18:07:26 +0000 Subject: [PATCH 14/25] prepare: install jq alongside yq and docker --- ansible/playbooks/prepare.yml | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/ansible/playbooks/prepare.yml b/ansible/playbooks/prepare.yml index de7bb3c..03ee3ce 100644 --- a/ansible/playbooks/prepare.yml +++ b/ansible/playbooks/prepare.yml @@ -163,7 +163,25 @@ become: yes # ────────────────────────────────────────────────────────────────────────── - # 4. Firewall rules (iptables + ufw) + # 4. jq — used by several client-cmd scripts to parse JSON responses + # ────────────────────────────────────────────────────────────────────────── + + - name: Check if jq is already installed + command: which jq + register: jq_pre + changed_when: false + failed_when: false + + - name: Install jq + when: jq_pre.rc != 0 + apt: + name: jq + state: present + update_cache: yes + become: yes + + # ────────────────────────────────────────────────────────────────────────── + # 5. Firewall rules (iptables + ufw) # # iptables is the kernel-level packet filter; ufw is the management # frontend that writes iptables rules and persists them across reboots. From 8dabd90f4fced7fe0278339300fffaff17ca5c3f Mon Sep 17 00:00:00 2001 From: Katya Ryazantseva Date: Wed, 18 Mar 2026 19:14:33 +0100 Subject: [PATCH 15/25] fix: grandine address flag --- client-cmds/grandine-cmd.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/client-cmds/grandine-cmd.sh b/client-cmds/grandine-cmd.sh index 52a1432..378a827 100644 --- a/client-cmds/grandine-cmd.sh +++ b/client-cmds/grandine-cmd.sh @@ -43,6 +43,7 @@ node_docker="sifrai/lean:devnet-3 \ --node-id $item \ --node-key /config/$privKeyPath \ --port $quicPort \ + --address 0.0.0.0 \ --http-address 0.0.0.0 \ --http-port $apiPort \ --metrics \ From 0b5051a472bb6c3e79000352922bb82fafeaccd7 Mon Sep 17 00:00:00 2001 From: Katya Ryazantseva Date: Wed, 18 Mar 2026 19:28:17 +0100 Subject: [PATCH 16/25] fix: grandine address flag ansible --- ansible/roles/grandine/tasks/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/grandine/tasks/main.yml b/ansible/roles/grandine/tasks/main.yml index 5e0f520..cf9cf72 100644 --- a/ansible/roles/grandine/tasks/main.yml +++ b/ansible/roles/grandine/tasks/main.yml @@ -103,6 +103,7 @@ --node-id {{ node_name }} --node-key /config/{{ node_name }}.key --port {{ grandine_quic_port }} + --address 0.0.0.0 --hash-sig-key-dir /config/hash-sig-keys --http-address 0.0.0.0 --http-port {{ grandine_api_port }} From f71c81839ffd56691cdffebd7950b8c8ae50ecf6 Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Wed, 18 Mar 2026 18:59:07 +0000 Subject: [PATCH 17/25] spin-node: skip aggregator selection when using --restart-client --- spin-node.sh | 206 +++++++++++++++++++++++++++------------------------ 1 file changed, 109 insertions(+), 97 deletions(-) diff --git a/spin-node.sh b/spin-node.sh index ea38675..1bc1b3c 100755 --- a/spin-node.sh +++ b/spin-node.sh @@ -203,6 +203,9 @@ restart_with_checkpoint_sync=false # Aggregator selection — one randomly chosen aggregator per subnet. # +# Skipped entirely for --restart-client: restarting a single node must not +# disturb the existing isAggregator assignments for the rest of the network. +# # Subnet membership is read from the explicit 'subnet:' field in the config, # which generate-subnet-config.py writes when --subnets N is used. # Nodes without a 'subnet' field (standard single-subnet configs) all @@ -216,122 +219,131 @@ _node_subnet() { yq eval ".validators[] | select(.name == \"$1\") | .subnet // 0" "$validator_config_file" } -# If --aggregator was given, validate it exists before doing anything else. -if [ -n "$aggregatorNode" ]; then - aggregator_found=false - for available_node in "${nodes[@]}"; do - if [[ "$aggregatorNode" == "$available_node" ]]; then - aggregator_found=true - break +if [ -n "$restartClient" ]; then + echo "Note: skipping aggregator selection — --restart-client retains existing isAggregator assignments." + _aggregator_summary=() +else + + # If --aggregator was given, validate it exists before doing anything else. + if [ -n "$aggregatorNode" ]; then + aggregator_found=false + for available_node in "${nodes[@]}"; do + if [[ "$aggregatorNode" == "$available_node" ]]; then + aggregator_found=true + break + fi + done + if [[ "$aggregator_found" == false ]]; then + echo "Error: Specified aggregator '$aggregatorNode' not found in validator config" + echo "Available nodes: ${nodes[@]}" + exit 1 fi - done - if [[ "$aggregator_found" == false ]]; then - echo "Error: Specified aggregator '$aggregatorNode' not found in validator config" - echo "Available nodes: ${nodes[@]}" - exit 1 fi -fi -# Collect unique subnet indices from the 'subnet' field (0 when absent). -_subnet_indices=() -for _node in "${nodes[@]}"; do - _subnet_indices+=("$(_node_subnet "$_node")") -done -_unique_subnets=($(printf '%s\n' "${_subnet_indices[@]}" | sort -un)) - -echo "Detected ${#_unique_subnets[@]} subnet(s): ${_unique_subnets[*]}" - -# Snapshot which nodes already have isAggregator: true before we reset anything. -# This lets us honour manual edits in the YAML when no --aggregator flag was passed. -# Uses dynamic variable names (_preset_agg_) for bash 3.2 compatibility -# (bash 3.2 ships with macOS and does not support declare -A). -for _node in "${nodes[@]}"; do - _is_agg=$(yq eval ".validators[] | select(.name == \"$_node\") | .isAggregator" "$validator_config_file") - if [[ "$_is_agg" == "true" ]]; then - _sn="$(_node_subnet "$_node")" - _varname="_preset_agg_${_sn}" - # Keep the first preset aggregator found per subnet. - [[ -z "${!_varname:-}" ]] && printf -v "$_varname" '%s' "$_node" - fi -done + # Collect unique subnet indices from the 'subnet' field (0 when absent). + _subnet_indices=() + for _node in "${nodes[@]}"; do + _subnet_indices+=("$(_node_subnet "$_node")") + done + _unique_subnets=($(printf '%s\n' "${_subnet_indices[@]}" | sort -un)) -# Reset every node's isAggregator flag (skipped in dry-run). -if [ "$dryRun" != "true" ]; then - yq eval -i '.validators[].isAggregator = false' "$validator_config_file" -fi + echo "Detected ${#_unique_subnets[@]} subnet(s): ${_unique_subnets[*]}" -# Select one aggregator per subnet and set the flag. -# Priority: 1) --aggregator CLI flag 2) pre-existing isAggregator: true 3) random -_aggregator_summary=() -for _subnet_idx in "${_unique_subnets[@]}"; do - _subnet_nodes=() + # Snapshot which nodes already have isAggregator: true before we reset anything. + # This lets us honour manual edits in the YAML when no --aggregator flag was passed. + # Uses dynamic variable names (_preset_agg_) for bash 3.2 compatibility + # (bash 3.2 ships with macOS and does not support declare -A). for _node in "${nodes[@]}"; do - [[ "$(_node_subnet "$_node")" == "$_subnet_idx" ]] && _subnet_nodes+=("$_node") + _is_agg=$(yq eval ".validators[] | select(.name == \"$_node\") | .isAggregator" "$validator_config_file") + if [[ "$_is_agg" == "true" ]]; then + _sn="$(_node_subnet "$_node")" + _varname="_preset_agg_${_sn}" + # Keep the first preset aggregator found per subnet. + [[ -z "${!_varname:-}" ]] && printf -v "$_varname" '%s' "$_node" + fi done - _selected_agg="" - - if [ -n "$aggregatorNode" ] && [[ "$(_node_subnet "$aggregatorNode")" == "$_subnet_idx" ]]; then - # 1. Explicit --aggregator flag. - _selected_agg="$aggregatorNode" - elif _pv="_preset_agg_${_subnet_idx}"; [ -n "${!_pv:-}" ]; then - # 2. A node had isAggregator: true in the config — respect the manual choice. - _preset="${!_pv}" - # Validate the preset node is still in the active nodes list. - _preset_valid=false - for _n in "${_subnet_nodes[@]}"; do - [[ "$_n" == "$_preset" ]] && _preset_valid=true && break + # Reset every node's isAggregator flag (skipped in dry-run). + if [ "$dryRun" != "true" ]; then + yq eval -i '.validators[].isAggregator = false' "$validator_config_file" + fi + + # Select one aggregator per subnet and set the flag. + # Priority: 1) --aggregator CLI flag 2) pre-existing isAggregator: true 3) random + _aggregator_summary=() + for _subnet_idx in "${_unique_subnets[@]}"; do + _subnet_nodes=() + for _node in "${nodes[@]}"; do + [[ "$(_node_subnet "$_node")" == "$_subnet_idx" ]] && _subnet_nodes+=("$_node") done - if [[ "$_preset_valid" == "true" ]]; then - _selected_agg="$_preset" + + _selected_agg="" + + if [ -n "$aggregatorNode" ] && [[ "$(_node_subnet "$aggregatorNode")" == "$_subnet_idx" ]]; then + # 1. Explicit --aggregator flag. + _selected_agg="$aggregatorNode" + elif _pv="_preset_agg_${_subnet_idx}"; [ -n "${!_pv:-}" ]; then + # 2. A node had isAggregator: true in the config — respect the manual choice. + _preset="${!_pv}" + # Validate the preset node is still in the active nodes list. + _preset_valid=false + for _n in "${_subnet_nodes[@]}"; do + [[ "$_n" == "$_preset" ]] && _preset_valid=true && break + done + if [[ "$_preset_valid" == "true" ]]; then + _selected_agg="$_preset" + else + # Preset node no longer exists — fall back to random and warn. + echo "Warning: preset aggregator '$_preset' for subnet $_subnet_idx is not in the active node list; selecting randomly." >&2 + _selected_agg="${_subnet_nodes[$((RANDOM % ${#_subnet_nodes[@]}))]}" + fi else - # Preset node no longer exists — fall back to random and warn. - echo "Warning: preset aggregator '$_preset' for subnet $_subnet_idx is not in the active node list; selecting randomly." >&2 + # 3. No preference set — pick randomly. _selected_agg="${_subnet_nodes[$((RANDOM % ${#_subnet_nodes[@]}))]}" fi - else - # 3. No preference set — pick randomly. - _selected_agg="${_subnet_nodes[$((RANDOM % ${#_subnet_nodes[@]}))]}" - fi - if [ "$dryRun" != "true" ]; then - yq eval -i "(.validators[] | select(.name == \"$_selected_agg\") | .isAggregator) = true" "$validator_config_file" - fi - _aggregator_summary+=("subnet $_subnet_idx → $_selected_agg") -done + if [ "$dryRun" != "true" ]; then + yq eval -i "(.validators[] | select(.name == \"$_selected_agg\") | .isAggregator) = true" "$validator_config_file" + fi + _aggregator_summary+=("subnet $_subnet_idx → $_selected_agg") + done -# Verify the invariant: exactly 1 aggregator per subnet (skipped in dry-run). -if [ "$dryRun" != "true" ]; then - _verify_failed=false - for _subnet_idx in "${_unique_subnets[@]}"; do - _agg_count=0 - for _node in "${nodes[@]}"; do - if [[ "$(_node_subnet "$_node")" == "$_subnet_idx" ]]; then - _is_agg=$(yq eval ".validators[] | select(.name == \"$_node\") | .isAggregator" "$validator_config_file") - [[ "$_is_agg" == "true" ]] && _agg_count=$((_agg_count + 1)) + # Verify the invariant: exactly 1 aggregator per subnet (skipped in dry-run). + if [ "$dryRun" != "true" ]; then + _verify_failed=false + for _subnet_idx in "${_unique_subnets[@]}"; do + _agg_count=0 + for _node in "${nodes[@]}"; do + if [[ "$(_node_subnet "$_node")" == "$_subnet_idx" ]]; then + _is_agg=$(yq eval ".validators[] | select(.name == \"$_node\") | .isAggregator" "$validator_config_file") + [[ "$_is_agg" == "true" ]] && _agg_count=$((_agg_count + 1)) + fi + done + if [ "$_agg_count" -ne 1 ]; then + echo "Error: subnet $_subnet_idx has $_agg_count aggregator(s) — expected exactly 1" >&2 + _verify_failed=true fi done - if [ "$_agg_count" -ne 1 ]; then - echo "Error: subnet $_subnet_idx has $_agg_count aggregator(s) — expected exactly 1" >&2 - _verify_failed=true + if [ "$_verify_failed" == "true" ]; then + echo "Aggregator invariant check failed. Aborting." >&2 + exit 1 fi - done - if [ "$_verify_failed" == "true" ]; then - echo "Aggregator invariant check failed. Aborting." >&2 - exit 1 fi -fi -# Print a prominent aggregator summary banner. -echo "" -echo "╔══════════════════════════════════════════════════════════════╗" -echo "║ 🗳 Aggregator Selection ║" -echo "╠══════════════════════════════════════════════════════════════╣" -for _line in "${_aggregator_summary[@]}"; do - printf "║ %-60s║\n" "$_line" -done -echo "╚══════════════════════════════════════════════════════════════╝" -echo "" +fi # end: aggregator selection (skipped for --restart-client) + +# Print a prominent aggregator summary banner (only when aggregator selection ran). +if [ ${#_aggregator_summary[@]} -gt 0 ]; then + echo "" + echo "╔══════════════════════════════════════════════════════════════╗" + echo "║ 🗳 Aggregator Selection ║" + echo "╠══════════════════════════════════════════════════════════════╣" + for _line in "${_aggregator_summary[@]}"; do + printf "║ %-60s║\n" "$_line" + done + echo "╚══════════════════════════════════════════════════════════════╝" + echo "" +fi # When --restart-client is specified, use it as the node list and enable checkpoint sync mode if [[ -n "$restartClient" ]]; then From 09d4fc0d942e4db5defd46311f378340c2a21a38 Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Wed, 18 Mar 2026 20:03:43 +0000 Subject: [PATCH 18/25] validator-config: enable gean_0 node --- ansible-devnet/genesis/validator-config.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/ansible-devnet/genesis/validator-config.yaml b/ansible-devnet/genesis/validator-config.yaml index 87bb120..3daa535 100644 --- a/ansible-devnet/genesis/validator-config.yaml +++ b/ansible-devnet/genesis/validator-config.yaml @@ -97,15 +97,15 @@ validators: isAggregator: false count: 1 - # - name: "gean_0" - # privkey: "df008e968231c25c3938d80fee9bcc93b4b9711312cf471c1b6f77e67ad68d08" - # enrFields: - # ip: "204.168.134.201" - # quic: 9001 - # metricsPort: 9095 - # apiPort: 5055 - # isAggregator: false - # count: 1 + - name: "gean_0" + privkey: "df008e968231c25c3938d80fee9bcc93b4b9711312cf471c1b6f77e67ad68d08" + enrFields: + ip: "204.168.134.201" + quic: 9001 + metricsPort: 9095 + apiPort: 5055 + isAggregator: false + count: 1 # - name: "lean_node_0" # privkey: "d94e3dc35e320440c891b66bd82d1aaf2079364162815b32c2633ecae009c84c" From 023eaf87533f1ba373cc8ec8ecdf21ee3bb6ed6f Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Thu, 19 Mar 2026 11:14:04 +0000 Subject: [PATCH 19/25] run-ansible: derive inventory groups dynamically instead of hardcoding The hardcoded group list (zeam_nodes, ream_nodes, ...) caused newly added client types (e.g. gean_nodes) to never have their ansible_user updated. This meant --useRoot was silently ignored for those nodes, causing Ansible to SSH as the current local user (partha) instead of root, and fail. --- run-ansible.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/run-ansible.sh b/run-ansible.sh index e0fe7c0..502b2b1 100755 --- a/run-ansible.sh +++ b/run-ansible.sh @@ -61,8 +61,11 @@ fi # Update inventory with SSH key file and user if provided if command -v yq &> /dev/null; then - # Get all remote host groups (zeam_nodes, ream_nodes, qlean_nodes, lantern_nodes, lighthouse_nodes) - for group in zeam_nodes ream_nodes qlean_nodes lantern_nodes lighthouse_nodes grandine_nodes ethlambda_nodes; do + # Derive the group list dynamically from the inventory so newly added clients + # (e.g. gean_nodes, lean_nodes) are automatically included without needing to + # update this hardcoded list every time a new client type is added. + all_groups=$(yq eval '.all.children | keys | .[]' "$INVENTORY_FILE" 2>/dev/null || echo "") + for group in $all_groups; do # Get all hosts in this group hosts=$(yq eval ".all.children.$group.hosts | keys | .[]" "$INVENTORY_FILE" 2>/dev/null || echo "") for host in $hosts; do From 9577f4a81818bc6ad303b4c295d6e6826c5322f7 Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Thu, 19 Mar 2026 14:13:54 +0000 Subject: [PATCH 20/25] validator-config: add nlean_0 node --- ansible-devnet/genesis/validator-config.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ansible-devnet/genesis/validator-config.yaml b/ansible-devnet/genesis/validator-config.yaml index 3daa535..e462b4c 100644 --- a/ansible-devnet/genesis/validator-config.yaml +++ b/ansible-devnet/genesis/validator-config.yaml @@ -107,6 +107,16 @@ validators: isAggregator: false count: 1 + - name: "nlean_0" + privkey: "d94e3dc35e320440c891b66bd82d1aaf2079364162815b32c2633ecae009c84c" + enrFields: + ip: "95.216.164.165" + quic: 9001 + metricsPort: 9095 + apiPort: 5055 + isAggregator: false + count: 1 + # - name: "lean_node_0" # privkey: "d94e3dc35e320440c891b66bd82d1aaf2079364162815b32c2633ecae009c84c" # enrFields: From 827f9e782760103d48911b68aa73a821dc93030c Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Thu, 19 Mar 2026 14:24:05 +0000 Subject: [PATCH 21/25] ansible: add gean and nlean roles and wire into deploy --- .../playbooks/helpers/deploy-single-node.yml | 20 +++- ansible/roles/gean/defaults/main.yml | 8 ++ ansible/roles/gean/tasks/main.yml | 110 ++++++++++++++++++ ansible/roles/nlean/defaults/main.yml | 8 ++ ansible/roles/nlean/tasks/main.yml | 109 +++++++++++++++++ 5 files changed, 253 insertions(+), 2 deletions(-) create mode 100644 ansible/roles/gean/defaults/main.yml create mode 100644 ansible/roles/gean/tasks/main.yml create mode 100644 ansible/roles/nlean/defaults/main.yml create mode 100644 ansible/roles/nlean/tasks/main.yml diff --git a/ansible/playbooks/helpers/deploy-single-node.yml b/ansible/playbooks/helpers/deploy-single-node.yml index 34f17f7..edcd1b6 100644 --- a/ansible/playbooks/helpers/deploy-single-node.yml +++ b/ansible/playbooks/helpers/deploy-single-node.yml @@ -86,7 +86,23 @@ - ethlambda - deploy +- name: Deploy Gean node + include_role: + name: gean + when: client_type == "gean" + tags: + - gean + - deploy + +- name: Deploy Nlean node + include_role: + name: nlean + when: client_type == "nlean" + tags: + - nlean + - deploy + - name: Fail if unknown client type fail: - msg: "Unknown client type '{{ client_type }}' for node '{{ node_name }}'. Expected: zeam, ream, qlean, lantern, lighthouse, grandine or ethlambda" - when: client_type not in ["zeam", "ream", "qlean", "lantern", "lighthouse", "grandine", "ethlambda"] + msg: "Unknown client type '{{ client_type }}' for node '{{ node_name }}'. Expected: zeam, ream, qlean, lantern, lighthouse, grandine, ethlambda, gean or nlean" + when: client_type not in ["zeam", "ream", "qlean", "lantern", "lighthouse", "grandine", "ethlambda", "gean", "nlean"] diff --git a/ansible/roles/gean/defaults/main.yml b/ansible/roles/gean/defaults/main.yml new file mode 100644 index 0000000..3e9959f --- /dev/null +++ b/ansible/roles/gean/defaults/main.yml @@ -0,0 +1,8 @@ +--- +# Default variables for gean role +# Note: These are fallback defaults. Actual values are extracted from client-cmds/gean-cmd.sh +# in the tasks/main.yml file. These defaults are used if extraction fails. + +gean_docker_image: "ghcr.io/geanlabs/gean:devnet3" +deployment_mode: docker # docker or binary +gean_devnet_id: "devnet0" diff --git a/ansible/roles/gean/tasks/main.yml b/ansible/roles/gean/tasks/main.yml new file mode 100644 index 0000000..c89bfad --- /dev/null +++ b/ansible/roles/gean/tasks/main.yml @@ -0,0 +1,110 @@ +--- +# Gean role: Deploy and manage Gean nodes +# Converts client-cmds/gean-cmd.sh logic to Ansible tasks + +- name: Extract docker image from client-cmd.sh + shell: | + project_root="$(cd '{{ playbook_dir }}/../..' && pwd)" + grep -E '^node_docker=' "$project_root/client-cmds/gean-cmd.sh" | head -1 | sed -E 's/.*node_docker="([^ "]+).*/\1/' + register: gean_docker_image_raw + changed_when: false + delegate_to: localhost + run_once: true + +- name: Extract deployment mode from client-cmd.sh + shell: | + project_root="$(cd '{{ playbook_dir }}/../..' && pwd)" + grep -E '^node_setup=' "$project_root/client-cmds/gean-cmd.sh" | head -1 | sed -E 's/.*node_setup="([^"]+)".*/\1/' + register: gean_deployment_mode_raw + changed_when: false + delegate_to: localhost + run_once: true + +- name: Set docker image and deployment mode from client-cmd.sh + set_fact: + gean_docker_image: "{{ gean_docker_image_raw.stdout | trim | default('ghcr.io/geanlabs/gean:devnet3') }}" + deployment_mode: "{{ gean_deployment_mode_raw.stdout | trim | default('docker') }}" + +- name: Extract node configuration from validator-config.yaml + shell: | + yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + register: gean_node_config + changed_when: false + delegate_to: localhost + loop: + - enrFields.quic + - metricsPort + - apiPort + - isAggregator + when: node_name is defined + +- name: Set node ports and aggregator flag + set_fact: + gean_quic_port: "{{ gean_node_config.results[0].stdout }}" + gean_metrics_port: "{{ gean_node_config.results[1].stdout }}" + gean_api_port: "{{ gean_node_config.results[2].stdout }}" + gean_is_aggregator: "{{ 'true' if (gean_node_config.results[3].stdout | default('') | trim) == 'true' else 'false' }}" + when: gean_node_config is defined + +- name: Ensure node key file exists + stat: + path: "{{ genesis_dir }}/{{ node_name }}.key" + register: node_key_stat + +- name: Debug node key file check + debug: + msg: "Checking for key file at {{ genesis_dir }}/{{ node_name }}.key - exists: {{ node_key_stat.stat.exists | default('undefined') }}" + +- name: Fail if node key file is missing + fail: + msg: "Node key file {{ node_name }}.key not found in {{ genesis_dir }}" + when: not (node_key_stat.stat.exists | default(false)) + +- name: Clean node data directory + file: + path: "{{ data_dir }}/{{ node_name }}" + state: absent + when: clean_data | default(false) | bool + +- name: Create node data directory + file: + path: "{{ data_dir }}/{{ node_name }}" + state: directory + mode: "0755" + +- name: Deploy Gean node using Docker + block: + - name: Stop existing Gean container (if any) + command: docker rm -f {{ node_name }} + register: gean_stop + failed_when: false + changed_when: gean_stop.rc == 0 + + - name: Start Gean container + command: >- + docker run -d + --pull=always + --name {{ node_name }} + --restart unless-stopped + --network host + {{ '--init --ulimit core=-1 --workdir /data' if (enable_core_dumps | default('') == 'all') or (node_name in (enable_core_dumps | default('')).split(',')) or (node_name.split('_')[0] in (enable_core_dumps | default('')).split(',')) else '' }} + -v {{ genesis_dir }}:/config:ro + -v {{ data_dir }}/{{ node_name }}:/data + {{ gean_docker_image }} + --data-dir /data + --genesis /config/config.yaml + --bootnodes /config/nodes.yaml + --validator-registry-path /config/validators.yaml + --node-id {{ node_name }} + --node-key /config/{{ node_name }}.key + --validator-keys /config/hash-sig-keys + --listen-addr /ip4/0.0.0.0/udp/{{ gean_quic_port }}/quic-v1 + --discovery-port {{ gean_quic_port }} + --devnet-id {{ gean_devnet_id }} + --api-port {{ gean_api_port }} + --metrics-port {{ gean_metrics_port }} + {{ '--is-aggregator' if (gean_is_aggregator | default('false')) == 'true' else '' }} + {{ ('--checkpoint-sync-url ' + checkpoint_sync_url) if (checkpoint_sync_url is defined and checkpoint_sync_url | length > 0) else '' }} + register: gean_container + changed_when: gean_container.rc == 0 + when: deployment_mode == 'docker' diff --git a/ansible/roles/nlean/defaults/main.yml b/ansible/roles/nlean/defaults/main.yml new file mode 100644 index 0000000..15db503 --- /dev/null +++ b/ansible/roles/nlean/defaults/main.yml @@ -0,0 +1,8 @@ +--- +# Default variables for nlean role +# Note: These are fallback defaults. Actual values are extracted from client-cmds/nlean-cmd.sh +# in the tasks/main.yml file. These defaults are used if extraction fails. + +nlean_docker_image: "nlean-local:devnet3" +deployment_mode: docker # docker or binary +nlean_network_name: "devnet0" diff --git a/ansible/roles/nlean/tasks/main.yml b/ansible/roles/nlean/tasks/main.yml new file mode 100644 index 0000000..a7e89a0 --- /dev/null +++ b/ansible/roles/nlean/tasks/main.yml @@ -0,0 +1,109 @@ +--- +# Nlean role: Deploy and manage Nlean nodes +# Converts client-cmds/nlean-cmd.sh logic to Ansible tasks + +- name: Extract docker image from client-cmd.sh + shell: | + project_root="$(cd '{{ playbook_dir }}/../..' && pwd)" + grep -E '^nlean_docker_image=' "$project_root/client-cmds/nlean-cmd.sh" | head -1 | sed -E 's/.*\$\{[^}]+:-([^}]+)\}.*/\1/' + register: nlean_docker_image_raw + changed_when: false + delegate_to: localhost + run_once: true + +- name: Extract deployment mode from client-cmd.sh + shell: | + project_root="$(cd '{{ playbook_dir }}/../..' && pwd)" + grep -E '^node_setup=' "$project_root/client-cmds/nlean-cmd.sh" | head -1 | sed -E 's/.*\$\{[^}]+:-([^}]+)\}.*/\1/' + register: nlean_deployment_mode_raw + changed_when: false + delegate_to: localhost + run_once: true + +- name: Set docker image and deployment mode from client-cmd.sh + set_fact: + nlean_docker_image: "{{ nlean_docker_image_raw.stdout | trim | default('nlean-local:devnet3') }}" + deployment_mode: "{{ nlean_deployment_mode_raw.stdout | trim | default('docker') }}" + +- name: Extract node configuration from validator-config.yaml + shell: | + yq eval ".validators[] | select(.name == \"{{ node_name }}\") | .{{ item }}" "{{ hostvars['localhost']['local_genesis_dir_path'] }}/validator-config.yaml" + register: nlean_node_config + changed_when: false + delegate_to: localhost + loop: + - enrFields.quic + - metricsPort + - apiPort + - isAggregator + when: node_name is defined + +- name: Set node ports and aggregator flag + set_fact: + nlean_quic_port: "{{ nlean_node_config.results[0].stdout }}" + nlean_metrics_port: "{{ nlean_node_config.results[1].stdout }}" + nlean_api_port: "{{ nlean_node_config.results[2].stdout }}" + nlean_is_aggregator: "{{ 'true' if (nlean_node_config.results[3].stdout | default('') | trim) == 'true' else 'false' }}" + when: nlean_node_config is defined + +- name: Ensure node key file exists + stat: + path: "{{ genesis_dir }}/{{ node_name }}.key" + register: node_key_stat + +- name: Debug node key file check + debug: + msg: "Checking for key file at {{ genesis_dir }}/{{ node_name }}.key - exists: {{ node_key_stat.stat.exists | default('undefined') }}" + +- name: Fail if node key file is missing + fail: + msg: "Node key file {{ node_name }}.key not found in {{ genesis_dir }}" + when: not (node_key_stat.stat.exists | default(false)) + +- name: Clean node data directory + file: + path: "{{ data_dir }}/{{ node_name }}" + state: absent + when: clean_data | default(false) | bool + +- name: Create node data directory + file: + path: "{{ data_dir }}/{{ node_name }}" + state: directory + mode: "0755" + +- name: Deploy Nlean node using Docker + block: + - name: Stop existing Nlean container (if any) + command: docker rm -f {{ node_name }} + register: nlean_stop + failed_when: false + changed_when: nlean_stop.rc == 0 + + - name: Start Nlean container + command: >- + docker run -d + --pull=always + --name {{ node_name }} + --restart unless-stopped + --network host + {{ '--init --ulimit core=-1 --workdir /data' if (enable_core_dumps | default('') == 'all') or (node_name in (enable_core_dumps | default('')).split(',')) or (node_name.split('_')[0] in (enable_core_dumps | default('')).split(',')) else '' }} + -v {{ genesis_dir }}:/config:ro + -v {{ data_dir }}/{{ node_name }}:/data + {{ nlean_docker_image }} + --validator-config /config/validator-config.yaml + --node {{ node_name }} + --data-dir /data + --network {{ nlean_network_name }} + --node-key /config/{{ node_name }}.key + --socket-port {{ nlean_quic_port }} + --metrics false + --metrics-port {{ nlean_metrics_port }} + --metrics-address 0.0.0.0 + --hash-sig-key-dir /config/hash-sig-keys + --api-port {{ nlean_api_port }} + {{ '--is-aggregator' if (nlean_is_aggregator | default('false')) == 'true' else '' }} + {{ ('--checkpoint-sync-url ' + checkpoint_sync_url) if (checkpoint_sync_url is defined and checkpoint_sync_url | length > 0) else '' }} + register: nlean_container + changed_when: nlean_container.rc == 0 + when: deployment_mode == 'docker' From ff50c26af23f15541b452404c09985af23a8c2d7 Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Thu, 19 Mar 2026 14:24:48 +0000 Subject: [PATCH 22/25] docs: update adding-a-new-client guide with gean and nlean --- docs/adding-a-new-client.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/adding-a-new-client.md b/docs/adding-a-new-client.md index 03d5fc8..52203c8 100644 --- a/docs/adding-a-new-client.md +++ b/docs/adding-a-new-client.md @@ -366,7 +366,7 @@ Add a block for your client type. The client type is the prefix before the first node name (`myclient_0` → `myclient`). ```yaml -# ... existing entries for zeam, ream, qlean, lantern, lighthouse, grandine, ethlambda ... +# ... existing entries for zeam, ream, qlean, lantern, lighthouse, grandine, ethlambda, gean, nlean ... - name: Deploy myclient node include_role: @@ -383,8 +383,8 @@ known list: ```yaml - name: Fail if unknown client type fail: - msg: "Unknown client type '{{ client_type }}' for node '{{ node_name }}'. Expected: zeam, ream, qlean, lantern, lighthouse, grandine, ethlambda or myclient" - when: client_type not in ["zeam", "ream", "qlean", "lantern", "lighthouse", "grandine", "ethlambda", "myclient"] + msg: "Unknown client type '{{ client_type }}' for node '{{ node_name }}'. Expected: zeam, ream, qlean, lantern, lighthouse, grandine, ethlambda, gean, nlean or myclient" + when: client_type not in ["zeam", "ream", "qlean", "lantern", "lighthouse", "grandine", "ethlambda", "gean", "nlean", "myclient"] ``` --- @@ -405,7 +405,9 @@ Current following clients are supported: 5. Lighthouse 6. Grandine 7. Ethlambda -8. Myclient ← add here +8. Gean +9. Nlean +10. Myclient ← add here ``` --- From f2f16bc1b8af8c840c242bca9c59cf2220419dd3 Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Thu, 19 Mar 2026 15:04:26 +0000 Subject: [PATCH 23/25] nlean: remove --pull=always for locally-built image --- ansible/roles/nlean/tasks/main.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/ansible/roles/nlean/tasks/main.yml b/ansible/roles/nlean/tasks/main.yml index a7e89a0..61e4acd 100644 --- a/ansible/roles/nlean/tasks/main.yml +++ b/ansible/roles/nlean/tasks/main.yml @@ -83,7 +83,6 @@ - name: Start Nlean container command: >- docker run -d - --pull=always --name {{ node_name }} --restart unless-stopped --network host From 020cb6ffa208faed66de943ed9bada416b0833a4 Mon Sep 17 00:00:00 2001 From: ch4r10t33r Date: Thu, 19 Mar 2026 15:08:29 +0000 Subject: [PATCH 24/25] nlean: use ghcr.io/nleaneth/nlean:latest as docker image --- ansible/roles/nlean/defaults/main.yml | 2 +- ansible/roles/nlean/tasks/main.yml | 3 ++- client-cmds/nlean-cmd.sh | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ansible/roles/nlean/defaults/main.yml b/ansible/roles/nlean/defaults/main.yml index 15db503..9f1c324 100644 --- a/ansible/roles/nlean/defaults/main.yml +++ b/ansible/roles/nlean/defaults/main.yml @@ -3,6 +3,6 @@ # Note: These are fallback defaults. Actual values are extracted from client-cmds/nlean-cmd.sh # in the tasks/main.yml file. These defaults are used if extraction fails. -nlean_docker_image: "nlean-local:devnet3" +nlean_docker_image: "ghcr.io/nleaneth/nlean:latest" deployment_mode: docker # docker or binary nlean_network_name: "devnet0" diff --git a/ansible/roles/nlean/tasks/main.yml b/ansible/roles/nlean/tasks/main.yml index 61e4acd..96f6fb1 100644 --- a/ansible/roles/nlean/tasks/main.yml +++ b/ansible/roles/nlean/tasks/main.yml @@ -22,7 +22,7 @@ - name: Set docker image and deployment mode from client-cmd.sh set_fact: - nlean_docker_image: "{{ nlean_docker_image_raw.stdout | trim | default('nlean-local:devnet3') }}" + nlean_docker_image: "{{ nlean_docker_image_raw.stdout | trim | default('ghcr.io/nleaneth/nlean:latest') }}" deployment_mode: "{{ nlean_deployment_mode_raw.stdout | trim | default('docker') }}" - name: Extract node configuration from validator-config.yaml @@ -83,6 +83,7 @@ - name: Start Nlean container command: >- docker run -d + --pull=always --name {{ node_name }} --restart unless-stopped --network host diff --git a/client-cmds/nlean-cmd.sh b/client-cmds/nlean-cmd.sh index 2f7fe0a..4f96865 100755 --- a/client-cmds/nlean-cmd.sh +++ b/client-cmds/nlean-cmd.sh @@ -6,7 +6,7 @@ # NLEAN_REPO should point to this repository when lean-quickstart is outside this workspace. # Default assumes sibling checkouts: /nlean and /lean-quickstart. nlean_repo="${NLEAN_REPO:-$scriptDir/../nlean}" -nlean_docker_image="${NLEAN_DOCKER_IMAGE:-nlean-local:devnet3}" +nlean_docker_image="${NLEAN_DOCKER_IMAGE:-ghcr.io/nleaneth/nlean:latest}" nlean_network_name="${NLEAN_NETWORK_NAME:-devnet0}" log_level="${NLEAN_LOG_LEVEL:-}" enable_metrics="${enableMetrics:-false}" From a20ec2792fa52c64d0b0372215eab06684d55efe Mon Sep 17 00:00:00 2001 From: Katya Ryazantseva Date: Thu, 19 Mar 2026 17:07:57 +0100 Subject: [PATCH 25/25] fix: enable metrics flag for nlean --- ansible/roles/nlean/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/nlean/tasks/main.yml b/ansible/roles/nlean/tasks/main.yml index 96f6fb1..339b757 100644 --- a/ansible/roles/nlean/tasks/main.yml +++ b/ansible/roles/nlean/tasks/main.yml @@ -97,7 +97,7 @@ --network {{ nlean_network_name }} --node-key /config/{{ node_name }}.key --socket-port {{ nlean_quic_port }} - --metrics false + --metrics true --metrics-port {{ nlean_metrics_port }} --metrics-address 0.0.0.0 --hash-sig-key-dir /config/hash-sig-keys