Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d683eeb
feat(op-supernode): rework interop activity startup
ajsutton May 17, 2026
dadbbd5
refactor(op-supernode/interop): clarify startup loop and tolerate tra…
ajsutton May 18, 2026
88dc071
docs(op-supernode/interop): refresh stale comments after startup rework
ajsutton May 18, 2026
e00bca9
test(op-acceptance-tests): interop startup-rework resync tests (#20824)
ajsutton May 18, 2026
0bcdc40
fix(op-supernode/interop): drop unsynchronized waitingForSync read
ajsutton May 18, 2026
a940459
test(op-supernode/interop): restore cold-start backfill edge-case cov…
ajsutton May 18, 2026
aeee61d
test(op-acceptance-tests): add interop startup-rework resync tests
ajsutton May 18, 2026
1c08da8
review: trim docs, drop local testutil, reuse existing DSL primitives
ajsutton May 18, 2026
40a1ece
test: drop activation delay from post-activation resync test
ajsutton May 18, 2026
fc5b6c1
test: simplify resync tests to reuse existing DSL primitives
ajsutton May 18, 2026
6e44756
test: collapse startup-resync tests into a single package
ajsutton May 18, 2026
d55a589
test: assert backfill coverage in post-activation resync
ajsutton May 18, 2026
7c0acac
feat(op-devstack): supernode restart paired with external EL wipe
ajsutton May 18, 2026
0223ea2
feat(op-devstack): TwoL2SupernodeInteropPeerEL preset with sibling se…
ajsutton May 18, 2026
9e5f749
test(op-acceptance-tests): supernode startup-rework resync tests with…
ajsutton May 18, 2026
c6974c7
test(op-acceptance-tests): peer-EL preset uses real execution-layer sync
ajsutton May 18, 2026
3dc7596
test(op-acceptance-tests): dedup resync tests, support op-reth EL wipe
ajsutton May 18, 2026
3537368
test(op-acceptance-tests): inline single-use preActivationDelay
ajsutton May 19, 2026
e58dc61
test(op-acceptance-tests): inline newResyncSystem, tighten CrossSafe …
ajsutton May 19, 2026
8aa4bcc
test(op-acceptance-tests): remove trailing blank line (goimports)
ajsutton May 19, 2026
3b23e1b
refactor(op-devstack/sysgo): dedupe op-reth storage init into initSto…
ajsutton May 19, 2026
47e016f
refactor(op-devstack/sysgo): own peer tracking at sysgo, replay on re…
ajsutton May 19, 2026
7baa809
refactor(op-devstack): collapse supernode restart into Stop+StartWith…
ajsutton May 19, 2026
73b5973
refactor(op-devstack): drop sibling-sequencer framing, simplify EL-wi…
ajsutton May 19, 2026
3f744bf
refactor(op-devstack): TwoL2SupernodeInteropPeerEL adds a verifier su…
ajsutton May 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,8 @@ update-op-geth:
# Build all Rust binaries (release) for sysgo tests.
build-rust-release:
cd rust && cargo build --release --bin kona-node --bin kona-host --bin op-reth
cd op-rbuilder && cargo build --release -p op-rbuilder --bin op-rbuilder
cd rollup-boost && cargo build --release -p rollup-boost --bin rollup-boost
cd rust/op-rbuilder && cargo build --release -p op-rbuilder --bin op-rbuilder
cd rust/rollup-boost && cargo build --release -p rollup-boost --bin rollup-boost

# Checks that locked NUT bundles have not been modified.
check-nut-locks:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
// Package startup_resync contains acceptance tests for the op-supernode
// interop startup rework's cold-start resync path: stopping the supernode,
// deleting its on-disk data dir, and starting a fresh supernode against the
// same chain containers and virtual nodes.
// interop startup rework's cold-start resync path.
package startup_resync

import (
Expand All @@ -20,72 +18,92 @@ const (
preRestartFinalized = uint64(5)
)

// TestSupernodeResyncResumesAtActivation_PostActivation drives a full
// supernode data-dir wipe after the chain has crossed activation, and
// asserts that cross-safe keeps advancing post-restart and that the
// cold-start backfill restored history into the logs DB.
// TestSupernodeResyncResumesAtActivation_PostActivation wipes the verifier
// supernode's data dir after the chain has crossed activation and asserts
// cross-safe resumes on the verifier. The "EL data wiped" subtest
// additionally wipes the verifier ELs so they must execution-layer-sync
// from the chains' sequencer ELs.
func TestSupernodeResyncResumesAtActivation_PostActivation(gt *testing.T) {
for _, tc := range []struct {
name string
restartOpts []func(*dsl.RestartOpts)
}{
{"EL data intact", nil},
{"EL data wiped", []func(*dsl.RestartOpts){dsl.WithELWiped}},
} {
gt.Run(tc.name, func(gt *testing.T) {
runPostActivationResync(gt, tc.restartOpts)
})
}
}

// TestSupernodeResyncSchedulesAtActivation_PreActivation wipes the verifier
// supernode's data dir while interop is still scheduled and asserts
// cold-start parks the verifier at activation. The "EL data wiped" subtest
// additionally wipes the verifier ELs.
func TestSupernodeResyncSchedulesAtActivation_PreActivation(gt *testing.T) {
for _, tc := range []struct {
name string
restartOpts []func(*dsl.RestartOpts)
}{
{"EL data intact", nil},
{"EL data wiped", []func(*dsl.RestartOpts){dsl.WithELWiped}},
} {
gt.Run(tc.name, func(gt *testing.T) {
runPreActivationResync(gt, tc.restartOpts)
})
}
}

func runPostActivationResync(gt *testing.T, restartOpts []func(*dsl.RestartOpts)) {
t := devtest.SerialT(gt)
sys := presets.NewTwoL2SupernodeInterop(t, 0,
sys := presets.NewTwoL2SupernodeInteropPeerEL(t, 0,
presets.WithUniformL2BlockTimes(l2BlockTime),
presets.WithInteropLogBackfillDepth(backfillDepth),
)
sys.VerifierSupernode.AwaitBackfillCompleted()

sys.Supernode.AwaitBackfillCompleted()

// Setup: let L2 finalized advance several blocks on both chains. On
// restart, op-node may drop back as part of its safe start process,
// but won't go past the finalized head. With finalized well past
// genesis the post-restart cold-start backfill has a real window to
// populate, instead of collapsing to empty against a re-recorded
// genesis SafeDB entry.
dsl.CheckAll(t,
sys.L2ACL.AdvancedFn(types.Finalized, preRestartFinalized, 180),
sys.L2BCL.AdvancedFn(types.Finalized, preRestartFinalized, 180),
sys.VerifierL2ACL.AdvancedFn(types.Finalized, preRestartFinalized, 180),
sys.VerifierL2BCL.AdvancedFn(types.Finalized, preRestartFinalized, 180),
)

sys.Supernode.RestartWithFreshDataDir()
sys.Supernode.AwaitBackfillCompleted()
activation := sys.VerifierSupernode.ActivationTimestamp()
sys.VerifierSupernode.RestartWithFreshDataDir(restartOpts...)
sys.VerifierSupernode.AwaitVerificationStartsAtOrAfter(activation)
sys.VerifierSupernode.AwaitBackfillCompleted()

dsl.CheckAll(t,
sys.L2ACL.AdvancedFn(types.CrossSafe, 1, 60),
sys.L2BCL.AdvancedFn(types.CrossSafe, 1, 60),
sys.VerifierL2ACL.AdvancedFn(types.CrossSafe, 1, 60),
sys.VerifierL2BCL.AdvancedFn(types.CrossSafe, 1, 60),
)

// Verify the cold-start backfill repopulated the logs DB.
sys.Supernode.AssertBackfillCovers(backfillDepth, l2BlockTime,
sys.VerifierSupernode.AssertBackfillCovers(backfillDepth, l2BlockTime,
sys.L2A.ChainID(), sys.L2B.ChainID())
}

// TestSupernodeResyncSchedulesAtActivation_PreActivation drives a full
// supernode data-dir wipe while interop is scheduled but not yet active,
// and asserts that cold-start init parks the verifier at the (future)
// activation timestamp while cross-safe keeps advancing on both chains.
func TestSupernodeResyncSchedulesAtActivation_PreActivation(gt *testing.T) {
func runPreActivationResync(gt *testing.T, restartOpts []func(*dsl.RestartOpts)) {
t := devtest.SerialT(gt)
// 60-minute delay: ensures the chain never approaches activation during
// the test, so we always exercise the genuine pre-activation cold-start
// path regardless of CI scheduling variance.
sys := presets.NewTwoL2SupernodeInterop(t, 60*60,
// Delay activation by an hour so the chain stays well below it throughout
// the test, and cold-start always parks at the future activation timestamp
// regardless of CI scheduling variance.
sys := presets.NewTwoL2SupernodeInteropPeerEL(t, uint64(60*60),
presets.WithUniformL2BlockTimes(l2BlockTime),
presets.WithInteropLogBackfillDepth(backfillDepth),
)
sys.VerifierSupernode.AwaitBackfillCompleted()
activation := sys.VerifierSupernode.ActivationTimestamp()

sys.Supernode.AwaitBackfillCompleted()
activation := sys.Supernode.ActivationTimestamp()

// Setup: let local-safe accumulate enough that op-node's SafeDB has
// entries to serve to the post-restart cold-start init.
dsl.CheckAll(t,
sys.L2ACL.AdvancedFn(types.LocalSafe, 2, 30),
sys.L2BCL.AdvancedFn(types.LocalSafe, 2, 30),
sys.VerifierL2ACL.AdvancedFn(types.LocalSafe, 2, 60),
sys.VerifierL2BCL.AdvancedFn(types.LocalSafe, 2, 60),
)

sys.Supernode.RestartWithFreshDataDir()
sys.Supernode.AwaitVerificationStartsAt(activation)
sys.VerifierSupernode.RestartWithFreshDataDir(restartOpts...)
sys.VerifierSupernode.AwaitVerificationStartsAt(activation)

dsl.CheckAll(t,
sys.L2ACL.AdvancedFn(types.CrossSafe, 1, 60),
sys.L2BCL.AdvancedFn(types.CrossSafe, 1, 60),
sys.VerifierL2ACL.AdvancedFn(types.CrossSafe, 1, 60),
sys.VerifierL2BCL.AdvancedFn(types.CrossSafe, 1, 60),
)
}
38 changes: 7 additions & 31 deletions op-devstack/dsl/l2_cl.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,14 @@ import (
// L2CLNode wraps a stack.L2CLNode interface for DSL operations
type L2CLNode struct {
commonImpl
inner stack.L2CLNode
managedPeers map[string]*L2CLNode
inner stack.L2CLNode
}

// NewL2CLNode creates a new L2CLNode DSL wrapper
func NewL2CLNode(inner stack.L2CLNode) *L2CLNode {
return &L2CLNode{
commonImpl: commonFromT(inner.T()),
inner: inner,
managedPeers: make(map[string]*L2CLNode),
commonImpl: commonFromT(inner.T()),
inner: inner,
}
}

Expand All @@ -56,7 +54,6 @@ func (cl *L2CLNode) Start() {
lifecycle, ok := cl.inner.(stack.Lifecycle)
cl.require.Truef(ok, "L2CL node %s is not lifecycle-controllable", cl.inner.Name())
lifecycle.Start()
cl.restoreManagedPeers()
}

func (cl *L2CLNode) Stop() {
Expand All @@ -65,18 +62,6 @@ func (cl *L2CLNode) Stop() {
lifecycle.Stop()
}

func (cl *L2CLNode) ManagePeer(peer *L2CLNode) {
cl.managedPeers[peer.Name()] = peer
peer.managedPeers[cl.Name()] = cl
}

func (cl *L2CLNode) restoreManagedPeers() {
for _, peer := range cl.managedPeers {
cl.connectPeerRaw(peer)
peer.connectPeerRaw(cl)
}
}

func (cl *L2CLNode) StartSequencer() {
// The op-node Sequencer.Start RPC requires the caller to pass the hash of op-node's
// current unsafe head. Reading the head and issuing the start call are two separate
Expand Down Expand Up @@ -449,30 +434,21 @@ func (cl *L2CLNode) Peers() *apis.PeerDump {
return peerDump
}

// DisconnectPeer one-shot disconnects from peer. Restart-survivability is
// handled in sysgo via the peer registry.
func (cl *L2CLNode) DisconnectPeer(peer *L2CLNode) {
delete(cl.managedPeers, peer.Name())
delete(peer.managedPeers, cl.Name())
cl.disconnectPeerRaw(peer)
}

func (cl *L2CLNode) disconnectPeerRaw(peer *L2CLNode) {
peerInfo := peer.PeerInfo()
err := retry.Do0(cl.ctx, 3, retry.Exponential(), func() error {
return cl.inner.P2PAPI().DisconnectPeer(cl.ctx, peerInfo.PeerID)
})
cl.require.NoError(err, "failed to disconnect peer")
}

// ConnectPeer one-shot dials peer. Restart-survivability is handled in sysgo
// via the peer registry.
func (cl *L2CLNode) ConnectPeer(peer *L2CLNode) {
cl.managedPeers[peer.Name()] = peer
peer.managedPeers[cl.Name()] = cl
cl.connectPeerRaw(peer)
}

func (cl *L2CLNode) connectPeerRaw(peer *L2CLNode) {
peerInfo := peer.PeerInfo()
cl.require.NotZero(len(peerInfo.Addresses), "failed to get peer address")
// graceful backoff for p2p connection, to avoid dial backoff or connection refused error
strategy := &retry.ExponentialStrategy{Min: 10 * time.Second, Max: 30 * time.Second, MaxJitter: 250 * time.Millisecond}
err := retry.Do0(cl.ctx, 5, strategy, func() error {
return cl.inner.P2PAPI().ConnectPeer(cl.ctx, peerInfo.Addresses[0])
Expand Down
14 changes: 14 additions & 0 deletions op-devstack/dsl/l2_el.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,20 @@ func (el *L2ELNode) Start() {
lifecycle.Start()
}

// WipeOnDiskState wipes any persistent state belonging to the EL between a
// Stop and Start. No-op for in-memory ELs.
func (el *L2ELNode) WipeOnDiskState() {
wiper, ok := el.inner.(stack.OnDiskStateWiper)
if !ok {
return
}
el.log.Info("Wiping on-disk state", "name", el.inner.Name())
err := wiper.WipeOnDiskState()
el.require.NoErrorf(err, "failed to wipe on-disk state for %s", el.inner.Name())
}

// PeerWith one-shot-dials peer. Static-topology restart survival is handled
// in sysgo via the peer registry, so this is just a thin convenience.
func (el *L2ELNode) PeerWith(peer *L2ELNode) {
sysgo.ConnectP2P(el.ctx, el.require, el.inner.L2EthClient().RPC(), peer.inner.L2EthClient().RPC(), false)
}
Expand Down
70 changes: 64 additions & 6 deletions op-devstack/dsl/supernode.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ type Supernode struct {
commonImpl
inner stack.Supernode
testControl stack.SupernodeTestControl
frontedELs []*L2ELNode
}

// AttachELs records the L2 ELs this supernode drives. Used by
// RestartWithFreshDataDir(WithELWiped) to wipe their on-disk state
// alongside the supernode's, and available to future operations that
// need to address the supernode's ELs as a group.
func (s *Supernode) AttachELs(els []*L2ELNode) {
s.frontedELs = els
}

// NewSupernode creates a new Supernode DSL wrapper
Expand Down Expand Up @@ -129,16 +138,48 @@ func (s *Supernode) ResumeInterop() {
s.interopActivity().Resume()
}

// RestartOpts controls optional behaviour of Supernode.RestartWithFreshDataDir.
type RestartOpts struct {
// WipeELs, when true, additionally stops and wipes every supernode-
// fronted EL alongside the supernode data dir, forcing post-restart
// execution-layer sync from peer ELs.
WipeELs bool
}

// WithELWiped is a RestartWithFreshDataDir option that also wipes every
// supernode-fronted EL's on-disk state. Requires AttachELs.
func WithELWiped(o *RestartOpts) { o.WipeELs = true }

// RestartWithFreshDataDir stops the supernode, deletes its on-disk data
// directory in full, and starts a fresh supernode against the same chain
// containers, virtual nodes, and externally-visible RPC address.
// Requires NewSupernodeWithTestControl.
func (s *Supernode) RestartWithFreshDataDir() {
// containers, virtual nodes, and externally-visible RPC address. With
// WithELWiped, every fronted EL is stopped, wiped, and restarted between
// the supernode stop and start so the post-restart VN must
// execution-layer-sync from peer ELs. Each EL's Start re-dials its
// registered static peers; fronted CL static peers are re-dialed after the
// supernode comes back up. Requires NewSupernodeWithTestControl, plus
// AttachELs when WipeELs is set.
func (s *Supernode) RestartWithFreshDataDir(opts ...func(*RestartOpts)) {
s.require.NotNil(s.testControl,
"RestartWithFreshDataDir requires test control; use NewSupernodeWithTestControl")
s.log.Info("restarting supernode with fresh data dir")
err := s.testControl.RestartWithFreshDataDir()
s.require.NoError(err, "failed to restart supernode with fresh data dir")

o := RestartOpts{}
for _, fn := range opts {
fn(&o)
}

s.log.Info("restarting supernode with fresh data dir", "wipe_els", o.WipeELs)
s.testControl.Stop()
if o.WipeELs {
for _, el := range s.frontedELs {
el.Stop()
el.WipeOnDiskState()
}
for _, el := range s.frontedELs {
el.Start()
}
}
s.require.NoError(s.testControl.StartWithFreshDataDir(), "start supernode fresh")
}

// BackfillAttempts returns the number of log-backfill attempts since the
Expand Down Expand Up @@ -188,6 +229,23 @@ func (s *Supernode) VerificationStartTimestamp() uint64 {
return s.interopActivity().VerificationStartTimestamp()
}

// AwaitVerificationStartsAtOrAfter is the >= variant of
// AwaitVerificationStartsAt, for cases where the exact handoff timestamp
// depends on which safeDB entry is recorded first after a cold start.
func (s *Supernode) AwaitVerificationStartsAtOrAfter(minExpected uint64) {
ia := s.interopActivity()
ctx, cancel := context.WithTimeout(s.ctx, 3*DefaultTimeout)
defer cancel()
err := wait.For(ctx, 500*time.Millisecond, func() (bool, error) {
return ia.BackfillCompleted(), nil
})
s.require.NoError(err, "cold-start initialization did not complete in time")
actual := ia.VerificationStartTimestamp()
s.require.GreaterOrEqualf(actual, minExpected,
"verificationStartTimestamp must be >= %d after cold-start init, got %d",
minExpected, actual)
}

// AwaitVerificationStartsAt blocks until cold-start init completes, then
// asserts VerificationStartTimestamp equals expected.
// Requires NewSupernodeWithTestControl.
Expand Down
6 changes: 0 additions & 6 deletions op-devstack/presets/singlechain_from_runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,6 @@ func singleChainMultiNodeFromRuntime(t devtest.T, runtime *sysgo.SingleChainRunt
L2ELB: dsl.NewL2ELNode(l2ELB),
L2CLB: dsl.NewL2CLNode(l2CLB),
}
if runtime.P2PEnabled {
preset.L2CLB.ManagePeer(preset.L2CL)
}
if runSyncChecks {
// Ensure the follower node is in sync with the sequencer before starting tests.
// CrossSafe requires derivation to run, which under ELSync can only begin
Expand Down Expand Up @@ -210,8 +207,6 @@ func singleChainTwoVerifiersFromRuntime(t devtest.T, runtime *sysgo.SingleChainR
L2CLC: dsl.NewL2CLNode(l2CLC),
TestSequencer: dsl.NewTestSequencer(testSequencer),
}
preset.L2CLC.ManagePeer(preset.L2CL)
preset.L2CLC.ManagePeer(preset.L2CLB)
return preset
}

Expand Down Expand Up @@ -254,7 +249,6 @@ func simpleWithSyncTesterFromRuntime(t devtest.T, runtime *sysgo.SingleChainRunt
SyncTesterL2EL: dsl.NewL2ELNode(syncTesterL2EL),
L2CL2: dsl.NewL2CLNode(l2CL2),
}
preset.L2CL2.ManagePeer(preset.L2CL)
return preset
}

Expand Down
1 change: 1 addition & 0 deletions op-devstack/presets/twol2_from_runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ func twoL2SupernodeInteropFromRuntime(t devtest.T, runtime *sysgo.MultiChainRunt
}
preset.FunderA = dsl.NewFunder(preset.Wallet, preset.FaucetA, preset.L2ELA)
preset.FunderB = dsl.NewFunder(preset.Wallet, preset.FaucetB, preset.L2ELB)
preset.Supernode.AttachELs([]*dsl.L2ELNode{preset.L2ELA, preset.L2ELB})
return preset
}

Expand Down
Loading