Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(batcher): altda->ethda failover #24

Open
wants to merge 10 commits into
base: feat--multiframe-altda-channel
Choose a base branch
from
27 changes: 18 additions & 9 deletions .github/workflows/kurtosis-devnet.yml
Original file line number Diff line number Diff line change
@@ -5,20 +5,20 @@ on:
branches: [eigenda-develop]
pull_request:

env:
MISE_VERSION: 2024.12.14

jobs:
# This is an optimism devnet which talks to the eigenda holesky testnet via an eigenda-proxy.
# TODO: we should connect this to an eigenda kurtosis devnet instead of using our holesky testnet.
run_op_eigenda_holesky_devnet:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- uses: actions/checkout@v4
- uses: jdx/mise-action@v2
with:
version: 2024.12.14 # [default: latest] mise version to install
install: true # [default: true] run `mise install`
cache: true # [default: true] cache mise using GitHub's cache
experimental: true # [default: false] enable experimental features
version: ${{ env.MISE_VERSION }}
experimental: true
# Needed by the just eigenda-holesky-devnet command below
# These secrets get injected into the eigenda-holesky.yaml kurtosis config file
- name: Create EigenDA secrets file
@@ -31,7 +31,16 @@ jobs:
}
}
EOF
- name: Run Starlark
- run: just eigenda-holesky-devnet
working-directory: kurtosis-devnet

run_op_eigenda_memstore_devnet:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: jdx/mise-action@v2
with:
version: ${{ env.MISE_VERSION }}
experimental: true
- run: just eigenda-memstore-devnet
working-directory: kurtosis-devnet
run: |
just eigenda-holesky-devnet
101 changes: 101 additions & 0 deletions kurtosis-devnet/eigenda-memstore.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# This devnet uses an eigenda-proxy to interact with the eigenda holesky testnet network.
# As a requirement, you must first create and populate the eigenda-secrets.json file
# 1. cp eigenda-secrets.example.json eigenda-secrets.json
# 2. Populate the file with the required values
# TODO: Connect this with an eigenda v1 kurtosis devnet instead of using our holesky testnet.
# See https://github.com/Layr-Labs/avs-devnet/blob/main/examples/eigenda.yaml
{{- $context := or . (dict)}}
---
optimism_package:
altda_deploy_config:
use_altda: true
# We use the generic commitment which means that the dachallenge contract won't get deployed.
# We align with l2beat's analysis of the da_challenge contract not being economically viable,
# so even if a rollup failsover to keccak commitments, not using the da_challenge contract is fine
# (has same security as using it).
# See https://l2beat.com/scaling/projects/redstone#da-layer-risk-analysis and
# https://discord.com/channels/1244729134312198194/1260612364865245224/1290294353688002562 for
# an economic analysis of the da challenge contract.
da_commitment_type: GenericCommitment
da_challenge_window: 16
da_resolve_window: 16
da_bond_size: 0
da_resolver_refund_percentage: 0
chains:
- participants:
- el_type: op-geth
# latest tag is currently broken until the next stable release, see https://github.com/ethereum-optimism/op-geth/pull/515
# Also see discussion in https://discord.com/channels/1244729134312198194/1260624141497798706/1342556343495692320
el_image: "us-docker.pkg.dev/oplabs-tools-artifacts/images/op-geth:optimism"
el_log_level: ""
el_extra_env_vars: {}
el_extra_labels: {}
el_extra_params: []
cl_type: op-node
cl_image: {{ localDockerImage "op-node" }}
cl_log_level: "debug"
cl_extra_env_vars: {}
cl_extra_labels: {}
cl_extra_params: []
count: 1
network_params:
network: "kurtosis"
network_id: "2151908"
seconds_per_slot: 2
name: "op-kurtosis"
fjord_time_offset: 0
granite_time_offset: 0
holocene_time_offset: 0
fund_dev_accounts: true
batcher_params:
image: {{ localDockerImage "op-batcher" }}
extra_params:
- --altda.max-concurrent-da-requests=1
- --max-channel-duration=25
- --target-num-frames=1
- --max-l1-tx-size-bytes=1000
- --batch-type=1
proposer_params:
image: {{ localDockerImage "op-proposer" }}
extra_params: []
game_type: 1
proposal_interval: 10m
challenger_params:
# TODO: reenable once we start testing secure integrations
enabled: false
image: {{ localDockerImage "op-challenger" }}
cannon_prestate_path: ""
cannon_prestates_url: "http://fileserver/proofs/op-program/cannon"
extra_params: []
da_server_params:
image: ghcr.io/layr-labs/eigenda-proxy:v1.6.4
cmd:
- --addr
- 0.0.0.0
- --port
- "3100"
- --memstore.enabled
- --memstore.expiration
- "30m"
additional_services:
- da_server
global_log_level: "info"
global_node_selectors: {}
global_tolerations: []
persistent: false
ethereum_package:
participants:
- el_type: geth
cl_type: teku
network_params:
preset: minimal
genesis_delay: 5
additional_preloaded_contracts: |
{
"0x4e59b44847b379578588920cA78FbF26c0B4956C": {
"balance": "0ETH",
"code": "0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe03601600081602082378035828234f58015156039578182fd5b8082525050506014600cf3",
"storage": {},
"nonce": "1"
}
}
74 changes: 74 additions & 0 deletions kurtosis-devnet/justfile
Original file line number Diff line number Diff line change
@@ -74,9 +74,83 @@ devnet-test DEVNET *TEST:
# Devnet recipes

# EigenDA devnet that uses eigenda-proxy connected to eigenda holesky testnet network
[group('eigenda')]
eigenda-holesky-devnet: (devnet "eigenda-holesky.yaml" "eigenda-secrets.json" "eigenda-holesky")
[group('eigenda')]
eigenda-holesky-devnet-clean:
kurtosis enclave rm eigenda-holesky-devnet --force
# EigenDA devnet that uses the eigenda-proxy in memstore mode (simulates an eigenda network but generates random certs)
[group('eigenda')]
eigenda-memstore-devnet: (devnet "eigenda-memstore.yaml")
[group('eigenda')]
eigenda-memstore-devnet-clean:
kurtosis enclave rm eigenda-memstore-devnet --force
# Cause proxy to start returning 503 errors to batcher, as a signal
# to failover to ethDA. Use `eigenda-memstore-devnet-failback` to revert.
[group('eigenda')]
eigenda-memstore-devnet-failover:
#!/usr/bin/env bash
PROXY_ENDPOINT=$(kurtosis port print eigenda-memstore-devnet da-server-op-kurtosis http)
curl -X PATCH $PROXY_ENDPOINT/memstore/config -d '{"PutReturnsFailoverError": true}'
[group('eigenda')]
eigenda-memstore-devnet-failback:
#!/usr/bin/env bash
PROXY_ENDPOINT=$(kurtosis port print eigenda-memstore-devnet da-server-op-kurtosis http)
curl -X PATCH $PROXY_ENDPOINT/memstore/config -d '{"PutReturnsFailoverError": false}'
[group('eigenda')]
eigenda-memstore-devnet-sync-status:
#!/usr/bin/env bash
OPNODE_ENDPOINT=$(kurtosis port print eigenda-memstore-devnet op-cl-1-op-node-op-geth-op-kurtosis http)
cast rpc optimism_syncStatus --rpc-url $OPNODE_ENDPOINT | jq
[group('eigenda')]
eigenda-memstore-devnet-configs-l1-l2:
#!/usr/bin/env bash
echo "OP-NODE ROLLUP CONFIG:"
OPNODE_ENDPOINT=$(kurtosis port print eigenda-memstore-devnet op-cl-1-op-node-op-geth-op-kurtosis http)
cast rpc optimism_rollupConfig --rpc-url $OPNODE_ENDPOINT | jq
echo "TEKU L1-CL SPEC:"
TEKU_ENDPOINT=$(kurtosis port print eigenda-memstore-devnet cl-1-teku-geth http)
curl $TEKU_ENDPOINT/eth/v1/config/spec | jq
# We unfortunately have to restart the batcher in this ugly way right now just to change even a single flag.
# This is b/c op's kurtosis setup right now is not idempotent so if we change a param in eigenda-memstore.yaml
# and rerun `just eigenda-memstore-devnet`, the entire devnet gets respun up which takes a long time.
# Track progress for fixing this in https://github.com/ethereum-optimism/optimism/issues/14390.
# Kurtosis also doesn't have a simple way to update a running service's config, like `kubectl edit` for k8s.
# See https://github.com/kurtosis-tech/kurtosis/issues/2628 for this issue.
# Restart batcher with new flags or image.
[group('eigenda')]
eigenda-memstore-devnet-restart-batcher:
#!/usr/bin/env bash
# IMAGE=op-batcher:eigenda-memstore-devnet
IMAGE=us-docker.pkg.dev/oplabs-tools-artifacts/images/op-batcher:v1.10.0
kurtosis service add eigenda-memstore-devnet op-batcher-op-kurtosis \
$IMAGE \
--ports "http=8548,metrics=9001" \
-- op-batcher \
--l2-eth-rpc=http://op-el-1-op-geth-op-node-op-kurtosis:8545 \
--rollup-rpc=http://op-cl-1-op-node-op-geth-op-kurtosis:8547 \
--poll-interval=1s \
--sub-safety-margin=6 \
--num-confirmations=1 \
--safe-abort-nonce-too-low-count=3 \
--resubmission-timeout=30s \
--rpc.addr=0.0.0.0 \
--rpc.port=8548 \
--rpc.enable-admin \
--metrics.enabled \
--metrics.addr=0.0.0.0 \
--metrics.port=9001 \
--l1-eth-rpc=http://el-1-geth-teku:8545 \
--private-key=0xb3d2d558e3491a3709b7c451100a0366b5872520c7aa020c17a0e7fa35b6a8df \
--data-availability-type=calldata \
--altda.enabled=True \
--altda.da-server=http://da-server-op-kurtosis:3100 \
--altda.da-service \
--altda.max-concurrent-da-requests=1 \
--max-channel-duration=25 \
--target-num-frames=1 \
--max-l1-tx-size-bytes=1000 \
--batch-type=1

# Simple devnet
simple-devnet: (devnet "simple.yaml")
8 changes: 8 additions & 0 deletions op-alt-da/daclient.go
Original file line number Diff line number Diff line change
@@ -16,6 +16,11 @@ var ErrNotFound = errors.New("not found")
// ErrInvalidInput is returned when the input is not valid for posting to the DA storage.
var ErrInvalidInput = errors.New("invalid input")

// ErrAltDADown is returned when the alt DA returns a 503 status code.
// It is used to signify that the alt DA is down and the client should failover to the eth DA.
// See https://github.com/ethereum-optimism/specs/issues/434
var ErrAltDADown = errors.New("alt DA is down: failover to eth DA")

// DAClient is an HTTP client to communicate with a DA storage service.
// It creates commitments and retrieves input data + verifies if needed.
type DAClient struct {
@@ -131,6 +136,9 @@ func (c *DAClient) setInput(ctx context.Context, img []byte) (CommitmentData, er
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusServiceUnavailable {
return nil, ErrAltDADown
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to store data: %v", resp.StatusCode)
}
18 changes: 16 additions & 2 deletions op-alt-da/damock.go
Original file line number Diff line number Diff line change
@@ -105,12 +105,16 @@ func (d *AltDADisabled) AdvanceL1Origin(ctx context.Context, l1 L1Fetcher, block
}

// FakeDAServer is a fake DA server for e2e tests.
// It is a small wrapper around DAServer that allows for setting request latencies,
// to mimic a DA service with slow responses (eg. eigenDA with 10 min batching interval).
// It is a small wrapper around DAServer that allows for setting:
// - request latencies, to mimic a DA service with slow responses
// (eg. eigenDA with 10 min batching interval).
// - response status codes, to mimic a DA service that is down.
type FakeDAServer struct {
*DAServer
putRequestLatency time.Duration
getRequestLatency time.Duration
// next failoverCount Put requests will return 503 status code for failover testing
failoverCount uint64
}

func NewFakeDAServer(host string, port int, log log.Logger) *FakeDAServer {
@@ -130,6 +134,11 @@ func (s *FakeDAServer) HandleGet(w http.ResponseWriter, r *http.Request) {

func (s *FakeDAServer) HandlePut(w http.ResponseWriter, r *http.Request) {
time.Sleep(s.putRequestLatency)
if s.failoverCount > 0 {
w.WriteHeader(http.StatusServiceUnavailable)
s.failoverCount--
return
}
s.DAServer.HandlePut(w, r)
}

@@ -154,6 +163,11 @@ func (s *FakeDAServer) SetGetRequestLatency(latency time.Duration) {
s.getRequestLatency = latency
}

// SetResponseStatusForNRequests sets the next n Put requests to return 503 status code.
func (s *FakeDAServer) SetPutFailoverForNRequests(n uint64) {
s.failoverCount = n
}

type MemStore struct {
db map[string][]byte
lock sync.RWMutex
22 changes: 16 additions & 6 deletions op-batcher/batcher/channel.go
Original file line number Diff line number Diff line change
@@ -45,8 +45,9 @@ func newChannel(log log.Logger, metr metrics.Metricer, cfg ChannelConfig, rollup
}

// TxFailed records a transaction as failed. It will attempt to resubmit the data
// in the failed transaction.
func (c *channel) TxFailed(id string) {
// in the failed transaction. failoverToEthDA should be set to true when using altDA
// and altDA is down. This will switch the channel to submit frames to ethDA instead.
func (c *channel) TxFailed(id string, failoverToEthDA bool) {
if data, ok := c.pendingTransactions[id]; ok {
c.log.Trace("marked transaction as failed", "id", id)
// Rewind to the first frame of the failed tx
@@ -57,7 +58,16 @@ func (c *channel) TxFailed(id string) {
} else {
c.log.Warn("unknown transaction marked as failed", "id", id)
}

if failoverToEthDA {
// We failover to calldata txs because in altda mode the channel and channelManager
// are configured to use a calldataConfigManager, as opposed to DynamicEthChannelConfig
// which can use both calldata and blobs. Failover should happen extremely rarely,
// and is only used while the altDA is down, so we can afford to be inefficient here.
// TODO: figure out how to switch to blobs/auto instead. Might need to make
// batcherService.initChannelConfig function stateless so that we can reuse it.
c.log.Info("Failing over to calldata txs", "id", c.ID())
c.cfg.DaType = DaTypeCalldata
}
c.metr.RecordBatchTxFailed()
}

@@ -132,22 +142,22 @@ func (c *channel) ID() derive.ChannelID {
// NextTxData should only be called after HasTxData returned true.
func (c *channel) NextTxData() txData {
nf := c.cfg.MaxFramesPerTx()
txdata := txData{frames: make([]frameData, 0, nf), asBlob: c.cfg.UseBlobs}
txdata := txData{frames: make([]frameData, 0, nf), daType: c.cfg.DaType}
for i := 0; i < nf && c.channelBuilder.HasPendingFrame(); i++ {
frame := c.channelBuilder.NextFrame()
txdata.frames = append(txdata.frames, frame)
}

id := txdata.ID().String()
c.log.Debug("returning next tx data", "id", id, "num_frames", len(txdata.frames), "as_blob", txdata.asBlob)
c.log.Debug("returning next tx data", "id", id, "num_frames", len(txdata.frames), "da_type", txdata.daType)
c.pendingTransactions[id] = txdata

return txdata
}

func (c *channel) HasTxData() bool {
if c.IsFull() || // If the channel is full, we should start to submit it
!c.cfg.UseBlobs { // If using calldata, we only send one frame per tx
c.cfg.DaType == DaTypeCalldata { // If using calldata, we only send one frame per tx
return c.channelBuilder.HasPendingFrame()
}
// Collect enough frames if channel is not full yet
11 changes: 7 additions & 4 deletions op-batcher/batcher/channel_config.go
Original file line number Diff line number Diff line change
@@ -46,9 +46,12 @@ type ChannelConfig struct {
// BatchType indicates whether the channel uses SingularBatch or SpanBatch.
BatchType uint

// UseBlobs indicates that this channel should be sent as a multi-blob
// transaction with one blob per frame.
UseBlobs bool
// DaType indicates how the frames in this channel should be sent to the L1.
DaType DaType
}

func (cc ChannelConfig) UseBlobs() bool {
return cc.DaType == DaTypeBlob
}

// ChannelConfig returns a copy of the receiver.
@@ -93,7 +96,7 @@ func (cc *ChannelConfig) ReinitCompressorConfig() {
}

func (cc *ChannelConfig) MaxFramesPerTx() int {
if !cc.UseBlobs {
if cc.DaType == DaTypeCalldata {
return 1
}
return cc.TargetNumFrames
3 changes: 2 additions & 1 deletion op-batcher/batcher/channel_config_provider_test.go
Original file line number Diff line number Diff line change
@@ -31,11 +31,12 @@ func TestDynamicEthChannelConfig_ChannelConfig(t *testing.T) {
calldataCfg := ChannelConfig{
MaxFrameSize: 120_000 - 1,
TargetNumFrames: 1,
DaType: DaTypeCalldata,
}
blobCfg := ChannelConfig{
MaxFrameSize: eth.MaxBlobDataSize - 1,
TargetNumFrames: 3, // gets closest to amortized fixed tx costs
UseBlobs: true,
DaType: DaTypeBlob,
}

tests := []struct {
Loading