From 6476aaadcbc97f08f92d832257b432ac5a3bc962 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Sat, 2 Aug 2025 18:28:12 +0200 Subject: [PATCH 01/18] go/worker/storage: Rename committee package to statesync Also rename node to worker, to avoid confusion. Ideally, the parent package (storage) would have runtime as a prefix to make it clearer this is a runtime worker. --- go/oasis-node/cmd/node/node_control.go | 6 +- go/oasis-test-runner/oasis/log.go | 4 +- .../checkpoint_sync.go | 92 ++-- .../checkpoint_sync_test.go | 2 +- .../{committee => statesync}/metrics.go | 6 +- .../node.go => statesync/state_sync.go} | 480 +++++++++--------- .../storage/{committee => statesync}/utils.go | 2 +- go/worker/storage/worker.go | 22 +- 8 files changed, 311 insertions(+), 303 deletions(-) rename go/worker/storage/{committee => statesync}/checkpoint_sync.go (81%) rename go/worker/storage/{committee => statesync}/checkpoint_sync_test.go (98%) rename go/worker/storage/{committee => statesync}/metrics.go (91%) rename go/worker/storage/{committee/node.go => statesync/state_sync.go} (73%) rename go/worker/storage/{committee => statesync}/utils.go (99%) diff --git a/go/oasis-node/cmd/node/node_control.go b/go/oasis-node/cmd/node/node_control.go index 9310d0780f5..494bf9b2332 100644 --- a/go/oasis-node/cmd/node/node_control.go +++ b/go/oasis-node/cmd/node/node_control.go @@ -312,10 +312,10 @@ func (n *Node) getRuntimeStatus(ctx context.Context) (map[common.Namespace]contr } // Fetch storage worker status. - if storageNode := n.StorageWorker.GetRuntime(rt.ID()); storageNode != nil { - status.Storage, err = storageNode.GetStatus(ctx) + if stateSync := n.StorageWorker.GetRuntime(rt.ID()); stateSync != nil { + status.Storage, err = stateSync.GetStatus(ctx) if err != nil { - logger.Error("failed to fetch storage worker status", "err", err) + logger.Error("failed to fetch state sync worker status", "err", err) } } diff --git a/go/oasis-test-runner/oasis/log.go b/go/oasis-test-runner/oasis/log.go index cd38354d83f..a46b126c18a 100644 --- a/go/oasis-test-runner/oasis/log.go +++ b/go/oasis-test-runner/oasis/log.go @@ -8,7 +8,7 @@ import ( roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" upgrade "github.com/oasisprotocol/oasis-core/go/upgrade/api" - workerStorage "github.com/oasisprotocol/oasis-core/go/worker/storage/committee" + "github.com/oasisprotocol/oasis-core/go/worker/storage/statesync" ) // LogAssertEvent returns a handler which checks whether a specific log event was @@ -116,7 +116,7 @@ func LogAssertRoothashRoothashReindexing() log.WatcherHandlerFactory { // LogAssertCheckpointSync returns a handler which checks whether initial storage sync from // a checkpoint was successful or not. func LogAssertCheckpointSync() log.WatcherHandlerFactory { - return LogAssertEvent(workerStorage.LogEventCheckpointSyncSuccess, "checkpoint sync did not succeed") + return LogAssertEvent(statesync.LogEventCheckpointSyncSuccess, "checkpoint sync did not succeed") } // LogAssertDiscrepancyMajorityFailure returns a handler which checks whether a discrepancy resolution diff --git a/go/worker/storage/committee/checkpoint_sync.go b/go/worker/storage/statesync/checkpoint_sync.go similarity index 81% rename from go/worker/storage/committee/checkpoint_sync.go rename to go/worker/storage/statesync/checkpoint_sync.go index ad553272a90..bca66368dfe 100644 --- a/go/worker/storage/committee/checkpoint_sync.go +++ b/go/worker/storage/statesync/checkpoint_sync.go @@ -1,4 +1,4 @@ -package committee +package statesync import ( "bytes" @@ -21,7 +21,7 @@ import ( const ( // cpListsTimeout is the timeout for fetching checkpoints from all nodes. cpListsTimeout = 30 * time.Second - // cpRestoreTimeout is the timeout for restoring a checkpoint chunk from a node. + // cpRestoreTimeout is the timeout for restoring a checkpoint chunk from the remote peer. cpRestoreTimeout = 60 * time.Second checkpointStatusDone = 0 @@ -37,7 +37,7 @@ var ErrNoUsableCheckpoints = errors.New("storage: no checkpoint could be synced" // CheckpointSyncConfig is the checkpoint sync configuration. type CheckpointSyncConfig struct { - // Disabled specifies whether checkpoint sync should be disabled. In this case the node will + // Disabled specifies whether checkpoint sync should be disabled. In this case the state sync worker will // only sync by applying all diffs from genesis. Disabled bool @@ -81,7 +81,7 @@ func (h *chunkHeap) Pop() any { return ret } -func (n *Node) checkpointChunkFetcher( +func (w *Worker) checkpointChunkFetcher( ctx context.Context, chunkDispatchCh chan *chunk, chunkReturnCh chan *chunk, @@ -103,9 +103,9 @@ func (n *Node) checkpointChunkFetcher( defer cancel() // Fetch chunk from peers. - rsp, pf, err := n.fetchChunk(chunkCtx, chunk) + rsp, pf, err := w.fetchChunk(chunkCtx, chunk) if err != nil { - n.logger.Error("failed to fetch chunk from peers", + w.logger.Error("failed to fetch chunk from peers", "err", err, "chunk", chunk.Index, ) @@ -114,7 +114,7 @@ func (n *Node) checkpointChunkFetcher( } // Restore fetched chunk. - done, err := n.localStorage.Checkpointer().RestoreChunk(chunkCtx, chunk.Index, bytes.NewBuffer(rsp)) + done, err := w.localStorage.Checkpointer().RestoreChunk(chunkCtx, chunk.Index, bytes.NewBuffer(rsp)) cancel() switch { @@ -124,7 +124,7 @@ func (n *Node) checkpointChunkFetcher( chunkReturnCh <- nil return case err != nil: - n.logger.Error("chunk restoration failed", + w.logger.Error("chunk restoration failed", "chunk", chunk.Index, "root", chunk.Root, "err", err, @@ -157,8 +157,8 @@ func (n *Node) checkpointChunkFetcher( // fetchChunk fetches chunk using checkpoint sync p2p protocol client. // // In case of no peers or error, it fallbacks to the legacy storage sync protocol. -func (n *Node) fetchChunk(ctx context.Context, chunk *chunk) ([]byte, rpc.PeerFeedback, error) { - rsp1, pf, err := n.checkpointSync.GetCheckpointChunk( +func (w *Worker) fetchChunk(ctx context.Context, chunk *chunk) ([]byte, rpc.PeerFeedback, error) { + rsp1, pf, err := w.checkpointSync.GetCheckpointChunk( ctx, &checkpointsync.GetCheckpointChunkRequest{ Version: chunk.Version, @@ -175,7 +175,7 @@ func (n *Node) fetchChunk(ctx context.Context, chunk *chunk) ([]byte, rpc.PeerFe return rsp1.Chunk, pf, nil } - rsp2, pf, err := n.legacyStorageSync.GetCheckpointChunk( + rsp2, pf, err := w.legacyStorageSync.GetCheckpointChunk( ctx, &synclegacy.GetCheckpointChunkRequest{ Version: chunk.Version, @@ -194,8 +194,8 @@ func (n *Node) fetchChunk(ctx context.Context, chunk *chunk) ([]byte, rpc.PeerFe return rsp2.Chunk, pf, nil } -func (n *Node) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelRequests uint) (cpStatus int, rerr error) { - if err := n.localStorage.Checkpointer().StartRestore(n.ctx, check.Metadata); err != nil { +func (w *Worker) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelRequests uint) (cpStatus int, rerr error) { + if err := w.localStorage.Checkpointer().StartRestore(w.ctx, check.Metadata); err != nil { // Any previous restores were already aborted by the driver up the call stack, so // things should have been going smoothly here; bail. return checkpointStatusBail, fmt.Errorf("can't start checkpoint restore: %w", err) @@ -208,9 +208,9 @@ func (n *Node) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelReq } // Abort has to succeed even if we were interrupted by context cancellation. ctx := context.Background() - if err := n.localStorage.Checkpointer().AbortRestore(ctx); err != nil { + if err := w.localStorage.Checkpointer().AbortRestore(ctx); err != nil { cpStatus = checkpointStatusBail - n.logger.Error("error while aborting checkpoint restore on handler exit, aborting sync", + w.logger.Error("error while aborting checkpoint restore on handler exit, aborting sync", "err", err, ) } @@ -222,7 +222,7 @@ func (n *Node) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelReq chunkReturnCh := make(chan *chunk, maxParallelRequests) errorCh := make(chan int, maxParallelRequests) - ctx, cancel := context.WithCancel(n.ctx) + ctx, cancel := context.WithCancel(w.ctx) // Spawn the worker group to fetch and restore checkpoint chunks. var workerGroup sync.WaitGroup @@ -231,7 +231,7 @@ func (n *Node) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelReq workerGroup.Add(1) go func() { defer workerGroup.Done() - n.checkpointChunkFetcher(ctx, chunkDispatchCh, chunkReturnCh, errorCh) + w.checkpointChunkFetcher(ctx, chunkDispatchCh, chunkReturnCh, errorCh) }() } go func() { @@ -264,7 +264,7 @@ func (n *Node) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelReq checkpoint: check, }) } - n.logger.Debug("checkpoint chunks prepared for dispatch", + w.logger.Debug("checkpoint chunks prepared for dispatch", "chunks", len(check.Chunks), "checkpoint_root", check.Root, ) @@ -283,8 +283,8 @@ func (n *Node) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelReq } select { - case <-n.ctx.Done(): - return checkpointStatusBail, n.ctx.Err() + case <-w.ctx.Done(): + return checkpointStatusBail, w.ctx.Err() case returned := <-chunkReturnCh: if returned == nil { @@ -313,13 +313,13 @@ func (n *Node) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelReq } } -func (n *Node) getCheckpointList() ([]*checkpointsync.Checkpoint, error) { - ctx, cancel := context.WithTimeout(n.ctx, cpListsTimeout) +func (w *Worker) getCheckpointList() ([]*checkpointsync.Checkpoint, error) { + ctx, cancel := context.WithTimeout(w.ctx, cpListsTimeout) defer cancel() - list, err := n.fetchCheckpoints(ctx) + list, err := w.fetchCheckpoints(ctx) if err != nil { - n.logger.Error("failed to retrieve any checkpoints", + w.logger.Error("failed to retrieve any checkpoints", "err", err, ) return nil, err @@ -334,15 +334,15 @@ func (n *Node) getCheckpointList() ([]*checkpointsync.Checkpoint, error) { // fetchCheckpoints fetches checkpoints using checkpoint sync p2p protocol client. // // In case of no peers, error or no checkpoints, it fallbacks to the legacy storage sync protocol. -func (n *Node) fetchCheckpoints(ctx context.Context) ([]*checkpointsync.Checkpoint, error) { - list1, err := n.checkpointSync.GetCheckpoints(ctx, &checkpointsync.GetCheckpointsRequest{ +func (w *Worker) fetchCheckpoints(ctx context.Context) ([]*checkpointsync.Checkpoint, error) { + list1, err := w.checkpointSync.GetCheckpoints(ctx, &checkpointsync.GetCheckpointsRequest{ Version: 1, }) if err == nil && len(list1) > 0 { // if NO error and at least one checkpoint return list1, nil } - list2, err := n.legacyStorageSync.GetCheckpoints(ctx, &synclegacy.GetCheckpointsRequest{ + list2, err := w.legacyStorageSync.GetCheckpoints(ctx, &synclegacy.GetCheckpointsRequest{ Version: 1, }) if err != nil { @@ -369,8 +369,8 @@ func sortCheckpoints(s []*checkpointsync.Checkpoint) { }) } -func (n *Node) checkCheckpointUsable(cp *checkpointsync.Checkpoint, remainingMask outstandingMask, genesisRound uint64) bool { - namespace := n.commonNode.Runtime.ID() +func (w *Worker) checkCheckpointUsable(cp *checkpointsync.Checkpoint, remainingMask outstandingMask, genesisRound uint64) bool { + namespace := w.commonNode.Runtime.ID() if !namespace.Equal(&cp.Root.Namespace) { // Not for the right runtime. return false @@ -380,12 +380,12 @@ func (n *Node) checkCheckpointUsable(cp *checkpointsync.Checkpoint, remainingMas return false } - blk, err := n.commonNode.Runtime.History().GetCommittedBlock(n.ctx, cp.Root.Version) + blk, err := w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, cp.Root.Version) if err != nil { - n.logger.Error("can't get block information for checkpoint, skipping", "err", err, "root", cp.Root) + w.logger.Error("can't get block information for checkpoint, skipping", "err", err, "root", cp.Root) return false } - _, lastIORoot, lastStateRoot := n.GetLastSynced() + _, lastIORoot, lastStateRoot := w.GetLastSynced() lastVersions := map[storageApi.RootType]uint64{ storageApi.RootTypeIO: lastIORoot.Version, storageApi.RootTypeState: lastStateRoot.Version, @@ -401,18 +401,18 @@ func (n *Node) checkCheckpointUsable(cp *checkpointsync.Checkpoint, remainingMas } } } - n.logger.Info("checkpoint for unknown root skipped", "root", cp.Root) + w.logger.Info("checkpoint for unknown root skipped", "root", cp.Root) return false } -func (n *Node) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*blockSummary, error) { +func (w *Worker) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*blockSummary, error) { // Store roots and round info for checkpoints that finished syncing. // Round and namespace info will get overwritten as rounds are skipped // for errors, driven by remainingRoots. var syncState blockSummary // Fetch checkpoints from peers. - cps, err := n.getCheckpointList() + cps, err := w.getCheckpointList() if err != nil { return nil, fmt.Errorf("can't get checkpoint list from peers: %w", err) } @@ -440,8 +440,8 @@ func (n *Node) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*bloc if !multipartRunning { return } - if err := n.localStorage.NodeDB().AbortMultipartInsert(); err != nil { - n.logger.Error("error aborting multipart restore on exit from syncer", + if err := w.localStorage.NodeDB().AbortMultipartInsert(); err != nil { + w.logger.Error("error aborting multipart restore on exit from syncer", "err", err, ) } @@ -449,7 +449,7 @@ func (n *Node) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*bloc for _, check := range cps { - if check.Root.Version < genesisRound || !n.checkCheckpointUsable(check, remainingRoots, genesisRound) { + if check.Root.Version < genesisRound || !w.checkCheckpointUsable(check, remainingRoots, genesisRound) { continue } @@ -458,10 +458,10 @@ func (n *Node) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*bloc // previous retries. Aborting multipart works with no multipart in // progress too. multipartRunning = false - if err := n.localStorage.NodeDB().AbortMultipartInsert(); err != nil { + if err := w.localStorage.NodeDB().AbortMultipartInsert(); err != nil { return nil, fmt.Errorf("error aborting previous multipart restore: %w", err) } - if err := n.localStorage.NodeDB().StartMultipartInsert(check.Root.Version); err != nil { + if err := w.localStorage.NodeDB().StartMultipartInsert(check.Root.Version); err != nil { return nil, fmt.Errorf("error starting multipart insert for round %d: %w", check.Root.Version, err) } multipartRunning = true @@ -486,18 +486,18 @@ func (n *Node) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*bloc } } - status, err := n.handleCheckpoint(check, n.checkpointSyncCfg.ChunkFetcherCount) + status, err := w.handleCheckpoint(check, w.checkpointSyncCfg.ChunkFetcherCount) switch status { case checkpointStatusDone: - n.logger.Info("successfully restored from checkpoint", "root", check.Root, "mask", mask) + w.logger.Info("successfully restored from checkpoint", "root", check.Root, "mask", mask) syncState.Namespace = check.Root.Namespace syncState.Round = check.Root.Version syncState.Roots = append(syncState.Roots, check.Root) remainingRoots.remove(check.Root.Type) if remainingRoots.isEmpty() { - if err = n.localStorage.NodeDB().Finalize(syncState.Roots); err != nil { - n.logger.Error("can't finalize version after all checkpoints restored", + if err = w.localStorage.NodeDB().Finalize(syncState.Roots); err != nil { + w.logger.Error("can't finalize version after all checkpoints restored", "err", err, "version", prevVersion, "roots", syncState.Roots, @@ -510,10 +510,10 @@ func (n *Node) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*bloc } continue case checkpointStatusNext: - n.logger.Info("error trying to restore from checkpoint, trying next most recent", "root", check.Root, "err", err) + w.logger.Info("error trying to restore from checkpoint, trying next most recent", "root", check.Root, "err", err) continue case checkpointStatusBail: - n.logger.Error("error trying to restore from checkpoint, unrecoverable", "root", check.Root, "err", err) + w.logger.Error("error trying to restore from checkpoint, unrecoverable", "root", check.Root, "err", err) return nil, fmt.Errorf("error restoring from checkpoints: %w", err) } } diff --git a/go/worker/storage/committee/checkpoint_sync_test.go b/go/worker/storage/statesync/checkpoint_sync_test.go similarity index 98% rename from go/worker/storage/committee/checkpoint_sync_test.go rename to go/worker/storage/statesync/checkpoint_sync_test.go index d39e50f3239..c9ac133c1bd 100644 --- a/go/worker/storage/committee/checkpoint_sync_test.go +++ b/go/worker/storage/statesync/checkpoint_sync_test.go @@ -1,4 +1,4 @@ -package committee +package statesync import ( "testing" diff --git a/go/worker/storage/committee/metrics.go b/go/worker/storage/statesync/metrics.go similarity index 91% rename from go/worker/storage/committee/metrics.go rename to go/worker/storage/statesync/metrics.go index 7f641f71fdd..4bc6c414df1 100644 --- a/go/worker/storage/committee/metrics.go +++ b/go/worker/storage/statesync/metrics.go @@ -1,4 +1,4 @@ -package committee +package statesync import ( "sync" @@ -49,9 +49,9 @@ var ( prometheusOnce sync.Once ) -func (n *Node) getMetricLabels() prometheus.Labels { +func (w *Worker) getMetricLabels() prometheus.Labels { return prometheus.Labels{ - "runtime": n.commonNode.Runtime.ID().String(), + "runtime": w.commonNode.Runtime.ID().String(), } } diff --git a/go/worker/storage/committee/node.go b/go/worker/storage/statesync/state_sync.go similarity index 73% rename from go/worker/storage/committee/node.go rename to go/worker/storage/statesync/state_sync.go index a1318a0802f..8762df77c4b 100644 --- a/go/worker/storage/committee/node.go +++ b/go/worker/storage/statesync/state_sync.go @@ -1,4 +1,6 @@ -package committee +// Package statesync defines the logic responsible for initializing, syncing, +// and pruning of the runtime state using the relevant p2p protocol clients. +package statesync import ( "container/heap" @@ -39,7 +41,7 @@ import ( ) var ( - _ committee.NodeHooks = (*Node)(nil) + _ committee.NodeHooks = (*Worker)(nil) // ErrNonLocalBackend is the error returned when the storage backend doesn't implement the LocalBackend interface. ErrNonLocalBackend = errors.New("storage: storage backend doesn't support local storage") @@ -118,8 +120,18 @@ type finalizeResult struct { err error } -// Node watches blocks for storage changes. -type Node struct { +// Worker is the runtime state sync worker, responsible for syncing state +// that corresponds to the incoming runtime block headers received from the +// consensus service. +// +// In addition this worker is responsible for: +// 1. Initializing the runtime state, possibly using checkpoints (if configured). +// 2. Pruning the state as specified by the configuration. +// 3. Optionally creating runtime state checkpoints (used by other nodes) for the state sync. +// 4. Creating (and optionally advertising) statesync p2p protocol clients and servers. +// 5. Registering node availability when it has synced sufficiently close to +// the latest known block header. +type Worker struct { commonNode *committee.Node roleProvider registration.RoleProvider @@ -162,27 +174,28 @@ type Node struct { initCh chan struct{} } -func NewNode( +// New creates a new state sync worker. +func New( commonNode *committee.Node, roleProvider registration.RoleProvider, rpcRoleProvider registration.RoleProvider, workerCommonCfg workerCommon.Config, localStorage storageApi.LocalBackend, checkpointSyncCfg *CheckpointSyncConfig, -) (*Node, error) { +) (*Worker, error) { initMetrics() // Create the fetcher pool. fetchPool := workerpool.New("storage_fetch/" + commonNode.Runtime.ID().String()) fetchPool.Resize(config.GlobalConfig.Storage.FetcherCount) - n := &Node{ + w := &Worker{ commonNode: commonNode, roleProvider: roleProvider, rpcRoleProvider: rpcRoleProvider, - logger: logging.GetLogger("worker/storage/committee").With("runtime_id", commonNode.Runtime.ID()), + logger: logging.GetLogger("worker/storage/statesync").With("runtime_id", commonNode.Runtime.ID()), workerCommonCfg: workerCommonCfg, @@ -208,21 +221,21 @@ func NewNode( } // Initialize sync state. - n.syncedState.Round = defaultUndefinedRound + w.syncedState.Round = defaultUndefinedRound - n.ctx, n.ctxCancel = context.WithCancel(context.Background()) + w.ctx, w.ctxCancel = context.WithCancel(context.Background()) // Create a checkpointer (even if checkpointing is disabled) to ensure the genesis checkpoint is available. - checkpointer, err := n.newCheckpointer(n.ctx, commonNode, localStorage) + checkpointer, err := w.newCheckpointer(w.ctx, commonNode, localStorage) if err != nil { return nil, fmt.Errorf("failed to create checkpointer: %w", err) } - n.checkpointer = checkpointer + w.checkpointer = checkpointer // Register prune handler. commonNode.Runtime.History().Pruner().RegisterHandler(&pruneHandler{ - logger: n.logger, - node: n, + logger: w.logger, + worker: w, }) // Advertise and serve p2p protocols. @@ -236,14 +249,14 @@ func NewNode( } // Create p2p protocol clients. - n.legacyStorageSync = synclegacy.NewClient(commonNode.P2P, commonNode.ChainContext, commonNode.Runtime.ID()) - n.diffSync = diffsync.NewClient(commonNode.P2P, commonNode.ChainContext, commonNode.Runtime.ID()) - n.checkpointSync = checkpointsync.NewClient(commonNode.P2P, commonNode.ChainContext, commonNode.Runtime.ID()) + w.legacyStorageSync = synclegacy.NewClient(commonNode.P2P, commonNode.ChainContext, commonNode.Runtime.ID()) + w.diffSync = diffsync.NewClient(commonNode.P2P, commonNode.ChainContext, commonNode.Runtime.ID()) + w.checkpointSync = checkpointsync.NewClient(commonNode.P2P, commonNode.ChainContext, commonNode.Runtime.ID()) - return n, nil + return w, nil } -func (n *Node) newCheckpointer(ctx context.Context, commonNode *committee.Node, localStorage storageApi.LocalBackend) (checkpoint.Checkpointer, error) { +func (w *Worker) newCheckpointer(ctx context.Context, commonNode *committee.Node, localStorage storageApi.LocalBackend) (checkpoint.Checkpointer, error) { checkInterval := checkpoint.CheckIntervalDisabled if config.GlobalConfig.Storage.Checkpointer.Enabled { checkInterval = config.GlobalConfig.Storage.Checkpointer.CheckInterval @@ -300,100 +313,95 @@ func (n *Node) newCheckpointer(ctx context.Context, commonNode *committee.Node, // Service interface. -// Name returns the service name. -func (n *Node) Name() string { - return "committee node" -} - // Start causes the worker to start responding to CometBFT new block events. -func (n *Node) Start() error { - go n.worker() +func (w *Worker) Start() error { + go w.worker() if config.GlobalConfig.Storage.Checkpointer.Enabled { - go n.consensusCheckpointSyncer() + go w.consensusCheckpointSyncer() } return nil } // Stop causes the worker to stop watching and shut down. -func (n *Node) Stop() { - n.statusLock.Lock() - n.status = api.StatusStopping - n.statusLock.Unlock() +func (w *Worker) Stop() { + w.statusLock.Lock() + w.status = api.StatusStopping + w.statusLock.Unlock() - n.fetchPool.Stop() + w.fetchPool.Stop() - n.ctxCancel() + w.ctxCancel() } // Quit returns a channel that will be closed when the worker stops. -func (n *Node) Quit() <-chan struct{} { - return n.quitCh +func (w *Worker) Quit() <-chan struct{} { + return w.quitCh } // Cleanup cleans up any leftover state after the worker is stopped. -func (n *Node) Cleanup() { +func (w *Worker) Cleanup() { // Nothing to do here? } // Initialized returns a channel that will be closed once the worker finished starting up. -func (n *Node) Initialized() <-chan struct{} { - return n.initCh +func (w *Worker) Initialized() <-chan struct{} { + return w.initCh } -// GetStatus returns the storage committee node status. -func (n *Node) GetStatus(context.Context) (*api.Status, error) { - n.syncedLock.RLock() - defer n.syncedLock.RUnlock() +// GetStatus returns the state sync worker status. +func (w *Worker) GetStatus(context.Context) (*api.Status, error) { + w.syncedLock.RLock() + defer w.syncedLock.RUnlock() - n.statusLock.RLock() - defer n.statusLock.RUnlock() + w.statusLock.RLock() + defer w.statusLock.RUnlock() return &api.Status{ - LastFinalizedRound: n.syncedState.Round, - Status: n.status, + LastFinalizedRound: w.syncedState.Round, + Status: w.status, }, nil } -func (n *Node) PauseCheckpointer(pause bool) error { +func (w *Worker) PauseCheckpointer(pause bool) error { if !commonFlags.DebugDontBlameOasis() { return api.ErrCantPauseCheckpointer } - n.checkpointer.Pause(pause) + w.checkpointer.Pause(pause) return nil } -// GetLocalStorage returns the local storage backend used by this storage node. -func (n *Node) GetLocalStorage() storageApi.LocalBackend { - return n.localStorage +// GetLocalStorage returns the local storage backend used by this state sync worker. +func (w *Worker) GetLocalStorage() storageApi.LocalBackend { + return w.localStorage } // NodeHooks implementation. // HandleNewBlockEarlyLocked is guarded by CrossNode. -func (n *Node) HandleNewBlockEarlyLocked(*runtime.BlockInfo) { +func (w *Worker) HandleNewBlockEarlyLocked(*runtime.BlockInfo) { // Nothing to do here. } // HandleNewBlockLocked is guarded by CrossNode. -func (n *Node) HandleNewBlockLocked(bi *runtime.BlockInfo) { +func (w *Worker) HandleNewBlockLocked(bi *runtime.BlockInfo) { // Notify the state syncer that there is a new block. - n.blockCh.In() <- bi.RuntimeBlock + w.blockCh.In() <- bi.RuntimeBlock } // HandleRuntimeHostEventLocked is guarded by CrossNode. -func (n *Node) HandleRuntimeHostEventLocked(*host.Event) { +func (w *Worker) HandleRuntimeHostEventLocked(*host.Event) { // Nothing to do here. } // Watcher implementation. // GetLastSynced returns the height, IORoot hash and StateRoot hash of the last block that was fully synced to. -func (n *Node) GetLastSynced() (uint64, storageApi.Root, storageApi.Root) { - n.syncedLock.RLock() - defer n.syncedLock.RUnlock() +func (w *Worker) GetLastSynced() (uint64, storageApi.Root, storageApi.Root) { + w.syncedLock.RLock() + defer w.syncedLock.RUnlock() var io, state storageApi.Root - for _, root := range n.syncedState.Roots { + for _, root := range w.syncedState.Roots { switch root.Type { case storageApi.RootTypeIO: io = root @@ -402,10 +410,10 @@ func (n *Node) GetLastSynced() (uint64, storageApi.Root, storageApi.Root) { } } - return n.syncedState.Round, io, state + return w.syncedState.Round, io, state } -func (n *Node) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { +func (w *Worker) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { result := &fetchedDiff{ fetched: false, pf: rpc.NewNopPeerFeedback(), @@ -415,13 +423,13 @@ func (n *Node) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { } defer func() { select { - case n.diffCh <- result: - case <-n.ctx.Done(): + case w.diffCh <- result: + case <-w.ctx.Done(): } }() // Check if the new root doesn't already exist. - if n.localStorage.NodeDB().HasRoot(thisRoot) { + if w.localStorage.NodeDB().HasRoot(thisRoot) { return } @@ -436,15 +444,15 @@ func (n *Node) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { } // New root does not yet exist in storage and we need to fetch it from a peer. - n.logger.Debug("calling GetDiff", + w.logger.Debug("calling GetDiff", "old_root", prevRoot, "new_root", thisRoot, ) - ctx, cancel := context.WithCancel(n.ctx) + ctx, cancel := context.WithCancel(w.ctx) defer cancel() - wl, pf, err := n.getDiff(ctx, prevRoot, thisRoot) + wl, pf, err := w.getDiff(ctx, prevRoot, thisRoot) if err != nil { result.err = err return @@ -456,35 +464,35 @@ func (n *Node) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { // getDiff fetches writelog using diff sync p2p protocol client. // // In case of no peers or error, it fallbacks to the legacy storage sync protocol. -func (n *Node) getDiff(ctx context.Context, prevRoot, thisRoot storageApi.Root) (storageApi.WriteLog, rpc.PeerFeedback, error) { - rsp1, pf, err := n.diffSync.GetDiff(ctx, &diffsync.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) +func (w *Worker) getDiff(ctx context.Context, prevRoot, thisRoot storageApi.Root) (storageApi.WriteLog, rpc.PeerFeedback, error) { + rsp1, pf, err := w.diffSync.GetDiff(ctx, &diffsync.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) if err == nil { // if NO error return rsp1.WriteLog, pf, nil } - rsp2, pf, err := n.legacyStorageSync.GetDiff(ctx, &synclegacy.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) + rsp2, pf, err := w.legacyStorageSync.GetDiff(ctx, &synclegacy.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) if err != nil { return nil, nil, err } return rsp2.WriteLog, pf, nil } -func (n *Node) finalize(summary *blockSummary) { - err := n.localStorage.NodeDB().Finalize(summary.Roots) +func (w *Worker) finalize(summary *blockSummary) { + err := w.localStorage.NodeDB().Finalize(summary.Roots) switch err { case nil: - n.logger.Debug("storage round finalized", + w.logger.Debug("storage round finalized", "round", summary.Round, ) case storageApi.ErrAlreadyFinalized: // This can happen if we are restoring after a roothash migration or if // we crashed before updating the sync state. - n.logger.Warn("storage round already finalized", + w.logger.Warn("storage round already finalized", "round", summary.Round, ) err = nil default: - n.logger.Error("failed to finalize storage round", + w.logger.Error("failed to finalize storage round", "err", err, "round", summary.Round, ) @@ -496,31 +504,31 @@ func (n *Node) finalize(summary *blockSummary) { } select { - case n.finalizeCh <- result: - case <-n.ctx.Done(): + case w.finalizeCh <- result: + case <-w.ctx.Done(): } } -func (n *Node) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) error { - n.logger.Info("initializing storage at genesis") +func (w *Worker) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) error { + w.logger.Info("initializing storage at genesis") // Check what the latest finalized version in the database is as we may be using a database // from a previous version or network. - latestVersion, alreadyInitialized := n.localStorage.NodeDB().GetLatestVersion() + latestVersion, alreadyInitialized := w.localStorage.NodeDB().GetLatestVersion() // Finalize any versions that were not yet finalized in the old database. This is only possible // as long as there is only one non-finalized root per version. Note that we also cannot be sure // that any of these roots are valid, but this is fine as long as the final version matches the // genesis root. if alreadyInitialized { - n.logger.Debug("already initialized, finalizing any non-finalized versions", + w.logger.Debug("already initialized, finalizing any non-finalized versions", "genesis_state_root", genesisBlock.Header.StateRoot, "genesis_round", genesisBlock.Header.Round, "latest_version", latestVersion, ) for v := latestVersion + 1; v < genesisBlock.Header.Round; v++ { - roots, err := n.localStorage.NodeDB().GetRootsForVersion(v) + roots, err := w.localStorage.NodeDB().GetRootsForVersion(v) if err != nil { return fmt.Errorf("failed to fetch roots for version %d: %w", v, err) } @@ -535,7 +543,7 @@ func (n *Node) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) e break // We must have exactly one non-finalized state root to continue. } - err = n.localStorage.NodeDB().Finalize(stateRoots) + err = w.localStorage.NodeDB().Finalize(stateRoots) if err != nil { return fmt.Errorf("failed to finalize version %d: %w", v, err) } @@ -559,14 +567,14 @@ func (n *Node) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) e maybeRoot := stateRoot maybeRoot.Version = latestVersion - if n.localStorage.NodeDB().HasRoot(maybeRoot) { - n.logger.Debug("latest version earlier than genesis state root, filling in versions", + if w.localStorage.NodeDB().HasRoot(maybeRoot) { + w.logger.Debug("latest version earlier than genesis state root, filling in versions", "genesis_state_root", genesisBlock.Header.StateRoot, "genesis_round", genesisBlock.Header.Round, "latest_version", latestVersion, ) for v := latestVersion; v < stateRoot.Version; v++ { - err := n.localStorage.Apply(n.ctx, &storageApi.ApplyRequest{ + err := w.localStorage.Apply(w.ctx, &storageApi.ApplyRequest{ Namespace: rt.ID, RootType: storageApi.RootTypeState, SrcRound: v, @@ -579,7 +587,7 @@ func (n *Node) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) e return fmt.Errorf("failed to fill in version %d: %w", v, err) } - err = n.localStorage.NodeDB().Finalize([]storageApi.Root{{ + err = w.localStorage.NodeDB().Finalize([]storageApi.Root{{ Namespace: rt.ID, Version: v + 1, Type: storageApi.RootTypeState, @@ -594,14 +602,14 @@ func (n *Node) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) e } default: // Latest finalized version is the same or ahead, root must exist. - compatible = n.localStorage.NodeDB().HasRoot(stateRoot) + compatible = w.localStorage.NodeDB().HasRoot(stateRoot) } // If we are incompatible and the local version is greater or the same as the genesis version, // we cannot do anything. If the local version is lower we assume the node will sync from a // different node. if !compatible && latestVersion >= stateRoot.Version { - n.logger.Error("existing state is incompatible with runtime genesis state", + w.logger.Error("existing state is incompatible with runtime genesis state", "genesis_state_root", genesisBlock.Header.StateRoot, "genesis_round", genesisBlock.Header.Round, "latest_version", latestVersion, @@ -611,46 +619,46 @@ func (n *Node) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) e if !compatible { // Database is empty, so assume the state will be replicated from another node. - n.logger.Warn("non-empty state root but no state available, assuming replication", + w.logger.Warn("non-empty state root but no state available, assuming replication", "state_root", genesisBlock.Header.StateRoot, ) - n.checkpointSyncForced = true + w.checkpointSyncForced = true } return nil } -func (n *Node) flushSyncedState(summary *blockSummary) (uint64, error) { - n.syncedLock.Lock() - defer n.syncedLock.Unlock() +func (w *Worker) flushSyncedState(summary *blockSummary) (uint64, error) { + w.syncedLock.Lock() + defer w.syncedLock.Unlock() - n.syncedState = *summary - if err := n.commonNode.Runtime.History().StorageSyncCheckpoint(n.syncedState.Round); err != nil { + w.syncedState = *summary + if err := w.commonNode.Runtime.History().StorageSyncCheckpoint(w.syncedState.Round); err != nil { return 0, err } - return n.syncedState.Round, nil + return w.syncedState.Round, nil } -func (n *Node) consensusCheckpointSyncer() { +func (w *Worker) consensusCheckpointSyncer() { // Make sure we always create a checkpoint when the consensus layer creates a checkpoint. The // reason why we do this is to make it faster for storage nodes that use consensus state sync // to catch up as exactly the right checkpoint will be available. - consensusCp := n.commonNode.Consensus.Checkpointer() + consensusCp := w.commonNode.Consensus.Checkpointer() if consensusCp == nil { return } // Wait for the common node to be initialized. select { - case <-n.commonNode.Initialized(): - case <-n.ctx.Done(): + case <-w.commonNode.Initialized(): + case <-w.ctx.Done(): return } // Determine the maximum number of consensus checkpoints to keep. - consensusParams, err := n.commonNode.Consensus.Core().GetParameters(n.ctx, consensus.HeightLatest) + consensusParams, err := w.commonNode.Consensus.Core().GetParameters(w.ctx, consensus.HeightLatest) if err != nil { - n.logger.Error("failed to fetch consensus parameters", + w.logger.Error("failed to fetch consensus parameters", "err", err, ) return @@ -658,7 +666,7 @@ func (n *Node) consensusCheckpointSyncer() { ch, sub, err := consensusCp.WatchCheckpoints() if err != nil { - n.logger.Error("failed to watch checkpoints", + w.logger.Error("failed to watch checkpoints", "err", err, ) return @@ -679,9 +687,9 @@ func (n *Node) consensusCheckpointSyncer() { }() for { select { - case <-n.quitCh: + case <-w.quitCh: return - case <-n.ctx.Done(): + case <-w.ctx.Done(): return case version := <-ch: // We need to wait for the next version as that is what will be in the consensus @@ -692,15 +700,15 @@ func (n *Node) consensusCheckpointSyncer() { versions = versions[1:] } - n.logger.Debug("consensus checkpoint detected, queuing runtime checkpoint", + w.logger.Debug("consensus checkpoint detected, queuing runtime checkpoint", "version", version+1, "num_versions", len(versions), ) if blkCh == nil { - blkCh, blkSub, err = n.commonNode.Consensus.Core().WatchBlocks(n.ctx) + blkCh, blkSub, err = w.commonNode.Consensus.Core().WatchBlocks(w.ctx) if err != nil { - n.logger.Error("failed to watch blocks", + w.logger.Error("failed to watch blocks", "err", err, ) continue @@ -709,7 +717,7 @@ func (n *Node) consensusCheckpointSyncer() { case blk := <-blkCh: // If there's nothing remaining, unsubscribe. if len(versions) == 0 { - n.logger.Debug("no more queued consensus checkpoint versions") + w.logger.Debug("no more queued consensus checkpoint versions") blkSub.Close() blkSub = nil @@ -727,12 +735,12 @@ func (n *Node) consensusCheckpointSyncer() { // Lookup what runtime round corresponds to the given consensus layer version and make // sure we checkpoint it. - blk, err := n.commonNode.Consensus.RootHash().GetLatestBlock(n.ctx, &roothashApi.RuntimeRequest{ - RuntimeID: n.commonNode.Runtime.ID(), + blk, err := w.commonNode.Consensus.RootHash().GetLatestBlock(w.ctx, &roothashApi.RuntimeRequest{ + RuntimeID: w.commonNode.Runtime.ID(), Height: int64(version), }) if err != nil { - n.logger.Error("failed to get runtime block corresponding to consensus checkpoint", + w.logger.Error("failed to get runtime block corresponding to consensus checkpoint", "err", err, "height", version, ) @@ -741,11 +749,11 @@ func (n *Node) consensusCheckpointSyncer() { // We may have not yet synced the corresponding runtime round locally. In this case // we need to wait until this is the case. - n.syncedLock.RLock() - lastSyncedRound := n.syncedState.Round - n.syncedLock.RUnlock() + w.syncedLock.RLock() + lastSyncedRound := w.syncedState.Round + w.syncedLock.RUnlock() if blk.Header.Round > lastSyncedRound { - n.logger.Debug("runtime round not available yet for checkpoint, waiting", + w.logger.Debug("runtime round not available yet for checkpoint, waiting", "height", version, "round", blk.Header.Round, "last_synced_round", lastSyncedRound, @@ -755,12 +763,12 @@ func (n *Node) consensusCheckpointSyncer() { } // Force runtime storage checkpointer to create a checkpoint at this round. - n.logger.Info("consensus checkpoint, force runtime checkpoint", + w.logger.Info("consensus checkpoint, force runtime checkpoint", "height", version, "round", blk.Header.Round, ) - n.checkpointer.ForceCheckpoint(blk.Header.Round) + w.checkpointer.ForceCheckpoint(blk.Header.Round) } versions = newVersions } @@ -768,105 +776,105 @@ func (n *Node) consensusCheckpointSyncer() { } // This is only called from the main worker goroutine, so no locking should be necessary. -func (n *Node) nudgeAvailability(lastSynced, latest uint64) { - if lastSynced == n.undefinedRound || latest == n.undefinedRound { +func (w *Worker) nudgeAvailability(lastSynced, latest uint64) { + if lastSynced == w.undefinedRound || latest == w.undefinedRound { return } - if latest-lastSynced < maximumRoundDelayForAvailability && !n.roleAvailable { - n.roleProvider.SetAvailable(func(_ *node.Node) error { + if latest-lastSynced < maximumRoundDelayForAvailability && !w.roleAvailable { + w.roleProvider.SetAvailable(func(_ *node.Node) error { return nil }) - if n.rpcRoleProvider != nil { - n.rpcRoleProvider.SetAvailable(func(_ *node.Node) error { + if w.rpcRoleProvider != nil { + w.rpcRoleProvider.SetAvailable(func(_ *node.Node) error { return nil }) } - n.roleAvailable = true + w.roleAvailable = true } - if latest-lastSynced > minimumRoundDelayForUnavailability && n.roleAvailable { - n.roleProvider.SetUnavailable() - if n.rpcRoleProvider != nil { - n.rpcRoleProvider.SetUnavailable() + if latest-lastSynced > minimumRoundDelayForUnavailability && w.roleAvailable { + w.roleProvider.SetUnavailable() + if w.rpcRoleProvider != nil { + w.rpcRoleProvider.SetUnavailable() } - n.roleAvailable = false + w.roleAvailable = false } } -func (n *Node) worker() { // nolint: gocyclo - defer close(n.quitCh) - defer close(n.diffCh) +func (w *Worker) worker() { // nolint: gocyclo + defer close(w.quitCh) + defer close(w.diffCh) // Wait for the common node to be initialized. select { - case <-n.commonNode.Initialized(): - case <-n.ctx.Done(): - close(n.initCh) + case <-w.commonNode.Initialized(): + case <-w.ctx.Done(): + close(w.initCh) return } - n.logger.Info("starting committee node") + w.logger.Info("starting") - n.statusLock.Lock() - n.status = api.StatusStarting - n.statusLock.Unlock() + w.statusLock.Lock() + w.status = api.StatusStarting + w.statusLock.Unlock() // Determine genesis block. - genesisBlock, err := n.commonNode.Consensus.RootHash().GetGenesisBlock(n.ctx, &roothashApi.RuntimeRequest{ - RuntimeID: n.commonNode.Runtime.ID(), + genesisBlock, err := w.commonNode.Consensus.RootHash().GetGenesisBlock(w.ctx, &roothashApi.RuntimeRequest{ + RuntimeID: w.commonNode.Runtime.ID(), Height: consensus.HeightLatest, }) if err != nil { - n.logger.Error("can't retrieve genesis block", "err", err) + w.logger.Error("can't retrieve genesis block", "err", err) return } - n.undefinedRound = genesisBlock.Header.Round - 1 + w.undefinedRound = genesisBlock.Header.Round - 1 // Determine last finalized storage version. - if version, dbNonEmpty := n.localStorage.NodeDB().GetLatestVersion(); dbNonEmpty { + if version, dbNonEmpty := w.localStorage.NodeDB().GetLatestVersion(); dbNonEmpty { var blk *block.Block - blk, err = n.commonNode.Runtime.History().GetCommittedBlock(n.ctx, version) + blk, err = w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, version) switch err { case nil: // Set last synced version to last finalized storage version. - if _, err = n.flushSyncedState(summaryFromBlock(blk)); err != nil { - n.logger.Error("failed to flush synced state", "err", err) + if _, err = w.flushSyncedState(summaryFromBlock(blk)); err != nil { + w.logger.Error("failed to flush synced state", "err", err) return } default: // Failed to fetch historic block. This is fine when the network just went through a // dump/restore upgrade and we don't have any information before genesis. We treat the // database as unsynced and will proceed to either use checkpoints or sync iteratively. - n.logger.Warn("failed to fetch historic block", + w.logger.Warn("failed to fetch historic block", "err", err, "round", version, ) } } - n.syncedLock.RLock() - cachedLastRound := n.syncedState.Round - n.syncedLock.RUnlock() + w.syncedLock.RLock() + cachedLastRound := w.syncedState.Round + w.syncedLock.RUnlock() if cachedLastRound == defaultUndefinedRound || cachedLastRound < genesisBlock.Header.Round { - cachedLastRound = n.undefinedRound + cachedLastRound = w.undefinedRound } // Initialize genesis from the runtime descriptor. - isInitialStartup := (cachedLastRound == n.undefinedRound) + isInitialStartup := (cachedLastRound == w.undefinedRound) if isInitialStartup { - n.statusLock.Lock() - n.status = api.StatusInitializingGenesis - n.statusLock.Unlock() + w.statusLock.Lock() + w.status = api.StatusInitializingGenesis + w.statusLock.Unlock() var rt *registryApi.Runtime - rt, err = n.commonNode.Runtime.ActiveDescriptor(n.ctx) + rt, err = w.commonNode.Runtime.ActiveDescriptor(w.ctx) if err != nil { - n.logger.Error("failed to retrieve runtime registry descriptor", + w.logger.Error("failed to retrieve runtime registry descriptor", "err", err, ) return } - if err = n.initGenesis(rt, genesisBlock); err != nil { - n.logger.Error("failed to initialize storage at genesis", + if err = w.initGenesis(rt, genesisBlock); err != nil { + w.logger.Error("failed to initialize storage at genesis", "err", err, ) return @@ -874,28 +882,28 @@ func (n *Node) worker() { // nolint: gocyclo } // Notify the checkpointer of the genesis round so it can be checkpointed. - if n.checkpointer != nil { - n.checkpointer.ForceCheckpoint(genesisBlock.Header.Round) - n.checkpointer.Flush() + if w.checkpointer != nil { + w.checkpointer.ForceCheckpoint(genesisBlock.Header.Round) + w.checkpointer.Flush() } // Check if we are able to fetch the first block that we would be syncing if we used iterative // syncing. In case we cannot (likely because we synced the consensus layer via state sync), we // must wait for a later checkpoint to become available. - if !n.checkpointSyncForced { - n.statusLock.Lock() - n.status = api.StatusSyncStartCheck - n.statusLock.Unlock() + if !w.checkpointSyncForced { + w.statusLock.Lock() + w.status = api.StatusSyncStartCheck + w.statusLock.Unlock() // Determine what is the first round that we would need to sync. iterativeSyncStart := cachedLastRound - if iterativeSyncStart == n.undefinedRound { + if iterativeSyncStart == w.undefinedRound { iterativeSyncStart++ } // Check if we actually have information about that round. This assumes that any reindexing // was already performed (the common node would not indicate being initialized otherwise). - _, err = n.commonNode.Runtime.History().GetCommittedBlock(n.ctx, iterativeSyncStart) + _, err = w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, iterativeSyncStart) SyncStartCheck: switch { case err == nil: @@ -903,7 +911,7 @@ func (n *Node) worker() { // nolint: gocyclo // No information is available about the initial round. Query the earliest historic // block and check if that block has the genesis state root and empty I/O root. var earlyBlk *block.Block - earlyBlk, err = n.commonNode.Runtime.History().GetEarliestBlock(n.ctx) + earlyBlk, err = w.commonNode.Runtime.History().GetEarliestBlock(w.ctx) switch err { case nil: // Make sure the state root is still the same as at genesis time. @@ -917,13 +925,13 @@ func (n *Node) worker() { // nolint: gocyclo // If this is the case, we can start syncing from this round instead. Fill in the // remaining versions to make sure they actually exist in the database. - n.logger.Debug("filling in versions to genesis", + w.logger.Debug("filling in versions to genesis", "genesis_round", genesisBlock.Header.Round, "earliest_round", earlyBlk.Header.Round, ) for v := genesisBlock.Header.Round; v < earlyBlk.Header.Round; v++ { - err = n.localStorage.Apply(n.ctx, &storageApi.ApplyRequest{ - Namespace: n.commonNode.Runtime.ID(), + err = w.localStorage.Apply(w.ctx, &storageApi.ApplyRequest{ + Namespace: w.commonNode.Runtime.ID(), RootType: storageApi.RootTypeState, SrcRound: v, SrcRoot: genesisBlock.Header.StateRoot, @@ -937,31 +945,31 @@ func (n *Node) worker() { // nolint: gocyclo // Ignore already finalized versions. continue default: - n.logger.Error("failed to fill in version", + w.logger.Error("failed to fill in version", "version", v, "err", err, ) return } - err = n.localStorage.NodeDB().Finalize([]storageApi.Root{{ - Namespace: n.commonNode.Runtime.ID(), + err = w.localStorage.NodeDB().Finalize([]storageApi.Root{{ + Namespace: w.commonNode.Runtime.ID(), Version: v + 1, Type: storageApi.RootTypeState, Hash: genesisBlock.Header.StateRoot, // We can ignore I/O roots. }}) if err != nil { - n.logger.Error("failed to finalize filled in version", + w.logger.Error("failed to finalize filled in version", "version", v, "err", err, ) return } } - cachedLastRound, err = n.flushSyncedState(summaryFromBlock(earlyBlk)) + cachedLastRound, err = w.flushSyncedState(summaryFromBlock(earlyBlk)) if err != nil { - n.logger.Error("failed to flush synced state", + w.logger.Error("failed to flush synced state", "err", err, ) return @@ -970,26 +978,26 @@ func (n *Node) worker() { // nolint: gocyclo break SyncStartCheck default: // This should never happen as the block should exist. - n.logger.Warn("failed to query earliest block in local history", + w.logger.Warn("failed to query earliest block in local history", "err", err, ) } // No information is available about this round, force checkpoint sync. - n.logger.Warn("forcing checkpoint sync as we don't have authoritative block info", + w.logger.Warn("forcing checkpoint sync as we don't have authoritative block info", "round", iterativeSyncStart, ) - n.checkpointSyncForced = true + w.checkpointSyncForced = true default: // Unknown error while fetching block information, abort. - n.logger.Error("failed to query block", + w.logger.Error("failed to query block", "err", err, ) return } } - n.logger.Info("worker initialized", + w.logger.Info("worker initialized", "genesis_round", genesisBlock.Header.Round, "last_synced", cachedLastRound, ) @@ -1008,10 +1016,10 @@ func (n *Node) worker() { // nolint: gocyclo // to a later state which may not be desired given that checkpoint sync has been explicitly // disabled via config. // - if (isInitialStartup && !n.checkpointSyncCfg.Disabled) || n.checkpointSyncForced { - n.statusLock.Lock() - n.status = api.StatusSyncingCheckpoints - n.statusLock.Unlock() + if (isInitialStartup && !w.checkpointSyncCfg.Disabled) || w.checkpointSyncForced { + w.statusLock.Lock() + w.status = api.StatusSyncingCheckpoints + w.statusLock.Unlock() var ( summary *blockSummary @@ -1019,17 +1027,17 @@ func (n *Node) worker() { // nolint: gocyclo ) CheckpointSyncRetry: for { - summary, err = n.syncCheckpoints(genesisBlock.Header.Round, n.checkpointSyncCfg.Disabled) + summary, err = w.syncCheckpoints(genesisBlock.Header.Round, w.checkpointSyncCfg.Disabled) if err == nil { break } attempt++ - switch n.checkpointSyncForced { + switch w.checkpointSyncForced { case true: // We have no other options but to perform a checkpoint sync as we are missing // either state or authoritative blocks. - n.logger.Info("checkpoint sync required, retrying", + w.logger.Info("checkpoint sync required, retrying", "err", err, "attempt", attempt, ) @@ -1041,36 +1049,36 @@ func (n *Node) worker() { // nolint: gocyclo // Try syncing again. The main reason for this is the sync failing due to a // checkpoint pruning race condition (where nodes list a checkpoint which is // then deleted just before we request its chunks). One retry is enough. - n.logger.Info("first checkpoint sync failed, trying once more", "err", err) + w.logger.Info("first checkpoint sync failed, trying once more", "err", err) } // Delay before retrying. select { case <-time.After(checkpointSyncRetryDelay): - case <-n.ctx.Done(): + case <-w.ctx.Done(): return } } if err != nil { - n.logger.Info("checkpoint sync failed", "err", err) + w.logger.Info("checkpoint sync failed", "err", err) } else { - cachedLastRound, err = n.flushSyncedState(summary) + cachedLastRound, err = w.flushSyncedState(summary) if err != nil { - n.logger.Error("failed to flush synced state", + w.logger.Error("failed to flush synced state", "err", err, ) return } lastFullyAppliedRound = cachedLastRound - n.logger.Info("checkpoint sync succeeded", + w.logger.Info("checkpoint sync succeeded", logging.LogEvent, LogEventCheckpointSyncSuccess, ) } } - close(n.initCh) + close(w.initCh) // Don't register availability immediately, we want to know first how far behind consensus we are. - latestBlockRound := n.undefinedRound + latestBlockRound := w.undefinedRound heartbeat := heartbeat{} heartbeat.reset() @@ -1097,10 +1105,10 @@ func (n *Node) worker() { // nolint: gocyclo syncingRounds[i] = syncing if i == latestBlockRound { - storageWorkerLastPendingRound.With(n.getMetricLabels()).Set(float64(i)) + storageWorkerLastPendingRound.With(w.getMetricLabels()).Set(float64(i)) } } - n.logger.Debug("preparing round sync", + w.logger.Debug("preparing round sync", "round", i, "outstanding_mask", syncing.outstanding, "awaiting_retry", syncing.awaitingRetry, @@ -1128,18 +1136,18 @@ func (n *Node) worker() { // nolint: gocyclo if !syncing.outstanding.contains(rootType) && syncing.awaitingRetry.contains(rootType) { syncing.scheduleDiff(rootType) wg.Add(1) - n.fetchPool.Submit(func() { + w.fetchPool.Submit(func() { defer wg.Done() - n.fetchDiff(this.Round, prevRoots[i], this.Roots[i]) + w.fetchDiff(this.Round, prevRoots[i], this.Roots[i]) }) } } } } - n.statusLock.Lock() - n.status = api.StatusSyncingRounds - n.statusLock.Unlock() + w.statusLock.Lock() + w.status = api.StatusSyncingRounds + w.statusLock.Unlock() pendingApply := &minRoundQueue{} pendingFinalize := &minRoundQueue{} @@ -1163,7 +1171,7 @@ mainLoop: // Apply the write log if one exists. err = nil if lastDiff.fetched { - err = n.localStorage.Apply(n.ctx, &storageApi.ApplyRequest{ + err = w.localStorage.Apply(w.ctx, &storageApi.ApplyRequest{ Namespace: lastDiff.thisRoot.Namespace, RootType: lastDiff.thisRoot.Type, SrcRound: lastDiff.prevRoot.Version, @@ -1178,7 +1186,7 @@ mainLoop: case errors.Is(err, storageApi.ErrExpectedRootMismatch): lastDiff.pf.RecordBadPeer() default: - n.logger.Error("can't apply write log", + w.logger.Error("can't apply write log", "err", err, "old_root", lastDiff.prevRoot, "new_root", lastDiff.thisRoot, @@ -1198,14 +1206,14 @@ mainLoop: } // We have fully synced the given round. - n.logger.Debug("finished syncing round", "round", lastDiff.round) + w.logger.Debug("finished syncing round", "round", lastDiff.round) delete(syncingRounds, lastDiff.round) summary := summaryCache[lastDiff.round] delete(summaryCache, lastDiff.round-1) lastFullyAppliedRound = lastDiff.round - storageWorkerLastSyncedRound.With(n.getMetricLabels()).Set(float64(lastDiff.round)) - storageWorkerRoundSyncLatency.With(n.getMetricLabels()).Observe(time.Since(syncing.startedAt).Seconds()) + storageWorkerLastSyncedRound.With(w.getMetricLabels()).Set(float64(lastDiff.round)) + storageWorkerRoundSyncLatency.With(w.getMetricLabels()).Observe(time.Since(syncing.startedAt).Seconds()) // Finalize storage for this round. This happens asynchronously // with respect to Apply operations for subsequent rounds. @@ -1222,15 +1230,15 @@ mainLoop: wg.Add(1) go func() { // Don't block fetching and applying remaining rounds. defer wg.Done() - n.finalize(lastSummary) + w.finalize(lastSummary) }() continue } select { - case inBlk := <-n.blockCh.Out(): + case inBlk := <-w.blockCh.Out(): blk := inBlk.(*block.Block) - n.logger.Debug("incoming block", + w.logger.Debug("incoming block", "round", blk.Header.Round, "last_synced", lastFullyAppliedRound, "last_finalized", cachedLastRound, @@ -1238,9 +1246,9 @@ mainLoop: // Check if we're far enough to reasonably register as available. latestBlockRound = blk.Header.Round - n.nudgeAvailability(cachedLastRound, latestBlockRound) + w.nudgeAvailability(cachedLastRound, latestBlockRound) - if _, ok := summaryCache[lastFullyAppliedRound]; !ok && lastFullyAppliedRound == n.undefinedRound { + if _, ok := summaryCache[lastFullyAppliedRound]; !ok && lastFullyAppliedRound == w.undefinedRound { dummy := blockSummary{ Namespace: blk.Header.Namespace, Round: lastFullyAppliedRound + 1, @@ -1264,7 +1272,7 @@ mainLoop: // since the undefined round may be unsigned -1 and in this case the loop // would not do any iterations. startSummaryRound := lastFullyAppliedRound - if startSummaryRound == n.undefinedRound { + if startSummaryRound == w.undefinedRound { startSummaryRound++ } for i := startSummaryRound; i < blk.Header.Round; i++ { @@ -1272,9 +1280,9 @@ mainLoop: continue } var oldBlock *block.Block - oldBlock, err = n.commonNode.Runtime.History().GetCommittedBlock(n.ctx, i) + oldBlock, err = w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, i) if err != nil { - n.logger.Error("can't get block for round", + w.logger.Error("can't get block for round", "err", err, "round", i, "current_round", blk.Header.Round, @@ -1291,14 +1299,14 @@ mainLoop: heartbeat.reset() case <-heartbeat.C: - if latestBlockRound != n.undefinedRound { - n.logger.Debug("heartbeat", "in_flight_rounds", len(syncingRounds)) + if latestBlockRound != w.undefinedRound { + w.logger.Debug("heartbeat", "in_flight_rounds", len(syncingRounds)) triggerRoundFetches() } - case item := <-n.diffCh: + case item := <-w.diffCh: if item.err != nil { - n.logger.Error("error calling getdiff", + w.logger.Error("error calling getdiff", "err", item.err, "round", item.round, "old_root", item.prevRoot, @@ -1315,35 +1323,35 @@ mainLoop: // when we're syncing and are far behind. triggerRoundFetches() - case finalized := <-n.finalizeCh: + case finalized := <-w.finalizeCh: // If finalization failed, things start falling apart. // There's no point redoing it, since it's probably not a transient // error, and cachedLastRound also can't be updated legitimately. if finalized.err != nil { // Request a node shutdown given that syncing is effectively blocked. - _ = n.commonNode.HostNode.RequestShutdown(n.ctx, false) + _ = w.commonNode.HostNode.RequestShutdown(w.ctx, false) break mainLoop } // No further sync or out of order handling needed here, since // only one finalize at a time is triggered (for round cachedLastRound+1) - cachedLastRound, err = n.flushSyncedState(finalized.summary) + cachedLastRound, err = w.flushSyncedState(finalized.summary) if err != nil { - n.logger.Error("failed to flush synced state", + w.logger.Error("failed to flush synced state", "err", err, ) } - storageWorkerLastFullRound.With(n.getMetricLabels()).Set(float64(finalized.summary.Round)) + storageWorkerLastFullRound.With(w.getMetricLabels()).Set(float64(finalized.summary.Round)) // Check if we're far enough to reasonably register as available. - n.nudgeAvailability(cachedLastRound, latestBlockRound) + w.nudgeAvailability(cachedLastRound, latestBlockRound) // Notify the checkpointer that there is a new finalized round. if config.GlobalConfig.Storage.Checkpointer.Enabled { - n.checkpointer.NotifyNewVersion(finalized.summary.Round) + w.checkpointer.NotifyNewVersion(finalized.summary.Round) } - case <-n.ctx.Done(): + case <-w.ctx.Done(): break mainLoop } } @@ -1356,12 +1364,12 @@ mainLoop: type pruneHandler struct { logger *logging.Logger - node *Node + worker *Worker } func (p *pruneHandler) Prune(rounds []uint64) error { // Make sure we never prune past what was synced. - lastSycnedRound, _, _ := p.node.GetLastSynced() + lastSycnedRound, _, _ := p.worker.GetLastSynced() for _, round := range rounds { if round >= lastSycnedRound { @@ -1375,7 +1383,7 @@ func (p *pruneHandler) Prune(rounds []uint64) error { p.logger.Debug("pruning storage for round", "round", round) // Prune given block. - err := p.node.localStorage.NodeDB().Prune(round) + err := p.worker.localStorage.NodeDB().Prune(round) switch err { case nil: case mkvsDB.ErrNotEarliest: diff --git a/go/worker/storage/committee/utils.go b/go/worker/storage/statesync/utils.go similarity index 99% rename from go/worker/storage/committee/utils.go rename to go/worker/storage/statesync/utils.go index 863b9fc7bd0..88adb492b33 100644 --- a/go/worker/storage/committee/utils.go +++ b/go/worker/storage/statesync/utils.go @@ -1,4 +1,4 @@ -package committee +package statesync import ( "fmt" diff --git a/go/worker/storage/worker.go b/go/worker/storage/worker.go index f49988bd1c7..ed28aa86906 100644 --- a/go/worker/storage/worker.go +++ b/go/worker/storage/worker.go @@ -12,10 +12,10 @@ import ( committeeCommon "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/registration" storageWorkerAPI "github.com/oasisprotocol/oasis-core/go/worker/storage/api" - "github.com/oasisprotocol/oasis-core/go/worker/storage/committee" + "github.com/oasisprotocol/oasis-core/go/worker/storage/statesync" ) -// Worker is a worker handling storage operations. +// Worker is a worker handling storage operations for all common worker runtimes. type Worker struct { enabled bool @@ -26,7 +26,7 @@ type Worker struct { initCh chan struct{} quitCh chan struct{} - runtimes map[common.Namespace]*committee.Node + runtimes map[common.Namespace]*statesync.Worker } // New constructs a new storage worker. @@ -44,14 +44,14 @@ func New( logger: logging.GetLogger("worker/storage"), initCh: make(chan struct{}), quitCh: make(chan struct{}), - runtimes: make(map[common.Namespace]*committee.Node), + runtimes: make(map[common.Namespace]*statesync.Worker), } if !enabled { return s, nil } - // Start storage node for every runtime. + // Register the state sync worker for every runtime. for id, rt := range s.commonWorker.GetRuntimes() { if err := s.registerRuntime(rt); err != nil { return nil, fmt.Errorf("failed to create storage worker for runtime %s: %w", id, err) @@ -90,13 +90,13 @@ func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { return fmt.Errorf("can't create local storage backend: %w", err) } - node, err := committee.NewNode( + worker, err := statesync.New( commonNode, rp, rpRPC, w.commonWorker.GetConfig(), localStorage, - &committee.CheckpointSyncConfig{ + &statesync.CheckpointSyncConfig{ Disabled: config.GlobalConfig.Storage.CheckpointSyncDisabled, ChunkFetcherCount: config.GlobalConfig.Storage.FetcherCount, }, @@ -105,8 +105,8 @@ func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { return err } commonNode.Runtime.RegisterStorage(localStorage) - commonNode.AddHooks(node) - w.runtimes[id] = node + commonNode.AddHooks(worker) + w.runtimes[id] = worker w.logger.Info("new runtime registered", "runtime_id", id, @@ -115,7 +115,7 @@ func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { return nil } -// Name returns the service name. +// Name returns the worker name. func (w *Worker) Name() string { return "storage worker" } @@ -196,6 +196,6 @@ func (w *Worker) Cleanup() { // GetRuntime returns a storage committee node for the given runtime (if available). // // In case the runtime with the specified id was not configured for this node it returns nil. -func (w *Worker) GetRuntime(id common.Namespace) *committee.Node { +func (w *Worker) GetRuntime(id common.Namespace) *statesync.Worker { return w.runtimes[id] } From 2a22222878c2462064173946f721ab92806c4b3f Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Sat, 2 Aug 2025 21:57:31 +0200 Subject: [PATCH 02/18] go/worker/storage/statesync: Move pruning to separate file --- go/worker/storage/statesync/prune.go | 48 +++++++++++++++++++++++ go/worker/storage/statesync/state_sync.go | 41 ------------------- 2 files changed, 48 insertions(+), 41 deletions(-) create mode 100644 go/worker/storage/statesync/prune.go diff --git a/go/worker/storage/statesync/prune.go b/go/worker/storage/statesync/prune.go new file mode 100644 index 00000000000..8e1a9c3e20b --- /dev/null +++ b/go/worker/storage/statesync/prune.go @@ -0,0 +1,48 @@ +package statesync + +import ( + "fmt" + + "github.com/oasisprotocol/oasis-core/go/common/logging" + mkvsDB "github.com/oasisprotocol/oasis-core/go/storage/mkvs/db/api" +) + +type pruneHandler struct { + logger *logging.Logger + worker *Worker +} + +func (p *pruneHandler) Prune(rounds []uint64) error { + // Make sure we never prune past what was synced. + lastSycnedRound, _, _ := p.worker.GetLastSynced() + + for _, round := range rounds { + if round >= lastSycnedRound { + return fmt.Errorf("worker/storage: tried to prune past last synced round (last synced: %d)", + lastSycnedRound, + ) + } + + // Old suggestion: Make sure we don't prune rounds that need to be checkpointed but haven't been yet. + + p.logger.Debug("pruning storage for round", "round", round) + + // Prune given block. + err := p.worker.localStorage.NodeDB().Prune(round) + switch err { + case nil: + case mkvsDB.ErrNotEarliest: + p.logger.Debug("skipping non-earliest round", + "round", round, + ) + continue + default: + p.logger.Error("failed to prune block", + "err", err, + ) + return err + } + } + + return nil +} diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index 8762df77c4b..f628697ef0c 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -29,7 +29,6 @@ import ( storageApi "github.com/oasisprotocol/oasis-core/go/storage/api" "github.com/oasisprotocol/oasis-core/go/storage/mkvs/checkpoint" dbApi "github.com/oasisprotocol/oasis-core/go/storage/mkvs/db/api" - mkvsDB "github.com/oasisprotocol/oasis-core/go/storage/mkvs/db/api" workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/registration" @@ -1361,43 +1360,3 @@ mainLoop: // some new blocks, but only as many as were already in-flight at the point when the main // context was canceled. } - -type pruneHandler struct { - logger *logging.Logger - worker *Worker -} - -func (p *pruneHandler) Prune(rounds []uint64) error { - // Make sure we never prune past what was synced. - lastSycnedRound, _, _ := p.worker.GetLastSynced() - - for _, round := range rounds { - if round >= lastSycnedRound { - return fmt.Errorf("worker/storage: tried to prune past last synced round (last synced: %d)", - lastSycnedRound, - ) - } - - // TODO: Make sure we don't prune rounds that need to be checkpointed but haven't been yet. - - p.logger.Debug("pruning storage for round", "round", round) - - // Prune given block. - err := p.worker.localStorage.NodeDB().Prune(round) - switch err { - case nil: - case mkvsDB.ErrNotEarliest: - p.logger.Debug("skipping non-earliest round", - "round", round, - ) - continue - default: - p.logger.Error("failed to prune block", - "err", err, - ) - return err - } - } - - return nil -} From ce699b9514455c7c92b222f616aa42dd7a76f7f6 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Sun, 24 Aug 2025 21:24:25 +0200 Subject: [PATCH 03/18] go/worker/storage/statesync: Create genesis checkpoint later Previously, genesis checkpoint was created right after the state may be initialized from the runtime descriptor. If this is not the case it is first fetched from the peers. Thus, we should force the genesis checkpoint only after checkpoint sync finishes. --- go/worker/storage/statesync/state_sync.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index f628697ef0c..7acb43fc026 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -880,12 +880,6 @@ func (w *Worker) worker() { // nolint: gocyclo } } - // Notify the checkpointer of the genesis round so it can be checkpointed. - if w.checkpointer != nil { - w.checkpointer.ForceCheckpoint(genesisBlock.Header.Round) - w.checkpointer.Flush() - } - // Check if we are able to fetch the first block that we would be syncing if we used iterative // syncing. In case we cannot (likely because we synced the consensus layer via state sync), we // must wait for a later checkpoint to become available. @@ -1076,6 +1070,12 @@ func (w *Worker) worker() { // nolint: gocyclo } close(w.initCh) + // Notify the checkpointer of the genesis round so it can be checkpointed. + if w.checkpointer != nil { + w.checkpointer.ForceCheckpoint(genesisBlock.Header.Round) + w.checkpointer.Flush() + } + // Don't register availability immediately, we want to know first how far behind consensus we are. latestBlockRound := w.undefinedRound From 06762dad630504479332dcbadec0f9c91ed0e170 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Sun, 24 Aug 2025 21:51:39 +0200 Subject: [PATCH 04/18] go/storage/mkvs: Add a Serve method to the checkpointer This is desirable, so that worker that initilize a new checkpointer don't require accepting context, but instead the lifetime and initialization of checkpointer is handled by the worker's Serve method. --- go/consensus/cometbft/abci/state.go | 9 ++++---- go/storage/mkvs/checkpoint/checkpointer.go | 21 ++++++++----------- .../mkvs/checkpoint/checkpointer_test.go | 12 ++++++++--- go/worker/storage/statesync/state_sync.go | 12 ++++++----- 4 files changed, 30 insertions(+), 24 deletions(-) diff --git a/go/consensus/cometbft/abci/state.go b/go/consensus/cometbft/abci/state.go index e580cd3e720..21ea769f026 100644 --- a/go/consensus/cometbft/abci/state.go +++ b/go/consensus/cometbft/abci/state.go @@ -782,10 +782,11 @@ func newApplicationState(ctx context.Context, upgrader upgrade.Backend, cfg *App }, nil }, } - s.checkpointer, err = checkpoint.NewCheckpointer(s.ctx, ndb, ldb.Checkpointer(), checkpointerCfg) - if err != nil { - return nil, fmt.Errorf("state: failed to create checkpointer: %w", err) - } + s.checkpointer = checkpoint.NewCheckpointer(ndb, ldb.Checkpointer(), checkpointerCfg) + go func() { + err := s.checkpointer.Serve(ctx) + s.logger.Error("checkpointer failed", "err", err) + }() } go s.metricsWorker() diff --git a/go/storage/mkvs/checkpoint/checkpointer.go b/go/storage/mkvs/checkpoint/checkpointer.go index 7a43a834871..9add11b34ab 100644 --- a/go/storage/mkvs/checkpoint/checkpointer.go +++ b/go/storage/mkvs/checkpoint/checkpointer.go @@ -72,7 +72,7 @@ type CreationParameters struct { ChunkerThreads uint16 } -// Checkpointer is a checkpointer. +// Checkpointer is responsible for creating the storage snapshots (checkpoints). type Checkpointer interface { // NotifyNewVersion notifies the checkpointer that a new version has been finalized. NotifyNewVersion(version uint64) @@ -95,6 +95,9 @@ type Checkpointer interface { // intervals; after unpausing, a checkpoint won't be created immediately, but the checkpointer // will wait for the next regular event. Pause(pause bool) + + // Serve starts running the checkpointer. + Serve(ctx context.Context) error } type checkpointer struct { @@ -285,7 +288,7 @@ func (c *checkpointer) maybeCheckpoint(ctx context.Context, version uint64, para return nil } -func (c *checkpointer) worker(ctx context.Context) { +func (c *checkpointer) Serve(ctx context.Context) error { c.logger.Debug("storage checkpointer started", "check_interval", c.cfg.CheckInterval, ) @@ -310,7 +313,7 @@ func (c *checkpointer) worker(ctx context.Context) { select { case <-ctx.Done(): - return + return ctx.Err() case <-time.After(interval): case <-c.flushCh.Out(): case paused = <-c.pausedCh: @@ -323,7 +326,7 @@ func (c *checkpointer) worker(ctx context.Context) { ) select { case <-ctx.Done(): - return + return ctx.Err() case v := <-c.notifyCh.Out(): version = v.(uint64) case v := <-c.forceCh.Out(): @@ -387,12 +390,7 @@ func (c *checkpointer) worker(ctx context.Context) { // NewCheckpointer creates a new checkpointer that can be notified of new finalized versions and // will automatically generate the configured number of checkpoints. -func NewCheckpointer( - ctx context.Context, - ndb db.NodeDB, - creator Creator, - cfg CheckpointerConfig, -) (Checkpointer, error) { +func NewCheckpointer(ndb db.NodeDB, creator Creator, cfg CheckpointerConfig) Checkpointer { c := &checkpointer{ cfg: cfg, ndb: ndb, @@ -405,6 +403,5 @@ func NewCheckpointer( cpNotifier: pubsub.NewBroker(false), logger: logging.GetLogger("storage/mkvs/checkpoint/"+cfg.Name).With("namespace", cfg.Namespace), } - go c.worker(ctx) - return c, nil + return c } diff --git a/go/storage/mkvs/checkpoint/checkpointer_test.go b/go/storage/mkvs/checkpoint/checkpointer_test.go index 9782f904b85..e9994248590 100644 --- a/go/storage/mkvs/checkpoint/checkpointer_test.go +++ b/go/storage/mkvs/checkpoint/checkpointer_test.go @@ -25,7 +25,8 @@ const ( func testCheckpointer(t *testing.T, factory dbApi.Factory, earliestVersion, interval uint64, preExistingData bool) { require := require.New(t) - ctx := context.Background() + ctx, cancel := context.WithCancel(t.Context()) + defer cancel() // Initialize a database. dir, err := os.MkdirTemp("", "mkvs.checkpointer") @@ -70,7 +71,7 @@ func testCheckpointer(t *testing.T, factory dbApi.Factory, earliestVersion, inte require.NoError(err, "NewFileCreator") // Create a checkpointer. - cp, err := NewCheckpointer(ctx, ndb, fc, CheckpointerConfig{ + cp := NewCheckpointer(ndb, fc, CheckpointerConfig{ Name: "test", Namespace: testNs, CheckInterval: testCheckInterval, @@ -89,7 +90,12 @@ func testCheckpointer(t *testing.T, factory dbApi.Factory, earliestVersion, inte return ndb.GetRootsForVersion(version) }, }) - require.NoError(err, "NewCheckpointer") + go func() { + err := cp.Serve(ctx) + if err != context.Canceled { + require.NoError(err) + } + }() // Start watching checkpoints. cpCh, sub, err := cp.WatchCheckpoints() diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index 7acb43fc026..002357d79c6 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -225,12 +225,15 @@ func New( w.ctx, w.ctxCancel = context.WithCancel(context.Background()) // Create a checkpointer (even if checkpointing is disabled) to ensure the genesis checkpoint is available. - checkpointer, err := w.newCheckpointer(w.ctx, commonNode, localStorage) + checkpointer, err := w.newCheckpointer(commonNode, localStorage) if err != nil { return nil, fmt.Errorf("failed to create checkpointer: %w", err) } w.checkpointer = checkpointer - + go func() { + err := w.checkpointer.Serve(w.ctx) + w.logger.Error("checkpointer failed", "err", err) + }() // Register prune handler. commonNode.Runtime.History().Pruner().RegisterHandler(&pruneHandler{ logger: w.logger, @@ -255,7 +258,7 @@ func New( return w, nil } -func (w *Worker) newCheckpointer(ctx context.Context, commonNode *committee.Node, localStorage storageApi.LocalBackend) (checkpoint.Checkpointer, error) { +func (w *Worker) newCheckpointer(commonNode *committee.Node, localStorage storageApi.LocalBackend) (checkpoint.Checkpointer, error) { checkInterval := checkpoint.CheckIntervalDisabled if config.GlobalConfig.Storage.Checkpointer.Enabled { checkInterval = config.GlobalConfig.Storage.Checkpointer.CheckInterval @@ -303,11 +306,10 @@ func (w *Worker) newCheckpointer(ctx context.Context, commonNode *committee.Node } return checkpoint.NewCheckpointer( - ctx, localStorage.NodeDB(), localStorage.Checkpointer(), checkpointerCfg, - ) + ), nil } // Service interface. From 97728c6bb4aa6a5144f4b002825aaa4def96d4c1 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Wed, 27 Aug 2025 10:10:18 +0200 Subject: [PATCH 05/18] go/storage/mkvs/checkpoint: Move methods around for readability --- go/storage/mkvs/checkpoint/checkpointer.go | 236 ++++++++++----------- 1 file changed, 118 insertions(+), 118 deletions(-) diff --git a/go/storage/mkvs/checkpoint/checkpointer.go b/go/storage/mkvs/checkpoint/checkpointer.go index 9add11b34ab..f1292123650 100644 --- a/go/storage/mkvs/checkpoint/checkpointer.go +++ b/go/storage/mkvs/checkpoint/checkpointer.go @@ -115,6 +115,24 @@ type checkpointer struct { logger *logging.Logger } +// NewCheckpointer creates a new checkpointer that can be notified of new finalized versions and +// will automatically generate the configured number of checkpoints. +func NewCheckpointer(ndb db.NodeDB, creator Creator, cfg CheckpointerConfig) Checkpointer { + c := &checkpointer{ + cfg: cfg, + ndb: ndb, + creator: creator, + notifyCh: channels.NewRingChannel(1), + forceCh: channels.NewRingChannel(1), + flushCh: channels.NewRingChannel(1), + statusCh: make(chan struct{}), + pausedCh: make(chan bool), + cpNotifier: pubsub.NewBroker(false), + logger: logging.GetLogger("storage/mkvs/checkpoint/"+cfg.Name).With("namespace", cfg.Namespace), + } + return c +} + // Implements Checkpointer. func (c *checkpointer) NotifyNewVersion(version uint64) { c.notifyCh.In() <- version @@ -143,6 +161,106 @@ func (c *checkpointer) Pause(pause bool) { c.pausedCh <- pause } +func (c *checkpointer) Serve(ctx context.Context) error { + c.logger.Debug("storage checkpointer started", + "check_interval", c.cfg.CheckInterval, + ) + defer func() { + c.logger.Debug("storage checkpointer terminating") + }() + + paused := false + + for { + var interval time.Duration + switch c.cfg.CheckInterval { + case CheckIntervalDisabled: + interval = CheckIntervalDisabled + default: + interval = random.GetRandomValueFromInterval( + checkpointIntervalRandomizationFactor, + rand.Float64(), + c.cfg.CheckInterval, + ) + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(interval): + case <-c.flushCh.Out(): + case paused = <-c.pausedCh: + continue + } + + var ( + version uint64 + force bool + ) + select { + case <-ctx.Done(): + return ctx.Err() + case v := <-c.notifyCh.Out(): + version = v.(uint64) + case v := <-c.forceCh.Out(): + version = v.(uint64) + force = true + } + + // Fetch current checkpoint parameters. + params := c.cfg.Parameters + if params == nil && c.cfg.GetParameters != nil { + var err error + params, err = c.cfg.GetParameters(ctx) + if err != nil { + c.logger.Error("failed to get checkpoint parameters", + "err", err, + "version", version, + ) + continue + } + } + if params == nil { + c.logger.Error("no checkpoint parameters") + continue + } + + // Don't checkpoint if checkpoints are disabled. + switch { + case force: + // Always checkpoint when forced. + case paused: + continue + case params.Interval == 0: + continue + case c.cfg.CheckInterval == CheckIntervalDisabled: + continue + default: + } + + var err error + switch force { + case false: + err = c.maybeCheckpoint(ctx, version, params) + case true: + err = c.checkpoint(ctx, version, params) + } + if err != nil { + c.logger.Error("failed to checkpoint", + "version", version, + "err", err, + ) + continue + } + + // Emit status update if someone is listening. This is only used in tests. + select { + case c.statusCh <- struct{}{}: + default: + } + } +} + func (c *checkpointer) checkpoint(ctx context.Context, version uint64, params *CreationParameters) (err error) { // Notify watchers about the checkpoint we are about to make. c.cpNotifier.Broadcast(version) @@ -287,121 +405,3 @@ func (c *checkpointer) maybeCheckpoint(ctx context.Context, version uint64, para return nil } - -func (c *checkpointer) Serve(ctx context.Context) error { - c.logger.Debug("storage checkpointer started", - "check_interval", c.cfg.CheckInterval, - ) - defer func() { - c.logger.Debug("storage checkpointer terminating") - }() - - paused := false - - for { - var interval time.Duration - switch c.cfg.CheckInterval { - case CheckIntervalDisabled: - interval = CheckIntervalDisabled - default: - interval = random.GetRandomValueFromInterval( - checkpointIntervalRandomizationFactor, - rand.Float64(), - c.cfg.CheckInterval, - ) - } - - select { - case <-ctx.Done(): - return ctx.Err() - case <-time.After(interval): - case <-c.flushCh.Out(): - case paused = <-c.pausedCh: - continue - } - - var ( - version uint64 - force bool - ) - select { - case <-ctx.Done(): - return ctx.Err() - case v := <-c.notifyCh.Out(): - version = v.(uint64) - case v := <-c.forceCh.Out(): - version = v.(uint64) - force = true - } - - // Fetch current checkpoint parameters. - params := c.cfg.Parameters - if params == nil && c.cfg.GetParameters != nil { - var err error - params, err = c.cfg.GetParameters(ctx) - if err != nil { - c.logger.Error("failed to get checkpoint parameters", - "err", err, - "version", version, - ) - continue - } - } - if params == nil { - c.logger.Error("no checkpoint parameters") - continue - } - - // Don't checkpoint if checkpoints are disabled. - switch { - case force: - // Always checkpoint when forced. - case paused: - continue - case params.Interval == 0: - continue - case c.cfg.CheckInterval == CheckIntervalDisabled: - continue - default: - } - - var err error - switch force { - case false: - err = c.maybeCheckpoint(ctx, version, params) - case true: - err = c.checkpoint(ctx, version, params) - } - if err != nil { - c.logger.Error("failed to checkpoint", - "version", version, - "err", err, - ) - continue - } - - // Emit status update if someone is listening. This is only used in tests. - select { - case c.statusCh <- struct{}{}: - default: - } - } -} - -// NewCheckpointer creates a new checkpointer that can be notified of new finalized versions and -// will automatically generate the configured number of checkpoints. -func NewCheckpointer(ndb db.NodeDB, creator Creator, cfg CheckpointerConfig) Checkpointer { - c := &checkpointer{ - cfg: cfg, - ndb: ndb, - creator: creator, - notifyCh: channels.NewRingChannel(1), - forceCh: channels.NewRingChannel(1), - flushCh: channels.NewRingChannel(1), - statusCh: make(chan struct{}), - pausedCh: make(chan bool), - cpNotifier: pubsub.NewBroker(false), - logger: logging.GetLogger("storage/mkvs/checkpoint/"+cfg.Name).With("namespace", cfg.Namespace), - } - return c -} From 81a43810c19fba7ee84f6decb4b84946aeb13a17 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Mon, 25 Aug 2025 11:14:53 +0200 Subject: [PATCH 06/18] go/worker/storage/statesync: Add explicit timeout for fetching diff --- go/worker/storage/statesync/state_sync.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index 002357d79c6..d78c64d615e 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -71,6 +71,9 @@ const ( // same checkpoint hashes. The current value was chosen based on the benchmarks // done on the modern developer machine. chunkerThreads = 12 + + // diffResponseTimeout is the maximum time for fetching storage diff from the peer. + diffResponseTimeout = 15 * time.Second ) type roundItem interface { @@ -450,7 +453,7 @@ func (w *Worker) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { "new_root", thisRoot, ) - ctx, cancel := context.WithCancel(w.ctx) + ctx, cancel := context.WithTimeout(w.ctx, diffResponseTimeout) defer cancel() wl, pf, err := w.getDiff(ctx, prevRoot, thisRoot) From 874bca63841ca34d68d72a672b6eddfd60b034ed Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Wed, 27 Aug 2025 11:43:16 +0200 Subject: [PATCH 07/18] go/worker/storage/statesync: Reduce the scope of workers This also serves as step towards passing the context explicitly. --- go/worker/storage/statesync/state_sync.go | 51 +++++++++++++---------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index d78c64d615e..5c9053d7b42 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -150,8 +150,6 @@ type Worker struct { undefinedRound uint64 - fetchPool *workerpool.Pool - workerCommonCfg workerCommon.Config checkpointer checkpoint.Checkpointer @@ -187,10 +185,6 @@ func New( ) (*Worker, error) { initMetrics() - // Create the fetcher pool. - fetchPool := workerpool.New("storage_fetch/" + commonNode.Runtime.ID().String()) - fetchPool.Resize(config.GlobalConfig.Storage.FetcherCount) - w := &Worker{ commonNode: commonNode, @@ -203,8 +197,6 @@ func New( localStorage: localStorage, - fetchPool: fetchPool, - checkpointSyncCfg: checkpointSyncCfg, status: api.StatusInitializing, @@ -233,10 +225,6 @@ func New( return nil, fmt.Errorf("failed to create checkpointer: %w", err) } w.checkpointer = checkpointer - go func() { - err := w.checkpointer.Serve(w.ctx) - w.logger.Error("checkpointer failed", "err", err) - }() // Register prune handler. commonNode.Runtime.History().Pruner().RegisterHandler(&pruneHandler{ logger: w.logger, @@ -320,20 +308,11 @@ func (w *Worker) newCheckpointer(commonNode *committee.Node, localStorage storag // Start causes the worker to start responding to CometBFT new block events. func (w *Worker) Start() error { go w.worker() - if config.GlobalConfig.Storage.Checkpointer.Enabled { - go w.consensusCheckpointSyncer() - } return nil } // Stop causes the worker to stop watching and shut down. func (w *Worker) Stop() { - w.statusLock.Lock() - w.status = api.StatusStopping - w.statusLock.Unlock() - - w.fetchPool.Stop() - w.ctxCancel() } @@ -817,11 +796,31 @@ func (w *Worker) worker() { // nolint: gocyclo } w.logger.Info("starting") - w.statusLock.Lock() w.status = api.StatusStarting w.statusLock.Unlock() + ctx, cancel := context.WithCancel(w.ctx) + defer cancel() + + go func() { + select { + case <-ctx.Done(): + w.statusLock.Lock() + w.status = api.StatusStopping + w.statusLock.Unlock() + } + }() + defer w.logger.Info("stopped") + + go func() { + err := w.checkpointer.Serve(w.ctx) + w.logger.Error("checkpointer failed", "err", err) + }() + if config.GlobalConfig.Storage.Checkpointer.Enabled { + go w.consensusCheckpointSyncer() + } + // Determine genesis block. genesisBlock, err := w.commonNode.Consensus.RootHash().GetGenesisBlock(w.ctx, &roothashApi.RuntimeRequest{ RuntimeID: w.commonNode.Runtime.ID(), @@ -1074,6 +1073,7 @@ func (w *Worker) worker() { // nolint: gocyclo } } close(w.initCh) + w.logger.Info("initialized") // Notify the checkpointer of the genesis round so it can be checkpointed. if w.checkpointer != nil { @@ -1090,6 +1090,11 @@ func (w *Worker) worker() { // nolint: gocyclo var wg sync.WaitGroup syncingRounds := make(map[uint64]*inFlight) summaryCache := make(map[uint64]*blockSummary) + + fetchPool := workerpool.New("storage_fetch/" + w.commonNode.Runtime.ID().String()) + fetchPool.Resize(config.GlobalConfig.Storage.FetcherCount) + defer fetchPool.Stop() + triggerRoundFetches := func() { for i := lastFullyAppliedRound + 1; i <= latestBlockRound; i++ { syncing, ok := syncingRounds[i] @@ -1140,7 +1145,7 @@ func (w *Worker) worker() { // nolint: gocyclo if !syncing.outstanding.contains(rootType) && syncing.awaitingRetry.contains(rootType) { syncing.scheduleDiff(rootType) wg.Add(1) - w.fetchPool.Submit(func() { + fetchPool.Submit(func() { defer wg.Done() w.fetchDiff(this.Round, prevRoots[i], this.Roots[i]) }) From 2cec7cebe7d3acb8025f99bb45c4493321895e17 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Wed, 27 Aug 2025 11:46:30 +0200 Subject: [PATCH 08/18] go/worker/storage/statesync: Fix deadlock on the cleanup Previously if the context was not canceled the fetcher might be sending the diffs on a channel that cannot be emptied, since we are already out of the main for loop, resulting in wg.Wait to never complete. --- go/worker/storage/statesync/state_sync.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index 5c9053d7b42..48f67986a8b 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -1365,6 +1365,7 @@ mainLoop: } } + cancel() // Ctx has to be canceled so that fetcher go routines can be emptied. wg.Wait() // blockCh will be garbage-collected without being closed. It can potentially still contain // some new blocks, but only as many as were already in-flight at the point when the main From e699f36e7423c6cd4a7a90af249c69c6dbd0f37e Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Sun, 3 Aug 2025 00:57:34 +0200 Subject: [PATCH 09/18] go/worker/storage/statesync: Pass context explicitly In addition, statesync worker now implements the Service interface. The corresponding BackgroundService methods (already not used) have been removed. Similarly, the storage worker was internally refactored to Service interface to ease eventual removal of the BackgroundService. Note that semantic changed slightly: Previously storage worker would wait for all state sync workers to finish. Now it will terminate when the first one finishes. Technically, this was already the case if the storage worker panicked. --- .changelog/6036.trivial.md | 0 .../storage/statesync/checkpoint_sync.go | 28 ++-- go/worker/storage/statesync/state_sync.go | 151 ++++++------------ go/worker/storage/worker.go | 73 ++++++--- 4 files changed, 112 insertions(+), 140 deletions(-) create mode 100644 .changelog/6036.trivial.md diff --git a/.changelog/6036.trivial.md b/.changelog/6036.trivial.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/go/worker/storage/statesync/checkpoint_sync.go b/go/worker/storage/statesync/checkpoint_sync.go index bca66368dfe..04b272eedaf 100644 --- a/go/worker/storage/statesync/checkpoint_sync.go +++ b/go/worker/storage/statesync/checkpoint_sync.go @@ -194,8 +194,8 @@ func (w *Worker) fetchChunk(ctx context.Context, chunk *chunk) ([]byte, rpc.Peer return rsp2.Chunk, pf, nil } -func (w *Worker) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelRequests uint) (cpStatus int, rerr error) { - if err := w.localStorage.Checkpointer().StartRestore(w.ctx, check.Metadata); err != nil { +func (w *Worker) handleCheckpoint(ctx context.Context, check *checkpointsync.Checkpoint, maxParallelRequests uint) (cpStatus int, rerr error) { + if err := w.localStorage.Checkpointer().StartRestore(ctx, check.Metadata); err != nil { // Any previous restores were already aborted by the driver up the call stack, so // things should have been going smoothly here; bail. return checkpointStatusBail, fmt.Errorf("can't start checkpoint restore: %w", err) @@ -222,7 +222,7 @@ func (w *Worker) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelR chunkReturnCh := make(chan *chunk, maxParallelRequests) errorCh := make(chan int, maxParallelRequests) - ctx, cancel := context.WithCancel(w.ctx) + chunkCtx, cancel := context.WithCancel(ctx) // Spawn the worker group to fetch and restore checkpoint chunks. var workerGroup sync.WaitGroup @@ -231,7 +231,7 @@ func (w *Worker) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelR workerGroup.Add(1) go func() { defer workerGroup.Done() - w.checkpointChunkFetcher(ctx, chunkDispatchCh, chunkReturnCh, errorCh) + w.checkpointChunkFetcher(chunkCtx, chunkDispatchCh, chunkReturnCh, errorCh) }() } go func() { @@ -283,8 +283,8 @@ func (w *Worker) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelR } select { - case <-w.ctx.Done(): - return checkpointStatusBail, w.ctx.Err() + case <-ctx.Done(): + return checkpointStatusBail, ctx.Err() case returned := <-chunkReturnCh: if returned == nil { @@ -313,8 +313,8 @@ func (w *Worker) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelR } } -func (w *Worker) getCheckpointList() ([]*checkpointsync.Checkpoint, error) { - ctx, cancel := context.WithTimeout(w.ctx, cpListsTimeout) +func (w *Worker) getCheckpointList(ctx context.Context) ([]*checkpointsync.Checkpoint, error) { + ctx, cancel := context.WithTimeout(ctx, cpListsTimeout) defer cancel() list, err := w.fetchCheckpoints(ctx) @@ -369,7 +369,7 @@ func sortCheckpoints(s []*checkpointsync.Checkpoint) { }) } -func (w *Worker) checkCheckpointUsable(cp *checkpointsync.Checkpoint, remainingMask outstandingMask, genesisRound uint64) bool { +func (w *Worker) checkCheckpointUsable(ctx context.Context, cp *checkpointsync.Checkpoint, remainingMask outstandingMask, genesisRound uint64) bool { namespace := w.commonNode.Runtime.ID() if !namespace.Equal(&cp.Root.Namespace) { // Not for the right runtime. @@ -380,7 +380,7 @@ func (w *Worker) checkCheckpointUsable(cp *checkpointsync.Checkpoint, remainingM return false } - blk, err := w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, cp.Root.Version) + blk, err := w.commonNode.Runtime.History().GetCommittedBlock(ctx, cp.Root.Version) if err != nil { w.logger.Error("can't get block information for checkpoint, skipping", "err", err, "root", cp.Root) return false @@ -405,14 +405,14 @@ func (w *Worker) checkCheckpointUsable(cp *checkpointsync.Checkpoint, remainingM return false } -func (w *Worker) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*blockSummary, error) { +func (w *Worker) syncCheckpoints(ctx context.Context, genesisRound uint64, wantOnlyGenesis bool) (*blockSummary, error) { // Store roots and round info for checkpoints that finished syncing. // Round and namespace info will get overwritten as rounds are skipped // for errors, driven by remainingRoots. var syncState blockSummary // Fetch checkpoints from peers. - cps, err := w.getCheckpointList() + cps, err := w.getCheckpointList(ctx) if err != nil { return nil, fmt.Errorf("can't get checkpoint list from peers: %w", err) } @@ -449,7 +449,7 @@ func (w *Worker) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*bl for _, check := range cps { - if check.Root.Version < genesisRound || !w.checkCheckpointUsable(check, remainingRoots, genesisRound) { + if check.Root.Version < genesisRound || !w.checkCheckpointUsable(ctx, check, remainingRoots, genesisRound) { continue } @@ -486,7 +486,7 @@ func (w *Worker) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*bl } } - status, err := w.handleCheckpoint(check, w.checkpointSyncCfg.ChunkFetcherCount) + status, err := w.handleCheckpoint(ctx, check, w.checkpointSyncCfg.ChunkFetcherCount) switch status { case checkpointStatusDone: w.logger.Info("successfully restored from checkpoint", "root", check.Root, "mask", mask) diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index 48f67986a8b..82cef6eab45 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -166,11 +166,6 @@ type Worker struct { diffCh chan *fetchedDiff finalizeCh chan finalizeResult - ctx context.Context - ctxCancel context.CancelFunc - - quitCh chan struct{} - initCh chan struct{} } @@ -205,7 +200,6 @@ func New( diffCh: make(chan *fetchedDiff), finalizeCh: make(chan finalizeResult), - quitCh: make(chan struct{}), initCh: make(chan struct{}), } @@ -217,8 +211,6 @@ func New( // Initialize sync state. w.syncedState.Round = defaultUndefinedRound - w.ctx, w.ctxCancel = context.WithCancel(context.Background()) - // Create a checkpointer (even if checkpointing is disabled) to ensure the genesis checkpoint is available. checkpointer, err := w.newCheckpointer(commonNode, localStorage) if err != nil { @@ -303,29 +295,6 @@ func (w *Worker) newCheckpointer(commonNode *committee.Node, localStorage storag ), nil } -// Service interface. - -// Start causes the worker to start responding to CometBFT new block events. -func (w *Worker) Start() error { - go w.worker() - return nil -} - -// Stop causes the worker to stop watching and shut down. -func (w *Worker) Stop() { - w.ctxCancel() -} - -// Quit returns a channel that will be closed when the worker stops. -func (w *Worker) Quit() <-chan struct{} { - return w.quitCh -} - -// Cleanup cleans up any leftover state after the worker is stopped. -func (w *Worker) Cleanup() { - // Nothing to do here? -} - // Initialized returns a channel that will be closed once the worker finished starting up. func (w *Worker) Initialized() <-chan struct{} { return w.initCh @@ -396,7 +365,7 @@ func (w *Worker) GetLastSynced() (uint64, storageApi.Root, storageApi.Root) { return w.syncedState.Round, io, state } -func (w *Worker) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { +func (w *Worker) fetchDiff(ctx context.Context, round uint64, prevRoot, thisRoot storageApi.Root) { result := &fetchedDiff{ fetched: false, pf: rpc.NewNopPeerFeedback(), @@ -407,7 +376,7 @@ func (w *Worker) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { defer func() { select { case w.diffCh <- result: - case <-w.ctx.Done(): + case <-ctx.Done(): } }() @@ -432,10 +401,10 @@ func (w *Worker) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { "new_root", thisRoot, ) - ctx, cancel := context.WithTimeout(w.ctx, diffResponseTimeout) + diffCtx, cancel := context.WithTimeout(ctx, diffResponseTimeout) defer cancel() - wl, pf, err := w.getDiff(ctx, prevRoot, thisRoot) + wl, pf, err := w.getDiff(diffCtx, prevRoot, thisRoot) if err != nil { result.err = err return @@ -460,7 +429,7 @@ func (w *Worker) getDiff(ctx context.Context, prevRoot, thisRoot storageApi.Root return rsp2.WriteLog, pf, nil } -func (w *Worker) finalize(summary *blockSummary) { +func (w *Worker) finalize(ctx context.Context, summary *blockSummary) { err := w.localStorage.NodeDB().Finalize(summary.Roots) switch err { case nil: @@ -488,11 +457,11 @@ func (w *Worker) finalize(summary *blockSummary) { select { case w.finalizeCh <- result: - case <-w.ctx.Done(): + case <-ctx.Done(): } } -func (w *Worker) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) error { +func (w *Worker) initGenesis(ctx context.Context, rt *registryApi.Runtime, genesisBlock *block.Block) error { w.logger.Info("initializing storage at genesis") // Check what the latest finalized version in the database is as we may be using a database @@ -557,7 +526,7 @@ func (w *Worker) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) "latest_version", latestVersion, ) for v := latestVersion; v < stateRoot.Version; v++ { - err := w.localStorage.Apply(w.ctx, &storageApi.ApplyRequest{ + err := w.localStorage.Apply(ctx, &storageApi.ApplyRequest{ Namespace: rt.ID, RootType: storageApi.RootTypeState, SrcRound: v, @@ -622,7 +591,7 @@ func (w *Worker) flushSyncedState(summary *blockSummary) (uint64, error) { return w.syncedState.Round, nil } -func (w *Worker) consensusCheckpointSyncer() { +func (w *Worker) consensusCheckpointSyncer(ctx context.Context) { // Make sure we always create a checkpoint when the consensus layer creates a checkpoint. The // reason why we do this is to make it faster for storage nodes that use consensus state sync // to catch up as exactly the right checkpoint will be available. @@ -634,12 +603,12 @@ func (w *Worker) consensusCheckpointSyncer() { // Wait for the common node to be initialized. select { case <-w.commonNode.Initialized(): - case <-w.ctx.Done(): + case <-ctx.Done(): return } // Determine the maximum number of consensus checkpoints to keep. - consensusParams, err := w.commonNode.Consensus.Core().GetParameters(w.ctx, consensus.HeightLatest) + consensusParams, err := w.commonNode.Consensus.Core().GetParameters(ctx, consensus.HeightLatest) if err != nil { w.logger.Error("failed to fetch consensus parameters", "err", err, @@ -670,9 +639,7 @@ func (w *Worker) consensusCheckpointSyncer() { }() for { select { - case <-w.quitCh: - return - case <-w.ctx.Done(): + case <-ctx.Done(): return case version := <-ch: // We need to wait for the next version as that is what will be in the consensus @@ -689,7 +656,7 @@ func (w *Worker) consensusCheckpointSyncer() { ) if blkCh == nil { - blkCh, blkSub, err = w.commonNode.Consensus.Core().WatchBlocks(w.ctx) + blkCh, blkSub, err = w.commonNode.Consensus.Core().WatchBlocks(ctx) if err != nil { w.logger.Error("failed to watch blocks", "err", err, @@ -718,7 +685,7 @@ func (w *Worker) consensusCheckpointSyncer() { // Lookup what runtime round corresponds to the given consensus layer version and make // sure we checkpoint it. - blk, err := w.commonNode.Consensus.RootHash().GetLatestBlock(w.ctx, &roothashApi.RuntimeRequest{ + blk, err := w.commonNode.Consensus.RootHash().GetLatestBlock(ctx, &roothashApi.RuntimeRequest{ RuntimeID: w.commonNode.Runtime.ID(), Height: int64(version), }) @@ -783,16 +750,16 @@ func (w *Worker) nudgeAvailability(lastSynced, latest uint64) { } } -func (w *Worker) worker() { // nolint: gocyclo - defer close(w.quitCh) +// Serve runs the state sync worker. +func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo defer close(w.diffCh) // Wait for the common node to be initialized. select { case <-w.commonNode.Initialized(): - case <-w.ctx.Done(): + case <-ctx.Done(): close(w.initCh) - return + return ctx.Err() } w.logger.Info("starting") @@ -800,7 +767,7 @@ func (w *Worker) worker() { // nolint: gocyclo w.status = api.StatusStarting w.statusLock.Unlock() - ctx, cancel := context.WithCancel(w.ctx) + ctx, cancel := context.WithCancel(ctx) defer cancel() go func() { @@ -814,34 +781,32 @@ func (w *Worker) worker() { // nolint: gocyclo defer w.logger.Info("stopped") go func() { - err := w.checkpointer.Serve(w.ctx) + err := w.checkpointer.Serve(ctx) w.logger.Error("checkpointer failed", "err", err) }() if config.GlobalConfig.Storage.Checkpointer.Enabled { - go w.consensusCheckpointSyncer() + go w.consensusCheckpointSyncer(ctx) } // Determine genesis block. - genesisBlock, err := w.commonNode.Consensus.RootHash().GetGenesisBlock(w.ctx, &roothashApi.RuntimeRequest{ + genesisBlock, err := w.commonNode.Consensus.RootHash().GetGenesisBlock(ctx, &roothashApi.RuntimeRequest{ RuntimeID: w.commonNode.Runtime.ID(), Height: consensus.HeightLatest, }) if err != nil { - w.logger.Error("can't retrieve genesis block", "err", err) - return + return fmt.Errorf("can't retrieve genesis block: %w", err) } w.undefinedRound = genesisBlock.Header.Round - 1 // Determine last finalized storage version. if version, dbNonEmpty := w.localStorage.NodeDB().GetLatestVersion(); dbNonEmpty { var blk *block.Block - blk, err = w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, version) + blk, err = w.commonNode.Runtime.History().GetCommittedBlock(ctx, version) switch err { case nil: // Set last synced version to last finalized storage version. if _, err = w.flushSyncedState(summaryFromBlock(blk)); err != nil { - w.logger.Error("failed to flush synced state", "err", err) - return + return fmt.Errorf("failed to flush synced state: %w", err) } default: // Failed to fetch historic block. This is fine when the network just went through a @@ -869,18 +834,12 @@ func (w *Worker) worker() { // nolint: gocyclo w.statusLock.Unlock() var rt *registryApi.Runtime - rt, err = w.commonNode.Runtime.ActiveDescriptor(w.ctx) + rt, err = w.commonNode.Runtime.ActiveDescriptor(ctx) if err != nil { - w.logger.Error("failed to retrieve runtime registry descriptor", - "err", err, - ) - return + return fmt.Errorf("failed to retrieve runtime registry descriptor: %w", err) } - if err = w.initGenesis(rt, genesisBlock); err != nil { - w.logger.Error("failed to initialize storage at genesis", - "err", err, - ) - return + if err = w.initGenesis(ctx, rt, genesisBlock); err != nil { + return fmt.Errorf("failed to initialize storage at genesis: %w", err) } } @@ -900,7 +859,7 @@ func (w *Worker) worker() { // nolint: gocyclo // Check if we actually have information about that round. This assumes that any reindexing // was already performed (the common node would not indicate being initialized otherwise). - _, err = w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, iterativeSyncStart) + _, err = w.commonNode.Runtime.History().GetCommittedBlock(ctx, iterativeSyncStart) SyncStartCheck: switch { case err == nil: @@ -908,7 +867,7 @@ func (w *Worker) worker() { // nolint: gocyclo // No information is available about the initial round. Query the earliest historic // block and check if that block has the genesis state root and empty I/O root. var earlyBlk *block.Block - earlyBlk, err = w.commonNode.Runtime.History().GetEarliestBlock(w.ctx) + earlyBlk, err = w.commonNode.Runtime.History().GetEarliestBlock(ctx) switch err { case nil: // Make sure the state root is still the same as at genesis time. @@ -927,7 +886,7 @@ func (w *Worker) worker() { // nolint: gocyclo "earliest_round", earlyBlk.Header.Round, ) for v := genesisBlock.Header.Round; v < earlyBlk.Header.Round; v++ { - err = w.localStorage.Apply(w.ctx, &storageApi.ApplyRequest{ + err = w.localStorage.Apply(ctx, &storageApi.ApplyRequest{ Namespace: w.commonNode.Runtime.ID(), RootType: storageApi.RootTypeState, SrcRound: v, @@ -942,11 +901,7 @@ func (w *Worker) worker() { // nolint: gocyclo // Ignore already finalized versions. continue default: - w.logger.Error("failed to fill in version", - "version", v, - "err", err, - ) - return + return fmt.Errorf("failed to fill in version %d: %w", v, err) } err = w.localStorage.NodeDB().Finalize([]storageApi.Root{{ @@ -957,19 +912,12 @@ func (w *Worker) worker() { // nolint: gocyclo // We can ignore I/O roots. }}) if err != nil { - w.logger.Error("failed to finalize filled in version", - "version", v, - "err", err, - ) - return + return fmt.Errorf("failed to finalize filled in version %v: %w", v, err) } } cachedLastRound, err = w.flushSyncedState(summaryFromBlock(earlyBlk)) if err != nil { - w.logger.Error("failed to flush synced state", - "err", err, - ) - return + return fmt.Errorf("failed to flush synced state: %w", err) } // No need to force a checkpoint sync. break SyncStartCheck @@ -987,10 +935,7 @@ func (w *Worker) worker() { // nolint: gocyclo w.checkpointSyncForced = true default: // Unknown error while fetching block information, abort. - w.logger.Error("failed to query block", - "err", err, - ) - return + return fmt.Errorf("failed to query block: %w", err) } } @@ -1024,7 +969,7 @@ func (w *Worker) worker() { // nolint: gocyclo ) CheckpointSyncRetry: for { - summary, err = w.syncCheckpoints(genesisBlock.Header.Round, w.checkpointSyncCfg.Disabled) + summary, err = w.syncCheckpoints(ctx, genesisBlock.Header.Round, w.checkpointSyncCfg.Disabled) if err == nil { break } @@ -1052,8 +997,8 @@ func (w *Worker) worker() { // nolint: gocyclo // Delay before retrying. select { case <-time.After(checkpointSyncRetryDelay): - case <-w.ctx.Done(): - return + case <-ctx.Done(): + return ctx.Err() } } if err != nil { @@ -1061,10 +1006,7 @@ func (w *Worker) worker() { // nolint: gocyclo } else { cachedLastRound, err = w.flushSyncedState(summary) if err != nil { - w.logger.Error("failed to flush synced state", - "err", err, - ) - return + return fmt.Errorf("failed to flush synced state %w", err) } lastFullyAppliedRound = cachedLastRound w.logger.Info("checkpoint sync succeeded", @@ -1147,7 +1089,7 @@ func (w *Worker) worker() { // nolint: gocyclo wg.Add(1) fetchPool.Submit(func() { defer wg.Done() - w.fetchDiff(this.Round, prevRoots[i], this.Roots[i]) + w.fetchDiff(ctx, this.Round, prevRoots[i], this.Roots[i]) }) } } @@ -1180,7 +1122,7 @@ mainLoop: // Apply the write log if one exists. err = nil if lastDiff.fetched { - err = w.localStorage.Apply(w.ctx, &storageApi.ApplyRequest{ + err = w.localStorage.Apply(ctx, &storageApi.ApplyRequest{ Namespace: lastDiff.thisRoot.Namespace, RootType: lastDiff.thisRoot.Type, SrcRound: lastDiff.prevRoot.Version, @@ -1239,7 +1181,7 @@ mainLoop: wg.Add(1) go func() { // Don't block fetching and applying remaining rounds. defer wg.Done() - w.finalize(lastSummary) + w.finalize(ctx, lastSummary) }() continue } @@ -1289,7 +1231,7 @@ mainLoop: continue } var oldBlock *block.Block - oldBlock, err = w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, i) + oldBlock, err = w.commonNode.Runtime.History().GetCommittedBlock(ctx, i) if err != nil { w.logger.Error("can't get block for round", "err", err, @@ -1338,7 +1280,7 @@ mainLoop: // error, and cachedLastRound also can't be updated legitimately. if finalized.err != nil { // Request a node shutdown given that syncing is effectively blocked. - _ = w.commonNode.HostNode.RequestShutdown(w.ctx, false) + _ = w.commonNode.HostNode.RequestShutdown(ctx, false) break mainLoop } @@ -1360,7 +1302,7 @@ mainLoop: w.checkpointer.NotifyNewVersion(finalized.summary.Round) } - case <-w.ctx.Done(): + case <-ctx.Done(): break mainLoop } } @@ -1370,4 +1312,5 @@ mainLoop: // blockCh will be garbage-collected without being closed. It can potentially still contain // some new blocks, but only as many as were already in-flight at the point when the main // context was canceled. + return nil } diff --git a/go/worker/storage/worker.go b/go/worker/storage/worker.go index ed28aa86906..2607c2ee35c 100644 --- a/go/worker/storage/worker.go +++ b/go/worker/storage/worker.go @@ -1,8 +1,11 @@ package storage import ( + "context" "fmt" + "golang.org/x/sync/errgroup" + "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/grpc" "github.com/oasisprotocol/oasis-core/go/common/logging" @@ -27,6 +30,9 @@ type Worker struct { quitCh chan struct{} runtimes map[common.Namespace]*statesync.Worker + + ctx context.Context + cancel context.CancelFunc } // New constructs a new storage worker. @@ -35,6 +41,7 @@ func New( commonWorker *workerCommon.Worker, registration *registration.Worker, ) (*Worker, error) { + ctx, cancel := context.WithCancel(context.Background()) enabled := config.GlobalConfig.Mode.HasLocalStorage() && len(commonWorker.GetRuntimes()) > 0 s := &Worker{ @@ -45,6 +52,8 @@ func New( initCh: make(chan struct{}), quitCh: make(chan struct{}), runtimes: make(map[common.Namespace]*statesync.Worker), + ctx: ctx, + cancel: cancel, } if !enabled { @@ -133,6 +142,25 @@ func (w *Worker) Initialized() <-chan struct{} { // Start starts the storage service. func (w *Worker) Start() error { + go func() { + if err := w.Serve(w.ctx); err != nil { + w.logger.Error("failed", "error", err) + } + }() + return nil +} + +// Serve starts a state sync worker for each of the configured runtime, unless +// disabled. +// +// Once all workers have been initialized the init channel is closed. +// +// If any state sync worker returns an error, it cancels the remaining ones and +// waits for all of them to finish. The error from the first failing worker is +// returned. +// +// Finally, upon exit the quit channel is closed. +func (w *Worker) Serve(ctx context.Context) error { if !w.enabled { w.logger.Info("not starting storage worker as it is disabled") @@ -142,34 +170,35 @@ func (w *Worker) Start() error { return nil } - // Wait for all runtimes to terminate. - go func() { - defer close(w.quitCh) - - for _, r := range w.runtimes { - <-r.Quit() - } + w.logger.Info("starting", "num_runtimes", len(w.runtimes)) + defer func() { + close(w.quitCh) + w.logger.Info("stopped") }() - // Start all runtimes and wait for initialization. go func() { - w.logger.Info("starting storage sync services", "num_runtimes", len(w.runtimes)) - - for _, r := range w.runtimes { - _ = r.Start() - } - - // Wait for runtimes to be initialized. for _, r := range w.runtimes { <-r.Initialized() } - - w.logger.Info("storage worker started") - + w.logger.Info("initialized") close(w.initCh) }() - return nil + return w.serve(ctx) +} + +func (w *Worker) serve(ctx context.Context) error { + g, ctx := errgroup.WithContext(ctx) + for id, r := range w.runtimes { + g.Go(func() error { + err := r.Serve(ctx) + if err != nil { + return fmt.Errorf("state sync worker failed (runtimeID: %s): %w", id, err) + } + return nil + }) + } + return g.Wait() } // Stop halts the service. @@ -179,9 +208,9 @@ func (w *Worker) Stop() { return } - for _, r := range w.runtimes { - r.Stop() - } + w.logger.Info("stopping") + w.cancel() + <-w.quitCh } // Quit returns a channel that will be closed when the service terminates. From f0172a32ced1325e1b108bf281c41fb94ee92e39 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Sun, 3 Aug 2025 12:19:48 +0200 Subject: [PATCH 10/18] go/worker/storage/statesync: Do not panic Additionally, observe that the parent (storage worker) is registered as background service, thus upon error inside state sync worker there is no need to manually request the node shutdown. --- go/worker/storage/statesync/state_sync.go | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index 82cef6eab45..c3b3d9921b7 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -1023,6 +1023,8 @@ func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo w.checkpointer.Flush() } + // Main syncing loop: + err = nil // Don't register availability immediately, we want to know first how far behind consensus we are. latestBlockRound := w.undefinedRound @@ -1233,12 +1235,8 @@ mainLoop: var oldBlock *block.Block oldBlock, err = w.commonNode.Runtime.History().GetCommittedBlock(ctx, i) if err != nil { - w.logger.Error("can't get block for round", - "err", err, - "round", i, - "current_round", blk.Header.Round, - ) - panic("can't get block in storage worker") + err = fmt.Errorf("failed to get block for round %d (current round: %d): %w", i, blk.Header.Round, err) + break mainLoop } summaryCache[i] = summaryFromBlock(oldBlock) } @@ -1279,8 +1277,8 @@ mainLoop: // There's no point redoing it, since it's probably not a transient // error, and cachedLastRound also can't be updated legitimately. if finalized.err != nil { - // Request a node shutdown given that syncing is effectively blocked. - _ = w.commonNode.HostNode.RequestShutdown(ctx, false) + w.logger.Error("failed to finalize", "err", err, "summary", finalized.summary) + err = fmt.Errorf("failed to finalize (round: %d): %w", finalized.summary.Round, finalized.err) break mainLoop } @@ -1303,6 +1301,7 @@ mainLoop: } case <-ctx.Done(): + err = ctx.Err() break mainLoop } } @@ -1312,5 +1311,5 @@ mainLoop: // blockCh will be garbage-collected without being closed. It can potentially still contain // some new blocks, but only as many as were already in-flight at the point when the main // context was canceled. - return nil + return err } From f5666e830585fb7de70f86dfc47ce6f347325c37 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Wed, 27 Aug 2025 11:09:23 +0200 Subject: [PATCH 11/18] go/worker/storage/statesync: Improve var grouping for syncing --- go/worker/storage/statesync/state_sync.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index c3b3d9921b7..5eed34dda0b 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -944,8 +944,6 @@ func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo "last_synced", cachedLastRound, ) - lastFullyAppliedRound := cachedLastRound - // Try to perform initial sync from state and io checkpoints if either: // // - Checkpoint sync has been forced because there is insufficient information available to use @@ -1008,7 +1006,6 @@ func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo if err != nil { return fmt.Errorf("failed to flush synced state %w", err) } - lastFullyAppliedRound = cachedLastRound w.logger.Info("checkpoint sync succeeded", logging.LogEvent, LogEventCheckpointSyncSuccess, ) @@ -1025,13 +1022,10 @@ func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo // Main syncing loop: err = nil - // Don't register availability immediately, we want to know first how far behind consensus we are. - latestBlockRound := w.undefinedRound - - heartbeat := heartbeat{} - heartbeat.reset() - var wg sync.WaitGroup + + latestBlockRound := w.undefinedRound // Don't register availability immediately, we want to know first how far behind consensus we are. + lastFullyAppliedRound := cachedLastRound syncingRounds := make(map[uint64]*inFlight) summaryCache := make(map[uint64]*blockSummary) @@ -1039,6 +1033,9 @@ func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo fetchPool.Resize(config.GlobalConfig.Storage.FetcherCount) defer fetchPool.Stop() + heartbeat := heartbeat{} + heartbeat.reset() + triggerRoundFetches := func() { for i := lastFullyAppliedRound + 1; i <= latestBlockRound; i++ { syncing, ok := syncingRounds[i] From 17740ee0bee3eb0d3dbc362c62248b0065a6d8d3 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Wed, 27 Aug 2025 11:16:58 +0200 Subject: [PATCH 12/18] go/worker/storage/statesync: Improve variable names Synced is a synonim for last finalized round inside the state sync worker. This change should made the code more readable. Eventually, we should ideally use either sync or finalized. Finally, the metrics use synced as last fully applied, but this would be breaking to change. --- go/worker/storage/statesync/metrics.go | 8 ++--- go/worker/storage/statesync/state_sync.go | 38 +++++++++++------------ 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/go/worker/storage/statesync/metrics.go b/go/worker/storage/statesync/metrics.go index 4bc6c414df1..5af29597b21 100644 --- a/go/worker/storage/statesync/metrics.go +++ b/go/worker/storage/statesync/metrics.go @@ -7,7 +7,7 @@ import ( ) var ( - storageWorkerLastFullRound = prometheus.NewGaugeVec( + storageWorkerLastFinalizedRound = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "oasis_worker_storage_full_round", Help: "The last round that was fully synced and finalized.", @@ -15,7 +15,7 @@ var ( []string{"runtime"}, ) - storageWorkerLastSyncedRound = prometheus.NewGaugeVec( + storageWorkerLastFullyAppliedRound = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "oasis_worker_storage_synced_round", Help: "The last round that was synced but not yet finalized.", @@ -40,8 +40,8 @@ var ( ) storageWorkerCollectors = []prometheus.Collector{ - storageWorkerLastFullRound, - storageWorkerLastSyncedRound, + storageWorkerLastFinalizedRound, + storageWorkerLastFullyAppliedRound, storageWorkerLastPendingRound, storageWorkerRoundSyncLatency, } diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index 5eed34dda0b..ff296ff71c4 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -820,14 +820,14 @@ func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo } w.syncedLock.RLock() - cachedLastRound := w.syncedState.Round + lastFinalizedRound := w.syncedState.Round w.syncedLock.RUnlock() - if cachedLastRound == defaultUndefinedRound || cachedLastRound < genesisBlock.Header.Round { - cachedLastRound = w.undefinedRound + if lastFinalizedRound == defaultUndefinedRound || lastFinalizedRound < genesisBlock.Header.Round { + lastFinalizedRound = w.undefinedRound } // Initialize genesis from the runtime descriptor. - isInitialStartup := (cachedLastRound == w.undefinedRound) + isInitialStartup := (lastFinalizedRound == w.undefinedRound) if isInitialStartup { w.statusLock.Lock() w.status = api.StatusInitializingGenesis @@ -852,7 +852,7 @@ func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo w.statusLock.Unlock() // Determine what is the first round that we would need to sync. - iterativeSyncStart := cachedLastRound + iterativeSyncStart := lastFinalizedRound if iterativeSyncStart == w.undefinedRound { iterativeSyncStart++ } @@ -915,7 +915,7 @@ func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo return fmt.Errorf("failed to finalize filled in version %v: %w", v, err) } } - cachedLastRound, err = w.flushSyncedState(summaryFromBlock(earlyBlk)) + lastFinalizedRound, err = w.flushSyncedState(summaryFromBlock(earlyBlk)) if err != nil { return fmt.Errorf("failed to flush synced state: %w", err) } @@ -941,7 +941,7 @@ func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo w.logger.Info("worker initialized", "genesis_round", genesisBlock.Header.Round, - "last_synced", cachedLastRound, + "last_finalized_round", lastFinalizedRound, ) // Try to perform initial sync from state and io checkpoints if either: @@ -1002,7 +1002,7 @@ func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo if err != nil { w.logger.Info("checkpoint sync failed", "err", err) } else { - cachedLastRound, err = w.flushSyncedState(summary) + lastFinalizedRound, err = w.flushSyncedState(summary) if err != nil { return fmt.Errorf("failed to flush synced state %w", err) } @@ -1025,7 +1025,7 @@ func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo var wg sync.WaitGroup latestBlockRound := w.undefinedRound // Don't register availability immediately, we want to know first how far behind consensus we are. - lastFullyAppliedRound := cachedLastRound + lastFullyAppliedRound := lastFinalizedRound syncingRounds := make(map[uint64]*inFlight) summaryCache := make(map[uint64]*blockSummary) @@ -1162,7 +1162,7 @@ mainLoop: delete(summaryCache, lastDiff.round-1) lastFullyAppliedRound = lastDiff.round - storageWorkerLastSyncedRound.With(w.getMetricLabels()).Set(float64(lastDiff.round)) + storageWorkerLastFullyAppliedRound.With(w.getMetricLabels()).Set(float64(lastDiff.round)) storageWorkerRoundSyncLatency.With(w.getMetricLabels()).Observe(time.Since(syncing.startedAt).Seconds()) // Finalize storage for this round. This happens asynchronously @@ -1175,7 +1175,7 @@ mainLoop: // Check if any new rounds were fully applied and need to be finalized. // Only finalize if it's the round after the one that was finalized last. // As a consequence at most one finalization can be happening at the time. - if len(*pendingFinalize) > 0 && cachedLastRound+1 == (*pendingFinalize)[0].GetRound() { + if len(*pendingFinalize) > 0 && lastFinalizedRound+1 == (*pendingFinalize)[0].GetRound() { lastSummary := heap.Pop(pendingFinalize).(*blockSummary) wg.Add(1) go func() { // Don't block fetching and applying remaining rounds. @@ -1190,13 +1190,13 @@ mainLoop: blk := inBlk.(*block.Block) w.logger.Debug("incoming block", "round", blk.Header.Round, - "last_synced", lastFullyAppliedRound, - "last_finalized", cachedLastRound, + "last_fully_applied", lastFullyAppliedRound, + "last_finalized", lastFinalizedRound, ) // Check if we're far enough to reasonably register as available. latestBlockRound = blk.Header.Round - w.nudgeAvailability(cachedLastRound, latestBlockRound) + w.nudgeAvailability(lastFinalizedRound, latestBlockRound) if _, ok := summaryCache[lastFullyAppliedRound]; !ok && lastFullyAppliedRound == w.undefinedRound { dummy := blockSummary{ @@ -1272,7 +1272,7 @@ mainLoop: case finalized := <-w.finalizeCh: // If finalization failed, things start falling apart. // There's no point redoing it, since it's probably not a transient - // error, and cachedLastRound also can't be updated legitimately. + // error, and lastFinalizedRound also can't be updated legitimately. if finalized.err != nil { w.logger.Error("failed to finalize", "err", err, "summary", finalized.summary) err = fmt.Errorf("failed to finalize (round: %d): %w", finalized.summary.Round, finalized.err) @@ -1280,17 +1280,17 @@ mainLoop: } // No further sync or out of order handling needed here, since - // only one finalize at a time is triggered (for round cachedLastRound+1) - cachedLastRound, err = w.flushSyncedState(finalized.summary) + // only one finalize at a time is triggered (for round lastFinalizedLastRound+1) + lastFinalizedRound, err = w.flushSyncedState(finalized.summary) if err != nil { w.logger.Error("failed to flush synced state", "err", err, ) } - storageWorkerLastFullRound.With(w.getMetricLabels()).Set(float64(finalized.summary.Round)) + storageWorkerLastFinalizedRound.With(w.getMetricLabels()).Set(float64(finalized.summary.Round)) // Check if we're far enough to reasonably register as available. - w.nudgeAvailability(cachedLastRound, latestBlockRound) + w.nudgeAvailability(lastFinalizedRound, latestBlockRound) // Notify the checkpointer that there is a new finalized round. if config.GlobalConfig.Storage.Checkpointer.Enabled { From ce420f1be5f9a92548c4865f257455cefca1e022 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Mon, 25 Aug 2025 11:33:38 +0200 Subject: [PATCH 13/18] go/worker/storage/statesync: Add timeout to checkpoint restoration --- go/worker/storage/statesync/checkpoint_sync.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/go/worker/storage/statesync/checkpoint_sync.go b/go/worker/storage/statesync/checkpoint_sync.go index 04b272eedaf..91ce2743101 100644 --- a/go/worker/storage/statesync/checkpoint_sync.go +++ b/go/worker/storage/statesync/checkpoint_sync.go @@ -21,8 +21,15 @@ import ( const ( // cpListsTimeout is the timeout for fetching checkpoints from all nodes. cpListsTimeout = 30 * time.Second - // cpRestoreTimeout is the timeout for restoring a checkpoint chunk from the remote peer. - cpRestoreTimeout = 60 * time.Second + + // cpRestoreChunkTimeout is the timeout for restoring a checkpoint chunk from the remote peer. + cpRestoreChunkTimeout = 60 * time.Second + + // cpRestoreTimeout is the timeout for restoring the whole checkpoint from the remote peers. + // + // As of now it takes ~10-30 min to restore the state from the checkpoint, however the timeout + // should be significantly higher to account for the growing state. + cpRestoreTimeout = 12 * time.Hour checkpointStatusDone = 0 checkpointStatusNext = 1 @@ -99,7 +106,7 @@ func (w *Worker) checkpointChunkFetcher( } } - chunkCtx, cancel := context.WithTimeout(ctx, cpRestoreTimeout) + chunkCtx, cancel := context.WithTimeout(ctx, cpRestoreChunkTimeout) defer cancel() // Fetch chunk from peers. @@ -195,6 +202,8 @@ func (w *Worker) fetchChunk(ctx context.Context, chunk *chunk) ([]byte, rpc.Peer } func (w *Worker) handleCheckpoint(ctx context.Context, check *checkpointsync.Checkpoint, maxParallelRequests uint) (cpStatus int, rerr error) { + ctx, cancel := context.WithTimeout(ctx, cpRestoreTimeout) + defer cancel() if err := w.localStorage.Checkpointer().StartRestore(ctx, check.Metadata); err != nil { // Any previous restores were already aborted by the driver up the call stack, so // things should have been going smoothly here; bail. From aad4b24bc482b90a5d0c3febc17c318fe6225f29 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Wed, 27 Aug 2025 15:54:36 +0200 Subject: [PATCH 14/18] go/worker/storage: Remove unused method and config --- go/worker/storage/statesync/state_sync.go | 8 -------- go/worker/storage/worker.go | 1 - 2 files changed, 9 deletions(-) diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index ff296ff71c4..f9027472fe8 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -174,7 +174,6 @@ func New( commonNode *committee.Node, roleProvider registration.RoleProvider, rpcRoleProvider registration.RoleProvider, - workerCommonCfg workerCommon.Config, localStorage storageApi.LocalBackend, checkpointSyncCfg *CheckpointSyncConfig, ) (*Worker, error) { @@ -188,8 +187,6 @@ func New( logger: logging.GetLogger("worker/storage/statesync").With("runtime_id", commonNode.Runtime.ID()), - workerCommonCfg: workerCommonCfg, - localStorage: localStorage, checkpointSyncCfg: checkpointSyncCfg, @@ -322,11 +319,6 @@ func (w *Worker) PauseCheckpointer(pause bool) error { return nil } -// GetLocalStorage returns the local storage backend used by this state sync worker. -func (w *Worker) GetLocalStorage() storageApi.LocalBackend { - return w.localStorage -} - // NodeHooks implementation. // HandleNewBlockEarlyLocked is guarded by CrossNode. diff --git a/go/worker/storage/worker.go b/go/worker/storage/worker.go index 2607c2ee35c..a6f781e0882 100644 --- a/go/worker/storage/worker.go +++ b/go/worker/storage/worker.go @@ -103,7 +103,6 @@ func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { commonNode, rp, rpRPC, - w.commonWorker.GetConfig(), localStorage, &statesync.CheckpointSyncConfig{ Disabled: config.GlobalConfig.Storage.CheckpointSyncDisabled, From ce7ceb27350285905dcbb92cd04d0880c38b0679 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Wed, 27 Aug 2025 15:04:19 +0200 Subject: [PATCH 15/18] go/worker/storage: Create new runtime storage worker This worker will be responsible for orchestrating storage operations for the given runtime. This includes: 1. Registering availability 2. Creating checkpoints if configured 3. Pruning state 4. Syncing state (internally init, checkpoint sync and diff sync) Subsequent commit will do the outlined refactor above. --- .changelog/6308.trivial.md | 0 go/worker/storage/runtime_worker.go | 96 +++++++++++++++++++++++ go/worker/storage/service_internal.go | 2 +- go/worker/storage/statesync/state_sync.go | 34 +------- go/worker/storage/worker.go | 16 ++-- 5 files changed, 110 insertions(+), 38 deletions(-) create mode 100644 .changelog/6308.trivial.md create mode 100644 go/worker/storage/runtime_worker.go diff --git a/.changelog/6308.trivial.md b/.changelog/6308.trivial.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/go/worker/storage/runtime_worker.go b/go/worker/storage/runtime_worker.go new file mode 100644 index 00000000000..d9266272b73 --- /dev/null +++ b/go/worker/storage/runtime_worker.go @@ -0,0 +1,96 @@ +package storage + +import ( + "context" + "fmt" + + "github.com/eapache/channels" + "golang.org/x/sync/errgroup" + + "github.com/oasisprotocol/oasis-core/go/common/logging" + runtimeAPI "github.com/oasisprotocol/oasis-core/go/runtime/api" + "github.com/oasisprotocol/oasis-core/go/runtime/host" + "github.com/oasisprotocol/oasis-core/go/storage/api" + committeeCommon "github.com/oasisprotocol/oasis-core/go/worker/common/committee" + "github.com/oasisprotocol/oasis-core/go/worker/registration" + storageAPI "github.com/oasisprotocol/oasis-core/go/worker/storage/api" + "github.com/oasisprotocol/oasis-core/go/worker/storage/statesync" +) + +// Worker is handling storage operations for a single runtime. +type worker struct { + logger *logging.Logger + stateSync *statesync.Worker + stateSyncBlkCh *channels.InfiniteChannel +} + +func newRuntimeWorker( + commonNode *committeeCommon.Node, + rp registration.RoleProvider, + rpRPC registration.RoleProvider, + localStorage api.LocalBackend, + checkpointerCfg *statesync.CheckpointSyncConfig, +) (*worker, error) { + worker := &worker{ + logger: logging.GetLogger("worker/storage").With("runtimeID", commonNode.Runtime.ID()), + stateSyncBlkCh: channels.NewInfiniteChannel(), + } + + stateSync, err := statesync.New( + commonNode, + rp, + rpRPC, + localStorage, + worker.stateSyncBlkCh, + checkpointerCfg, + ) + if err != nil { + return nil, fmt.Errorf("failed to create state sync worker: %w", err) + } + + worker.stateSync = stateSync + + return worker, nil +} + +// NodeHooks implementation. + +// HandleNewBlockEarlyLocked is guarded by CrossNode. +func (w *worker) HandleNewBlockEarlyLocked(*runtimeAPI.BlockInfo) { + // Nothing to do here. +} + +// HandleNewBlockLocked is guarded by CrossNode. +func (w *worker) HandleNewBlockLocked(bi *runtimeAPI.BlockInfo) { + // Notify the state syncer that there is a new block. + w.stateSyncBlkCh.In() <- bi.RuntimeBlock +} + +// HandleRuntimeHostEventLocked is guarded by CrossNode. +func (w *worker) HandleRuntimeHostEventLocked(*host.Event) { + // Nothing to do here. +} + +// Initialized returns a channel that will be closed once the worker finished starting up. +func (w *worker) Initialized() <-chan struct{} { + return w.stateSync.Initialized() +} + +func (w *worker) GetStatus(ctx context.Context) (*storageAPI.Status, error) { + return w.stateSync.GetStatus(ctx) +} + +func (w *worker) PauseCheckpointer(pause bool) error { + return w.stateSync.PauseCheckpointer(pause) +} + +func (w *worker) serve(ctx context.Context) error { + w.logger.Info("started") + defer w.logger.Info("stopped") + + g, ctx := errgroup.WithContext(ctx) + g.Go(func() error { + return w.stateSync.Serve(ctx) + }) + return g.Wait() +} diff --git a/go/worker/storage/service_internal.go b/go/worker/storage/service_internal.go index cf5d0ecf064..7c26399099a 100644 --- a/go/worker/storage/service_internal.go +++ b/go/worker/storage/service_internal.go @@ -14,7 +14,7 @@ func (w *Worker) GetLastSyncedRound(_ context.Context, request *api.GetLastSynce return nil, api.ErrRuntimeNotFound } - round, ioRoot, stateRoot := node.GetLastSynced() + round, ioRoot, stateRoot := node.stateSync.GetLastSynced() return &api.GetLastSyncedRoundResponse{ Round: round, IORoot: ioRoot, diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index f9027472fe8..325dc30f971 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -24,12 +24,9 @@ import ( registryApi "github.com/oasisprotocol/oasis-core/go/registry/api" roothashApi "github.com/oasisprotocol/oasis-core/go/roothash/api" "github.com/oasisprotocol/oasis-core/go/roothash/api/block" - runtime "github.com/oasisprotocol/oasis-core/go/runtime/api" - "github.com/oasisprotocol/oasis-core/go/runtime/host" storageApi "github.com/oasisprotocol/oasis-core/go/storage/api" "github.com/oasisprotocol/oasis-core/go/storage/mkvs/checkpoint" dbApi "github.com/oasisprotocol/oasis-core/go/storage/mkvs/db/api" - workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/registration" "github.com/oasisprotocol/oasis-core/go/worker/storage/api" @@ -39,12 +36,8 @@ import ( "github.com/oasisprotocol/oasis-core/go/worker/storage/p2p/synclegacy" ) -var ( - _ committee.NodeHooks = (*Worker)(nil) - - // ErrNonLocalBackend is the error returned when the storage backend doesn't implement the LocalBackend interface. - ErrNonLocalBackend = errors.New("storage: storage backend doesn't support local storage") -) +// ErrNonLocalBackend is the error returned when the storage backend doesn't implement the LocalBackend interface. +var ErrNonLocalBackend = errors.New("storage: storage backend doesn't support local storage") const ( // RoundLatest is a magic value for the latest round. @@ -150,8 +143,6 @@ type Worker struct { undefinedRound uint64 - workerCommonCfg workerCommon.Config - checkpointer checkpoint.Checkpointer checkpointSyncCfg *CheckpointSyncConfig checkpointSyncForced bool @@ -175,6 +166,7 @@ func New( roleProvider registration.RoleProvider, rpcRoleProvider registration.RoleProvider, localStorage storageApi.LocalBackend, + blockCh *channels.InfiniteChannel, checkpointSyncCfg *CheckpointSyncConfig, ) (*Worker, error) { initMetrics() @@ -193,7 +185,7 @@ func New( status: api.StatusInitializing, - blockCh: channels.NewInfiniteChannel(), + blockCh: blockCh, diffCh: make(chan *fetchedDiff), finalizeCh: make(chan finalizeResult), @@ -319,24 +311,6 @@ func (w *Worker) PauseCheckpointer(pause bool) error { return nil } -// NodeHooks implementation. - -// HandleNewBlockEarlyLocked is guarded by CrossNode. -func (w *Worker) HandleNewBlockEarlyLocked(*runtime.BlockInfo) { - // Nothing to do here. -} - -// HandleNewBlockLocked is guarded by CrossNode. -func (w *Worker) HandleNewBlockLocked(bi *runtime.BlockInfo) { - // Notify the state syncer that there is a new block. - w.blockCh.In() <- bi.RuntimeBlock -} - -// HandleRuntimeHostEventLocked is guarded by CrossNode. -func (w *Worker) HandleRuntimeHostEventLocked(*host.Event) { - // Nothing to do here. -} - // Watcher implementation. // GetLastSynced returns the height, IORoot hash and StateRoot hash of the last block that was fully synced to. diff --git a/go/worker/storage/worker.go b/go/worker/storage/worker.go index a6f781e0882..969277f8beb 100644 --- a/go/worker/storage/worker.go +++ b/go/worker/storage/worker.go @@ -29,7 +29,7 @@ type Worker struct { initCh chan struct{} quitCh chan struct{} - runtimes map[common.Namespace]*statesync.Worker + runtimes map[common.Namespace]*worker ctx context.Context cancel context.CancelFunc @@ -51,7 +51,7 @@ func New( logger: logging.GetLogger("worker/storage"), initCh: make(chan struct{}), quitCh: make(chan struct{}), - runtimes: make(map[common.Namespace]*statesync.Worker), + runtimes: make(map[common.Namespace]*worker), ctx: ctx, cancel: cancel, } @@ -99,7 +99,7 @@ func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { return fmt.Errorf("can't create local storage backend: %w", err) } - worker, err := statesync.New( + worker, err := newRuntimeWorker( commonNode, rp, rpRPC, @@ -190,9 +190,9 @@ func (w *Worker) serve(ctx context.Context) error { g, ctx := errgroup.WithContext(ctx) for id, r := range w.runtimes { g.Go(func() error { - err := r.Serve(ctx) + err := r.serve(ctx) if err != nil { - return fmt.Errorf("state sync worker failed (runtimeID: %s): %w", id, err) + return fmt.Errorf("runtime storage worker failed (runtimeID: %s): %w", id, err) } return nil }) @@ -221,9 +221,11 @@ func (w *Worker) Quit() <-chan struct{} { func (w *Worker) Cleanup() { } -// GetRuntime returns a storage committee node for the given runtime (if available). +// GetRuntime returns the state sync for the given runtime (if available). // // In case the runtime with the specified id was not configured for this node it returns nil. +// +// Sugggestion: This is only used to get status, how about making this GetRuntimeStatus? func (w *Worker) GetRuntime(id common.Namespace) *statesync.Worker { - return w.runtimes[id] + return w.runtimes[id].stateSync } From 3762e467c8c55dd51d841c49b2e0a606432a02fa Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Wed, 27 Aug 2025 15:50:55 +0200 Subject: [PATCH 16/18] worker/storage/statesync: Add WatchFinalizedRound method This method is needed so that other worker (checkpointer, availability nudger) will be able to react. --- go/worker/storage/statesync/state_sync.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index 325dc30f971..b2e6a479f41 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -150,6 +150,8 @@ type Worker struct { syncedLock sync.RWMutex syncedState blockSummary + finalizedNotifier *pubsub.Broker + statusLock sync.RWMutex status api.StorageWorkerStatus @@ -183,6 +185,8 @@ func New( checkpointSyncCfg: checkpointSyncCfg, + finalizedNotifier: pubsub.NewBroker(false), + status: api.StatusInitializing, blockCh: blockCh, @@ -303,6 +307,15 @@ func (w *Worker) GetStatus(context.Context) (*api.Status, error) { }, nil } +// WatchFinalizedRounds watches block rounds that have been successfully finalized. +func (w *Worker) WatchFinalizedRounds() (<-chan uint64, pubsub.ClosableSubscription, error) { + ch := make(chan uint64) + sub := w.finalizedNotifier.Subscribe() + sub.Unwrap(ch) + + return ch, sub, nil +} + func (w *Worker) PauseCheckpointer(pause bool) error { if !commonFlags.DebugDontBlameOasis() { return api.ErrCantPauseCheckpointer @@ -550,6 +563,8 @@ func (w *Worker) flushSyncedState(summary *blockSummary) (uint64, error) { defer w.syncedLock.Unlock() w.syncedState = *summary + w.finalizedNotifier.Broadcast(summary.Round) + if err := w.commonNode.Runtime.History().StorageSyncCheckpoint(w.syncedState.Round); err != nil { return 0, err } From 9d17682ff16f14497eafc065729c51c91e3ecd81 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Wed, 27 Aug 2025 17:55:10 +0200 Subject: [PATCH 17/18] go/worker/storage: Move checkpointer to a separate worker. Generally this worker should be tested explicitly. For this checkpointer worker should probably take finalizedCh instead of the whole state sync worker, waiting for the storage initialization should be moved to the parent worker. Finally, instead of the full common worker we should accept interface to ease mocking. Technically this worker does two things, creates and notifies. I am open to moving checkpointer creation to the parent worker, and notify there in the separate goroutine. This way this worker is only responsible for creating checkpoints. I have kept it this way as this way all the code relevant for checkpoint creation or checkpointer functionality is encapsulated together. --- go/storage/mkvs/checkpoint/checkpointer.go | 52 ++- .../storage/checkpointer/checkpointer.go | 323 ++++++++++++++++++ go/worker/storage/runtime_worker.go | 35 +- go/worker/storage/statesync/state_sync.go | 238 +------------ go/worker/storage/worker.go | 1 + 5 files changed, 392 insertions(+), 257 deletions(-) create mode 100644 go/worker/storage/checkpointer/checkpointer.go diff --git a/go/storage/mkvs/checkpoint/checkpointer.go b/go/storage/mkvs/checkpoint/checkpointer.go index f1292123650..fd64c8b0d50 100644 --- a/go/storage/mkvs/checkpoint/checkpointer.go +++ b/go/storage/mkvs/checkpoint/checkpointer.go @@ -88,6 +88,10 @@ type Checkpointer interface { // versions are emitted before the checkpointing process starts. WatchCheckpoints() (<-chan uint64, pubsub.ClosableSubscription, error) + // // WatchCreatedCheckpoints returns a channel that produces a stream of checkpointed versions. The + // versions are emitted immediately after the checkpoint is created. + WatchCreatedCheckpoints() (<-chan uint64, pubsub.ClosableSubscription, error) + // Flush makes the checkpointer immediately process any notifications. Flush() @@ -103,14 +107,15 @@ type Checkpointer interface { type checkpointer struct { cfg CheckpointerConfig - ndb db.NodeDB - creator Creator - notifyCh *channels.RingChannel - forceCh *channels.RingChannel - flushCh *channels.RingChannel - statusCh chan struct{} - pausedCh chan bool - cpNotifier *pubsub.Broker + ndb db.NodeDB + creator Creator + notifyCh *channels.RingChannel + forceCh *channels.RingChannel + flushCh *channels.RingChannel + statusCh chan struct{} + pausedCh chan bool + cpNotifier *pubsub.Broker + cpCreatedNotifier *pubsub.Broker logger *logging.Logger } @@ -119,16 +124,17 @@ type checkpointer struct { // will automatically generate the configured number of checkpoints. func NewCheckpointer(ndb db.NodeDB, creator Creator, cfg CheckpointerConfig) Checkpointer { c := &checkpointer{ - cfg: cfg, - ndb: ndb, - creator: creator, - notifyCh: channels.NewRingChannel(1), - forceCh: channels.NewRingChannel(1), - flushCh: channels.NewRingChannel(1), - statusCh: make(chan struct{}), - pausedCh: make(chan bool), - cpNotifier: pubsub.NewBroker(false), - logger: logging.GetLogger("storage/mkvs/checkpoint/"+cfg.Name).With("namespace", cfg.Namespace), + cfg: cfg, + ndb: ndb, + creator: creator, + notifyCh: channels.NewRingChannel(1), + forceCh: channels.NewRingChannel(1), + flushCh: channels.NewRingChannel(1), + statusCh: make(chan struct{}), + pausedCh: make(chan bool), + cpNotifier: pubsub.NewBroker(false), + cpCreatedNotifier: pubsub.NewBroker(false), + logger: logging.GetLogger("storage/mkvs/checkpoint/"+cfg.Name).With("namespace", cfg.Namespace), } return c } @@ -152,6 +158,15 @@ func (c *checkpointer) WatchCheckpoints() (<-chan uint64, pubsub.ClosableSubscri return ch, sub, nil } +// Implements Checkpointer. +func (c *checkpointer) WatchCreatedCheckpoints() (<-chan uint64, pubsub.ClosableSubscription, error) { + ch := make(chan uint64) + sub := c.cpCreatedNotifier.Subscribe() + sub.Unwrap(ch) + + return ch, sub, nil +} + // Implements Checkpointer. func (c *checkpointer) Flush() { c.flushCh.In() <- struct{}{} @@ -312,6 +327,7 @@ func (c *checkpointer) checkpoint(ctx context.Context, version uint64, params *C return fmt.Errorf("checkpointer: failed to create checkpoint: %w", err) } } + c.cpCreatedNotifier.Broadcast(version) return nil } diff --git a/go/worker/storage/checkpointer/checkpointer.go b/go/worker/storage/checkpointer/checkpointer.go new file mode 100644 index 00000000000..61fb577359a --- /dev/null +++ b/go/worker/storage/checkpointer/checkpointer.go @@ -0,0 +1,323 @@ +// Package checkpointer defines logic for periodically creating checkpoints +// of the runtime state. +package checkpointer + +import ( + "context" + "fmt" + "time" + + "github.com/oasisprotocol/oasis-core/go/common/logging" + "github.com/oasisprotocol/oasis-core/go/common/pubsub" + consensusAPI "github.com/oasisprotocol/oasis-core/go/consensus/api" + commonFlags "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/flags" + roothashAPI "github.com/oasisprotocol/oasis-core/go/roothash/api" + storageAPI "github.com/oasisprotocol/oasis-core/go/storage/api" + "github.com/oasisprotocol/oasis-core/go/storage/mkvs/checkpoint" + "github.com/oasisprotocol/oasis-core/go/worker/common/committee" + "github.com/oasisprotocol/oasis-core/go/worker/storage/api" + "github.com/oasisprotocol/oasis-core/go/worker/storage/statesync" +) + +// chunkerThreads is target number of subtrees during parallel checkpoint creation. +// It is intentionally non-configurable since we want operators to produce +// same checkpoint hashes. The current value was chosen based on the benchmarks +// done on the modern developer machine. +const chunkerThreads = 12 + +// Worker is responsible for creating runtime checkpoints for every consensus checkpoint, +// and notifying the checkpointer about the new finalized versions. +// +// If the checkpointer is disabled, it will wait until the state is initialized +// and ensure at least checkpoint for the genesis height was created. +type Worker struct { + commonNode *committee.Node + localStorage storageAPI.LocalBackend + checkpointer checkpoint.Checkpointer + stateSync *statesync.Worker + cfg Config + logger *logging.Logger +} + +// Config is the worker configuration. +type Config struct { + // CheckpointerEnabled specifies creation of period runtime checkpoints is enabled. + CheckpointerEnabled bool + // CheckInterval is the interval on which to check if any checkpointing is needed. + CheckInterval time.Duration + // ParallelChunker specifies if the new parallel chunking algorithm can be used. + ParallelChunker bool +} + +// New creates new worker. +func New(commonNode *committee.Node, localStorage storageAPI.LocalBackend, stateSync *statesync.Worker, cfg Config) (*Worker, error) { + checkInterval := checkpoint.CheckIntervalDisabled + if cfg.CheckpointerEnabled { + checkInterval = cfg.CheckInterval + } + checkpointerCfg := checkpoint.CheckpointerConfig{ + Name: "runtime", + Namespace: commonNode.Runtime.ID(), + CheckInterval: checkInterval, + RootsPerVersion: 2, // State root and I/O root. + GetParameters: func(ctx context.Context) (*checkpoint.CreationParameters, error) { + rt, rerr := commonNode.Runtime.ActiveDescriptor(ctx) + if rerr != nil { + return nil, fmt.Errorf("failed to retrieve runtime descriptor: %w", rerr) + } + + blk, rerr := commonNode.Consensus.RootHash().GetGenesisBlock(ctx, &roothashAPI.RuntimeRequest{ + RuntimeID: rt.ID, + Height: consensusAPI.HeightLatest, + }) + if rerr != nil { + return nil, fmt.Errorf("failed to retrieve genesis block: %w", rerr) + } + + var threads uint16 + if cfg.ParallelChunker { + threads = chunkerThreads + } + + return &checkpoint.CreationParameters{ + Interval: rt.Storage.CheckpointInterval, + NumKept: rt.Storage.CheckpointNumKept, + ChunkSize: rt.Storage.CheckpointChunkSize, + InitialVersion: blk.Header.Round, + ChunkerThreads: threads, + }, nil + }, + GetRoots: func(ctx context.Context, version uint64) ([]storageAPI.Root, error) { + blk, berr := commonNode.Runtime.History().GetCommittedBlock(ctx, version) + if berr != nil { + return nil, berr + } + + return blk.Header.StorageRoots(), nil + }, + } + + checkpointer := checkpoint.NewCheckpointer( + localStorage.NodeDB(), + localStorage.Checkpointer(), + checkpointerCfg, + ) + + return &Worker{ + commonNode: commonNode, + localStorage: localStorage, + checkpointer: checkpointer, + stateSync: stateSync, + cfg: cfg, + logger: logging.GetLogger("worker/storage/checkpointer").With("runtime_id", commonNode.Runtime.ID()), + }, nil +} + +func (w *Worker) PauseCheckpointer(pause bool) error { + if !commonFlags.DebugDontBlameOasis() { + return api.ErrCantPauseCheckpointer + } + w.checkpointer.Pause(pause) + return nil +} + +// Serve runs the worker. +func (w *Worker) Serve(ctx context.Context) error { + w.logger.Info("started") + defer w.logger.Info("stopped") + + consensusCp := w.commonNode.Consensus.Checkpointer() + if consensusCp == nil { + return nil // TODO was existing code robust here? + } + + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + go func() { // TODO make it more robust as worker should probably stop here? + if err := w.checkpointer.Serve(ctx); err != nil { + w.logger.Error("checkpointer failed", "err", err) + } + }() + + if err := w.ensureGenesisCheckpoint(ctx); err != nil { + return fmt.Errorf("failed to ensure genesis checkpoint was created: %w", err) + } + + if !w.cfg.CheckpointerEnabled { + return nil // We can return safely after creating the genesis checkpoint. + } + + // Determine the maximum number of consensus checkpoints to keep. + // TODO: This should probably be checked more then once, as params can change without the node + // being restarted. + consensusParams, err := w.commonNode.Consensus.Core().GetParameters(ctx, consensusAPI.HeightLatest) + if err != nil { + return fmt.Errorf("failed to fetch consensus parameters: %w", err) + } + + ch, sub, err := consensusCp.WatchCheckpoints() + if err != nil { + return fmt.Errorf("failed to watch checkpoints: %w", err) + } + defer sub.Close() + + finalizeCh, sub, err := w.stateSync.WatchFinalizedRounds() + if err != nil { + return fmt.Errorf("failed to watch finalized summaries: %w", err) + } + defer sub.Close() + + var ( + versions []uint64 + blkCh <-chan *consensusAPI.Block + blkSub pubsub.ClosableSubscription + ) + defer func() { + if blkCh != nil { + blkSub.Close() + blkSub = nil + blkCh = nil + } + }() + for { + select { + case <-ctx.Done(): + return ctx.Err() + case version := <-ch: + // We need to wait for the next version as that is what will be in the consensus + // checkpoint. + versions = append(versions, version+1) + // Make sure that we limit the size of the checkpoint queue. + if uint64(len(versions)) > consensusParams.Parameters.StateCheckpointNumKept { + versions = versions[1:] + } + + w.logger.Debug("consensus checkpoint detected, queuing runtime checkpoint", + "version", version+1, + "num_versions", len(versions), + ) + + if blkCh == nil { + blkCh, blkSub, err = w.commonNode.Consensus.Core().WatchBlocks(ctx) + if err != nil { + w.logger.Error("failed to watch blocks", + "err", err, + ) + continue + } + } + case blk := <-blkCh: + // If there's nothing remaining, unsubscribe. + if len(versions) == 0 { + w.logger.Debug("no more queued consensus checkpoint versions") + + blkSub.Close() + blkSub = nil + blkCh = nil + continue + } + + var newVersions []uint64 + for idx, version := range versions { + if version > uint64(blk.Height) { + // We need to wait for further versions. + newVersions = versions[idx:] + break + } + + // Lookup what runtime round corresponds to the given consensus layer version and make + // sure we checkpoint it. + blk, err := w.commonNode.Consensus.RootHash().GetLatestBlock(ctx, &roothashAPI.RuntimeRequest{ + RuntimeID: w.commonNode.Runtime.ID(), + Height: int64(version), + }) + if err != nil { + w.logger.Error("failed to get runtime block corresponding to consensus checkpoint", + "err", err, + "height", version, + ) + continue + } + + // We may have not yet synced the corresponding runtime round locally. In this case + // we need to wait until this is the case. + lastSyncedRound, _, _ := w.stateSync.GetLastSynced() + if blk.Header.Round > lastSyncedRound { + w.logger.Debug("runtime round not available yet for checkpoint, waiting", + "height", version, + "round", blk.Header.Round, + "last_synced_round", lastSyncedRound, + ) + newVersions = versions[idx:] + break + } + + // Force runtime storage checkpointer to create a checkpoint at this round. + w.logger.Info("consensus checkpoint, force runtime checkpoint", + "height", version, + "round", blk.Header.Round, + ) + + w.checkpointer.ForceCheckpoint(blk.Header.Round) + } + versions = newVersions + case round := <-finalizeCh: + w.checkpointer.NotifyNewVersion(round) + } + } +} + +func (w *Worker) ensureGenesisCheckpoint(ctx context.Context) error { + // Wait for the common node to be initialized. + select { + case <-w.commonNode.Initialized(): + case <-ctx.Done(): + return ctx.Err() + } + + // Wait for state sync worker to be initialized which guarantees us to have the state initialized. + select { + case <-w.stateSync.Initialized(): + case <-ctx.Done(): + return ctx.Err() + } + + genesisBlock, err := w.commonNode.Consensus.RootHash().GetGenesisBlock(ctx, &roothashAPI.RuntimeRequest{ + RuntimeID: w.commonNode.Runtime.ID(), + Height: consensusAPI.HeightLatest, + }) + if err != nil { + return fmt.Errorf("can't retrieve genesis block: %w", err) + } + + ch, sub, err := w.checkpointer.WatchCreatedCheckpoints() + if err != nil { + return fmt.Errorf("failed to watch created checkpoints: %w", err) + } + defer sub.Close() + + _, err = w.localStorage.Checkpointer().GetCheckpoint(ctx, 1, genesisBlock.Header.StorageRootState()) + if err == nil { // if NOT error we already have a checkpoint. TODO: this is not robust, even though genesis has no io root. + return nil + } + + // Notify the checkpointer of the genesis round so it can be checkpointed. + if w.checkpointer != nil { + w.checkpointer.ForceCheckpoint(genesisBlock.Header.Round) + w.checkpointer.Flush() + } + + // TODO add timeout. + for { + select { + case <-ctx.Done(): + return ctx.Err() + case r := <-ch: + if r != genesisBlock.Header.Round { + continue + } + return nil // genesis checkpoint created successfully. + } + } +} diff --git a/go/worker/storage/runtime_worker.go b/go/worker/storage/runtime_worker.go index d9266272b73..e62a198af0d 100644 --- a/go/worker/storage/runtime_worker.go +++ b/go/worker/storage/runtime_worker.go @@ -8,12 +8,14 @@ import ( "golang.org/x/sync/errgroup" "github.com/oasisprotocol/oasis-core/go/common/logging" + "github.com/oasisprotocol/oasis-core/go/config" runtimeAPI "github.com/oasisprotocol/oasis-core/go/runtime/api" "github.com/oasisprotocol/oasis-core/go/runtime/host" "github.com/oasisprotocol/oasis-core/go/storage/api" committeeCommon "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/registration" storageAPI "github.com/oasisprotocol/oasis-core/go/worker/storage/api" + "github.com/oasisprotocol/oasis-core/go/worker/storage/checkpointer" "github.com/oasisprotocol/oasis-core/go/worker/storage/statesync" ) @@ -21,6 +23,7 @@ import ( type worker struct { logger *logging.Logger stateSync *statesync.Worker + checkpointer *checkpointer.Worker stateSyncBlkCh *channels.InfiniteChannel } @@ -29,7 +32,8 @@ func newRuntimeWorker( rp registration.RoleProvider, rpRPC registration.RoleProvider, localStorage api.LocalBackend, - checkpointerCfg *statesync.CheckpointSyncConfig, + checkpointSyncCfg *statesync.CheckpointSyncConfig, + checkpointerEnabled bool, ) (*worker, error) { worker := &worker{ logger: logging.GetLogger("worker/storage").With("runtimeID", commonNode.Runtime.ID()), @@ -42,14 +46,24 @@ func newRuntimeWorker( rpRPC, localStorage, worker.stateSyncBlkCh, - checkpointerCfg, + checkpointSyncCfg, ) if err != nil { return nil, fmt.Errorf("failed to create state sync worker: %w", err) } - worker.stateSync = stateSync + cpCfg := checkpointer.Config{ + CheckpointerEnabled: config.GlobalConfig.Storage.Checkpointer.Enabled, + CheckInterval: config.GlobalConfig.Storage.Checkpointer.CheckInterval, + ParallelChunker: config.GlobalConfig.Storage.Checkpointer.ParallelChunker, + } + checkpointer, err := checkpointer.New(commonNode, localStorage, stateSync, cpCfg) + if err != nil { + return nil, fmt.Errorf("failed to create checkpointer worker: %w", err) + } + worker.checkpointer = checkpointer + return worker, nil } @@ -81,13 +95,26 @@ func (w *worker) GetStatus(ctx context.Context) (*storageAPI.Status, error) { } func (w *worker) PauseCheckpointer(pause bool) error { - return w.stateSync.PauseCheckpointer(pause) + return w.checkpointer.PauseCheckpointer(pause) } func (w *worker) serve(ctx context.Context) error { w.logger.Info("started") defer w.logger.Info("stopped") + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + // Create runtime checkpoint for every consensus checkpoint, to make it faster for storage nodes + // that use consensus state sync to catch up as exactly the right checkpoint will be available. + // Intentionally not part of the errgroup below as failing checkpointer should not stop state sync. + go func() { + err := w.checkpointer.Serve(ctx) + if err != nil { + w.logger.Info("checkpointer worker failed", "err", err) + } + }() + g, ctx := errgroup.WithContext(ctx) g.Go(func() error { return w.stateSync.Serve(ctx) diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index b2e6a479f41..233daff6fe9 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -19,13 +19,11 @@ import ( "github.com/oasisprotocol/oasis-core/go/common/workerpool" "github.com/oasisprotocol/oasis-core/go/config" consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" - commonFlags "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/flags" "github.com/oasisprotocol/oasis-core/go/p2p/rpc" registryApi "github.com/oasisprotocol/oasis-core/go/registry/api" roothashApi "github.com/oasisprotocol/oasis-core/go/roothash/api" "github.com/oasisprotocol/oasis-core/go/roothash/api/block" storageApi "github.com/oasisprotocol/oasis-core/go/storage/api" - "github.com/oasisprotocol/oasis-core/go/storage/mkvs/checkpoint" dbApi "github.com/oasisprotocol/oasis-core/go/storage/mkvs/db/api" "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/registration" @@ -59,12 +57,6 @@ const ( // for them to be applied. maxInFlightRounds = 100 - // chunkerThreads is target number of subtrees during parallel checkpoint creation. - // It is intentionally non-configurable since we want operators to produce - // same checkpoint hashes. The current value was chosen based on the benchmarks - // done on the modern developer machine. - chunkerThreads = 12 - // diffResponseTimeout is the maximum time for fetching storage diff from the peer. diffResponseTimeout = 15 * time.Second ) @@ -143,7 +135,6 @@ type Worker struct { undefinedRound uint64 - checkpointer checkpoint.Checkpointer checkpointSyncCfg *CheckpointSyncConfig checkpointSyncForced bool @@ -196,20 +187,9 @@ func New( initCh: make(chan struct{}), } - // Validate checkpoint sync configuration. - if err := checkpointSyncCfg.Validate(); err != nil { - return nil, fmt.Errorf("bad checkpoint sync configuration: %w", err) - } - // Initialize sync state. w.syncedState.Round = defaultUndefinedRound - // Create a checkpointer (even if checkpointing is disabled) to ensure the genesis checkpoint is available. - checkpointer, err := w.newCheckpointer(commonNode, localStorage) - if err != nil { - return nil, fmt.Errorf("failed to create checkpointer: %w", err) - } - w.checkpointer = checkpointer // Register prune handler. commonNode.Runtime.History().Pruner().RegisterHandler(&pruneHandler{ logger: w.logger, @@ -234,61 +214,10 @@ func New( return w, nil } -func (w *Worker) newCheckpointer(commonNode *committee.Node, localStorage storageApi.LocalBackend) (checkpoint.Checkpointer, error) { - checkInterval := checkpoint.CheckIntervalDisabled - if config.GlobalConfig.Storage.Checkpointer.Enabled { - checkInterval = config.GlobalConfig.Storage.Checkpointer.CheckInterval - } - checkpointerCfg := checkpoint.CheckpointerConfig{ - Name: "runtime", - Namespace: commonNode.Runtime.ID(), - CheckInterval: checkInterval, - RootsPerVersion: 2, // State root and I/O root. - GetParameters: func(ctx context.Context) (*checkpoint.CreationParameters, error) { - rt, rerr := commonNode.Runtime.ActiveDescriptor(ctx) - if rerr != nil { - return nil, fmt.Errorf("failed to retrieve runtime descriptor: %w", rerr) - } - - blk, rerr := commonNode.Consensus.RootHash().GetGenesisBlock(ctx, &roothashApi.RuntimeRequest{ - RuntimeID: rt.ID, - Height: consensus.HeightLatest, - }) - if rerr != nil { - return nil, fmt.Errorf("failed to retrieve genesis block: %w", rerr) - } - - var threads uint16 - if config.GlobalConfig.Storage.Checkpointer.ParallelChunker { - threads = chunkerThreads - } - - return &checkpoint.CreationParameters{ - Interval: rt.Storage.CheckpointInterval, - NumKept: rt.Storage.CheckpointNumKept, - ChunkSize: rt.Storage.CheckpointChunkSize, - InitialVersion: blk.Header.Round, - ChunkerThreads: threads, - }, nil - }, - GetRoots: func(ctx context.Context, version uint64) ([]storageApi.Root, error) { - blk, berr := commonNode.Runtime.History().GetCommittedBlock(ctx, version) - if berr != nil { - return nil, berr - } - - return blk.Header.StorageRoots(), nil - }, - } - - return checkpoint.NewCheckpointer( - localStorage.NodeDB(), - localStorage.Checkpointer(), - checkpointerCfg, - ), nil -} - // Initialized returns a channel that will be closed once the worker finished starting up. +// +// If worker is initialized it is guaranteed that the storage has state available for the +// genesis height or higher. func (w *Worker) Initialized() <-chan struct{} { return w.initCh } @@ -316,14 +245,6 @@ func (w *Worker) WatchFinalizedRounds() (<-chan uint64, pubsub.ClosableSubscript return ch, sub, nil } -func (w *Worker) PauseCheckpointer(pause bool) error { - if !commonFlags.DebugDontBlameOasis() { - return api.ErrCantPauseCheckpointer - } - w.checkpointer.Pause(pause) - return nil -} - // Watcher implementation. // GetLastSynced returns the height, IORoot hash and StateRoot hash of the last block that was fully synced to. @@ -572,140 +493,6 @@ func (w *Worker) flushSyncedState(summary *blockSummary) (uint64, error) { return w.syncedState.Round, nil } -func (w *Worker) consensusCheckpointSyncer(ctx context.Context) { - // Make sure we always create a checkpoint when the consensus layer creates a checkpoint. The - // reason why we do this is to make it faster for storage nodes that use consensus state sync - // to catch up as exactly the right checkpoint will be available. - consensusCp := w.commonNode.Consensus.Checkpointer() - if consensusCp == nil { - return - } - - // Wait for the common node to be initialized. - select { - case <-w.commonNode.Initialized(): - case <-ctx.Done(): - return - } - - // Determine the maximum number of consensus checkpoints to keep. - consensusParams, err := w.commonNode.Consensus.Core().GetParameters(ctx, consensus.HeightLatest) - if err != nil { - w.logger.Error("failed to fetch consensus parameters", - "err", err, - ) - return - } - - ch, sub, err := consensusCp.WatchCheckpoints() - if err != nil { - w.logger.Error("failed to watch checkpoints", - "err", err, - ) - return - } - defer sub.Close() - - var ( - versions []uint64 - blkCh <-chan *consensus.Block - blkSub pubsub.ClosableSubscription - ) - defer func() { - if blkCh != nil { - blkSub.Close() - blkSub = nil - blkCh = nil - } - }() - for { - select { - case <-ctx.Done(): - return - case version := <-ch: - // We need to wait for the next version as that is what will be in the consensus - // checkpoint. - versions = append(versions, version+1) - // Make sure that we limit the size of the checkpoint queue. - if uint64(len(versions)) > consensusParams.Parameters.StateCheckpointNumKept { - versions = versions[1:] - } - - w.logger.Debug("consensus checkpoint detected, queuing runtime checkpoint", - "version", version+1, - "num_versions", len(versions), - ) - - if blkCh == nil { - blkCh, blkSub, err = w.commonNode.Consensus.Core().WatchBlocks(ctx) - if err != nil { - w.logger.Error("failed to watch blocks", - "err", err, - ) - continue - } - } - case blk := <-blkCh: - // If there's nothing remaining, unsubscribe. - if len(versions) == 0 { - w.logger.Debug("no more queued consensus checkpoint versions") - - blkSub.Close() - blkSub = nil - blkCh = nil - continue - } - - var newVersions []uint64 - for idx, version := range versions { - if version > uint64(blk.Height) { - // We need to wait for further versions. - newVersions = versions[idx:] - break - } - - // Lookup what runtime round corresponds to the given consensus layer version and make - // sure we checkpoint it. - blk, err := w.commonNode.Consensus.RootHash().GetLatestBlock(ctx, &roothashApi.RuntimeRequest{ - RuntimeID: w.commonNode.Runtime.ID(), - Height: int64(version), - }) - if err != nil { - w.logger.Error("failed to get runtime block corresponding to consensus checkpoint", - "err", err, - "height", version, - ) - continue - } - - // We may have not yet synced the corresponding runtime round locally. In this case - // we need to wait until this is the case. - w.syncedLock.RLock() - lastSyncedRound := w.syncedState.Round - w.syncedLock.RUnlock() - if blk.Header.Round > lastSyncedRound { - w.logger.Debug("runtime round not available yet for checkpoint, waiting", - "height", version, - "round", blk.Header.Round, - "last_synced_round", lastSyncedRound, - ) - newVersions = versions[idx:] - break - } - - // Force runtime storage checkpointer to create a checkpoint at this round. - w.logger.Info("consensus checkpoint, force runtime checkpoint", - "height", version, - "round", blk.Header.Round, - ) - - w.checkpointer.ForceCheckpoint(blk.Header.Round) - } - versions = newVersions - } - } -} - // This is only called from the main worker goroutine, so no locking should be necessary. func (w *Worker) nudgeAvailability(lastSynced, latest uint64) { if lastSynced == w.undefinedRound || latest == w.undefinedRound { @@ -761,14 +548,6 @@ func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo }() defer w.logger.Info("stopped") - go func() { - err := w.checkpointer.Serve(ctx) - w.logger.Error("checkpointer failed", "err", err) - }() - if config.GlobalConfig.Storage.Checkpointer.Enabled { - go w.consensusCheckpointSyncer(ctx) - } - // Determine genesis block. genesisBlock, err := w.commonNode.Consensus.RootHash().GetGenesisBlock(ctx, &roothashApi.RuntimeRequest{ RuntimeID: w.commonNode.Runtime.ID(), @@ -995,12 +774,6 @@ func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo close(w.initCh) w.logger.Info("initialized") - // Notify the checkpointer of the genesis round so it can be checkpointed. - if w.checkpointer != nil { - w.checkpointer.ForceCheckpoint(genesisBlock.Header.Round) - w.checkpointer.Flush() - } - // Main syncing loop: err = nil var wg sync.WaitGroup @@ -1273,11 +1046,6 @@ mainLoop: // Check if we're far enough to reasonably register as available. w.nudgeAvailability(lastFinalizedRound, latestBlockRound) - // Notify the checkpointer that there is a new finalized round. - if config.GlobalConfig.Storage.Checkpointer.Enabled { - w.checkpointer.NotifyNewVersion(finalized.summary.Round) - } - case <-ctx.Done(): err = ctx.Err() break mainLoop diff --git a/go/worker/storage/worker.go b/go/worker/storage/worker.go index 969277f8beb..7c9d2e37e44 100644 --- a/go/worker/storage/worker.go +++ b/go/worker/storage/worker.go @@ -108,6 +108,7 @@ func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { Disabled: config.GlobalConfig.Storage.CheckpointSyncDisabled, ChunkFetcherCount: config.GlobalConfig.Storage.FetcherCount, }, + config.GlobalConfig.Storage.Checkpointer.Enabled, ) if err != nil { return err From 8bc3c324dc0960c1c3edb9eb1f569155fc469ea9 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Wed, 27 Aug 2025 19:24:30 +0200 Subject: [PATCH 18/18] go/worker/storage: Move availability nudger to separate worker Same issues as with checkpointer. Possibly we would want to accept WatchFinalizedRounds as part of an interface. Co-authored-by: Peter Nose --- .../availabilitynudger/availability.go | 144 ++++++++++++++++++ go/worker/storage/runtime_worker.go | 36 +++-- go/worker/storage/statesync/state_sync.go | 57 +------ go/worker/storage/worker.go | 12 +- 4 files changed, 179 insertions(+), 70 deletions(-) create mode 100644 go/worker/storage/availabilitynudger/availability.go diff --git a/go/worker/storage/availabilitynudger/availability.go b/go/worker/storage/availabilitynudger/availability.go new file mode 100644 index 00000000000..be6a2e1338c --- /dev/null +++ b/go/worker/storage/availabilitynudger/availability.go @@ -0,0 +1,144 @@ +// Package availabilitynudger defines logic for updating the role providers. +package availabilitynudger + +import ( + "context" + "fmt" + "math" + + "github.com/eapache/channels" + + "github.com/oasisprotocol/oasis-core/go/common" + "github.com/oasisprotocol/oasis-core/go/common/logging" + "github.com/oasisprotocol/oasis-core/go/common/node" + "github.com/oasisprotocol/oasis-core/go/roothash/api/block" + "github.com/oasisprotocol/oasis-core/go/worker/registration" + "github.com/oasisprotocol/oasis-core/go/worker/storage/statesync" +) + +const ( + // The maximum number of rounds the worker can be behind the chain before it's sensible for + // it to register as available. + maximumRoundDelayForAvailability = uint64(10) + + // The minimum number of rounds the worker can be behind the chain before it's sensible for + // it to stop advertising availability. + minimumRoundDelayForUnavailability = uint64(15) +) + +// Worker tracks the progress of last and last synced rounds +// and “nudges” role providers to mark themselves available or unavailable +// based on how closely the node is keeping up with consensus. +type Worker struct { + roleProvider registration.RoleProvider + rpcRoleProvider registration.RoleProvider + roleAvailable bool + + lastRound uint64 + lastSyncedRound uint64 + + blockCh *channels.InfiniteChannel + stateSync *statesync.Worker + + logger *logging.Logger +} + +// New creates a new worker that updates the availability to role providers. +func New(localProvider, rpcProvider registration.RoleProvider, blockCh *channels.InfiniteChannel, stateSync *statesync.Worker, runtimeID common.Namespace) *Worker { + return &Worker{ + roleProvider: localProvider, + rpcRoleProvider: rpcProvider, + lastRound: math.MaxUint64, + lastSyncedRound: math.MaxUint64, + blockCh: blockCh, + stateSync: stateSync, + logger: logging.GetLogger("worker/storage/availabilitynudger").With("runtime_id", runtimeID), + } +} + +// Serve starts the worker. +func (w *Worker) Serve(ctx context.Context) error { + w.logger.Info("started") + defer w.logger.Info("stopped") + + finalizeCh, sub, err := w.stateSync.WatchFinalizedRounds() + if err != nil { + return fmt.Errorf("failed to watch finalized rounds: %w", err) + } + defer sub.Close() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case inBlk := <-w.blockCh.Out(): + blk := inBlk.(*block.Block) + w.setLastRound(blk.Header.Round) + case round := <-finalizeCh: + w.setLastSyncedRound(round) + } + w.updateAvailability() + } +} + +// setLastRound updates the last round number. +func (w *Worker) setLastRound(round uint64) { + w.lastRound = round +} + +// setLastSyncedRound updates the most recently synced round number. +func (w *Worker) setLastSyncedRound(round uint64) { + w.lastSyncedRound = round +} + +// updateAvailability updates the role's availability based on the gap +// between the last round and the last synced round. +func (w *Worker) updateAvailability() { + if w.lastRound == math.MaxUint64 || w.lastSyncedRound == math.MaxUint64 { + return + } + // if w.lastRound > w.lastSyncedRound { + // return + // } not sure what was intent here given this we are looking for the gap. + + switch roundLag := w.lastRound - w.lastSyncedRound; { + case roundLag < maximumRoundDelayForAvailability: + w.markAvailable() + case roundLag > minimumRoundDelayForUnavailability: + w.markUnavailable() + } +} + +// markAvailable sets the role as available if it is not already. +func (w *Worker) markAvailable() { + if w.roleAvailable { + return + } + w.roleAvailable = true + + w.logger.Info("marking as available") + + if w.roleProvider != nil { + w.roleProvider.SetAvailable(func(*node.Node) error { return nil }) + } + if w.rpcRoleProvider != nil { + w.rpcRoleProvider.SetAvailable(func(*node.Node) error { return nil }) + } +} + +// markUnavailable sets the role as unavailable if it is currently available. +func (w *Worker) markUnavailable() { + if !w.roleAvailable { + return + } + w.roleAvailable = false + + w.logger.Info("marking as unavailable") + + if w.roleProvider != nil { + w.roleProvider.SetUnavailable() + } + if w.rpcRoleProvider != nil { + w.rpcRoleProvider.SetUnavailable() + } +} diff --git a/go/worker/storage/runtime_worker.go b/go/worker/storage/runtime_worker.go index e62a198af0d..1d0429b9c05 100644 --- a/go/worker/storage/runtime_worker.go +++ b/go/worker/storage/runtime_worker.go @@ -15,16 +15,20 @@ import ( committeeCommon "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/registration" storageAPI "github.com/oasisprotocol/oasis-core/go/worker/storage/api" + "github.com/oasisprotocol/oasis-core/go/worker/storage/availabilitynudger" "github.com/oasisprotocol/oasis-core/go/worker/storage/checkpointer" "github.com/oasisprotocol/oasis-core/go/worker/storage/statesync" ) // Worker is handling storage operations for a single runtime. type worker struct { - logger *logging.Logger - stateSync *statesync.Worker - checkpointer *checkpointer.Worker - stateSyncBlkCh *channels.InfiniteChannel + commonNode *committeeCommon.Node + logger *logging.Logger + stateSync *statesync.Worker + checkpointer *checkpointer.Worker + availabilityNudger *availabilitynudger.Worker + stateSyncBlkCh *channels.InfiniteChannel + availabilityBlkCh *channels.InfiniteChannel } func newRuntimeWorker( @@ -36,14 +40,14 @@ func newRuntimeWorker( checkpointerEnabled bool, ) (*worker, error) { worker := &worker{ - logger: logging.GetLogger("worker/storage").With("runtimeID", commonNode.Runtime.ID()), - stateSyncBlkCh: channels.NewInfiniteChannel(), + commonNode: commonNode, + logger: logging.GetLogger("worker/storage").With("runtimeID", commonNode.Runtime.ID()), + stateSyncBlkCh: channels.NewInfiniteChannel(), + availabilityBlkCh: channels.NewInfiniteChannel(), } stateSync, err := statesync.New( commonNode, - rp, - rpRPC, localStorage, worker.stateSyncBlkCh, checkpointSyncCfg, @@ -64,6 +68,8 @@ func newRuntimeWorker( } worker.checkpointer = checkpointer + worker.availabilityNudger = availabilitynudger.New(rp, rpRPC, worker.availabilityBlkCh, stateSync, commonNode.Runtime.ID()) + return worker, nil } @@ -76,8 +82,9 @@ func (w *worker) HandleNewBlockEarlyLocked(*runtimeAPI.BlockInfo) { // HandleNewBlockLocked is guarded by CrossNode. func (w *worker) HandleNewBlockLocked(bi *runtimeAPI.BlockInfo) { - // Notify the state syncer that there is a new block. + // Notify the state syncer and availability nudger that there is a new block. w.stateSyncBlkCh.In() <- bi.RuntimeBlock + w.availabilityBlkCh.In() <- bi.RuntimeBlock } // HandleRuntimeHostEventLocked is guarded by CrossNode. @@ -117,7 +124,16 @@ func (w *worker) serve(ctx context.Context) error { g, ctx := errgroup.WithContext(ctx) g.Go(func() error { - return w.stateSync.Serve(ctx) + if err := w.stateSync.Serve(ctx); err != nil { + return fmt.Errorf("state sync worker failed: %w", err) + } + return nil + }) + g.Go(func() error { + if err := w.availabilityNudger.Serve(ctx); err != nil { + return fmt.Errorf("availability nudger failed: %w", err) + } + return nil }) return g.Wait() } diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index 233daff6fe9..af1aa522d09 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -14,7 +14,6 @@ import ( "github.com/eapache/channels" "github.com/oasisprotocol/oasis-core/go/common/logging" - "github.com/oasisprotocol/oasis-core/go/common/node" "github.com/oasisprotocol/oasis-core/go/common/pubsub" "github.com/oasisprotocol/oasis-core/go/common/workerpool" "github.com/oasisprotocol/oasis-core/go/config" @@ -26,11 +25,9 @@ import ( storageApi "github.com/oasisprotocol/oasis-core/go/storage/api" dbApi "github.com/oasisprotocol/oasis-core/go/storage/mkvs/db/api" "github.com/oasisprotocol/oasis-core/go/worker/common/committee" - "github.com/oasisprotocol/oasis-core/go/worker/registration" "github.com/oasisprotocol/oasis-core/go/worker/storage/api" "github.com/oasisprotocol/oasis-core/go/worker/storage/p2p/checkpointsync" "github.com/oasisprotocol/oasis-core/go/worker/storage/p2p/diffsync" - storagePub "github.com/oasisprotocol/oasis-core/go/worker/storage/p2p/pub" "github.com/oasisprotocol/oasis-core/go/worker/storage/p2p/synclegacy" ) @@ -45,14 +42,6 @@ const ( checkpointSyncRetryDelay = 10 * time.Second - // The maximum number of rounds the worker can be behind the chain before it's sensible for - // it to register as available. - maximumRoundDelayForAvailability = uint64(10) - - // The minimum number of rounds the worker can be behind the chain before it's sensible for - // it to stop advertising availability. - minimumRoundDelayForUnavailability = uint64(15) - // maxInFlightRounds is the maximum number of rounds that should be fetched before waiting // for them to be applied. maxInFlightRounds = 100 @@ -114,17 +103,10 @@ type finalizeResult struct { // In addition this worker is responsible for: // 1. Initializing the runtime state, possibly using checkpoints (if configured). // 2. Pruning the state as specified by the configuration. -// 3. Optionally creating runtime state checkpoints (used by other nodes) for the state sync. -// 4. Creating (and optionally advertising) statesync p2p protocol clients and servers. -// 5. Registering node availability when it has synced sufficiently close to -// the latest known block header. +// 3. Creating (and optionally advertising) statesync p2p protocol clients and servers. type Worker struct { commonNode *committee.Node - roleProvider registration.RoleProvider - rpcRoleProvider registration.RoleProvider - roleAvailable bool - logger *logging.Logger localStorage storageApi.LocalBackend @@ -156,8 +138,6 @@ type Worker struct { // New creates a new state sync worker. func New( commonNode *committee.Node, - roleProvider registration.RoleProvider, - rpcRoleProvider registration.RoleProvider, localStorage storageApi.LocalBackend, blockCh *channels.InfiniteChannel, checkpointSyncCfg *CheckpointSyncConfig, @@ -167,9 +147,6 @@ func New( w := &Worker{ commonNode: commonNode, - roleProvider: roleProvider, - rpcRoleProvider: rpcRoleProvider, - logger: logging.GetLogger("worker/storage/statesync").With("runtime_id", commonNode.Runtime.ID()), localStorage: localStorage, @@ -202,9 +179,6 @@ func New( if config.GlobalConfig.Storage.Checkpointer.Enabled { commonNode.P2P.RegisterProtocolServer(checkpointsync.NewServer(commonNode.ChainContext, commonNode.Runtime.ID(), localStorage)) } - if rpcRoleProvider != nil { - commonNode.P2P.RegisterProtocolServer(storagePub.NewServer(commonNode.ChainContext, commonNode.Runtime.ID(), localStorage)) - } // Create p2p protocol clients. w.legacyStorageSync = synclegacy.NewClient(commonNode.P2P, commonNode.ChainContext, commonNode.Runtime.ID()) @@ -493,31 +467,6 @@ func (w *Worker) flushSyncedState(summary *blockSummary) (uint64, error) { return w.syncedState.Round, nil } -// This is only called from the main worker goroutine, so no locking should be necessary. -func (w *Worker) nudgeAvailability(lastSynced, latest uint64) { - if lastSynced == w.undefinedRound || latest == w.undefinedRound { - return - } - if latest-lastSynced < maximumRoundDelayForAvailability && !w.roleAvailable { - w.roleProvider.SetAvailable(func(_ *node.Node) error { - return nil - }) - if w.rpcRoleProvider != nil { - w.rpcRoleProvider.SetAvailable(func(_ *node.Node) error { - return nil - }) - } - w.roleAvailable = true - } - if latest-lastSynced > minimumRoundDelayForUnavailability && w.roleAvailable { - w.roleProvider.SetUnavailable() - if w.rpcRoleProvider != nil { - w.rpcRoleProvider.SetUnavailable() - } - w.roleAvailable = false - } -} - // Serve runs the state sync worker. func (w *Worker) Serve(ctx context.Context) error { // nolint: gocyclo defer close(w.diffCh) @@ -950,7 +899,6 @@ mainLoop: // Check if we're far enough to reasonably register as available. latestBlockRound = blk.Header.Round - w.nudgeAvailability(lastFinalizedRound, latestBlockRound) if _, ok := summaryCache[lastFullyAppliedRound]; !ok && lastFullyAppliedRound == w.undefinedRound { dummy := blockSummary{ @@ -1043,9 +991,6 @@ mainLoop: } storageWorkerLastFinalizedRound.With(w.getMetricLabels()).Set(float64(finalized.summary.Round)) - // Check if we're far enough to reasonably register as available. - w.nudgeAvailability(lastFinalizedRound, latestBlockRound) - case <-ctx.Done(): err = ctx.Err() break mainLoop diff --git a/go/worker/storage/worker.go b/go/worker/storage/worker.go index 7c9d2e37e44..16ef4fe0d86 100644 --- a/go/worker/storage/worker.go +++ b/go/worker/storage/worker.go @@ -15,6 +15,7 @@ import ( committeeCommon "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/registration" storageWorkerAPI "github.com/oasisprotocol/oasis-core/go/worker/storage/api" + "github.com/oasisprotocol/oasis-core/go/worker/storage/p2p/pub" "github.com/oasisprotocol/oasis-core/go/worker/storage/statesync" ) @@ -79,6 +80,11 @@ func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { "runtime_id", id, ) + localStorage, err := NewLocalBackend(commonNode.Runtime.DataDir(), id) + if err != nil { + return fmt.Errorf("can't create local storage backend: %w", err) + } + // Since the storage node is always coupled with another role, make sure to not add any // particular role here. Instead this only serves to prevent registration until the storage node // is synced by making the role provider unavailable. @@ -93,10 +99,8 @@ func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { return fmt.Errorf("failed to create rpc role provider: %w", err) } } - - localStorage, err := NewLocalBackend(commonNode.Runtime.DataDir(), id) - if err != nil { - return fmt.Errorf("can't create local storage backend: %w", err) + if rpRPC != nil { + commonNode.P2P.RegisterProtocolServer(pub.NewServer(commonNode.ChainContext, commonNode.Runtime.ID(), localStorage)) } worker, err := newRuntimeWorker(