From 0b3d6e2e46f2a6c41cde4d91b7176e8c99edbf49 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Sat, 2 Aug 2025 18:28:12 +0200 Subject: [PATCH 01/11] go/worker/storage: Rename committee package to statesync Also rename node to worker, to avoid confusion. Ideally, the parent package (storage) would have runtime as a prefix to make it clearer this is a runtime worker. --- go/oasis-node/cmd/node/node_control.go | 6 +- go/oasis-test-runner/oasis/log.go | 4 +- .../checkpoint_sync.go | 92 ++-- .../checkpoint_sync_test.go | 2 +- .../{committee => statesync}/metrics.go | 6 +- .../node.go => statesync/state_sync.go} | 483 +++++++++--------- .../storage/{committee => statesync}/utils.go | 2 +- go/worker/storage/worker.go | 22 +- 8 files changed, 314 insertions(+), 303 deletions(-) rename go/worker/storage/{committee => statesync}/checkpoint_sync.go (81%) rename go/worker/storage/{committee => statesync}/checkpoint_sync_test.go (98%) rename go/worker/storage/{committee => statesync}/metrics.go (91%) rename go/worker/storage/{committee/node.go => statesync/state_sync.go} (73%) rename go/worker/storage/{committee => statesync}/utils.go (99%) diff --git a/go/oasis-node/cmd/node/node_control.go b/go/oasis-node/cmd/node/node_control.go index 9310d0780f5..494bf9b2332 100644 --- a/go/oasis-node/cmd/node/node_control.go +++ b/go/oasis-node/cmd/node/node_control.go @@ -312,10 +312,10 @@ func (n *Node) getRuntimeStatus(ctx context.Context) (map[common.Namespace]contr } // Fetch storage worker status. - if storageNode := n.StorageWorker.GetRuntime(rt.ID()); storageNode != nil { - status.Storage, err = storageNode.GetStatus(ctx) + if stateSync := n.StorageWorker.GetRuntime(rt.ID()); stateSync != nil { + status.Storage, err = stateSync.GetStatus(ctx) if err != nil { - logger.Error("failed to fetch storage worker status", "err", err) + logger.Error("failed to fetch state sync worker status", "err", err) } } diff --git a/go/oasis-test-runner/oasis/log.go b/go/oasis-test-runner/oasis/log.go index cd38354d83f..a46b126c18a 100644 --- a/go/oasis-test-runner/oasis/log.go +++ b/go/oasis-test-runner/oasis/log.go @@ -8,7 +8,7 @@ import ( roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" upgrade "github.com/oasisprotocol/oasis-core/go/upgrade/api" - workerStorage "github.com/oasisprotocol/oasis-core/go/worker/storage/committee" + "github.com/oasisprotocol/oasis-core/go/worker/storage/statesync" ) // LogAssertEvent returns a handler which checks whether a specific log event was @@ -116,7 +116,7 @@ func LogAssertRoothashRoothashReindexing() log.WatcherHandlerFactory { // LogAssertCheckpointSync returns a handler which checks whether initial storage sync from // a checkpoint was successful or not. func LogAssertCheckpointSync() log.WatcherHandlerFactory { - return LogAssertEvent(workerStorage.LogEventCheckpointSyncSuccess, "checkpoint sync did not succeed") + return LogAssertEvent(statesync.LogEventCheckpointSyncSuccess, "checkpoint sync did not succeed") } // LogAssertDiscrepancyMajorityFailure returns a handler which checks whether a discrepancy resolution diff --git a/go/worker/storage/committee/checkpoint_sync.go b/go/worker/storage/statesync/checkpoint_sync.go similarity index 81% rename from go/worker/storage/committee/checkpoint_sync.go rename to go/worker/storage/statesync/checkpoint_sync.go index ad553272a90..bca66368dfe 100644 --- a/go/worker/storage/committee/checkpoint_sync.go +++ b/go/worker/storage/statesync/checkpoint_sync.go @@ -1,4 +1,4 @@ -package committee +package statesync import ( "bytes" @@ -21,7 +21,7 @@ import ( const ( // cpListsTimeout is the timeout for fetching checkpoints from all nodes. cpListsTimeout = 30 * time.Second - // cpRestoreTimeout is the timeout for restoring a checkpoint chunk from a node. + // cpRestoreTimeout is the timeout for restoring a checkpoint chunk from the remote peer. cpRestoreTimeout = 60 * time.Second checkpointStatusDone = 0 @@ -37,7 +37,7 @@ var ErrNoUsableCheckpoints = errors.New("storage: no checkpoint could be synced" // CheckpointSyncConfig is the checkpoint sync configuration. type CheckpointSyncConfig struct { - // Disabled specifies whether checkpoint sync should be disabled. In this case the node will + // Disabled specifies whether checkpoint sync should be disabled. In this case the state sync worker will // only sync by applying all diffs from genesis. Disabled bool @@ -81,7 +81,7 @@ func (h *chunkHeap) Pop() any { return ret } -func (n *Node) checkpointChunkFetcher( +func (w *Worker) checkpointChunkFetcher( ctx context.Context, chunkDispatchCh chan *chunk, chunkReturnCh chan *chunk, @@ -103,9 +103,9 @@ func (n *Node) checkpointChunkFetcher( defer cancel() // Fetch chunk from peers. - rsp, pf, err := n.fetchChunk(chunkCtx, chunk) + rsp, pf, err := w.fetchChunk(chunkCtx, chunk) if err != nil { - n.logger.Error("failed to fetch chunk from peers", + w.logger.Error("failed to fetch chunk from peers", "err", err, "chunk", chunk.Index, ) @@ -114,7 +114,7 @@ func (n *Node) checkpointChunkFetcher( } // Restore fetched chunk. - done, err := n.localStorage.Checkpointer().RestoreChunk(chunkCtx, chunk.Index, bytes.NewBuffer(rsp)) + done, err := w.localStorage.Checkpointer().RestoreChunk(chunkCtx, chunk.Index, bytes.NewBuffer(rsp)) cancel() switch { @@ -124,7 +124,7 @@ func (n *Node) checkpointChunkFetcher( chunkReturnCh <- nil return case err != nil: - n.logger.Error("chunk restoration failed", + w.logger.Error("chunk restoration failed", "chunk", chunk.Index, "root", chunk.Root, "err", err, @@ -157,8 +157,8 @@ func (n *Node) checkpointChunkFetcher( // fetchChunk fetches chunk using checkpoint sync p2p protocol client. // // In case of no peers or error, it fallbacks to the legacy storage sync protocol. -func (n *Node) fetchChunk(ctx context.Context, chunk *chunk) ([]byte, rpc.PeerFeedback, error) { - rsp1, pf, err := n.checkpointSync.GetCheckpointChunk( +func (w *Worker) fetchChunk(ctx context.Context, chunk *chunk) ([]byte, rpc.PeerFeedback, error) { + rsp1, pf, err := w.checkpointSync.GetCheckpointChunk( ctx, &checkpointsync.GetCheckpointChunkRequest{ Version: chunk.Version, @@ -175,7 +175,7 @@ func (n *Node) fetchChunk(ctx context.Context, chunk *chunk) ([]byte, rpc.PeerFe return rsp1.Chunk, pf, nil } - rsp2, pf, err := n.legacyStorageSync.GetCheckpointChunk( + rsp2, pf, err := w.legacyStorageSync.GetCheckpointChunk( ctx, &synclegacy.GetCheckpointChunkRequest{ Version: chunk.Version, @@ -194,8 +194,8 @@ func (n *Node) fetchChunk(ctx context.Context, chunk *chunk) ([]byte, rpc.PeerFe return rsp2.Chunk, pf, nil } -func (n *Node) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelRequests uint) (cpStatus int, rerr error) { - if err := n.localStorage.Checkpointer().StartRestore(n.ctx, check.Metadata); err != nil { +func (w *Worker) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelRequests uint) (cpStatus int, rerr error) { + if err := w.localStorage.Checkpointer().StartRestore(w.ctx, check.Metadata); err != nil { // Any previous restores were already aborted by the driver up the call stack, so // things should have been going smoothly here; bail. return checkpointStatusBail, fmt.Errorf("can't start checkpoint restore: %w", err) @@ -208,9 +208,9 @@ func (n *Node) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelReq } // Abort has to succeed even if we were interrupted by context cancellation. ctx := context.Background() - if err := n.localStorage.Checkpointer().AbortRestore(ctx); err != nil { + if err := w.localStorage.Checkpointer().AbortRestore(ctx); err != nil { cpStatus = checkpointStatusBail - n.logger.Error("error while aborting checkpoint restore on handler exit, aborting sync", + w.logger.Error("error while aborting checkpoint restore on handler exit, aborting sync", "err", err, ) } @@ -222,7 +222,7 @@ func (n *Node) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelReq chunkReturnCh := make(chan *chunk, maxParallelRequests) errorCh := make(chan int, maxParallelRequests) - ctx, cancel := context.WithCancel(n.ctx) + ctx, cancel := context.WithCancel(w.ctx) // Spawn the worker group to fetch and restore checkpoint chunks. var workerGroup sync.WaitGroup @@ -231,7 +231,7 @@ func (n *Node) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelReq workerGroup.Add(1) go func() { defer workerGroup.Done() - n.checkpointChunkFetcher(ctx, chunkDispatchCh, chunkReturnCh, errorCh) + w.checkpointChunkFetcher(ctx, chunkDispatchCh, chunkReturnCh, errorCh) }() } go func() { @@ -264,7 +264,7 @@ func (n *Node) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelReq checkpoint: check, }) } - n.logger.Debug("checkpoint chunks prepared for dispatch", + w.logger.Debug("checkpoint chunks prepared for dispatch", "chunks", len(check.Chunks), "checkpoint_root", check.Root, ) @@ -283,8 +283,8 @@ func (n *Node) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelReq } select { - case <-n.ctx.Done(): - return checkpointStatusBail, n.ctx.Err() + case <-w.ctx.Done(): + return checkpointStatusBail, w.ctx.Err() case returned := <-chunkReturnCh: if returned == nil { @@ -313,13 +313,13 @@ func (n *Node) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelReq } } -func (n *Node) getCheckpointList() ([]*checkpointsync.Checkpoint, error) { - ctx, cancel := context.WithTimeout(n.ctx, cpListsTimeout) +func (w *Worker) getCheckpointList() ([]*checkpointsync.Checkpoint, error) { + ctx, cancel := context.WithTimeout(w.ctx, cpListsTimeout) defer cancel() - list, err := n.fetchCheckpoints(ctx) + list, err := w.fetchCheckpoints(ctx) if err != nil { - n.logger.Error("failed to retrieve any checkpoints", + w.logger.Error("failed to retrieve any checkpoints", "err", err, ) return nil, err @@ -334,15 +334,15 @@ func (n *Node) getCheckpointList() ([]*checkpointsync.Checkpoint, error) { // fetchCheckpoints fetches checkpoints using checkpoint sync p2p protocol client. // // In case of no peers, error or no checkpoints, it fallbacks to the legacy storage sync protocol. -func (n *Node) fetchCheckpoints(ctx context.Context) ([]*checkpointsync.Checkpoint, error) { - list1, err := n.checkpointSync.GetCheckpoints(ctx, &checkpointsync.GetCheckpointsRequest{ +func (w *Worker) fetchCheckpoints(ctx context.Context) ([]*checkpointsync.Checkpoint, error) { + list1, err := w.checkpointSync.GetCheckpoints(ctx, &checkpointsync.GetCheckpointsRequest{ Version: 1, }) if err == nil && len(list1) > 0 { // if NO error and at least one checkpoint return list1, nil } - list2, err := n.legacyStorageSync.GetCheckpoints(ctx, &synclegacy.GetCheckpointsRequest{ + list2, err := w.legacyStorageSync.GetCheckpoints(ctx, &synclegacy.GetCheckpointsRequest{ Version: 1, }) if err != nil { @@ -369,8 +369,8 @@ func sortCheckpoints(s []*checkpointsync.Checkpoint) { }) } -func (n *Node) checkCheckpointUsable(cp *checkpointsync.Checkpoint, remainingMask outstandingMask, genesisRound uint64) bool { - namespace := n.commonNode.Runtime.ID() +func (w *Worker) checkCheckpointUsable(cp *checkpointsync.Checkpoint, remainingMask outstandingMask, genesisRound uint64) bool { + namespace := w.commonNode.Runtime.ID() if !namespace.Equal(&cp.Root.Namespace) { // Not for the right runtime. return false @@ -380,12 +380,12 @@ func (n *Node) checkCheckpointUsable(cp *checkpointsync.Checkpoint, remainingMas return false } - blk, err := n.commonNode.Runtime.History().GetCommittedBlock(n.ctx, cp.Root.Version) + blk, err := w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, cp.Root.Version) if err != nil { - n.logger.Error("can't get block information for checkpoint, skipping", "err", err, "root", cp.Root) + w.logger.Error("can't get block information for checkpoint, skipping", "err", err, "root", cp.Root) return false } - _, lastIORoot, lastStateRoot := n.GetLastSynced() + _, lastIORoot, lastStateRoot := w.GetLastSynced() lastVersions := map[storageApi.RootType]uint64{ storageApi.RootTypeIO: lastIORoot.Version, storageApi.RootTypeState: lastStateRoot.Version, @@ -401,18 +401,18 @@ func (n *Node) checkCheckpointUsable(cp *checkpointsync.Checkpoint, remainingMas } } } - n.logger.Info("checkpoint for unknown root skipped", "root", cp.Root) + w.logger.Info("checkpoint for unknown root skipped", "root", cp.Root) return false } -func (n *Node) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*blockSummary, error) { +func (w *Worker) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*blockSummary, error) { // Store roots and round info for checkpoints that finished syncing. // Round and namespace info will get overwritten as rounds are skipped // for errors, driven by remainingRoots. var syncState blockSummary // Fetch checkpoints from peers. - cps, err := n.getCheckpointList() + cps, err := w.getCheckpointList() if err != nil { return nil, fmt.Errorf("can't get checkpoint list from peers: %w", err) } @@ -440,8 +440,8 @@ func (n *Node) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*bloc if !multipartRunning { return } - if err := n.localStorage.NodeDB().AbortMultipartInsert(); err != nil { - n.logger.Error("error aborting multipart restore on exit from syncer", + if err := w.localStorage.NodeDB().AbortMultipartInsert(); err != nil { + w.logger.Error("error aborting multipart restore on exit from syncer", "err", err, ) } @@ -449,7 +449,7 @@ func (n *Node) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*bloc for _, check := range cps { - if check.Root.Version < genesisRound || !n.checkCheckpointUsable(check, remainingRoots, genesisRound) { + if check.Root.Version < genesisRound || !w.checkCheckpointUsable(check, remainingRoots, genesisRound) { continue } @@ -458,10 +458,10 @@ func (n *Node) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*bloc // previous retries. Aborting multipart works with no multipart in // progress too. multipartRunning = false - if err := n.localStorage.NodeDB().AbortMultipartInsert(); err != nil { + if err := w.localStorage.NodeDB().AbortMultipartInsert(); err != nil { return nil, fmt.Errorf("error aborting previous multipart restore: %w", err) } - if err := n.localStorage.NodeDB().StartMultipartInsert(check.Root.Version); err != nil { + if err := w.localStorage.NodeDB().StartMultipartInsert(check.Root.Version); err != nil { return nil, fmt.Errorf("error starting multipart insert for round %d: %w", check.Root.Version, err) } multipartRunning = true @@ -486,18 +486,18 @@ func (n *Node) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*bloc } } - status, err := n.handleCheckpoint(check, n.checkpointSyncCfg.ChunkFetcherCount) + status, err := w.handleCheckpoint(check, w.checkpointSyncCfg.ChunkFetcherCount) switch status { case checkpointStatusDone: - n.logger.Info("successfully restored from checkpoint", "root", check.Root, "mask", mask) + w.logger.Info("successfully restored from checkpoint", "root", check.Root, "mask", mask) syncState.Namespace = check.Root.Namespace syncState.Round = check.Root.Version syncState.Roots = append(syncState.Roots, check.Root) remainingRoots.remove(check.Root.Type) if remainingRoots.isEmpty() { - if err = n.localStorage.NodeDB().Finalize(syncState.Roots); err != nil { - n.logger.Error("can't finalize version after all checkpoints restored", + if err = w.localStorage.NodeDB().Finalize(syncState.Roots); err != nil { + w.logger.Error("can't finalize version after all checkpoints restored", "err", err, "version", prevVersion, "roots", syncState.Roots, @@ -510,10 +510,10 @@ func (n *Node) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*bloc } continue case checkpointStatusNext: - n.logger.Info("error trying to restore from checkpoint, trying next most recent", "root", check.Root, "err", err) + w.logger.Info("error trying to restore from checkpoint, trying next most recent", "root", check.Root, "err", err) continue case checkpointStatusBail: - n.logger.Error("error trying to restore from checkpoint, unrecoverable", "root", check.Root, "err", err) + w.logger.Error("error trying to restore from checkpoint, unrecoverable", "root", check.Root, "err", err) return nil, fmt.Errorf("error restoring from checkpoints: %w", err) } } diff --git a/go/worker/storage/committee/checkpoint_sync_test.go b/go/worker/storage/statesync/checkpoint_sync_test.go similarity index 98% rename from go/worker/storage/committee/checkpoint_sync_test.go rename to go/worker/storage/statesync/checkpoint_sync_test.go index d39e50f3239..c9ac133c1bd 100644 --- a/go/worker/storage/committee/checkpoint_sync_test.go +++ b/go/worker/storage/statesync/checkpoint_sync_test.go @@ -1,4 +1,4 @@ -package committee +package statesync import ( "testing" diff --git a/go/worker/storage/committee/metrics.go b/go/worker/storage/statesync/metrics.go similarity index 91% rename from go/worker/storage/committee/metrics.go rename to go/worker/storage/statesync/metrics.go index 7f641f71fdd..4bc6c414df1 100644 --- a/go/worker/storage/committee/metrics.go +++ b/go/worker/storage/statesync/metrics.go @@ -1,4 +1,4 @@ -package committee +package statesync import ( "sync" @@ -49,9 +49,9 @@ var ( prometheusOnce sync.Once ) -func (n *Node) getMetricLabels() prometheus.Labels { +func (w *Worker) getMetricLabels() prometheus.Labels { return prometheus.Labels{ - "runtime": n.commonNode.Runtime.ID().String(), + "runtime": w.commonNode.Runtime.ID().String(), } } diff --git a/go/worker/storage/committee/node.go b/go/worker/storage/statesync/state_sync.go similarity index 73% rename from go/worker/storage/committee/node.go rename to go/worker/storage/statesync/state_sync.go index c4f6a890b4d..4ec5c1b21f7 100644 --- a/go/worker/storage/committee/node.go +++ b/go/worker/storage/statesync/state_sync.go @@ -1,4 +1,6 @@ -package committee +// Package statesync defines the logic responsible for initializing, syncing, +// and pruning of the runtime state using the relevant p2p protocol clients. +package statesync import ( "container/heap" @@ -39,7 +41,7 @@ import ( ) var ( - _ committee.NodeHooks = (*Node)(nil) + _ committee.NodeHooks = (*Worker)(nil) // ErrNonLocalBackend is the error returned when the storage backend doesn't implement the LocalBackend interface. ErrNonLocalBackend = errors.New("storage: storage backend doesn't support local storage") @@ -118,8 +120,21 @@ type finalizeResult struct { err error } -// Node watches blocks for storage changes. -type Node struct { // nolint: maligned +// Worker is the runtime state sync worker, responsible for syncing state +// that corresponds to the incoming runtime block headers received from the +// consensus service. +// +// In addition this worker is responsible for: +// 1. Initializing the runtime state, possibly using checkpoints (if configured). +// 2. Pruning the state as specified by the configuration. +// 3. Optionally creating runtime state checkpoints (used by other nodes) for the state sync. +// 4. Creating (and optionally advertising) statesync p2p protocol clients and servers. +// 5. Registering node availability when it has synced sufficiently close to +// the latest known block header. +// +// Suggestion: This worker should not be responsible for creating and advertising p2p related stuff. +// Instead it should receive the p2p client (even better interface) for fetching storage diffs and checkpoints. +type Worker struct { // nolint: maligned commonNode *committee.Node roleProvider registration.RoleProvider @@ -162,27 +177,28 @@ type Node struct { // nolint: maligned initCh chan struct{} } -func NewNode( +// New creates a new state sync worker. +func New( commonNode *committee.Node, roleProvider registration.RoleProvider, rpcRoleProvider registration.RoleProvider, workerCommonCfg workerCommon.Config, localStorage storageApi.LocalBackend, checkpointSyncCfg *CheckpointSyncConfig, -) (*Node, error) { +) (*Worker, error) { initMetrics() // Create the fetcher pool. fetchPool := workerpool.New("storage_fetch/" + commonNode.Runtime.ID().String()) fetchPool.Resize(config.GlobalConfig.Storage.FetcherCount) - n := &Node{ + w := &Worker{ commonNode: commonNode, roleProvider: roleProvider, rpcRoleProvider: rpcRoleProvider, - logger: logging.GetLogger("worker/storage/committee").With("runtime_id", commonNode.Runtime.ID()), + logger: logging.GetLogger("worker/storage/statesync").With("runtime_id", commonNode.Runtime.ID()), workerCommonCfg: workerCommonCfg, @@ -208,21 +224,21 @@ func NewNode( } // Initialize sync state. - n.syncedState.Round = defaultUndefinedRound + w.syncedState.Round = defaultUndefinedRound - n.ctx, n.ctxCancel = context.WithCancel(context.Background()) + w.ctx, w.ctxCancel = context.WithCancel(context.Background()) // Create a checkpointer (even if checkpointing is disabled) to ensure the genesis checkpoint is available. - checkpointer, err := n.newCheckpointer(n.ctx, commonNode, localStorage) + checkpointer, err := w.newCheckpointer(w.ctx, commonNode, localStorage) if err != nil { return nil, fmt.Errorf("failed to create checkpointer: %w", err) } - n.checkpointer = checkpointer + w.checkpointer = checkpointer // Register prune handler. commonNode.Runtime.History().Pruner().RegisterHandler(&pruneHandler{ - logger: n.logger, - node: n, + logger: w.logger, + worker: w, }) // Advertise and serve p2p protocols. @@ -236,14 +252,14 @@ func NewNode( } // Create p2p protocol clients. - n.legacyStorageSync = synclegacy.NewClient(commonNode.P2P, commonNode.ChainContext, commonNode.Runtime.ID()) - n.diffSync = diffsync.NewClient(commonNode.P2P, commonNode.ChainContext, commonNode.Runtime.ID()) - n.checkpointSync = checkpointsync.NewClient(commonNode.P2P, commonNode.ChainContext, commonNode.Runtime.ID()) + w.legacyStorageSync = synclegacy.NewClient(commonNode.P2P, commonNode.ChainContext, commonNode.Runtime.ID()) + w.diffSync = diffsync.NewClient(commonNode.P2P, commonNode.ChainContext, commonNode.Runtime.ID()) + w.checkpointSync = checkpointsync.NewClient(commonNode.P2P, commonNode.ChainContext, commonNode.Runtime.ID()) - return n, nil + return w, nil } -func (n *Node) newCheckpointer(ctx context.Context, commonNode *committee.Node, localStorage storageApi.LocalBackend) (checkpoint.Checkpointer, error) { +func (w *Worker) newCheckpointer(ctx context.Context, commonNode *committee.Node, localStorage storageApi.LocalBackend) (checkpoint.Checkpointer, error) { checkInterval := checkpoint.CheckIntervalDisabled if config.GlobalConfig.Storage.Checkpointer.Enabled { checkInterval = config.GlobalConfig.Storage.Checkpointer.CheckInterval @@ -300,100 +316,95 @@ func (n *Node) newCheckpointer(ctx context.Context, commonNode *committee.Node, // Service interface. -// Name returns the service name. -func (n *Node) Name() string { - return "committee node" -} - // Start causes the worker to start responding to CometBFT new block events. -func (n *Node) Start() error { - go n.worker() +func (w *Worker) Start() error { + go w.worker() if config.GlobalConfig.Storage.Checkpointer.Enabled { - go n.consensusCheckpointSyncer() + go w.consensusCheckpointSyncer() } return nil } // Stop causes the worker to stop watching and shut down. -func (n *Node) Stop() { - n.statusLock.Lock() - n.status = api.StatusStopping - n.statusLock.Unlock() +func (w *Worker) Stop() { + w.statusLock.Lock() + w.status = api.StatusStopping + w.statusLock.Unlock() - n.fetchPool.Stop() + w.fetchPool.Stop() - n.ctxCancel() + w.ctxCancel() } // Quit returns a channel that will be closed when the worker stops. -func (n *Node) Quit() <-chan struct{} { - return n.quitCh +func (w *Worker) Quit() <-chan struct{} { + return w.quitCh } // Cleanup cleans up any leftover state after the worker is stopped. -func (n *Node) Cleanup() { +func (w *Worker) Cleanup() { // Nothing to do here? } // Initialized returns a channel that will be closed once the worker finished starting up. -func (n *Node) Initialized() <-chan struct{} { - return n.initCh +func (w *Worker) Initialized() <-chan struct{} { + return w.initCh } -// GetStatus returns the storage committee node status. -func (n *Node) GetStatus(context.Context) (*api.Status, error) { - n.syncedLock.RLock() - defer n.syncedLock.RUnlock() +// GetStatus returns the state sync worker status. +func (w *Worker) GetStatus(context.Context) (*api.Status, error) { + w.syncedLock.RLock() + defer w.syncedLock.RUnlock() - n.statusLock.RLock() - defer n.statusLock.RUnlock() + w.statusLock.RLock() + defer w.statusLock.RUnlock() return &api.Status{ - LastFinalizedRound: n.syncedState.Round, - Status: n.status, + LastFinalizedRound: w.syncedState.Round, + Status: w.status, }, nil } -func (n *Node) PauseCheckpointer(pause bool) error { +func (w *Worker) PauseCheckpointer(pause bool) error { if !commonFlags.DebugDontBlameOasis() { return api.ErrCantPauseCheckpointer } - n.checkpointer.Pause(pause) + w.checkpointer.Pause(pause) return nil } -// GetLocalStorage returns the local storage backend used by this storage node. -func (n *Node) GetLocalStorage() storageApi.LocalBackend { - return n.localStorage +// GetLocalStorage returns the local storage backend used by this state sync worker. +func (w *Worker) GetLocalStorage() storageApi.LocalBackend { + return w.localStorage } // NodeHooks implementation. // HandleNewBlockEarlyLocked is guarded by CrossNode. -func (n *Node) HandleNewBlockEarlyLocked(*runtime.BlockInfo) { +func (w *Worker) HandleNewBlockEarlyLocked(*runtime.BlockInfo) { // Nothing to do here. } // HandleNewBlockLocked is guarded by CrossNode. -func (n *Node) HandleNewBlockLocked(bi *runtime.BlockInfo) { +func (w *Worker) HandleNewBlockLocked(bi *runtime.BlockInfo) { // Notify the state syncer that there is a new block. - n.blockCh.In() <- bi.RuntimeBlock + w.blockCh.In() <- bi.RuntimeBlock } // HandleRuntimeHostEventLocked is guarded by CrossNode. -func (n *Node) HandleRuntimeHostEventLocked(*host.Event) { +func (w *Worker) HandleRuntimeHostEventLocked(*host.Event) { // Nothing to do here. } // Watcher implementation. // GetLastSynced returns the height, IORoot hash and StateRoot hash of the last block that was fully synced to. -func (n *Node) GetLastSynced() (uint64, storageApi.Root, storageApi.Root) { - n.syncedLock.RLock() - defer n.syncedLock.RUnlock() +func (w *Worker) GetLastSynced() (uint64, storageApi.Root, storageApi.Root) { + w.syncedLock.RLock() + defer w.syncedLock.RUnlock() var io, state storageApi.Root - for _, root := range n.syncedState.Roots { + for _, root := range w.syncedState.Roots { switch root.Type { case storageApi.RootTypeIO: io = root @@ -402,10 +413,10 @@ func (n *Node) GetLastSynced() (uint64, storageApi.Root, storageApi.Root) { } } - return n.syncedState.Round, io, state + return w.syncedState.Round, io, state } -func (n *Node) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { +func (w *Worker) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { result := &fetchedDiff{ fetched: false, pf: rpc.NewNopPeerFeedback(), @@ -415,13 +426,13 @@ func (n *Node) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { } defer func() { select { - case n.diffCh <- result: - case <-n.ctx.Done(): + case w.diffCh <- result: + case <-w.ctx.Done(): } }() // Check if the new root doesn't already exist. - if n.localStorage.NodeDB().HasRoot(thisRoot) { + if w.localStorage.NodeDB().HasRoot(thisRoot) { return } @@ -436,15 +447,15 @@ func (n *Node) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { } // New root does not yet exist in storage and we need to fetch it from a peer. - n.logger.Debug("calling GetDiff", + w.logger.Debug("calling GetDiff", "old_root", prevRoot, "new_root", thisRoot, ) - ctx, cancel := context.WithCancel(n.ctx) + ctx, cancel := context.WithCancel(w.ctx) defer cancel() - wl, pf, err := n.getDiff(ctx, prevRoot, thisRoot) + wl, pf, err := w.getDiff(ctx, prevRoot, thisRoot) if err != nil { result.err = err return @@ -456,35 +467,35 @@ func (n *Node) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { // getDiff fetches writelog using diff sync p2p protocol client. // // In case of no peers or error, it fallbacks to the legacy storage sync protocol. -func (n *Node) getDiff(ctx context.Context, prevRoot, thisRoot storageApi.Root) (storageApi.WriteLog, rpc.PeerFeedback, error) { - rsp1, pf, err := n.diffSync.GetDiff(ctx, &diffsync.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) +func (w *Worker) getDiff(ctx context.Context, prevRoot, thisRoot storageApi.Root) (storageApi.WriteLog, rpc.PeerFeedback, error) { + rsp1, pf, err := w.diffSync.GetDiff(ctx, &diffsync.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) if err == nil { // if NO error return rsp1.WriteLog, pf, nil } - rsp2, pf, err := n.legacyStorageSync.GetDiff(ctx, &synclegacy.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) + rsp2, pf, err := w.legacyStorageSync.GetDiff(ctx, &synclegacy.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) if err != nil { return nil, nil, err } return rsp2.WriteLog, pf, nil } -func (n *Node) finalize(summary *blockSummary) { - err := n.localStorage.NodeDB().Finalize(summary.Roots) +func (w *Worker) finalize(summary *blockSummary) { + err := w.localStorage.NodeDB().Finalize(summary.Roots) switch err { case nil: - n.logger.Debug("storage round finalized", + w.logger.Debug("storage round finalized", "round", summary.Round, ) case storageApi.ErrAlreadyFinalized: // This can happen if we are restoring after a roothash migration or if // we crashed before updating the sync state. - n.logger.Warn("storage round already finalized", + w.logger.Warn("storage round already finalized", "round", summary.Round, ) err = nil default: - n.logger.Error("failed to finalize storage round", + w.logger.Error("failed to finalize storage round", "err", err, "round", summary.Round, ) @@ -496,31 +507,31 @@ func (n *Node) finalize(summary *blockSummary) { } select { - case n.finalizeCh <- result: - case <-n.ctx.Done(): + case w.finalizeCh <- result: + case <-w.ctx.Done(): } } -func (n *Node) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) error { - n.logger.Info("initializing storage at genesis") +func (w *Worker) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) error { + w.logger.Info("initializing storage at genesis") // Check what the latest finalized version in the database is as we may be using a database // from a previous version or network. - latestVersion, alreadyInitialized := n.localStorage.NodeDB().GetLatestVersion() + latestVersion, alreadyInitialized := w.localStorage.NodeDB().GetLatestVersion() // Finalize any versions that were not yet finalized in the old database. This is only possible // as long as there is only one non-finalized root per version. Note that we also cannot be sure // that any of these roots are valid, but this is fine as long as the final version matches the // genesis root. if alreadyInitialized { - n.logger.Debug("already initialized, finalizing any non-finalized versions", + w.logger.Debug("already initialized, finalizing any non-finalized versions", "genesis_state_root", genesisBlock.Header.StateRoot, "genesis_round", genesisBlock.Header.Round, "latest_version", latestVersion, ) for v := latestVersion + 1; v < genesisBlock.Header.Round; v++ { - roots, err := n.localStorage.NodeDB().GetRootsForVersion(v) + roots, err := w.localStorage.NodeDB().GetRootsForVersion(v) if err != nil { return fmt.Errorf("failed to fetch roots for version %d: %w", v, err) } @@ -535,7 +546,7 @@ func (n *Node) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) e break // We must have exactly one non-finalized state root to continue. } - err = n.localStorage.NodeDB().Finalize(stateRoots) + err = w.localStorage.NodeDB().Finalize(stateRoots) if err != nil { return fmt.Errorf("failed to finalize version %d: %w", v, err) } @@ -559,14 +570,14 @@ func (n *Node) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) e maybeRoot := stateRoot maybeRoot.Version = latestVersion - if n.localStorage.NodeDB().HasRoot(maybeRoot) { - n.logger.Debug("latest version earlier than genesis state root, filling in versions", + if w.localStorage.NodeDB().HasRoot(maybeRoot) { + w.logger.Debug("latest version earlier than genesis state root, filling in versions", "genesis_state_root", genesisBlock.Header.StateRoot, "genesis_round", genesisBlock.Header.Round, "latest_version", latestVersion, ) for v := latestVersion; v < stateRoot.Version; v++ { - err := n.localStorage.Apply(n.ctx, &storageApi.ApplyRequest{ + err := w.localStorage.Apply(w.ctx, &storageApi.ApplyRequest{ Namespace: rt.ID, RootType: storageApi.RootTypeState, SrcRound: v, @@ -579,7 +590,7 @@ func (n *Node) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) e return fmt.Errorf("failed to fill in version %d: %w", v, err) } - err = n.localStorage.NodeDB().Finalize([]storageApi.Root{{ + err = w.localStorage.NodeDB().Finalize([]storageApi.Root{{ Namespace: rt.ID, Version: v + 1, Type: storageApi.RootTypeState, @@ -594,14 +605,14 @@ func (n *Node) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) e } default: // Latest finalized version is the same or ahead, root must exist. - compatible = n.localStorage.NodeDB().HasRoot(stateRoot) + compatible = w.localStorage.NodeDB().HasRoot(stateRoot) } // If we are incompatible and the local version is greater or the same as the genesis version, // we cannot do anything. If the local version is lower we assume the node will sync from a // different node. if !compatible && latestVersion >= stateRoot.Version { - n.logger.Error("existing state is incompatible with runtime genesis state", + w.logger.Error("existing state is incompatible with runtime genesis state", "genesis_state_root", genesisBlock.Header.StateRoot, "genesis_round", genesisBlock.Header.Round, "latest_version", latestVersion, @@ -611,46 +622,46 @@ func (n *Node) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) e if !compatible { // Database is empty, so assume the state will be replicated from another node. - n.logger.Warn("non-empty state root but no state available, assuming replication", + w.logger.Warn("non-empty state root but no state available, assuming replication", "state_root", genesisBlock.Header.StateRoot, ) - n.checkpointSyncForced = true + w.checkpointSyncForced = true } return nil } -func (n *Node) flushSyncedState(summary *blockSummary) (uint64, error) { - n.syncedLock.Lock() - defer n.syncedLock.Unlock() +func (w *Worker) flushSyncedState(summary *blockSummary) (uint64, error) { + w.syncedLock.Lock() + defer w.syncedLock.Unlock() - n.syncedState = *summary - if err := n.commonNode.Runtime.History().StorageSyncCheckpoint(n.syncedState.Round); err != nil { + w.syncedState = *summary + if err := w.commonNode.Runtime.History().StorageSyncCheckpoint(w.syncedState.Round); err != nil { return 0, err } - return n.syncedState.Round, nil + return w.syncedState.Round, nil } -func (n *Node) consensusCheckpointSyncer() { +func (w *Worker) consensusCheckpointSyncer() { // Make sure we always create a checkpoint when the consensus layer creates a checkpoint. The // reason why we do this is to make it faster for storage nodes that use consensus state sync // to catch up as exactly the right checkpoint will be available. - consensusCp := n.commonNode.Consensus.Checkpointer() + consensusCp := w.commonNode.Consensus.Checkpointer() if consensusCp == nil { return } // Wait for the common node to be initialized. select { - case <-n.commonNode.Initialized(): - case <-n.ctx.Done(): + case <-w.commonNode.Initialized(): + case <-w.ctx.Done(): return } // Determine the maximum number of consensus checkpoints to keep. - consensusParams, err := n.commonNode.Consensus.Core().GetParameters(n.ctx, consensus.HeightLatest) + consensusParams, err := w.commonNode.Consensus.Core().GetParameters(w.ctx, consensus.HeightLatest) if err != nil { - n.logger.Error("failed to fetch consensus parameters", + w.logger.Error("failed to fetch consensus parameters", "err", err, ) return @@ -658,7 +669,7 @@ func (n *Node) consensusCheckpointSyncer() { ch, sub, err := consensusCp.WatchCheckpoints() if err != nil { - n.logger.Error("failed to watch checkpoints", + w.logger.Error("failed to watch checkpoints", "err", err, ) return @@ -679,9 +690,9 @@ func (n *Node) consensusCheckpointSyncer() { }() for { select { - case <-n.quitCh: + case <-w.quitCh: return - case <-n.ctx.Done(): + case <-w.ctx.Done(): return case version := <-ch: // We need to wait for the next version as that is what will be in the consensus @@ -692,15 +703,15 @@ func (n *Node) consensusCheckpointSyncer() { versions = versions[1:] } - n.logger.Debug("consensus checkpoint detected, queuing runtime checkpoint", + w.logger.Debug("consensus checkpoint detected, queuing runtime checkpoint", "version", version+1, "num_versions", len(versions), ) if blkCh == nil { - blkCh, blkSub, err = n.commonNode.Consensus.Core().WatchBlocks(n.ctx) + blkCh, blkSub, err = w.commonNode.Consensus.Core().WatchBlocks(w.ctx) if err != nil { - n.logger.Error("failed to watch blocks", + w.logger.Error("failed to watch blocks", "err", err, ) continue @@ -709,7 +720,7 @@ func (n *Node) consensusCheckpointSyncer() { case blk := <-blkCh: // If there's nothing remaining, unsubscribe. if len(versions) == 0 { - n.logger.Debug("no more queued consensus checkpoint versions") + w.logger.Debug("no more queued consensus checkpoint versions") blkSub.Close() blkSub = nil @@ -727,12 +738,12 @@ func (n *Node) consensusCheckpointSyncer() { // Lookup what runtime round corresponds to the given consensus layer version and make // sure we checkpoint it. - blk, err := n.commonNode.Consensus.RootHash().GetLatestBlock(n.ctx, &roothashApi.RuntimeRequest{ - RuntimeID: n.commonNode.Runtime.ID(), + blk, err := w.commonNode.Consensus.RootHash().GetLatestBlock(w.ctx, &roothashApi.RuntimeRequest{ + RuntimeID: w.commonNode.Runtime.ID(), Height: int64(version), }) if err != nil { - n.logger.Error("failed to get runtime block corresponding to consensus checkpoint", + w.logger.Error("failed to get runtime block corresponding to consensus checkpoint", "err", err, "height", version, ) @@ -741,11 +752,11 @@ func (n *Node) consensusCheckpointSyncer() { // We may have not yet synced the corresponding runtime round locally. In this case // we need to wait until this is the case. - n.syncedLock.RLock() - lastSyncedRound := n.syncedState.Round - n.syncedLock.RUnlock() + w.syncedLock.RLock() + lastSyncedRound := w.syncedState.Round + w.syncedLock.RUnlock() if blk.Header.Round > lastSyncedRound { - n.logger.Debug("runtime round not available yet for checkpoint, waiting", + w.logger.Debug("runtime round not available yet for checkpoint, waiting", "height", version, "round", blk.Header.Round, "last_synced_round", lastSyncedRound, @@ -755,12 +766,12 @@ func (n *Node) consensusCheckpointSyncer() { } // Force runtime storage checkpointer to create a checkpoint at this round. - n.logger.Info("consensus checkpoint, force runtime checkpoint", + w.logger.Info("consensus checkpoint, force runtime checkpoint", "height", version, "round", blk.Header.Round, ) - n.checkpointer.ForceCheckpoint(blk.Header.Round) + w.checkpointer.ForceCheckpoint(blk.Header.Round) } versions = newVersions } @@ -768,105 +779,105 @@ func (n *Node) consensusCheckpointSyncer() { } // This is only called from the main worker goroutine, so no locking should be necessary. -func (n *Node) nudgeAvailability(lastSynced, latest uint64) { - if lastSynced == n.undefinedRound || latest == n.undefinedRound { +func (w *Worker) nudgeAvailability(lastSynced, latest uint64) { + if lastSynced == w.undefinedRound || latest == w.undefinedRound { return } - if latest-lastSynced < maximumRoundDelayForAvailability && !n.roleAvailable { - n.roleProvider.SetAvailable(func(_ *node.Node) error { + if latest-lastSynced < maximumRoundDelayForAvailability && !w.roleAvailable { + w.roleProvider.SetAvailable(func(_ *node.Node) error { return nil }) - if n.rpcRoleProvider != nil { - n.rpcRoleProvider.SetAvailable(func(_ *node.Node) error { + if w.rpcRoleProvider != nil { + w.rpcRoleProvider.SetAvailable(func(_ *node.Node) error { return nil }) } - n.roleAvailable = true + w.roleAvailable = true } - if latest-lastSynced > minimumRoundDelayForUnavailability && n.roleAvailable { - n.roleProvider.SetUnavailable() - if n.rpcRoleProvider != nil { - n.rpcRoleProvider.SetUnavailable() + if latest-lastSynced > minimumRoundDelayForUnavailability && w.roleAvailable { + w.roleProvider.SetUnavailable() + if w.rpcRoleProvider != nil { + w.rpcRoleProvider.SetUnavailable() } - n.roleAvailable = false + w.roleAvailable = false } } -func (n *Node) worker() { // nolint: gocyclo - defer close(n.quitCh) - defer close(n.diffCh) +func (w *Worker) worker() { // nolint: gocyclo + defer close(w.quitCh) + defer close(w.diffCh) // Wait for the common node to be initialized. select { - case <-n.commonNode.Initialized(): - case <-n.ctx.Done(): - close(n.initCh) + case <-w.commonNode.Initialized(): + case <-w.ctx.Done(): + close(w.initCh) return } - n.logger.Info("starting committee node") + w.logger.Info("starting state sycne worker") - n.statusLock.Lock() - n.status = api.StatusStarting - n.statusLock.Unlock() + w.statusLock.Lock() + w.status = api.StatusStarting + w.statusLock.Unlock() // Determine genesis block. - genesisBlock, err := n.commonNode.Consensus.RootHash().GetGenesisBlock(n.ctx, &roothashApi.RuntimeRequest{ - RuntimeID: n.commonNode.Runtime.ID(), + genesisBlock, err := w.commonNode.Consensus.RootHash().GetGenesisBlock(w.ctx, &roothashApi.RuntimeRequest{ + RuntimeID: w.commonNode.Runtime.ID(), Height: consensus.HeightLatest, }) if err != nil { - n.logger.Error("can't retrieve genesis block", "err", err) + w.logger.Error("can't retrieve genesis block", "err", err) return } - n.undefinedRound = genesisBlock.Header.Round - 1 + w.undefinedRound = genesisBlock.Header.Round - 1 // Determine last finalized storage version. - if version, dbNonEmpty := n.localStorage.NodeDB().GetLatestVersion(); dbNonEmpty { + if version, dbNonEmpty := w.localStorage.NodeDB().GetLatestVersion(); dbNonEmpty { var blk *block.Block - blk, err = n.commonNode.Runtime.History().GetCommittedBlock(n.ctx, version) + blk, err = w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, version) switch err { case nil: // Set last synced version to last finalized storage version. - if _, err = n.flushSyncedState(summaryFromBlock(blk)); err != nil { - n.logger.Error("failed to flush synced state", "err", err) + if _, err = w.flushSyncedState(summaryFromBlock(blk)); err != nil { + w.logger.Error("failed to flush synced state", "err", err) return } default: // Failed to fetch historic block. This is fine when the network just went through a // dump/restore upgrade and we don't have any information before genesis. We treat the // database as unsynced and will proceed to either use checkpoints or sync iteratively. - n.logger.Warn("failed to fetch historic block", + w.logger.Warn("failed to fetch historic block", "err", err, "round", version, ) } } - n.syncedLock.RLock() - cachedLastRound := n.syncedState.Round - n.syncedLock.RUnlock() + w.syncedLock.RLock() + cachedLastRound := w.syncedState.Round + w.syncedLock.RUnlock() if cachedLastRound == defaultUndefinedRound || cachedLastRound < genesisBlock.Header.Round { - cachedLastRound = n.undefinedRound + cachedLastRound = w.undefinedRound } // Initialize genesis from the runtime descriptor. - isInitialStartup := (cachedLastRound == n.undefinedRound) + isInitialStartup := (cachedLastRound == w.undefinedRound) if isInitialStartup { - n.statusLock.Lock() - n.status = api.StatusInitializingGenesis - n.statusLock.Unlock() + w.statusLock.Lock() + w.status = api.StatusInitializingGenesis + w.statusLock.Unlock() var rt *registryApi.Runtime - rt, err = n.commonNode.Runtime.ActiveDescriptor(n.ctx) + rt, err = w.commonNode.Runtime.ActiveDescriptor(w.ctx) if err != nil { - n.logger.Error("failed to retrieve runtime registry descriptor", + w.logger.Error("failed to retrieve runtime registry descriptor", "err", err, ) return } - if err = n.initGenesis(rt, genesisBlock); err != nil { - n.logger.Error("failed to initialize storage at genesis", + if err = w.initGenesis(rt, genesisBlock); err != nil { + w.logger.Error("failed to initialize storage at genesis", "err", err, ) return @@ -874,28 +885,28 @@ func (n *Node) worker() { // nolint: gocyclo } // Notify the checkpointer of the genesis round so it can be checkpointed. - if n.checkpointer != nil { - n.checkpointer.ForceCheckpoint(genesisBlock.Header.Round) - n.checkpointer.Flush() + if w.checkpointer != nil { + w.checkpointer.ForceCheckpoint(genesisBlock.Header.Round) + w.checkpointer.Flush() } // Check if we are able to fetch the first block that we would be syncing if we used iterative // syncing. In case we cannot (likely because we synced the consensus layer via state sync), we // must wait for a later checkpoint to become available. - if !n.checkpointSyncForced { - n.statusLock.Lock() - n.status = api.StatusSyncStartCheck - n.statusLock.Unlock() + if !w.checkpointSyncForced { + w.statusLock.Lock() + w.status = api.StatusSyncStartCheck + w.statusLock.Unlock() // Determine what is the first round that we would need to sync. iterativeSyncStart := cachedLastRound - if iterativeSyncStart == n.undefinedRound { + if iterativeSyncStart == w.undefinedRound { iterativeSyncStart++ } // Check if we actually have information about that round. This assumes that any reindexing // was already performed (the common node would not indicate being initialized otherwise). - _, err = n.commonNode.Runtime.History().GetCommittedBlock(n.ctx, iterativeSyncStart) + _, err = w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, iterativeSyncStart) SyncStartCheck: switch { case err == nil: @@ -903,7 +914,7 @@ func (n *Node) worker() { // nolint: gocyclo // No information is available about the initial round. Query the earliest historic // block and check if that block has the genesis state root and empty I/O root. var earlyBlk *block.Block - earlyBlk, err = n.commonNode.Runtime.History().GetEarliestBlock(n.ctx) + earlyBlk, err = w.commonNode.Runtime.History().GetEarliestBlock(w.ctx) switch err { case nil: // Make sure the state root is still the same as at genesis time. @@ -917,13 +928,13 @@ func (n *Node) worker() { // nolint: gocyclo // If this is the case, we can start syncing from this round instead. Fill in the // remaining versions to make sure they actually exist in the database. - n.logger.Debug("filling in versions to genesis", + w.logger.Debug("filling in versions to genesis", "genesis_round", genesisBlock.Header.Round, "earliest_round", earlyBlk.Header.Round, ) for v := genesisBlock.Header.Round; v < earlyBlk.Header.Round; v++ { - err = n.localStorage.Apply(n.ctx, &storageApi.ApplyRequest{ - Namespace: n.commonNode.Runtime.ID(), + err = w.localStorage.Apply(w.ctx, &storageApi.ApplyRequest{ + Namespace: w.commonNode.Runtime.ID(), RootType: storageApi.RootTypeState, SrcRound: v, SrcRoot: genesisBlock.Header.StateRoot, @@ -937,31 +948,31 @@ func (n *Node) worker() { // nolint: gocyclo // Ignore already finalized versions. continue default: - n.logger.Error("failed to fill in version", + w.logger.Error("failed to fill in version", "version", v, "err", err, ) return } - err = n.localStorage.NodeDB().Finalize([]storageApi.Root{{ - Namespace: n.commonNode.Runtime.ID(), + err = w.localStorage.NodeDB().Finalize([]storageApi.Root{{ + Namespace: w.commonNode.Runtime.ID(), Version: v + 1, Type: storageApi.RootTypeState, Hash: genesisBlock.Header.StateRoot, // We can ignore I/O roots. }}) if err != nil { - n.logger.Error("failed to finalize filled in version", + w.logger.Error("failed to finalize filled in version", "version", v, "err", err, ) return } } - cachedLastRound, err = n.flushSyncedState(summaryFromBlock(earlyBlk)) + cachedLastRound, err = w.flushSyncedState(summaryFromBlock(earlyBlk)) if err != nil { - n.logger.Error("failed to flush synced state", + w.logger.Error("failed to flush synced state", "err", err, ) return @@ -970,26 +981,26 @@ func (n *Node) worker() { // nolint: gocyclo break SyncStartCheck default: // This should never happen as the block should exist. - n.logger.Warn("failed to query earliest block in local history", + w.logger.Warn("failed to query earliest block in local history", "err", err, ) } // No information is available about this round, force checkpoint sync. - n.logger.Warn("forcing checkpoint sync as we don't have authoritative block info", + w.logger.Warn("forcing checkpoint sync as we don't have authoritative block info", "round", iterativeSyncStart, ) - n.checkpointSyncForced = true + w.checkpointSyncForced = true default: // Unknown error while fetching block information, abort. - n.logger.Error("failed to query block", + w.logger.Error("failed to query block", "err", err, ) return } } - n.logger.Info("worker initialized", + w.logger.Info("worker initialized", "genesis_round", genesisBlock.Header.Round, "last_synced", cachedLastRound, ) @@ -1008,10 +1019,10 @@ func (n *Node) worker() { // nolint: gocyclo // to a later state which may not be desired given that checkpoint sync has been explicitly // disabled via config. // - if (isInitialStartup && !n.checkpointSyncCfg.Disabled) || n.checkpointSyncForced { - n.statusLock.Lock() - n.status = api.StatusSyncingCheckpoints - n.statusLock.Unlock() + if (isInitialStartup && !w.checkpointSyncCfg.Disabled) || w.checkpointSyncForced { + w.statusLock.Lock() + w.status = api.StatusSyncingCheckpoints + w.statusLock.Unlock() var ( summary *blockSummary @@ -1019,17 +1030,17 @@ func (n *Node) worker() { // nolint: gocyclo ) CheckpointSyncRetry: for { - summary, err = n.syncCheckpoints(genesisBlock.Header.Round, n.checkpointSyncCfg.Disabled) + summary, err = w.syncCheckpoints(genesisBlock.Header.Round, w.checkpointSyncCfg.Disabled) if err == nil { break } attempt++ - switch n.checkpointSyncForced { + switch w.checkpointSyncForced { case true: // We have no other options but to perform a checkpoint sync as we are missing // either state or authoritative blocks. - n.logger.Info("checkpoint sync required, retrying", + w.logger.Info("checkpoint sync required, retrying", "err", err, "attempt", attempt, ) @@ -1041,36 +1052,36 @@ func (n *Node) worker() { // nolint: gocyclo // Try syncing again. The main reason for this is the sync failing due to a // checkpoint pruning race condition (where nodes list a checkpoint which is // then deleted just before we request its chunks). One retry is enough. - n.logger.Info("first checkpoint sync failed, trying once more", "err", err) + w.logger.Info("first checkpoint sync failed, trying once more", "err", err) } // Delay before retrying. select { case <-time.After(checkpointSyncRetryDelay): - case <-n.ctx.Done(): + case <-w.ctx.Done(): return } } if err != nil { - n.logger.Info("checkpoint sync failed", "err", err) + w.logger.Info("checkpoint sync failed", "err", err) } else { - cachedLastRound, err = n.flushSyncedState(summary) + cachedLastRound, err = w.flushSyncedState(summary) if err != nil { - n.logger.Error("failed to flush synced state", + w.logger.Error("failed to flush synced state", "err", err, ) return } lastFullyAppliedRound = cachedLastRound - n.logger.Info("checkpoint sync succeeded", + w.logger.Info("checkpoint sync succeeded", logging.LogEvent, LogEventCheckpointSyncSuccess, ) } } - close(n.initCh) + close(w.initCh) // Don't register availability immediately, we want to know first how far behind consensus we are. - latestBlockRound := n.undefinedRound + latestBlockRound := w.undefinedRound heartbeat := heartbeat{} heartbeat.reset() @@ -1097,10 +1108,10 @@ func (n *Node) worker() { // nolint: gocyclo syncingRounds[i] = syncing if i == latestBlockRound { - storageWorkerLastPendingRound.With(n.getMetricLabels()).Set(float64(i)) + storageWorkerLastPendingRound.With(w.getMetricLabels()).Set(float64(i)) } } - n.logger.Debug("preparing round sync", + w.logger.Debug("preparing round sync", "round", i, "outstanding_mask", syncing.outstanding, "awaiting_retry", syncing.awaitingRetry, @@ -1128,18 +1139,18 @@ func (n *Node) worker() { // nolint: gocyclo if !syncing.outstanding.contains(rootType) && syncing.awaitingRetry.contains(rootType) { syncing.scheduleDiff(rootType) wg.Add(1) - n.fetchPool.Submit(func() { + w.fetchPool.Submit(func() { defer wg.Done() - n.fetchDiff(this.Round, prevRoots[i], this.Roots[i]) + w.fetchDiff(this.Round, prevRoots[i], this.Roots[i]) }) } } } } - n.statusLock.Lock() - n.status = api.StatusSyncingRounds - n.statusLock.Unlock() + w.statusLock.Lock() + w.status = api.StatusSyncingRounds + w.statusLock.Unlock() pendingApply := &minRoundQueue{} pendingFinalize := &minRoundQueue{} @@ -1163,7 +1174,7 @@ mainLoop: // Apply the write log if one exists. err = nil if lastDiff.fetched { - err = n.localStorage.Apply(n.ctx, &storageApi.ApplyRequest{ + err = w.localStorage.Apply(w.ctx, &storageApi.ApplyRequest{ Namespace: lastDiff.thisRoot.Namespace, RootType: lastDiff.thisRoot.Type, SrcRound: lastDiff.prevRoot.Version, @@ -1178,7 +1189,7 @@ mainLoop: case errors.Is(err, storageApi.ErrExpectedRootMismatch): lastDiff.pf.RecordBadPeer() default: - n.logger.Error("can't apply write log", + w.logger.Error("can't apply write log", "err", err, "old_root", lastDiff.prevRoot, "new_root", lastDiff.thisRoot, @@ -1198,14 +1209,14 @@ mainLoop: } // We have fully synced the given round. - n.logger.Debug("finished syncing round", "round", lastDiff.round) + w.logger.Debug("finished syncing round", "round", lastDiff.round) delete(syncingRounds, lastDiff.round) summary := summaryCache[lastDiff.round] delete(summaryCache, lastDiff.round-1) lastFullyAppliedRound = lastDiff.round - storageWorkerLastSyncedRound.With(n.getMetricLabels()).Set(float64(lastDiff.round)) - storageWorkerRoundSyncLatency.With(n.getMetricLabels()).Observe(time.Since(syncing.startedAt).Seconds()) + storageWorkerLastSyncedRound.With(w.getMetricLabels()).Set(float64(lastDiff.round)) + storageWorkerRoundSyncLatency.With(w.getMetricLabels()).Observe(time.Since(syncing.startedAt).Seconds()) // Finalize storage for this round. This happens asynchronously // with respect to Apply operations for subsequent rounds. @@ -1222,15 +1233,15 @@ mainLoop: wg.Add(1) go func() { // Don't block fetching and applying remaining rounds. defer wg.Done() - n.finalize(lastSummary) + w.finalize(lastSummary) }() continue } select { - case inBlk := <-n.blockCh.Out(): + case inBlk := <-w.blockCh.Out(): blk := inBlk.(*block.Block) - n.logger.Debug("incoming block", + w.logger.Debug("incoming block", "round", blk.Header.Round, "last_synced", lastFullyAppliedRound, "last_finalized", cachedLastRound, @@ -1238,9 +1249,9 @@ mainLoop: // Check if we're far enough to reasonably register as available. latestBlockRound = blk.Header.Round - n.nudgeAvailability(cachedLastRound, latestBlockRound) + w.nudgeAvailability(cachedLastRound, latestBlockRound) - if _, ok := summaryCache[lastFullyAppliedRound]; !ok && lastFullyAppliedRound == n.undefinedRound { + if _, ok := summaryCache[lastFullyAppliedRound]; !ok && lastFullyAppliedRound == w.undefinedRound { dummy := blockSummary{ Namespace: blk.Header.Namespace, Round: lastFullyAppliedRound + 1, @@ -1264,7 +1275,7 @@ mainLoop: // since the undefined round may be unsigned -1 and in this case the loop // would not do any iterations. startSummaryRound := lastFullyAppliedRound - if startSummaryRound == n.undefinedRound { + if startSummaryRound == w.undefinedRound { startSummaryRound++ } for i := startSummaryRound; i < blk.Header.Round; i++ { @@ -1272,9 +1283,9 @@ mainLoop: continue } var oldBlock *block.Block - oldBlock, err = n.commonNode.Runtime.History().GetCommittedBlock(n.ctx, i) + oldBlock, err = w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, i) if err != nil { - n.logger.Error("can't get block for round", + w.logger.Error("can't get block for round", "err", err, "round", i, "current_round", blk.Header.Round, @@ -1291,14 +1302,14 @@ mainLoop: heartbeat.reset() case <-heartbeat.C: - if latestBlockRound != n.undefinedRound { - n.logger.Debug("heartbeat", "in_flight_rounds", len(syncingRounds)) + if latestBlockRound != w.undefinedRound { + w.logger.Debug("heartbeat", "in_flight_rounds", len(syncingRounds)) triggerRoundFetches() } - case item := <-n.diffCh: + case item := <-w.diffCh: if item.err != nil { - n.logger.Error("error calling getdiff", + w.logger.Error("error calling getdiff", "err", item.err, "round", item.round, "old_root", item.prevRoot, @@ -1315,35 +1326,35 @@ mainLoop: // when we're syncing and are far behind. triggerRoundFetches() - case finalized := <-n.finalizeCh: + case finalized := <-w.finalizeCh: // If finalization failed, things start falling apart. // There's no point redoing it, since it's probably not a transient // error, and cachedLastRound also can't be updated legitimately. if finalized.err != nil { // Request a node shutdown given that syncing is effectively blocked. - _ = n.commonNode.HostNode.RequestShutdown(n.ctx, false) + _ = w.commonNode.HostNode.RequestShutdown(w.ctx, false) break mainLoop } // No further sync or out of order handling needed here, since // only one finalize at a time is triggered (for round cachedLastRound+1) - cachedLastRound, err = n.flushSyncedState(finalized.summary) + cachedLastRound, err = w.flushSyncedState(finalized.summary) if err != nil { - n.logger.Error("failed to flush synced state", + w.logger.Error("failed to flush synced state", "err", err, ) } - storageWorkerLastFullRound.With(n.getMetricLabels()).Set(float64(finalized.summary.Round)) + storageWorkerLastFullRound.With(w.getMetricLabels()).Set(float64(finalized.summary.Round)) // Check if we're far enough to reasonably register as available. - n.nudgeAvailability(cachedLastRound, latestBlockRound) + w.nudgeAvailability(cachedLastRound, latestBlockRound) // Notify the checkpointer that there is a new finalized round. if config.GlobalConfig.Storage.Checkpointer.Enabled { - n.checkpointer.NotifyNewVersion(finalized.summary.Round) + w.checkpointer.NotifyNewVersion(finalized.summary.Round) } - case <-n.ctx.Done(): + case <-w.ctx.Done(): break mainLoop } } @@ -1356,12 +1367,12 @@ mainLoop: type pruneHandler struct { logger *logging.Logger - node *Node + worker *Worker } func (p *pruneHandler) Prune(rounds []uint64) error { // Make sure we never prune past what was synced. - lastSycnedRound, _, _ := p.node.GetLastSynced() + lastSycnedRound, _, _ := p.worker.GetLastSynced() for _, round := range rounds { if round >= lastSycnedRound { @@ -1375,7 +1386,7 @@ func (p *pruneHandler) Prune(rounds []uint64) error { p.logger.Debug("pruning storage for round", "round", round) // Prune given block. - err := p.node.localStorage.NodeDB().Prune(round) + err := p.worker.localStorage.NodeDB().Prune(round) switch err { case nil: case mkvsDB.ErrNotEarliest: diff --git a/go/worker/storage/committee/utils.go b/go/worker/storage/statesync/utils.go similarity index 99% rename from go/worker/storage/committee/utils.go rename to go/worker/storage/statesync/utils.go index 863b9fc7bd0..88adb492b33 100644 --- a/go/worker/storage/committee/utils.go +++ b/go/worker/storage/statesync/utils.go @@ -1,4 +1,4 @@ -package committee +package statesync import ( "fmt" diff --git a/go/worker/storage/worker.go b/go/worker/storage/worker.go index f49988bd1c7..c5b174d3aac 100644 --- a/go/worker/storage/worker.go +++ b/go/worker/storage/worker.go @@ -12,10 +12,10 @@ import ( committeeCommon "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/registration" storageWorkerAPI "github.com/oasisprotocol/oasis-core/go/worker/storage/api" - "github.com/oasisprotocol/oasis-core/go/worker/storage/committee" + "github.com/oasisprotocol/oasis-core/go/worker/storage/statesync" ) -// Worker is a worker handling storage operations. +// Worker is a worker handling storage operations for all common worker runtimes. type Worker struct { enabled bool @@ -26,7 +26,7 @@ type Worker struct { initCh chan struct{} quitCh chan struct{} - runtimes map[common.Namespace]*committee.Node + runtimes map[common.Namespace]*statesync.Worker } // New constructs a new storage worker. @@ -44,14 +44,14 @@ func New( logger: logging.GetLogger("worker/storage"), initCh: make(chan struct{}), quitCh: make(chan struct{}), - runtimes: make(map[common.Namespace]*committee.Node), + runtimes: make(map[common.Namespace]*statesync.Worker), } if !enabled { return s, nil } - // Start storage node for every runtime. + // Start state sync worker for every runtime. for id, rt := range s.commonWorker.GetRuntimes() { if err := s.registerRuntime(rt); err != nil { return nil, fmt.Errorf("failed to create storage worker for runtime %s: %w", id, err) @@ -90,13 +90,13 @@ func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { return fmt.Errorf("can't create local storage backend: %w", err) } - node, err := committee.NewNode( + worker, err := statesync.New( commonNode, rp, rpRPC, w.commonWorker.GetConfig(), localStorage, - &committee.CheckpointSyncConfig{ + &statesync.CheckpointSyncConfig{ Disabled: config.GlobalConfig.Storage.CheckpointSyncDisabled, ChunkFetcherCount: config.GlobalConfig.Storage.FetcherCount, }, @@ -105,8 +105,8 @@ func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { return err } commonNode.Runtime.RegisterStorage(localStorage) - commonNode.AddHooks(node) - w.runtimes[id] = node + commonNode.AddHooks(worker) + w.runtimes[id] = worker w.logger.Info("new runtime registered", "runtime_id", id, @@ -115,7 +115,7 @@ func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { return nil } -// Name returns the service name. +// Name returns the worker name. func (w *Worker) Name() string { return "storage worker" } @@ -196,6 +196,6 @@ func (w *Worker) Cleanup() { // GetRuntime returns a storage committee node for the given runtime (if available). // // In case the runtime with the specified id was not configured for this node it returns nil. -func (w *Worker) GetRuntime(id common.Namespace) *committee.Node { +func (w *Worker) GetRuntime(id common.Namespace) *statesync.Worker { return w.runtimes[id] } From 10b47051e48beaa3873fd0ad541de2c0de21dc44 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Sat, 2 Aug 2025 21:57:31 +0200 Subject: [PATCH 02/11] go/worker/storage/statesync: Move pruning to separate file --- go/worker/storage/statesync/prune.go | 48 +++++++++++++++++++++++ go/worker/storage/statesync/state_sync.go | 41 ------------------- 2 files changed, 48 insertions(+), 41 deletions(-) create mode 100644 go/worker/storage/statesync/prune.go diff --git a/go/worker/storage/statesync/prune.go b/go/worker/storage/statesync/prune.go new file mode 100644 index 00000000000..dd67a4fbd9c --- /dev/null +++ b/go/worker/storage/statesync/prune.go @@ -0,0 +1,48 @@ +package statesync + +import ( + "fmt" + + "github.com/oasisprotocol/oasis-core/go/common/logging" + "github.com/oasisprotocol/oasis-core/go/storage/mkvs/db/api" +) + +type pruneHandler struct { + logger *logging.Logger + worker *Worker +} + +func (p *pruneHandler) Prune(rounds []uint64) error { + // Make sure we never prune past what was synced. + lastSycnedRound, _, _ := p.worker.GetLastSynced() + + for _, round := range rounds { + if round >= lastSycnedRound { + return fmt.Errorf("worker/storage: tried to prune past last synced round (last synced: %d)", + lastSycnedRound, + ) + } + + // Old suggestion: Make sure we don't prune rounds that need to be checkpointed but haven't been yet. + + p.logger.Debug("pruning storage for round", "round", round) + + // Prune given block. + err := p.worker.localStorage.NodeDB().Prune(round) + switch err { + case nil: + case api.ErrNotEarliest: + p.logger.Debug("skipping non-earliest round", + "round", round, + ) + continue + default: + p.logger.Error("failed to prune block", + "err", err, + ) + return err + } + } + + return nil +} diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index 4ec5c1b21f7..27bd4fef093 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -29,7 +29,6 @@ import ( storageApi "github.com/oasisprotocol/oasis-core/go/storage/api" "github.com/oasisprotocol/oasis-core/go/storage/mkvs/checkpoint" dbApi "github.com/oasisprotocol/oasis-core/go/storage/mkvs/db/api" - mkvsDB "github.com/oasisprotocol/oasis-core/go/storage/mkvs/db/api" workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/registration" @@ -1364,43 +1363,3 @@ mainLoop: // some new blocks, but only as many as were already in-flight at the point when the main // context was canceled. } - -type pruneHandler struct { - logger *logging.Logger - worker *Worker -} - -func (p *pruneHandler) Prune(rounds []uint64) error { - // Make sure we never prune past what was synced. - lastSycnedRound, _, _ := p.worker.GetLastSynced() - - for _, round := range rounds { - if round >= lastSycnedRound { - return fmt.Errorf("worker/storage: tried to prune past last synced round (last synced: %d)", - lastSycnedRound, - ) - } - - // TODO: Make sure we don't prune rounds that need to be checkpointed but haven't been yet. - - p.logger.Debug("pruning storage for round", "round", round) - - // Prune given block. - err := p.worker.localStorage.NodeDB().Prune(round) - switch err { - case nil: - case mkvsDB.ErrNotEarliest: - p.logger.Debug("skipping non-earliest round", - "round", round, - ) - continue - default: - p.logger.Error("failed to prune block", - "err", err, - ) - return err - } - } - - return nil -} From 8bbc0850afeb667baadf21c0a681305d2b3f6fe7 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Sun, 3 Aug 2025 00:57:34 +0200 Subject: [PATCH 03/11] go/worker/storage/statesync: Move checkpointert to separate file Logic was preserved, the only thing that changed is that context is passed explicitly and worker for creating checkpoints was renamed. --- go/worker/storage/statesync/checkpointer.go | 215 ++++++++++++++++++++ go/worker/storage/statesync/state_sync.go | 200 +----------------- 2 files changed, 216 insertions(+), 199 deletions(-) create mode 100644 go/worker/storage/statesync/checkpointer.go diff --git a/go/worker/storage/statesync/checkpointer.go b/go/worker/storage/statesync/checkpointer.go new file mode 100644 index 00000000000..070a6696395 --- /dev/null +++ b/go/worker/storage/statesync/checkpointer.go @@ -0,0 +1,215 @@ +package statesync + +import ( + "context" + "fmt" + + "github.com/oasisprotocol/oasis-core/go/common/pubsub" + "github.com/oasisprotocol/oasis-core/go/config" + consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" + roothashApi "github.com/oasisprotocol/oasis-core/go/roothash/api" + storageApi "github.com/oasisprotocol/oasis-core/go/storage/api" + "github.com/oasisprotocol/oasis-core/go/storage/mkvs/checkpoint" + "github.com/oasisprotocol/oasis-core/go/worker/common/committee" +) + +const ( + // chunkerThreads is target number of subtrees during parallel checkpoint creation. + // It is intentionally non-configurable since we want operators to produce + // same checkpoint hashes. The current value was chosen based on the benchmarks + // done on the modern developer machine. + chunkerThreads = 12 +) + +func (w *Worker) newCheckpointer(ctx context.Context, commonNode *committee.Node, localStorage storageApi.LocalBackend) (checkpoint.Checkpointer, error) { + checkInterval := checkpoint.CheckIntervalDisabled + if config.GlobalConfig.Storage.Checkpointer.Enabled { + checkInterval = config.GlobalConfig.Storage.Checkpointer.CheckInterval + } + checkpointerCfg := checkpoint.CheckpointerConfig{ + Name: "runtime", + Namespace: commonNode.Runtime.ID(), + CheckInterval: checkInterval, + RootsPerVersion: 2, // State root and I/O root. + GetParameters: func(ctx context.Context) (*checkpoint.CreationParameters, error) { + rt, rerr := commonNode.Runtime.ActiveDescriptor(ctx) + if rerr != nil { + return nil, fmt.Errorf("failed to retrieve runtime descriptor: %w", rerr) + } + + blk, rerr := commonNode.Consensus.RootHash().GetGenesisBlock(ctx, &roothashApi.RuntimeRequest{ + RuntimeID: rt.ID, + Height: consensus.HeightLatest, + }) + if rerr != nil { + return nil, fmt.Errorf("failed to retrieve genesis block: %w", rerr) + } + + var threads uint16 + if config.GlobalConfig.Storage.Checkpointer.ParallelChunker { + threads = chunkerThreads + } + + return &checkpoint.CreationParameters{ + Interval: rt.Storage.CheckpointInterval, + NumKept: rt.Storage.CheckpointNumKept, + ChunkSize: rt.Storage.CheckpointChunkSize, + InitialVersion: blk.Header.Round, + ChunkerThreads: threads, + }, nil + }, + GetRoots: func(ctx context.Context, version uint64) ([]storageApi.Root, error) { + blk, berr := commonNode.Runtime.History().GetCommittedBlock(ctx, version) + if berr != nil { + return nil, berr + } + + return blk.Header.StorageRoots(), nil + }, + } + + return checkpoint.NewCheckpointer( + ctx, + localStorage.NodeDB(), + localStorage.Checkpointer(), + checkpointerCfg, + ) +} + +// createCheckpoints is a worker responsible for triggering creation of runtime +// checkpoint everytime a consensus checkpoint is created. +// +// The reason why we do this is to make it faster for storage nodes that use consensus state sync +// to catch up as exactly the right checkpoint will be available. +func (w *Worker) createCheckpoints(ctx context.Context) { + consensusCp := w.commonNode.Consensus.Checkpointer() + if consensusCp == nil { + return + } + + // Wait for the common node to be initialized. + select { + case <-w.commonNode.Initialized(): + case <-ctx.Done(): + return + } + + // Determine the maximum number of consensus checkpoints to keep. + consensusParams, err := w.commonNode.Consensus.Core().GetParameters(ctx, consensus.HeightLatest) + if err != nil { + w.logger.Error("failed to fetch consensus parameters", + "err", err, + ) + return + } + + ch, sub, err := consensusCp.WatchCheckpoints() + if err != nil { + w.logger.Error("failed to watch checkpoints", + "err", err, + ) + return + } + defer sub.Close() + + var ( + versions []uint64 + blkCh <-chan *consensus.Block + blkSub pubsub.ClosableSubscription + ) + defer func() { + if blkCh != nil { + blkSub.Close() + blkSub = nil + blkCh = nil + } + }() + for { + select { + case <-w.quitCh: + return + case <-ctx.Done(): + return + case version := <-ch: + // We need to wait for the next version as that is what will be in the consensus + // checkpoint. + versions = append(versions, version+1) + // Make sure that we limit the size of the checkpoint queue. + if uint64(len(versions)) > consensusParams.Parameters.StateCheckpointNumKept { + versions = versions[1:] + } + + w.logger.Debug("consensus checkpoint detected, queuing runtime checkpoint", + "version", version+1, + "num_versions", len(versions), + ) + + if blkCh == nil { + blkCh, blkSub, err = w.commonNode.Consensus.Core().WatchBlocks(ctx) + if err != nil { + w.logger.Error("failed to watch blocks", + "err", err, + ) + continue + } + } + case blk := <-blkCh: + // If there's nothing remaining, unsubscribe. + if len(versions) == 0 { + w.logger.Debug("no more queued consensus checkpoint versions") + + blkSub.Close() + blkSub = nil + blkCh = nil + continue + } + + var newVersions []uint64 + for idx, version := range versions { + if version > uint64(blk.Height) { + // We need to wait for further versions. + newVersions = versions[idx:] + break + } + + // Lookup what runtime round corresponds to the given consensus layer version and make + // sure we checkpoint it. + blk, err := w.commonNode.Consensus.RootHash().GetLatestBlock(ctx, &roothashApi.RuntimeRequest{ + RuntimeID: w.commonNode.Runtime.ID(), + Height: int64(version), + }) + if err != nil { + w.logger.Error("failed to get runtime block corresponding to consensus checkpoint", + "err", err, + "height", version, + ) + continue + } + + // We may have not yet synced the corresponding runtime round locally. In this case + // we need to wait until this is the case. + w.syncedLock.RLock() + lastSyncedRound := w.syncedState.Round + w.syncedLock.RUnlock() + if blk.Header.Round > lastSyncedRound { + w.logger.Debug("runtime round not available yet for checkpoint, waiting", + "height", version, + "round", blk.Header.Round, + "last_synced_round", lastSyncedRound, + ) + newVersions = versions[idx:] + break + } + + // Force runtime storage checkpointer to create a checkpoint at this round. + w.logger.Info("consensus checkpoint, force runtime checkpoint", + "height", version, + "round", blk.Header.Round, + ) + + w.checkpointer.ForceCheckpoint(blk.Header.Round) + } + versions = newVersions + } + } +} diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index 27bd4fef093..5bb7c376835 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -15,7 +15,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/common/logging" "github.com/oasisprotocol/oasis-core/go/common/node" - "github.com/oasisprotocol/oasis-core/go/common/pubsub" "github.com/oasisprotocol/oasis-core/go/common/workerpool" "github.com/oasisprotocol/oasis-core/go/config" consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" @@ -65,12 +64,6 @@ const ( // maxInFlightRounds is the maximum number of rounds that should be fetched before waiting // for them to be applied. maxInFlightRounds = 100 - - // chunkerThreads is target number of subtrees during parallel checkpoint creation. - // It is intentionally non-configurable since we want operators to produce - // same checkpoint hashes. The current value was chosen based on the benchmarks - // done on the modern developer machine. - chunkerThreads = 12 ) type roundItem interface { @@ -258,68 +251,13 @@ func New( return w, nil } -func (w *Worker) newCheckpointer(ctx context.Context, commonNode *committee.Node, localStorage storageApi.LocalBackend) (checkpoint.Checkpointer, error) { - checkInterval := checkpoint.CheckIntervalDisabled - if config.GlobalConfig.Storage.Checkpointer.Enabled { - checkInterval = config.GlobalConfig.Storage.Checkpointer.CheckInterval - } - checkpointerCfg := checkpoint.CheckpointerConfig{ - Name: "runtime", - Namespace: commonNode.Runtime.ID(), - CheckInterval: checkInterval, - RootsPerVersion: 2, // State root and I/O root. - GetParameters: func(ctx context.Context) (*checkpoint.CreationParameters, error) { - rt, rerr := commonNode.Runtime.ActiveDescriptor(ctx) - if rerr != nil { - return nil, fmt.Errorf("failed to retrieve runtime descriptor: %w", rerr) - } - - blk, rerr := commonNode.Consensus.RootHash().GetGenesisBlock(ctx, &roothashApi.RuntimeRequest{ - RuntimeID: rt.ID, - Height: consensus.HeightLatest, - }) - if rerr != nil { - return nil, fmt.Errorf("failed to retrieve genesis block: %w", rerr) - } - - var threads uint16 - if config.GlobalConfig.Storage.Checkpointer.ParallelChunker { - threads = chunkerThreads - } - - return &checkpoint.CreationParameters{ - Interval: rt.Storage.CheckpointInterval, - NumKept: rt.Storage.CheckpointNumKept, - ChunkSize: rt.Storage.CheckpointChunkSize, - InitialVersion: blk.Header.Round, - ChunkerThreads: threads, - }, nil - }, - GetRoots: func(ctx context.Context, version uint64) ([]storageApi.Root, error) { - blk, berr := commonNode.Runtime.History().GetCommittedBlock(ctx, version) - if berr != nil { - return nil, berr - } - - return blk.Header.StorageRoots(), nil - }, - } - - return checkpoint.NewCheckpointer( - ctx, - localStorage.NodeDB(), - localStorage.Checkpointer(), - checkpointerCfg, - ) -} - // Service interface. // Start causes the worker to start responding to CometBFT new block events. func (w *Worker) Start() error { go w.worker() if config.GlobalConfig.Storage.Checkpointer.Enabled { - go w.consensusCheckpointSyncer() + go w.createCheckpoints(w.ctx) } return nil } @@ -641,142 +579,6 @@ func (w *Worker) flushSyncedState(summary *blockSummary) (uint64, error) { return w.syncedState.Round, nil } -func (w *Worker) consensusCheckpointSyncer() { - // Make sure we always create a checkpoint when the consensus layer creates a checkpoint. The - // reason why we do this is to make it faster for storage nodes that use consensus state sync - // to catch up as exactly the right checkpoint will be available. - consensusCp := w.commonNode.Consensus.Checkpointer() - if consensusCp == nil { - return - } - - // Wait for the common node to be initialized. - select { - case <-w.commonNode.Initialized(): - case <-w.ctx.Done(): - return - } - - // Determine the maximum number of consensus checkpoints to keep. - consensusParams, err := w.commonNode.Consensus.Core().GetParameters(w.ctx, consensus.HeightLatest) - if err != nil { - w.logger.Error("failed to fetch consensus parameters", - "err", err, - ) - return - } - - ch, sub, err := consensusCp.WatchCheckpoints() - if err != nil { - w.logger.Error("failed to watch checkpoints", - "err", err, - ) - return - } - defer sub.Close() - - var ( - versions []uint64 - blkCh <-chan *consensus.Block - blkSub pubsub.ClosableSubscription - ) - defer func() { - if blkCh != nil { - blkSub.Close() - blkSub = nil - blkCh = nil - } - }() - for { - select { - case <-w.quitCh: - return - case <-w.ctx.Done(): - return - case version := <-ch: - // We need to wait for the next version as that is what will be in the consensus - // checkpoint. - versions = append(versions, version+1) - // Make sure that we limit the size of the checkpoint queue. - if uint64(len(versions)) > consensusParams.Parameters.StateCheckpointNumKept { - versions = versions[1:] - } - - w.logger.Debug("consensus checkpoint detected, queuing runtime checkpoint", - "version", version+1, - "num_versions", len(versions), - ) - - if blkCh == nil { - blkCh, blkSub, err = w.commonNode.Consensus.Core().WatchBlocks(w.ctx) - if err != nil { - w.logger.Error("failed to watch blocks", - "err", err, - ) - continue - } - } - case blk := <-blkCh: - // If there's nothing remaining, unsubscribe. - if len(versions) == 0 { - w.logger.Debug("no more queued consensus checkpoint versions") - - blkSub.Close() - blkSub = nil - blkCh = nil - continue - } - - var newVersions []uint64 - for idx, version := range versions { - if version > uint64(blk.Height) { - // We need to wait for further versions. - newVersions = versions[idx:] - break - } - - // Lookup what runtime round corresponds to the given consensus layer version and make - // sure we checkpoint it. - blk, err := w.commonNode.Consensus.RootHash().GetLatestBlock(w.ctx, &roothashApi.RuntimeRequest{ - RuntimeID: w.commonNode.Runtime.ID(), - Height: int64(version), - }) - if err != nil { - w.logger.Error("failed to get runtime block corresponding to consensus checkpoint", - "err", err, - "height", version, - ) - continue - } - - // We may have not yet synced the corresponding runtime round locally. In this case - // we need to wait until this is the case. - w.syncedLock.RLock() - lastSyncedRound := w.syncedState.Round - w.syncedLock.RUnlock() - if blk.Header.Round > lastSyncedRound { - w.logger.Debug("runtime round not available yet for checkpoint, waiting", - "height", version, - "round", blk.Header.Round, - "last_synced_round", lastSyncedRound, - ) - newVersions = versions[idx:] - break - } - - // Force runtime storage checkpointer to create a checkpoint at this round. - w.logger.Info("consensus checkpoint, force runtime checkpoint", - "height", version, - "round", blk.Header.Round, - ) - - w.checkpointer.ForceCheckpoint(blk.Header.Round) - } - versions = newVersions - } - } -} - // This is only called from the main worker goroutine, so no locking should be necessary. func (w *Worker) nudgeAvailability(lastSynced, latest uint64) { if lastSynced == w.undefinedRound || latest == w.undefinedRound { From b030d023cead1a45a5de266ead1572553302b883 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Sun, 3 Aug 2025 00:57:34 +0200 Subject: [PATCH 04/11] go/worker/storage/statesync: Pass context explicitly In addition state sync worker should return an error and it should be the caller responsibility to act accordingly. See e.g. new workers such as stateless client. Note that semantic changed slightly: Previously storage worker would wait for all state sync workers to finish. Now it will terminate when the first one finishes. Notice that this is not 100% true as previously state sync worker could panic (which would in that case shutdown the whole node). --- .../storage/statesync/checkpoint_sync.go | 28 +-- go/worker/storage/statesync/checkpointer.go | 2 - go/worker/storage/statesync/state_sync.go | 164 ++++++------------ go/worker/storage/worker.go | 53 ++++-- 4 files changed, 100 insertions(+), 147 deletions(-) diff --git a/go/worker/storage/statesync/checkpoint_sync.go b/go/worker/storage/statesync/checkpoint_sync.go index bca66368dfe..04b272eedaf 100644 --- a/go/worker/storage/statesync/checkpoint_sync.go +++ b/go/worker/storage/statesync/checkpoint_sync.go @@ -194,8 +194,8 @@ func (w *Worker) fetchChunk(ctx context.Context, chunk *chunk) ([]byte, rpc.Peer return rsp2.Chunk, pf, nil } -func (w *Worker) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelRequests uint) (cpStatus int, rerr error) { - if err := w.localStorage.Checkpointer().StartRestore(w.ctx, check.Metadata); err != nil { +func (w *Worker) handleCheckpoint(ctx context.Context, check *checkpointsync.Checkpoint, maxParallelRequests uint) (cpStatus int, rerr error) { + if err := w.localStorage.Checkpointer().StartRestore(ctx, check.Metadata); err != nil { // Any previous restores were already aborted by the driver up the call stack, so // things should have been going smoothly here; bail. return checkpointStatusBail, fmt.Errorf("can't start checkpoint restore: %w", err) @@ -222,7 +222,7 @@ func (w *Worker) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelR chunkReturnCh := make(chan *chunk, maxParallelRequests) errorCh := make(chan int, maxParallelRequests) - ctx, cancel := context.WithCancel(w.ctx) + chunkCtx, cancel := context.WithCancel(ctx) // Spawn the worker group to fetch and restore checkpoint chunks. var workerGroup sync.WaitGroup @@ -231,7 +231,7 @@ func (w *Worker) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelR workerGroup.Add(1) go func() { defer workerGroup.Done() - w.checkpointChunkFetcher(ctx, chunkDispatchCh, chunkReturnCh, errorCh) + w.checkpointChunkFetcher(chunkCtx, chunkDispatchCh, chunkReturnCh, errorCh) }() } go func() { @@ -283,8 +283,8 @@ func (w *Worker) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelR } select { - case <-w.ctx.Done(): - return checkpointStatusBail, w.ctx.Err() + case <-ctx.Done(): + return checkpointStatusBail, ctx.Err() case returned := <-chunkReturnCh: if returned == nil { @@ -313,8 +313,8 @@ func (w *Worker) handleCheckpoint(check *checkpointsync.Checkpoint, maxParallelR } } -func (w *Worker) getCheckpointList() ([]*checkpointsync.Checkpoint, error) { - ctx, cancel := context.WithTimeout(w.ctx, cpListsTimeout) +func (w *Worker) getCheckpointList(ctx context.Context) ([]*checkpointsync.Checkpoint, error) { + ctx, cancel := context.WithTimeout(ctx, cpListsTimeout) defer cancel() list, err := w.fetchCheckpoints(ctx) @@ -369,7 +369,7 @@ func sortCheckpoints(s []*checkpointsync.Checkpoint) { }) } -func (w *Worker) checkCheckpointUsable(cp *checkpointsync.Checkpoint, remainingMask outstandingMask, genesisRound uint64) bool { +func (w *Worker) checkCheckpointUsable(ctx context.Context, cp *checkpointsync.Checkpoint, remainingMask outstandingMask, genesisRound uint64) bool { namespace := w.commonNode.Runtime.ID() if !namespace.Equal(&cp.Root.Namespace) { // Not for the right runtime. @@ -380,7 +380,7 @@ func (w *Worker) checkCheckpointUsable(cp *checkpointsync.Checkpoint, remainingM return false } - blk, err := w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, cp.Root.Version) + blk, err := w.commonNode.Runtime.History().GetCommittedBlock(ctx, cp.Root.Version) if err != nil { w.logger.Error("can't get block information for checkpoint, skipping", "err", err, "root", cp.Root) return false @@ -405,14 +405,14 @@ func (w *Worker) checkCheckpointUsable(cp *checkpointsync.Checkpoint, remainingM return false } -func (w *Worker) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*blockSummary, error) { +func (w *Worker) syncCheckpoints(ctx context.Context, genesisRound uint64, wantOnlyGenesis bool) (*blockSummary, error) { // Store roots and round info for checkpoints that finished syncing. // Round and namespace info will get overwritten as rounds are skipped // for errors, driven by remainingRoots. var syncState blockSummary // Fetch checkpoints from peers. - cps, err := w.getCheckpointList() + cps, err := w.getCheckpointList(ctx) if err != nil { return nil, fmt.Errorf("can't get checkpoint list from peers: %w", err) } @@ -449,7 +449,7 @@ func (w *Worker) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*bl for _, check := range cps { - if check.Root.Version < genesisRound || !w.checkCheckpointUsable(check, remainingRoots, genesisRound) { + if check.Root.Version < genesisRound || !w.checkCheckpointUsable(ctx, check, remainingRoots, genesisRound) { continue } @@ -486,7 +486,7 @@ func (w *Worker) syncCheckpoints(genesisRound uint64, wantOnlyGenesis bool) (*bl } } - status, err := w.handleCheckpoint(check, w.checkpointSyncCfg.ChunkFetcherCount) + status, err := w.handleCheckpoint(ctx, check, w.checkpointSyncCfg.ChunkFetcherCount) switch status { case checkpointStatusDone: w.logger.Info("successfully restored from checkpoint", "root", check.Root, "mask", mask) diff --git a/go/worker/storage/statesync/checkpointer.go b/go/worker/storage/statesync/checkpointer.go index 070a6696395..9e72cf4285d 100644 --- a/go/worker/storage/statesync/checkpointer.go +++ b/go/worker/storage/statesync/checkpointer.go @@ -126,8 +126,6 @@ func (w *Worker) createCheckpoints(ctx context.Context) { }() for { select { - case <-w.quitCh: - return case <-ctx.Done(): return case version := <-ch: diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index 5bb7c376835..f1d3552a804 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -143,8 +143,6 @@ type Worker struct { // nolint: maligned undefinedRound uint64 - fetchPool *workerpool.Pool - workerCommonCfg workerCommon.Config checkpointer checkpoint.Checkpointer @@ -161,16 +159,12 @@ type Worker struct { // nolint: maligned diffCh chan *fetchedDiff finalizeCh chan finalizeResult - ctx context.Context - ctxCancel context.CancelFunc - - quitCh chan struct{} - initCh chan struct{} } // New creates a new state sync worker. func New( + ctx context.Context, commonNode *committee.Node, roleProvider registration.RoleProvider, rpcRoleProvider registration.RoleProvider, @@ -180,10 +174,6 @@ func New( ) (*Worker, error) { initMetrics() - // Create the fetcher pool. - fetchPool := workerpool.New("storage_fetch/" + commonNode.Runtime.ID().String()) - fetchPool.Resize(config.GlobalConfig.Storage.FetcherCount) - w := &Worker{ commonNode: commonNode, @@ -196,8 +186,6 @@ func New( localStorage: localStorage, - fetchPool: fetchPool, - checkpointSyncCfg: checkpointSyncCfg, status: api.StatusInitializing, @@ -206,7 +194,6 @@ func New( diffCh: make(chan *fetchedDiff), finalizeCh: make(chan finalizeResult), - quitCh: make(chan struct{}), initCh: make(chan struct{}), } @@ -218,10 +205,8 @@ func New( // Initialize sync state. w.syncedState.Round = defaultUndefinedRound - w.ctx, w.ctxCancel = context.WithCancel(context.Background()) - // Create a checkpointer (even if checkpointing is disabled) to ensure the genesis checkpoint is available. - checkpointer, err := w.newCheckpointer(w.ctx, commonNode, localStorage) + checkpointer, err := w.newCheckpointer(ctx, commonNode, localStorage) if err != nil { return nil, fmt.Errorf("failed to create checkpointer: %w", err) } @@ -251,38 +236,6 @@ func New( return w, nil } -// Service interface. - -// Start causes the worker to start responding to CometBFT new block events. -func (w *Worker) Start() error { - go w.worker() - if config.GlobalConfig.Storage.Checkpointer.Enabled { - go w.createCheckpoints(w.ctx) - } - return nil -} - -// Stop causes the worker to stop watching and shut down. -func (w *Worker) Stop() { - w.statusLock.Lock() - w.status = api.StatusStopping - w.statusLock.Unlock() - - w.fetchPool.Stop() - - w.ctxCancel() -} - -// Quit returns a channel that will be closed when the worker stops. -func (w *Worker) Quit() <-chan struct{} { - return w.quitCh -} - -// Cleanup cleans up any leftover state after the worker is stopped. -func (w *Worker) Cleanup() { - // Nothing to do here? -} - // Initialized returns a channel that will be closed once the worker finished starting up. func (w *Worker) Initialized() <-chan struct{} { return w.initCh @@ -353,7 +306,7 @@ func (w *Worker) GetLastSynced() (uint64, storageApi.Root, storageApi.Root) { return w.syncedState.Round, io, state } -func (w *Worker) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { +func (w *Worker) fetchDiff(ctx context.Context, round uint64, prevRoot, thisRoot storageApi.Root) { result := &fetchedDiff{ fetched: false, pf: rpc.NewNopPeerFeedback(), @@ -364,7 +317,7 @@ func (w *Worker) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { defer func() { select { case w.diffCh <- result: - case <-w.ctx.Done(): + case <-ctx.Done(): } }() @@ -389,10 +342,10 @@ func (w *Worker) fetchDiff(round uint64, prevRoot, thisRoot storageApi.Root) { "new_root", thisRoot, ) - ctx, cancel := context.WithCancel(w.ctx) + diffCtx, cancel := context.WithCancel(ctx) defer cancel() - wl, pf, err := w.getDiff(ctx, prevRoot, thisRoot) + wl, pf, err := w.getDiff(diffCtx, prevRoot, thisRoot) if err != nil { result.err = err return @@ -417,7 +370,7 @@ func (w *Worker) getDiff(ctx context.Context, prevRoot, thisRoot storageApi.Root return rsp2.WriteLog, pf, nil } -func (w *Worker) finalize(summary *blockSummary) { +func (w *Worker) finalize(ctx context.Context, summary *blockSummary) { err := w.localStorage.NodeDB().Finalize(summary.Roots) switch err { case nil: @@ -445,11 +398,11 @@ func (w *Worker) finalize(summary *blockSummary) { select { case w.finalizeCh <- result: - case <-w.ctx.Done(): + case <-ctx.Done(): } } -func (w *Worker) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) error { +func (w *Worker) initGenesis(ctx context.Context, rt *registryApi.Runtime, genesisBlock *block.Block) error { w.logger.Info("initializing storage at genesis") // Check what the latest finalized version in the database is as we may be using a database @@ -514,7 +467,7 @@ func (w *Worker) initGenesis(rt *registryApi.Runtime, genesisBlock *block.Block) "latest_version", latestVersion, ) for v := latestVersion; v < stateRoot.Version; v++ { - err := w.localStorage.Apply(w.ctx, &storageApi.ApplyRequest{ + err := w.localStorage.Apply(ctx, &storageApi.ApplyRequest{ Namespace: rt.ID, RootType: storageApi.RootTypeState, SrcRound: v, @@ -604,45 +557,47 @@ func (w *Worker) nudgeAvailability(lastSynced, latest uint64) { } } -func (w *Worker) worker() { // nolint: gocyclo - defer close(w.quitCh) +// Run runs state sync worker. +func (w *Worker) Run(ctx context.Context) error { // nolint: gocyclo defer close(w.diffCh) // Wait for the common node to be initialized. select { case <-w.commonNode.Initialized(): - case <-w.ctx.Done(): + case <-ctx.Done(): close(w.initCh) - return + return ctx.Err() } - w.logger.Info("starting state sycne worker") + w.logger.Info("starting runtime state sync worker") w.statusLock.Lock() w.status = api.StatusStarting w.statusLock.Unlock() + if config.GlobalConfig.Storage.Checkpointer.Enabled { + go w.createCheckpoints(ctx) + } + // Determine genesis block. - genesisBlock, err := w.commonNode.Consensus.RootHash().GetGenesisBlock(w.ctx, &roothashApi.RuntimeRequest{ + genesisBlock, err := w.commonNode.Consensus.RootHash().GetGenesisBlock(ctx, &roothashApi.RuntimeRequest{ RuntimeID: w.commonNode.Runtime.ID(), Height: consensus.HeightLatest, }) if err != nil { - w.logger.Error("can't retrieve genesis block", "err", err) - return + return fmt.Errorf("can't retrieve genesis block: %w", err) } w.undefinedRound = genesisBlock.Header.Round - 1 // Determine last finalized storage version. if version, dbNonEmpty := w.localStorage.NodeDB().GetLatestVersion(); dbNonEmpty { var blk *block.Block - blk, err = w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, version) + blk, err = w.commonNode.Runtime.History().GetCommittedBlock(ctx, version) switch err { case nil: // Set last synced version to last finalized storage version. if _, err = w.flushSyncedState(summaryFromBlock(blk)); err != nil { - w.logger.Error("failed to flush synced state", "err", err) - return + return fmt.Errorf("failed to flush synced state: %w", err) } default: // Failed to fetch historic block. This is fine when the network just went through a @@ -670,18 +625,12 @@ func (w *Worker) worker() { // nolint: gocyclo w.statusLock.Unlock() var rt *registryApi.Runtime - rt, err = w.commonNode.Runtime.ActiveDescriptor(w.ctx) + rt, err = w.commonNode.Runtime.ActiveDescriptor(ctx) if err != nil { - w.logger.Error("failed to retrieve runtime registry descriptor", - "err", err, - ) - return + return fmt.Errorf("failed to retrieve runtime registry descriptor: %w", err) } - if err = w.initGenesis(rt, genesisBlock); err != nil { - w.logger.Error("failed to initialize storage at genesis", - "err", err, - ) - return + if err = w.initGenesis(ctx, rt, genesisBlock); err != nil { + return fmt.Errorf("failed to initialize storage at genesis: %w", err) } } @@ -707,7 +656,7 @@ func (w *Worker) worker() { // nolint: gocyclo // Check if we actually have information about that round. This assumes that any reindexing // was already performed (the common node would not indicate being initialized otherwise). - _, err = w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, iterativeSyncStart) + _, err = w.commonNode.Runtime.History().GetCommittedBlock(ctx, iterativeSyncStart) SyncStartCheck: switch { case err == nil: @@ -715,7 +664,7 @@ func (w *Worker) worker() { // nolint: gocyclo // No information is available about the initial round. Query the earliest historic // block and check if that block has the genesis state root and empty I/O root. var earlyBlk *block.Block - earlyBlk, err = w.commonNode.Runtime.History().GetEarliestBlock(w.ctx) + earlyBlk, err = w.commonNode.Runtime.History().GetEarliestBlock(ctx) switch err { case nil: // Make sure the state root is still the same as at genesis time. @@ -734,7 +683,7 @@ func (w *Worker) worker() { // nolint: gocyclo "earliest_round", earlyBlk.Header.Round, ) for v := genesisBlock.Header.Round; v < earlyBlk.Header.Round; v++ { - err = w.localStorage.Apply(w.ctx, &storageApi.ApplyRequest{ + err = w.localStorage.Apply(ctx, &storageApi.ApplyRequest{ Namespace: w.commonNode.Runtime.ID(), RootType: storageApi.RootTypeState, SrcRound: v, @@ -749,11 +698,7 @@ func (w *Worker) worker() { // nolint: gocyclo // Ignore already finalized versions. continue default: - w.logger.Error("failed to fill in version", - "version", v, - "err", err, - ) - return + return fmt.Errorf("failed to fill in version %d: %w", v, err) } err = w.localStorage.NodeDB().Finalize([]storageApi.Root{{ @@ -764,19 +709,12 @@ func (w *Worker) worker() { // nolint: gocyclo // We can ignore I/O roots. }}) if err != nil { - w.logger.Error("failed to finalize filled in version", - "version", v, - "err", err, - ) - return + return fmt.Errorf("failed to finalize filled in version %v: %w", v, err) } } cachedLastRound, err = w.flushSyncedState(summaryFromBlock(earlyBlk)) if err != nil { - w.logger.Error("failed to flush synced state", - "err", err, - ) - return + return fmt.Errorf("failed to flush synced state: %w", err) } // No need to force a checkpoint sync. break SyncStartCheck @@ -794,10 +732,7 @@ func (w *Worker) worker() { // nolint: gocyclo w.checkpointSyncForced = true default: // Unknown error while fetching block information, abort. - w.logger.Error("failed to query block", - "err", err, - ) - return + return fmt.Errorf("failed to query block: %w", err) } } @@ -831,7 +766,7 @@ func (w *Worker) worker() { // nolint: gocyclo ) CheckpointSyncRetry: for { - summary, err = w.syncCheckpoints(genesisBlock.Header.Round, w.checkpointSyncCfg.Disabled) + summary, err = w.syncCheckpoints(ctx, genesisBlock.Header.Round, w.checkpointSyncCfg.Disabled) if err == nil { break } @@ -859,8 +794,8 @@ func (w *Worker) worker() { // nolint: gocyclo // Delay before retrying. select { case <-time.After(checkpointSyncRetryDelay): - case <-w.ctx.Done(): - return + case <-ctx.Done(): + return ctx.Err() } } if err != nil { @@ -868,10 +803,7 @@ func (w *Worker) worker() { // nolint: gocyclo } else { cachedLastRound, err = w.flushSyncedState(summary) if err != nil { - w.logger.Error("failed to flush synced state", - "err", err, - ) - return + return fmt.Errorf("failed to flush synced state %w", err) } lastFullyAppliedRound = cachedLastRound w.logger.Info("checkpoint sync succeeded", @@ -890,6 +822,11 @@ func (w *Worker) worker() { // nolint: gocyclo var wg sync.WaitGroup syncingRounds := make(map[uint64]*inFlight) summaryCache := make(map[uint64]*blockSummary) + // Create the fetcher pool. + fetchPool := workerpool.New("storage_fetch/" + w.commonNode.Runtime.ID().String()) + fetchPool.Resize(config.GlobalConfig.Storage.FetcherCount) + defer fetchPool.Stop() + triggerRoundFetches := func() { for i := lastFullyAppliedRound + 1; i <= latestBlockRound; i++ { syncing, ok := syncingRounds[i] @@ -940,9 +877,9 @@ func (w *Worker) worker() { // nolint: gocyclo if !syncing.outstanding.contains(rootType) && syncing.awaitingRetry.contains(rootType) { syncing.scheduleDiff(rootType) wg.Add(1) - w.fetchPool.Submit(func() { + fetchPool.Submit(func() { defer wg.Done() - w.fetchDiff(this.Round, prevRoots[i], this.Roots[i]) + w.fetchDiff(ctx, this.Round, prevRoots[i], this.Roots[i]) }) } } @@ -975,7 +912,7 @@ mainLoop: // Apply the write log if one exists. err = nil if lastDiff.fetched { - err = w.localStorage.Apply(w.ctx, &storageApi.ApplyRequest{ + err = w.localStorage.Apply(ctx, &storageApi.ApplyRequest{ Namespace: lastDiff.thisRoot.Namespace, RootType: lastDiff.thisRoot.Type, SrcRound: lastDiff.prevRoot.Version, @@ -1034,7 +971,7 @@ mainLoop: wg.Add(1) go func() { // Don't block fetching and applying remaining rounds. defer wg.Done() - w.finalize(lastSummary) + w.finalize(ctx, lastSummary) }() continue } @@ -1084,7 +1021,7 @@ mainLoop: continue } var oldBlock *block.Block - oldBlock, err = w.commonNode.Runtime.History().GetCommittedBlock(w.ctx, i) + oldBlock, err = w.commonNode.Runtime.History().GetCommittedBlock(ctx, i) if err != nil { w.logger.Error("can't get block for round", "err", err, @@ -1133,7 +1070,7 @@ mainLoop: // error, and cachedLastRound also can't be updated legitimately. if finalized.err != nil { // Request a node shutdown given that syncing is effectively blocked. - _ = w.commonNode.HostNode.RequestShutdown(w.ctx, false) + _ = w.commonNode.HostNode.RequestShutdown(ctx, false) break mainLoop } @@ -1155,7 +1092,7 @@ mainLoop: w.checkpointer.NotifyNewVersion(finalized.summary.Round) } - case <-w.ctx.Done(): + case <-ctx.Done(): break mainLoop } } @@ -1164,4 +1101,5 @@ mainLoop: // blockCh will be garbage-collected without being closed. It can potentially still contain // some new blocks, but only as many as were already in-flight at the point when the main // context was canceled. + return nil } diff --git a/go/worker/storage/worker.go b/go/worker/storage/worker.go index c5b174d3aac..a027d9eae59 100644 --- a/go/worker/storage/worker.go +++ b/go/worker/storage/worker.go @@ -1,8 +1,11 @@ package storage import ( + "context" "fmt" + "golang.org/x/sync/errgroup" + "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/grpc" "github.com/oasisprotocol/oasis-core/go/common/logging" @@ -27,6 +30,9 @@ type Worker struct { quitCh chan struct{} runtimes map[common.Namespace]*statesync.Worker + + ctx context.Context + cancel context.CancelFunc } // New constructs a new storage worker. @@ -35,6 +41,7 @@ func New( commonWorker *workerCommon.Worker, registration *registration.Worker, ) (*Worker, error) { + ctx, cancel := context.WithCancel(context.Background()) enabled := config.GlobalConfig.Mode.HasLocalStorage() && len(commonWorker.GetRuntimes()) > 0 s := &Worker{ @@ -45,6 +52,8 @@ func New( initCh: make(chan struct{}), quitCh: make(chan struct{}), runtimes: make(map[common.Namespace]*statesync.Worker), + ctx: ctx, + cancel: cancel, } if !enabled { @@ -91,6 +100,7 @@ func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { } worker, err := statesync.New( + w.ctx, commonNode, rp, rpRPC, @@ -142,36 +152,43 @@ func (w *Worker) Start() error { return nil } - // Wait for all runtimes to terminate. go func() { defer close(w.quitCh) - - for _, r := range w.runtimes { - <-r.Quit() - } + _ = w.Serve() // error logged as part of Serve already. }() - // Start all runtimes and wait for initialization. go func() { - w.logger.Info("starting storage sync services", "num_runtimes", len(w.runtimes)) - - for _, r := range w.runtimes { - _ = r.Start() - } - - // Wait for runtimes to be initialized. for _, r := range w.runtimes { <-r.Initialized() } - w.logger.Info("storage worker started") - close(w.initCh) }() return nil } +// Serve starts running state sync worker for every configured runtime. +// +// In case of an error from one of the state sync workers it cancels the remaining +// ones and waits for all of them to finish. The error from the first worker +// that failed is returned. +func (w *Worker) Serve() error { + w.logger.Info("starting storage sync workers", "num_runtimes", len(w.runtimes)) + + g, ctx := errgroup.WithContext(w.ctx) + for id, r := range w.runtimes { + g.Go(func() error { + err := r.Run(ctx) + if err != nil { + w.logger.Error("state sync worker failed", "runtimeID", id, err, err) + } + return err + }) + } + return g.Wait() +} + // Stop halts the service. func (w *Worker) Stop() { if !w.enabled { @@ -179,9 +196,9 @@ func (w *Worker) Stop() { return } - for _, r := range w.runtimes { - r.Stop() - } + w.cancel() + <-w.quitCh + w.logger.Info("stopped") } // Quit returns a channel that will be closed when the service terminates. From a4e9069fd93d845adf444b1712e8b45751567f55 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Tue, 19 Aug 2025 11:45:30 +0200 Subject: [PATCH 05/11] go/worker/storage/statesync: Remove redundant context Probably the timeout should be the client responsibility. --- go/worker/storage/p2p/diffsync/client.go | 2 ++ go/worker/storage/p2p/synclegacy/client.go | 2 ++ go/worker/storage/statesync/checkpoint_sync.go | 1 + go/worker/storage/statesync/state_sync.go | 7 +++---- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/go/worker/storage/p2p/diffsync/client.go b/go/worker/storage/p2p/diffsync/client.go index 4c0363fe21d..1be235bbc42 100644 --- a/go/worker/storage/p2p/diffsync/client.go +++ b/go/worker/storage/p2p/diffsync/client.go @@ -20,6 +20,8 @@ const ( type Client interface { // GetDiff requests a write log of entries that must be applied to get from the first given root // to the second one. + // + // The request times out in [MaxGetDiffResponseTime]. GetDiff(ctx context.Context, request *GetDiffRequest) (*GetDiffResponse, rpc.PeerFeedback, error) } diff --git a/go/worker/storage/p2p/synclegacy/client.go b/go/worker/storage/p2p/synclegacy/client.go index 702fea0fdad..434b18bb3f2 100644 --- a/go/worker/storage/p2p/synclegacy/client.go +++ b/go/worker/storage/p2p/synclegacy/client.go @@ -24,6 +24,8 @@ const ( type Client interface { // GetDiff requests a write log of entries that must be applied to get from the first given root // to the second one. + // + // The request times out in [MaxGetDiffResponseTime]. GetDiff(ctx context.Context, request *GetDiffRequest) (*GetDiffResponse, rpc.PeerFeedback, error) // GetCheckpoints returns a list of checkpoint metadata for all known checkpoints. diff --git a/go/worker/storage/statesync/checkpoint_sync.go b/go/worker/storage/statesync/checkpoint_sync.go index 04b272eedaf..5cd04b4695a 100644 --- a/go/worker/storage/statesync/checkpoint_sync.go +++ b/go/worker/storage/statesync/checkpoint_sync.go @@ -486,6 +486,7 @@ func (w *Worker) syncCheckpoints(ctx context.Context, genesisRound uint64, wantO } } + // Suggestion: Limit the max time for restoring checkpoint. status, err := w.handleCheckpoint(ctx, check, w.checkpointSyncCfg.ChunkFetcherCount) switch status { case checkpointStatusDone: diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index f1d3552a804..61f1ea020e7 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -342,10 +342,7 @@ func (w *Worker) fetchDiff(ctx context.Context, round uint64, prevRoot, thisRoot "new_root", thisRoot, ) - diffCtx, cancel := context.WithCancel(ctx) - defer cancel() - - wl, pf, err := w.getDiff(diffCtx, prevRoot, thisRoot) + wl, pf, err := w.getDiff(ctx, prevRoot, thisRoot) if err != nil { result.err = err return @@ -356,6 +353,8 @@ func (w *Worker) fetchDiff(ctx context.Context, round uint64, prevRoot, thisRoot // getDiff fetches writelog using diff sync p2p protocol client. // +// The request relies on the default timeout of the underlying p2p protocol clients. +// // In case of no peers or error, it fallbacks to the legacy storage sync protocol. func (w *Worker) getDiff(ctx context.Context, prevRoot, thisRoot storageApi.Root) (storageApi.WriteLog, rpc.PeerFeedback, error) { rsp1, pf, err := w.diffSync.GetDiff(ctx, &diffsync.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) From a27b96796136da13cc410640901174172c51b636 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Sun, 3 Aug 2025 12:19:48 +0200 Subject: [PATCH 06/11] go/worker/storage/statesync: Do not panic Additionally, observe that the parent (storage worker) is registered as background service, thus upon error inside state sync worker there is no need to manually request the node shutdown. --- go/worker/storage/statesync/state_sync.go | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index 61f1ea020e7..4e5943e3a2a 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -1022,12 +1022,7 @@ mainLoop: var oldBlock *block.Block oldBlock, err = w.commonNode.Runtime.History().GetCommittedBlock(ctx, i) if err != nil { - w.logger.Error("can't get block for round", - "err", err, - "round", i, - "current_round", blk.Header.Round, - ) - panic("can't get block in storage worker") + return fmt.Errorf("getting block for round %d (current round: %d): %w", i, blk.Header.Round, err) } summaryCache[i] = summaryFromBlock(oldBlock) } @@ -1068,9 +1063,8 @@ mainLoop: // There's no point redoing it, since it's probably not a transient // error, and cachedLastRound also can't be updated legitimately. if finalized.err != nil { - // Request a node shutdown given that syncing is effectively blocked. - _ = w.commonNode.HostNode.RequestShutdown(ctx, false) - break mainLoop + w.logger.Error("failed to finalize", "err", err, "summary", finalized.summary) + return fmt.Errorf("failed to finalize (round: %d): %w", finalized.summary.Round, finalized.err) } // No further sync or out of order handling needed here, since From 8dff06f7aae5b3b6b7d947d1146132cc17068f96 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Sun, 3 Aug 2025 12:32:07 +0200 Subject: [PATCH 07/11] go/worker/storage/statesync: Move syncing methods at the bottom --- go/worker/storage/statesync/state_sync.go | 249 +++++++++++----------- 1 file changed, 126 insertions(+), 123 deletions(-) diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index 4e5943e3a2a..c5e532b26bb 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -107,7 +107,7 @@ func (d *fetchedDiff) GetRound() uint64 { return d.round } -type finalizeResult struct { +type finalizedResult struct { summary *blockSummary err error } @@ -157,7 +157,7 @@ type Worker struct { // nolint: maligned blockCh *channels.InfiniteChannel diffCh chan *fetchedDiff - finalizeCh chan finalizeResult + finalizeCh chan finalizedResult initCh chan struct{} } @@ -192,7 +192,7 @@ func New( blockCh: channels.NewInfiniteChannel(), diffCh: make(chan *fetchedDiff), - finalizeCh: make(chan finalizeResult), + finalizeCh: make(chan finalizedResult), initCh: make(chan struct{}), } @@ -306,101 +306,6 @@ func (w *Worker) GetLastSynced() (uint64, storageApi.Root, storageApi.Root) { return w.syncedState.Round, io, state } -func (w *Worker) fetchDiff(ctx context.Context, round uint64, prevRoot, thisRoot storageApi.Root) { - result := &fetchedDiff{ - fetched: false, - pf: rpc.NewNopPeerFeedback(), - round: round, - prevRoot: prevRoot, - thisRoot: thisRoot, - } - defer func() { - select { - case w.diffCh <- result: - case <-ctx.Done(): - } - }() - - // Check if the new root doesn't already exist. - if w.localStorage.NodeDB().HasRoot(thisRoot) { - return - } - - result.fetched = true - - // Even if HasRoot returns false the root can still exist if it is equal - // to the previous root and the root was emitted by the consensus committee - // directly (e.g., during an epoch transition). - if thisRoot.Hash.Equal(&prevRoot.Hash) { - result.writeLog = storageApi.WriteLog{} - return - } - - // New root does not yet exist in storage and we need to fetch it from a peer. - w.logger.Debug("calling GetDiff", - "old_root", prevRoot, - "new_root", thisRoot, - ) - - wl, pf, err := w.getDiff(ctx, prevRoot, thisRoot) - if err != nil { - result.err = err - return - } - result.pf = pf - result.writeLog = wl -} - -// getDiff fetches writelog using diff sync p2p protocol client. -// -// The request relies on the default timeout of the underlying p2p protocol clients. -// -// In case of no peers or error, it fallbacks to the legacy storage sync protocol. -func (w *Worker) getDiff(ctx context.Context, prevRoot, thisRoot storageApi.Root) (storageApi.WriteLog, rpc.PeerFeedback, error) { - rsp1, pf, err := w.diffSync.GetDiff(ctx, &diffsync.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) - if err == nil { // if NO error - return rsp1.WriteLog, pf, nil - } - - rsp2, pf, err := w.legacyStorageSync.GetDiff(ctx, &synclegacy.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) - if err != nil { - return nil, nil, err - } - return rsp2.WriteLog, pf, nil -} - -func (w *Worker) finalize(ctx context.Context, summary *blockSummary) { - err := w.localStorage.NodeDB().Finalize(summary.Roots) - switch err { - case nil: - w.logger.Debug("storage round finalized", - "round", summary.Round, - ) - case storageApi.ErrAlreadyFinalized: - // This can happen if we are restoring after a roothash migration or if - // we crashed before updating the sync state. - w.logger.Warn("storage round already finalized", - "round", summary.Round, - ) - err = nil - default: - w.logger.Error("failed to finalize storage round", - "err", err, - "round", summary.Round, - ) - } - - result := finalizeResult{ - summary: summary, - err: err, - } - - select { - case w.finalizeCh <- result: - case <-ctx.Done(): - } -} - func (w *Worker) initGenesis(ctx context.Context, rt *registryApi.Runtime, genesisBlock *block.Block) error { w.logger.Info("initializing storage at genesis") @@ -531,31 +436,6 @@ func (w *Worker) flushSyncedState(summary *blockSummary) (uint64, error) { return w.syncedState.Round, nil } -// This is only called from the main worker goroutine, so no locking should be necessary. -func (w *Worker) nudgeAvailability(lastSynced, latest uint64) { - if lastSynced == w.undefinedRound || latest == w.undefinedRound { - return - } - if latest-lastSynced < maximumRoundDelayForAvailability && !w.roleAvailable { - w.roleProvider.SetAvailable(func(_ *node.Node) error { - return nil - }) - if w.rpcRoleProvider != nil { - w.rpcRoleProvider.SetAvailable(func(_ *node.Node) error { - return nil - }) - } - w.roleAvailable = true - } - if latest-lastSynced > minimumRoundDelayForUnavailability && w.roleAvailable { - w.roleProvider.SetUnavailable() - if w.rpcRoleProvider != nil { - w.rpcRoleProvider.SetUnavailable() - } - w.roleAvailable = false - } -} - // Run runs state sync worker. func (w *Worker) Run(ctx context.Context) error { // nolint: gocyclo defer close(w.diffCh) @@ -1096,3 +976,126 @@ mainLoop: // context was canceled. return nil } + +func (w *Worker) fetchDiff(ctx context.Context, round uint64, prevRoot, thisRoot storageApi.Root) { + result := &fetchedDiff{ + fetched: false, + pf: rpc.NewNopPeerFeedback(), + round: round, + prevRoot: prevRoot, + thisRoot: thisRoot, + } + defer func() { + select { + case w.diffCh <- result: + case <-ctx.Done(): + } + }() + + // Check if the new root doesn't already exist. + if w.localStorage.NodeDB().HasRoot(thisRoot) { + return + } + + result.fetched = true + + // Even if HasRoot returns false the root can still exist if it is equal + // to the previous root and the root was emitted by the consensus committee + // directly (e.g., during an epoch transition). + if thisRoot.Hash.Equal(&prevRoot.Hash) { + result.writeLog = storageApi.WriteLog{} + return + } + + // New root does not yet exist in storage and we need to fetch it from a peer. + w.logger.Debug("calling GetDiff", + "old_root", prevRoot, + "new_root", thisRoot, + ) + + diffCtx, cancel := context.WithCancel(ctx) + defer cancel() + + wl, pf, err := w.getDiff(diffCtx, prevRoot, thisRoot) + if err != nil { + result.err = err + return + } + result.pf = pf + result.writeLog = wl +} + +// getDiff fetches writelog using diff sync p2p protocol client. +// +// The request relies on the default timeout of the underlying p2p protocol clients. +// +// In case of no peers or error, it fallbacks to the legacy storage sync protocol. +func (w *Worker) getDiff(ctx context.Context, prevRoot, thisRoot storageApi.Root) (storageApi.WriteLog, rpc.PeerFeedback, error) { + rsp1, pf, err := w.diffSync.GetDiff(ctx, &diffsync.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) + if err == nil { // if NO error + return rsp1.WriteLog, pf, nil + } + + rsp2, pf, err := w.legacyStorageSync.GetDiff(ctx, &synclegacy.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) + if err != nil { + return nil, nil, err + } + return rsp2.WriteLog, pf, nil +} + +func (w *Worker) finalize(ctx context.Context, summary *blockSummary) { + err := w.localStorage.NodeDB().Finalize(summary.Roots) + switch err { + case nil: + w.logger.Debug("storage round finalized", + "round", summary.Round, + ) + case storageApi.ErrAlreadyFinalized: + // This can happen if we are restoring after a roothash migration or if + // we crashed before updating the sync state. + w.logger.Warn("storage round already finalized", + "round", summary.Round, + ) + err = nil + default: + w.logger.Error("failed to finalize storage round", + "err", err, + "round", summary.Round, + ) + } + + result := finalizedResult{ + summary: summary, + err: err, + } + + select { + case w.finalizeCh <- result: + case <-ctx.Done(): + } +} + +// This is only called from the main worker goroutine, so no locking should be necessary. +func (w *Worker) nudgeAvailability(lastSynced, latest uint64) { + if lastSynced == w.undefinedRound || latest == w.undefinedRound { + return + } + if latest-lastSynced < maximumRoundDelayForAvailability && !w.roleAvailable { + w.roleProvider.SetAvailable(func(_ *node.Node) error { + return nil + }) + if w.rpcRoleProvider != nil { + w.rpcRoleProvider.SetAvailable(func(_ *node.Node) error { + return nil + }) + } + w.roleAvailable = true + } + if latest-lastSynced > minimumRoundDelayForUnavailability && w.roleAvailable { + w.roleProvider.SetUnavailable() + if w.rpcRoleProvider != nil { + w.rpcRoleProvider.SetUnavailable() + } + w.roleAvailable = false + } +} From 213b17fa1f2290f527e527ae25209109d99d494d Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Sun, 3 Aug 2025 16:25:06 +0200 Subject: [PATCH 08/11] go/worker/storage/statesync: Refactor the code The code was broken into smaller functions. Also the scope of variables (including channels) have been reduced. Semantics as well as performance should stay the same. --- .changelog/6299.trivial.md | 0 go/worker/storage/statesync/state_sync.go | 403 +++++++++++----------- 2 files changed, 203 insertions(+), 200 deletions(-) create mode 100644 .changelog/6299.trivial.md diff --git a/.changelog/6299.trivial.md b/.changelog/6299.trivial.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index c5e532b26bb..fdf4b561cd2 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -155,9 +155,7 @@ type Worker struct { // nolint: maligned statusLock sync.RWMutex status api.StorageWorkerStatus - blockCh *channels.InfiniteChannel - diffCh chan *fetchedDiff - finalizeCh chan finalizedResult + blockCh *channels.InfiniteChannel initCh chan struct{} } @@ -190,9 +188,7 @@ func New( status: api.StatusInitializing, - blockCh: channels.NewInfiniteChannel(), - diffCh: make(chan *fetchedDiff), - finalizeCh: make(chan finalizedResult), + blockCh: channels.NewInfiniteChannel(), initCh: make(chan struct{}), } @@ -438,8 +434,6 @@ func (w *Worker) flushSyncedState(summary *blockSummary) (uint64, error) { // Run runs state sync worker. func (w *Worker) Run(ctx context.Context) error { // nolint: gocyclo - defer close(w.diffCh) - // Wait for the common node to be initialized. select { case <-w.commonNode.Initialized(): @@ -620,8 +614,6 @@ func (w *Worker) Run(ctx context.Context) error { // nolint: gocyclo "last_synced", cachedLastRound, ) - lastFullyAppliedRound := cachedLastRound - // Try to perform initial sync from state and io checkpoints if either: // // - Checkpoint sync has been forced because there is insufficient information available to use @@ -684,7 +676,6 @@ func (w *Worker) Run(ctx context.Context) error { // nolint: gocyclo if err != nil { return fmt.Errorf("failed to flush synced state %w", err) } - lastFullyAppliedRound = cachedLastRound w.logger.Info("checkpoint sync succeeded", logging.LogEvent, LogEventCheckpointSyncSuccess, ) @@ -692,92 +683,39 @@ func (w *Worker) Run(ctx context.Context) error { // nolint: gocyclo } close(w.initCh) - // Don't register availability immediately, we want to know first how far behind consensus we are. - latestBlockRound := w.undefinedRound + w.statusLock.Lock() + w.status = api.StatusSyncingRounds + w.statusLock.Unlock() - heartbeat := heartbeat{} - heartbeat.reset() + return w.sync(ctx, cachedLastRound) +} - var wg sync.WaitGroup +func (w *Worker) sync( + ctx context.Context, + lastFinalizedRound uint64, +) error { syncingRounds := make(map[uint64]*inFlight) summaryCache := make(map[uint64]*blockSummary) - // Create the fetcher pool. + pendingApply := &minRoundQueue{} + pendingFinalize := &minRoundQueue{} // Suggestion: slice would suffice given that application must happen in order. + + diffCh := make(chan *fetchedDiff) + finalizedCh := make(chan finalizedResult) + fetchPool := workerpool.New("storage_fetch/" + w.commonNode.Runtime.ID().String()) fetchPool.Resize(config.GlobalConfig.Storage.FetcherCount) defer fetchPool.Stop() - triggerRoundFetches := func() { - for i := lastFullyAppliedRound + 1; i <= latestBlockRound; i++ { - syncing, ok := syncingRounds[i] - if ok && syncing.outstanding.hasAll() { - continue - } - - if !ok { - if len(syncingRounds) >= maxInFlightRounds { - break - } - - syncing = &inFlight{ - startedAt: time.Now(), - awaitingRetry: outstandingMaskFull, - } - syncingRounds[i] = syncing - - if i == latestBlockRound { - storageWorkerLastPendingRound.With(w.getMetricLabels()).Set(float64(i)) - } - } - w.logger.Debug("preparing round sync", - "round", i, - "outstanding_mask", syncing.outstanding, - "awaiting_retry", syncing.awaitingRetry, - ) - - prev := summaryCache[i-1] - this := summaryCache[i] - prevRoots := make([]storageApi.Root, len(prev.Roots)) - copy(prevRoots, prev.Roots) - for i := range prevRoots { - if prevRoots[i].Type == storageApi.RootTypeIO { - // IO roots aren't chained, so clear it (but leave cache intact). - prevRoots[i] = storageApi.Root{ - Namespace: this.Namespace, - Version: this.Round, - Type: storageApi.RootTypeIO, - } - prevRoots[i].Hash.Empty() - break - } - } - - for i := range prevRoots { - rootType := prevRoots[i].Type - if !syncing.outstanding.contains(rootType) && syncing.awaitingRetry.contains(rootType) { - syncing.scheduleDiff(rootType) - wg.Add(1) - fetchPool.Submit(func() { - defer wg.Done() - w.fetchDiff(ctx, this.Round, prevRoots[i], this.Roots[i]) - }) - } - } - } - } + heartbeat := heartbeat{} + heartbeat.reset() + defer heartbeat.Stop() - w.statusLock.Lock() - w.status = api.StatusSyncingRounds - w.statusLock.Unlock() + var wg sync.WaitGroup + defer wg.Wait() - pendingApply := &minRoundQueue{} - pendingFinalize := &minRoundQueue{} - - // Main processing loop. When a new block arrives, its state and I/O roots are inspected. - // If missing locally, diffs are fetched from peers, possibly for many rounds in parallel, - // including all missing rounds since the last fully applied one. Fetched diffs are then applied - // in round order, ensuring no gaps. Once a round has all its roots applied, background finalization - // for that round is triggered asynchronously, not blocking concurrent fetching and diff application. -mainLoop: + lastFullyAppliedRound := lastFinalizedRound + // Don't register availability immediately, we want to know first how far behind consensus we are. + latestBlockRound := w.undefinedRound for { // Drain the Apply and Finalize queues first, before waiting for new events in the select below. @@ -788,32 +726,7 @@ mainLoop: pendingFinalize.Len() < dbApi.MaxPendingVersions-1 // -1 since one may be already finalizing. if applyNext { lastDiff := heap.Pop(pendingApply).(*fetchedDiff) - // Apply the write log if one exists. - err = nil - if lastDiff.fetched { - err = w.localStorage.Apply(ctx, &storageApi.ApplyRequest{ - Namespace: lastDiff.thisRoot.Namespace, - RootType: lastDiff.thisRoot.Type, - SrcRound: lastDiff.prevRoot.Version, - SrcRoot: lastDiff.prevRoot.Hash, - DstRound: lastDiff.thisRoot.Version, - DstRoot: lastDiff.thisRoot.Hash, - WriteLog: lastDiff.writeLog, - }) - switch { - case err == nil: - lastDiff.pf.RecordSuccess() - case errors.Is(err, storageApi.ErrExpectedRootMismatch): - lastDiff.pf.RecordBadPeer() - default: - w.logger.Error("can't apply write log", - "err", err, - "old_root", lastDiff.prevRoot, - "new_root", lastDiff.thisRoot, - ) - lastDiff.pf.RecordSuccess() - } - } + err := w.apply(ctx, lastDiff) syncing := syncingRounds[lastDiff.round] if err != nil { @@ -832,10 +745,12 @@ mainLoop: delete(summaryCache, lastDiff.round-1) lastFullyAppliedRound = lastDiff.round + // Suggestion: Rename to lastAppliedRoundMetric, as synced is often synonim for finalized in this code. storageWorkerLastSyncedRound.With(w.getMetricLabels()).Set(float64(lastDiff.round)) + // Suggestion: Ideally this would be recorded once the round is finalized (synced). storageWorkerRoundSyncLatency.With(w.getMetricLabels()).Observe(time.Since(syncing.startedAt).Seconds()) - // Finalize storage for this round. This happens asynchronously + // Trigger finalization for this round, that will happen concurently // with respect to Apply operations for subsequent rounds. heap.Push(pendingFinalize, summary) @@ -845,81 +760,39 @@ mainLoop: // Check if any new rounds were fully applied and need to be finalized. // Only finalize if it's the round after the one that was finalized last. // As a consequence at most one finalization can be happening at the time. - if len(*pendingFinalize) > 0 && cachedLastRound+1 == (*pendingFinalize)[0].GetRound() { - lastSummary := heap.Pop(pendingFinalize).(*blockSummary) + if len(*pendingFinalize) > 0 && lastFinalizedRound+1 == (*pendingFinalize)[0].GetRound() { + summary := heap.Pop(pendingFinalize).(*blockSummary) wg.Add(1) go func() { // Don't block fetching and applying remaining rounds. defer wg.Done() - w.finalize(ctx, lastSummary) + w.finalize(ctx, summary, finalizedCh) }() continue } select { + case <-ctx.Done(): + return ctx.Err() case inBlk := <-w.blockCh.Out(): blk := inBlk.(*block.Block) w.logger.Debug("incoming block", "round", blk.Header.Round, "last_synced", lastFullyAppliedRound, - "last_finalized", cachedLastRound, + "last_finalized", lastFinalizedRound, ) // Check if we're far enough to reasonably register as available. latestBlockRound = blk.Header.Round - w.nudgeAvailability(cachedLastRound, latestBlockRound) - - if _, ok := summaryCache[lastFullyAppliedRound]; !ok && lastFullyAppliedRound == w.undefinedRound { - dummy := blockSummary{ - Namespace: blk.Header.Namespace, - Round: lastFullyAppliedRound + 1, - Roots: []storageApi.Root{ - { - Version: lastFullyAppliedRound + 1, - Type: storageApi.RootTypeIO, - }, - { - Version: lastFullyAppliedRound + 1, - Type: storageApi.RootTypeState, - }, - }, - } - dummy.Roots[0].Empty() - dummy.Roots[1].Empty() - summaryCache[lastFullyAppliedRound] = &dummy - } - // Determine if we need to fetch any old block summaries. In case the first - // round is an undefined round, we need to start with the following round - // since the undefined round may be unsigned -1 and in this case the loop - // would not do any iterations. - startSummaryRound := lastFullyAppliedRound - if startSummaryRound == w.undefinedRound { - startSummaryRound++ - } - for i := startSummaryRound; i < blk.Header.Round; i++ { - if _, ok := summaryCache[i]; ok { - continue - } - var oldBlock *block.Block - oldBlock, err = w.commonNode.Runtime.History().GetCommittedBlock(ctx, i) - if err != nil { - return fmt.Errorf("getting block for round %d (current round: %d): %w", i, blk.Header.Round, err) - } - summaryCache[i] = summaryFromBlock(oldBlock) - } - if _, ok := summaryCache[blk.Header.Round]; !ok { - summaryCache[blk.Header.Round] = summaryFromBlock(blk) - } + // Fixme: If block channel has many pending blocks (e.g. after checkpoint sync), + // nudgeAvailability may incorrectly set the node as available too early. + w.nudgeAvailability(lastFinalizedRound, latestBlockRound) - triggerRoundFetches() - heartbeat.reset() - - case <-heartbeat.C: - if latestBlockRound != w.undefinedRound { - w.logger.Debug("heartbeat", "in_flight_rounds", len(syncingRounds)) - triggerRoundFetches() + if err := w.fetchMissingBlockHeaders(ctx, lastFullyAppliedRound, blk, summaryCache); err != nil { + return fmt.Errorf("failed to fetch missing block headers: %w", err) // Suggestion: databases can fail, consider retrying. } - case item := <-w.diffCh: + w.triggerRoundFetches(ctx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) + case item := <-diffCh: if item.err != nil { w.logger.Error("error calling getdiff", "err", item.err, @@ -928,7 +801,7 @@ mainLoop: "new_root", item.thisRoot, "fetched", item.fetched, ) - syncingRounds[item.round].retry(item.thisRoot.Type) + syncingRounds[item.round].retry(item.thisRoot.Type) // Suggestion: Trigger fetches immediately. break } @@ -936,48 +809,150 @@ mainLoop: // Item was successfully processed, trigger more round fetches. // This ensures that new rounds are processed as fast as possible // when we're syncing and are far behind. - triggerRoundFetches() - - case finalized := <-w.finalizeCh: - // If finalization failed, things start falling apart. - // There's no point redoing it, since it's probably not a transient - // error, and cachedLastRound also can't be updated legitimately. + w.triggerRoundFetches(ctx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) + heartbeat.reset() + case <-heartbeat.C: + if latestBlockRound != w.undefinedRound { + w.logger.Debug("heartbeat", "in_flight_rounds", len(syncingRounds)) + w.triggerRoundFetches(ctx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) + } + case finalized := <-finalizedCh: if finalized.err != nil { - w.logger.Error("failed to finalize", "err", err, "summary", finalized.summary) return fmt.Errorf("failed to finalize (round: %d): %w", finalized.summary.Round, finalized.err) } - - // No further sync or out of order handling needed here, since - // only one finalize at a time is triggered (for round cachedLastRound+1) - cachedLastRound, err = w.flushSyncedState(finalized.summary) - if err != nil { - w.logger.Error("failed to flush synced state", - "err", err, - ) + var err error + lastFinalizedRound, err = w.flushSyncedState(finalized.summary) + if err != nil { // Suggestion: DB operations can always fail, consider retrying. + return fmt.Errorf("failed to flush synced state: %w", err) } storageWorkerLastFullRound.With(w.getMetricLabels()).Set(float64(finalized.summary.Round)) // Check if we're far enough to reasonably register as available. - w.nudgeAvailability(cachedLastRound, latestBlockRound) + w.nudgeAvailability(lastFinalizedRound, latestBlockRound) // Notify the checkpointer that there is a new finalized round. if config.GlobalConfig.Storage.Checkpointer.Enabled { w.checkpointer.NotifyNewVersion(finalized.summary.Round) } - case <-ctx.Done(): - break mainLoop + return ctx.Err() } } +} - wg.Wait() - // blockCh will be garbage-collected without being closed. It can potentially still contain - // some new blocks, but only as many as were already in-flight at the point when the main - // context was canceled. +func (w *Worker) fetchMissingBlockHeaders(ctx context.Context, lastFullyAppliedRound uint64, blk *block.Block, summaryCache map[uint64]*blockSummary) error { + if _, ok := summaryCache[lastFullyAppliedRound]; !ok && lastFullyAppliedRound == w.undefinedRound { // Suggestion: Helper that is only done once. + dummy := blockSummary{ + Namespace: blk.Header.Namespace, + Round: lastFullyAppliedRound + 1, + Roots: []storageApi.Root{ + { + Version: lastFullyAppliedRound + 1, + Type: storageApi.RootTypeIO, + }, + { + Version: lastFullyAppliedRound + 1, + Type: storageApi.RootTypeState, + }, + }, + } + dummy.Roots[0].Empty() + dummy.Roots[1].Empty() + summaryCache[lastFullyAppliedRound] = &dummy + } + // Determine if we need to fetch any old block summaries. In case the first + // round is an undefined round, we need to start with the following round + // since the undefined round may be unsigned -1 and in this case the loop + // would not do any iterations. + startSummaryRound := lastFullyAppliedRound + if startSummaryRound == w.undefinedRound { + startSummaryRound++ + } + for i := startSummaryRound; i < blk.Header.Round; i++ { + if _, ok := summaryCache[i]; ok { + continue + } + oldBlock, err := w.commonNode.Runtime.History().GetCommittedBlock(ctx, i) + if err != nil { + return fmt.Errorf("getting block for round %d (current round: %d): %w", i, blk.Header.Round, err) + } + summaryCache[i] = summaryFromBlock(oldBlock) + } + if _, ok := summaryCache[blk.Header.Round]; !ok { + summaryCache[blk.Header.Round] = summaryFromBlock(blk) + } return nil } -func (w *Worker) fetchDiff(ctx context.Context, round uint64, prevRoot, thisRoot storageApi.Root) { +func (w *Worker) triggerRoundFetches( + ctx context.Context, + wg *sync.WaitGroup, + fetchPool *workerpool.Pool, + diffCh chan<- *fetchedDiff, + syncingRounds map[uint64]*inFlight, + summaryCache map[uint64]*blockSummary, + start uint64, + end uint64, +) { + for r := start; r <= end; r++ { + syncing, ok := syncingRounds[r] + if ok && syncing.outstanding.hasAll() { + continue + } + + if !ok { + if len(syncingRounds) >= maxInFlightRounds { + break + } + + syncing = &inFlight{ + startedAt: time.Now(), + awaitingRetry: outstandingMaskFull, + } + syncingRounds[r] = syncing + + if r == end { + storageWorkerLastPendingRound.With(w.getMetricLabels()).Set(float64(r)) + } + } + w.logger.Debug("preparing round sync", + "round", r, + "outstanding_mask", syncing.outstanding, + "awaiting_retry", syncing.awaitingRetry, + ) + + prev := summaryCache[r-1] + this := summaryCache[r] + prevRoots := make([]storageApi.Root, len(prev.Roots)) + copy(prevRoots, prev.Roots) + for i := range prevRoots { + if prevRoots[i].Type == storageApi.RootTypeIO { + // IO roots aren't chained, so clear it (but leave cache intact). + prevRoots[i] = storageApi.Root{ + Namespace: this.Namespace, + Version: this.Round, + Type: storageApi.RootTypeIO, + } + prevRoots[i].Hash.Empty() + break + } + } + + for i := range prevRoots { + rootType := prevRoots[i].Type + if !syncing.outstanding.contains(rootType) && syncing.awaitingRetry.contains(rootType) { + syncing.scheduleDiff(rootType) + wg.Add(1) + fetchPool.Submit(func() { + defer wg.Done() + w.fetchDiff(ctx, diffCh, this.Round, prevRoots[i], this.Roots[i]) + }) + } + } + } +} + +func (w *Worker) fetchDiff(ctx context.Context, fetchCh chan<- *fetchedDiff, round uint64, prevRoot, thisRoot storageApi.Root) { result := &fetchedDiff{ fetched: false, pf: rpc.NewNopPeerFeedback(), @@ -987,7 +962,7 @@ func (w *Worker) fetchDiff(ctx context.Context, round uint64, prevRoot, thisRoot } defer func() { select { - case w.diffCh <- result: + case fetchCh <- result: case <-ctx.Done(): } }() @@ -1043,7 +1018,38 @@ func (w *Worker) getDiff(ctx context.Context, prevRoot, thisRoot storageApi.Root return rsp2.WriteLog, pf, nil } -func (w *Worker) finalize(ctx context.Context, summary *blockSummary) { +func (w *Worker) apply(ctx context.Context, diff *fetchedDiff) error { + if !diff.fetched { + return nil + } + + err := w.localStorage.Apply(ctx, &storageApi.ApplyRequest{ + Namespace: diff.thisRoot.Namespace, + RootType: diff.thisRoot.Type, + SrcRound: diff.prevRoot.Version, + SrcRoot: diff.prevRoot.Hash, + DstRound: diff.thisRoot.Version, + DstRoot: diff.thisRoot.Hash, + WriteLog: diff.writeLog, + }) + switch { + case err == nil: + diff.pf.RecordSuccess() + case errors.Is(err, storageApi.ErrExpectedRootMismatch): + diff.pf.RecordBadPeer() + default: + w.logger.Error("can't apply write log", + "err", err, + "old_root", diff.prevRoot, + "new_root", diff.thisRoot, + ) + diff.pf.RecordSuccess() + } + + return err +} + +func (w *Worker) finalize(ctx context.Context, summary *blockSummary, finalizedCh chan<- finalizedResult) { err := w.localStorage.NodeDB().Finalize(summary.Roots) switch err { case nil: @@ -1058,10 +1064,7 @@ func (w *Worker) finalize(ctx context.Context, summary *blockSummary) { ) err = nil default: - w.logger.Error("failed to finalize storage round", - "err", err, - "round", summary.Round, - ) + w.logger.Error("failed to finalize", "err", err, "summary", summary) } result := finalizedResult{ @@ -1070,7 +1073,7 @@ func (w *Worker) finalize(ctx context.Context, summary *blockSummary) { } select { - case w.finalizeCh <- result: + case finalizedCh <- result: case <-ctx.Done(): } } From 1bb8edbeab8b1a9aad01eb95b20be9309ce7934e Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Thu, 7 Aug 2025 00:10:27 +0200 Subject: [PATCH 09/11] go/worker/storage/statesync: Move diff sync to separate file The logic was preserved. Ideally, diff sync would only accept context, local storage backend, and client/interface to fetch diff. This would make it testable in isolation. Finally, use of undefined round should be moved out of it. --- go/worker/storage/statesync/diff_sync.go | 462 ++++++++++++++++ go/worker/storage/statesync/state_sync.go | 642 ++++------------------ 2 files changed, 562 insertions(+), 542 deletions(-) create mode 100644 go/worker/storage/statesync/diff_sync.go diff --git a/go/worker/storage/statesync/diff_sync.go b/go/worker/storage/statesync/diff_sync.go new file mode 100644 index 00000000000..468f5c2dc06 --- /dev/null +++ b/go/worker/storage/statesync/diff_sync.go @@ -0,0 +1,462 @@ +package statesync + +import ( + "container/heap" + "context" + "errors" + "fmt" + "sync" + "time" + + "github.com/oasisprotocol/oasis-core/go/common/workerpool" + "github.com/oasisprotocol/oasis-core/go/config" + "github.com/oasisprotocol/oasis-core/go/p2p/rpc" + "github.com/oasisprotocol/oasis-core/go/roothash/api/block" + "github.com/oasisprotocol/oasis-core/go/storage/api" + dbApi "github.com/oasisprotocol/oasis-core/go/storage/mkvs/db/api" + "github.com/oasisprotocol/oasis-core/go/worker/storage/p2p/diffsync" + "github.com/oasisprotocol/oasis-core/go/worker/storage/p2p/synclegacy" +) + +const ( + // maxInFlightRounds is the maximum number of rounds that should be fetched before waiting + // for them to be applied. + maxInFlightRounds = 100 +) + +type roundItem interface { + GetRound() uint64 +} + +// minRoundQueue is a Round()-based min priority queue. +type minRoundQueue []roundItem + +// Sorting interface. +func (q minRoundQueue) Len() int { return len(q) } +func (q minRoundQueue) Less(i, j int) bool { return q[i].GetRound() < q[j].GetRound() } +func (q minRoundQueue) Swap(i, j int) { q[i], q[j] = q[j], q[i] } + +// Push appends x as the last element in the heap's array. +func (q *minRoundQueue) Push(x any) { + *q = append(*q, x.(roundItem)) +} + +// Pop removes and returns the last element in the heap's array. +func (q *minRoundQueue) Pop() any { + old := *q + n := len(old) + x := old[n-1] + *q = old[0 : n-1] + return x +} + +// fetchedDiff has all the context needed for a single GetDiff operation. +type fetchedDiff struct { + fetched bool + pf rpc.PeerFeedback + err error + round uint64 + prevRoot api.Root + thisRoot api.Root + writeLog api.WriteLog +} + +func (d *fetchedDiff) GetRound() uint64 { + return d.round +} + +type finalizedResult struct { + summary *blockSummary + err error +} + +// syncDiffs is responsible for fetching, applying and finalizing storage diffs +// as the new runtimes block headers arrive from the consensus service. +// +// In addition, it is also responsible for updating availability of the registration +// service and notifying block history and checkpointer of the newly finalized rounds. +// +// Suggestion: Ideally syncDiffs is refactored into independent worker and made only +// responsible for the syncing. +func (w *Worker) syncDiffs( + ctx context.Context, + lastFinalizedRound uint64, +) error { + syncingRounds := make(map[uint64]*inFlight) + summaryCache := make(map[uint64]*blockSummary) + pendingApply := &minRoundQueue{} + pendingFinalize := &minRoundQueue{} // Suggestion: slice would suffice given that application must happen in order. + + diffCh := make(chan *fetchedDiff) + finalizedCh := make(chan finalizedResult) + + fetchPool := workerpool.New("storage_fetch/" + w.commonNode.Runtime.ID().String()) + fetchPool.Resize(config.GlobalConfig.Storage.FetcherCount) + defer fetchPool.Stop() + + heartbeat := heartbeat{} + heartbeat.reset() + defer heartbeat.Stop() + + var wg sync.WaitGroup + defer wg.Wait() + + lastFullyAppliedRound := lastFinalizedRound + // Don't register availability immediately, we want to know first how far behind consensus we are. + latestBlockRound := w.undefinedRound + for { + // Drain the Apply and Finalize queues first, before waiting for new events in the select below. + + // Apply fetched writelogs, but only if they are for the round after the last fully applied one + // and current number of pending roots to be finalized is smaller than max allowed. + applyNext := pendingApply.Len() > 0 && + lastFullyAppliedRound+1 == (*pendingApply)[0].GetRound() && + pendingFinalize.Len() < dbApi.MaxPendingVersions-1 // -1 since one may be already finalizing. + if applyNext { + lastDiff := heap.Pop(pendingApply).(*fetchedDiff) + err := w.apply(ctx, lastDiff) + + syncing := syncingRounds[lastDiff.round] + if err != nil { + syncing.retry(lastDiff.thisRoot.Type) + continue + } + syncing.outstanding.remove(lastDiff.thisRoot.Type) + if !syncing.outstanding.isEmpty() || !syncing.awaitingRetry.isEmpty() { + continue + } + + // We have fully synced the given round. + w.logger.Debug("finished syncing round", "round", lastDiff.round) + delete(syncingRounds, lastDiff.round) + summary := summaryCache[lastDiff.round] + delete(summaryCache, lastDiff.round-1) + lastFullyAppliedRound = lastDiff.round + + // Suggestion: Rename to lastAppliedRoundMetric, as synced is synonim for finalized in this code. + storageWorkerLastSyncedRound.With(w.getMetricLabels()).Set(float64(lastDiff.round)) + // Suggestion: Ideally this would be recorded once the round is finalized (synced). + storageWorkerRoundSyncLatency.With(w.getMetricLabels()).Observe(time.Since(syncing.startedAt).Seconds()) + + // Trigger finalization for this round, that will happen concurently + // with respect to Apply operations for subsequent rounds. + heap.Push(pendingFinalize, summary) + + continue + } + + // Check if any new rounds were fully applied and need to be finalized. + // Only finalize if it's the round after the one that was finalized last. + // As a consequence at most one finalization can be happening at the time. + if len(*pendingFinalize) > 0 && lastFinalizedRound+1 == (*pendingFinalize)[0].GetRound() { + summary := heap.Pop(pendingFinalize).(*blockSummary) + wg.Add(1) + go func() { // Don't block fetching and applying remaining rounds. + defer wg.Done() + w.finalize(ctx, summary, finalizedCh) + }() + continue + } + + select { + case <-ctx.Done(): + return ctx.Err() + case inBlk := <-w.blockCh.Out(): + blk := inBlk.(*block.Block) + w.logger.Debug("incoming block", + "round", blk.Header.Round, + "last_fully_applied", lastFullyAppliedRound, + "last_finalized", lastFinalizedRound, + ) + + // Check if we're far enough to reasonably register as available. + latestBlockRound = blk.Header.Round + // Fixme: If block channel has many pending blocks (e.g. after checkpoint sync), + // nudgeAvailability may incorrectly set the node as available too early. + w.nudgeAvailability(lastFinalizedRound, latestBlockRound) + + if err := w.fetchMissingBlockHeaders(ctx, lastFullyAppliedRound, blk, summaryCache); err != nil { + return fmt.Errorf("failed to fetch missing block headers: %w", err) // Suggestion: databases can fail, consider retrying. + } + + w.triggerRoundFetches(ctx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) + case item := <-diffCh: + if item.err != nil { + w.logger.Error("error calling getdiff", + "err", item.err, + "round", item.round, + "old_root", item.prevRoot, + "new_root", item.thisRoot, + "fetched", item.fetched, + ) + syncingRounds[item.round].retry(item.thisRoot.Type) // Suggestion: Trigger fetches immediately. + break + } + + heap.Push(pendingApply, item) + // Item was successfully processed, trigger more round fetches. + // This ensures that new rounds are processed as fast as possible + // when we're syncing and are far behind. + w.triggerRoundFetches(ctx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) + heartbeat.reset() + case <-heartbeat.C: + if latestBlockRound != w.undefinedRound { + w.logger.Debug("heartbeat", "in_flight_rounds", len(syncingRounds)) + w.triggerRoundFetches(ctx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) + } + case finalized := <-finalizedCh: + if finalized.err != nil { + return fmt.Errorf("failed to finalize (round: %d): %w", finalized.summary.Round, finalized.err) + } + var err error + lastFinalizedRound, err = w.flushSyncedState(finalized.summary) + if err != nil { // Suggestion: DB operations can always fail, consider retrying. + return fmt.Errorf("failed to flush synced state: %w", err) + } + storageWorkerLastFullRound.With(w.getMetricLabels()).Set(float64(finalized.summary.Round)) + + // Check if we're far enough to reasonably register as available. + w.nudgeAvailability(lastFinalizedRound, latestBlockRound) + + // Notify the checkpointer that there is a new finalized round. + if config.GlobalConfig.Storage.Checkpointer.Enabled { + w.checkpointer.NotifyNewVersion(finalized.summary.Round) + } + } + } +} + +func (w *Worker) fetchMissingBlockHeaders(ctx context.Context, lastFullyAppliedRound uint64, blk *block.Block, summaryCache map[uint64]*blockSummary) error { + if _, ok := summaryCache[lastFullyAppliedRound]; !ok && lastFullyAppliedRound == w.undefinedRound { // Suggestion: Helper that is only done once. + dummy := blockSummary{ + Namespace: blk.Header.Namespace, + Round: lastFullyAppliedRound + 1, + Roots: []api.Root{ + { + Version: lastFullyAppliedRound + 1, + Type: api.RootTypeIO, + }, + { + Version: lastFullyAppliedRound + 1, + Type: api.RootTypeState, + }, + }, + } + dummy.Roots[0].Empty() + dummy.Roots[1].Empty() + summaryCache[lastFullyAppliedRound] = &dummy + } + // Determine if we need to fetch any old block summaries. In case the first + // round is an undefined round, we need to start with the following round + // since the undefined round may be unsigned -1 and in this case the loop + // would not do any iterations. + startSummaryRound := lastFullyAppliedRound + if startSummaryRound == w.undefinedRound { + startSummaryRound++ + } + for i := startSummaryRound; i < blk.Header.Round; i++ { + if _, ok := summaryCache[i]; ok { + continue + } + oldBlock, err := w.commonNode.Runtime.History().GetCommittedBlock(ctx, i) + if err != nil { + return fmt.Errorf("getting block for round %d (current round: %d): %w", i, blk.Header.Round, err) + } + summaryCache[i] = summaryFromBlock(oldBlock) + } + if _, ok := summaryCache[blk.Header.Round]; !ok { + summaryCache[blk.Header.Round] = summaryFromBlock(blk) + } + return nil +} + +func (w *Worker) triggerRoundFetches( + ctx context.Context, + wg *sync.WaitGroup, + fetchPool *workerpool.Pool, + diffCh chan<- *fetchedDiff, + syncingRounds map[uint64]*inFlight, + summaryCache map[uint64]*blockSummary, + start uint64, + end uint64, +) { + for r := start; r <= end; r++ { + syncing, ok := syncingRounds[r] + if ok && syncing.outstanding.hasAll() { + continue + } + + if !ok { + if len(syncingRounds) >= maxInFlightRounds { + break + } + + syncing = &inFlight{ + startedAt: time.Now(), + awaitingRetry: outstandingMaskFull, + } + syncingRounds[r] = syncing + + if r == end { + storageWorkerLastPendingRound.With(w.getMetricLabels()).Set(float64(r)) + } + } + w.logger.Debug("preparing round sync", + "round", r, + "outstanding_mask", syncing.outstanding, + "awaiting_retry", syncing.awaitingRetry, + ) + + prev := summaryCache[r-1] + this := summaryCache[r] + prevRoots := make([]api.Root, len(prev.Roots)) + copy(prevRoots, prev.Roots) + for i := range prevRoots { + if prevRoots[i].Type == api.RootTypeIO { + // IO roots aren't chained, so clear it (but leave cache intact). + prevRoots[i] = api.Root{ + Namespace: this.Namespace, + Version: this.Round, + Type: api.RootTypeIO, + } + prevRoots[i].Hash.Empty() + break + } + } + + for i := range prevRoots { + rootType := prevRoots[i].Type + if !syncing.outstanding.contains(rootType) && syncing.awaitingRetry.contains(rootType) { + syncing.scheduleDiff(rootType) + wg.Add(1) + fetchPool.Submit(func() { + defer wg.Done() + w.fetchDiff(ctx, diffCh, this.Round, prevRoots[i], this.Roots[i]) + }) + } + } + } +} + +func (w *Worker) fetchDiff(ctx context.Context, fetchCh chan<- *fetchedDiff, round uint64, prevRoot, thisRoot api.Root) { + result := &fetchedDiff{ + fetched: false, + pf: rpc.NewNopPeerFeedback(), + round: round, + prevRoot: prevRoot, + thisRoot: thisRoot, + } + defer func() { + select { + case fetchCh <- result: + case <-ctx.Done(): + } + }() + + // Check if the new root doesn't already exist. + if w.localStorage.NodeDB().HasRoot(thisRoot) { + return + } + + result.fetched = true + + // Even if HasRoot returns false the root can still exist if it is equal + // to the previous root and the root was emitted by the consensus committee + // directly (e.g., during an epoch transition). + if thisRoot.Hash.Equal(&prevRoot.Hash) { + result.writeLog = api.WriteLog{} + return + } + + // New root does not yet exist in storage and we need to fetch it from a peer. + w.logger.Debug("calling GetDiff", + "old_root", prevRoot, + "new_root", thisRoot, + ) + + wl, pf, err := w.getDiff(ctx, prevRoot, thisRoot) + if err != nil { + result.err = err + return + } + result.pf = pf + result.writeLog = wl +} + +// getDiff fetches writelog using diff sync p2p protocol client. +// +// The request relies on the default timeout of the underlying p2p protocol clients. +// +// In case of no peers or error, it fallbacks to the legacy storage sync protocol. +func (w *Worker) getDiff(ctx context.Context, prevRoot, thisRoot api.Root) (api.WriteLog, rpc.PeerFeedback, error) { + rsp1, pf, err := w.diffSync.GetDiff(ctx, &diffsync.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) + if err == nil { // if NO error + return rsp1.WriteLog, pf, nil + } + + rsp2, pf, err := w.legacyStorageSync.GetDiff(ctx, &synclegacy.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) + if err != nil { + return nil, nil, err + } + return rsp2.WriteLog, pf, nil +} + +func (w *Worker) apply(ctx context.Context, diff *fetchedDiff) error { + if !diff.fetched { + return nil + } + + err := w.localStorage.Apply(ctx, &api.ApplyRequest{ + Namespace: diff.thisRoot.Namespace, + RootType: diff.thisRoot.Type, + SrcRound: diff.prevRoot.Version, + SrcRoot: diff.prevRoot.Hash, + DstRound: diff.thisRoot.Version, + DstRoot: diff.thisRoot.Hash, + WriteLog: diff.writeLog, + }) + switch { + case err == nil: + diff.pf.RecordSuccess() + case errors.Is(err, api.ErrExpectedRootMismatch): + diff.pf.RecordBadPeer() + default: + w.logger.Error("can't apply write log", + "err", err, + "old_root", diff.prevRoot, + "new_root", diff.thisRoot, + ) + diff.pf.RecordSuccess() + } + + return err +} + +func (w *Worker) finalize(ctx context.Context, summary *blockSummary, finalizedCh chan<- finalizedResult) { + err := w.localStorage.NodeDB().Finalize(summary.Roots) + switch err { + case nil: + w.logger.Debug("storage round finalized", + "round", summary.Round, + ) + case api.ErrAlreadyFinalized: + // This can happen if we are restoring after a roothash migration or if + // we crashed before updating the sync state. + w.logger.Warn("storage round already finalized", + "round", summary.Round, + ) + err = nil + default: + w.logger.Error("failed to finalize", "err", err, "summary", summary) + } + + result := finalizedResult{ + summary: summary, + err: err, + } + + select { + case finalizedCh <- result: + case <-ctx.Done(): + } +} diff --git a/go/worker/storage/statesync/state_sync.go b/go/worker/storage/statesync/state_sync.go index fdf4b561cd2..5a4fc0a3187 100644 --- a/go/worker/storage/statesync/state_sync.go +++ b/go/worker/storage/statesync/state_sync.go @@ -3,7 +3,6 @@ package statesync import ( - "container/heap" "context" "errors" "fmt" @@ -15,11 +14,9 @@ import ( "github.com/oasisprotocol/oasis-core/go/common/logging" "github.com/oasisprotocol/oasis-core/go/common/node" - "github.com/oasisprotocol/oasis-core/go/common/workerpool" "github.com/oasisprotocol/oasis-core/go/config" consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" commonFlags "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/flags" - "github.com/oasisprotocol/oasis-core/go/p2p/rpc" registryApi "github.com/oasisprotocol/oasis-core/go/registry/api" roothashApi "github.com/oasisprotocol/oasis-core/go/roothash/api" "github.com/oasisprotocol/oasis-core/go/roothash/api/block" @@ -27,7 +24,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/runtime/host" storageApi "github.com/oasisprotocol/oasis-core/go/storage/api" "github.com/oasisprotocol/oasis-core/go/storage/mkvs/checkpoint" - dbApi "github.com/oasisprotocol/oasis-core/go/storage/mkvs/db/api" workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/registration" @@ -60,58 +56,8 @@ const ( // The minimum number of rounds the worker can be behind the chain before it's sensible for // it to stop advertising availability. minimumRoundDelayForUnavailability = uint64(15) - - // maxInFlightRounds is the maximum number of rounds that should be fetched before waiting - // for them to be applied. - maxInFlightRounds = 100 ) -type roundItem interface { - GetRound() uint64 -} - -// minRoundQueue is a Round()-based min priority queue. -type minRoundQueue []roundItem - -// Sorting interface. -func (q minRoundQueue) Len() int { return len(q) } -func (q minRoundQueue) Less(i, j int) bool { return q[i].GetRound() < q[j].GetRound() } -func (q minRoundQueue) Swap(i, j int) { q[i], q[j] = q[j], q[i] } - -// Push appends x as the last element in the heap's array. -func (q *minRoundQueue) Push(x any) { - *q = append(*q, x.(roundItem)) -} - -// Pop removes and returns the last element in the heap's array. -func (q *minRoundQueue) Pop() any { - old := *q - n := len(old) - x := old[n-1] - *q = old[0 : n-1] - return x -} - -// fetchedDiff has all the context needed for a single GetDiff operation. -type fetchedDiff struct { - fetched bool - pf rpc.PeerFeedback - err error - round uint64 - prevRoot storageApi.Root - thisRoot storageApi.Root - writeLog storageApi.WriteLog -} - -func (d *fetchedDiff) GetRound() uint64 { - return d.round -} - -type finalizedResult struct { - summary *blockSummary - err error -} - // Worker is the runtime state sync worker, responsible for syncing state // that corresponds to the incoming runtime block headers received from the // consensus service. @@ -302,136 +248,6 @@ func (w *Worker) GetLastSynced() (uint64, storageApi.Root, storageApi.Root) { return w.syncedState.Round, io, state } -func (w *Worker) initGenesis(ctx context.Context, rt *registryApi.Runtime, genesisBlock *block.Block) error { - w.logger.Info("initializing storage at genesis") - - // Check what the latest finalized version in the database is as we may be using a database - // from a previous version or network. - latestVersion, alreadyInitialized := w.localStorage.NodeDB().GetLatestVersion() - - // Finalize any versions that were not yet finalized in the old database. This is only possible - // as long as there is only one non-finalized root per version. Note that we also cannot be sure - // that any of these roots are valid, but this is fine as long as the final version matches the - // genesis root. - if alreadyInitialized { - w.logger.Debug("already initialized, finalizing any non-finalized versions", - "genesis_state_root", genesisBlock.Header.StateRoot, - "genesis_round", genesisBlock.Header.Round, - "latest_version", latestVersion, - ) - - for v := latestVersion + 1; v < genesisBlock.Header.Round; v++ { - roots, err := w.localStorage.NodeDB().GetRootsForVersion(v) - if err != nil { - return fmt.Errorf("failed to fetch roots for version %d: %w", v, err) - } - - var stateRoots []storageApi.Root - for _, root := range roots { - if root.Type == storageApi.RootTypeState { - stateRoots = append(stateRoots, root) - } - } - if len(stateRoots) != 1 { - break // We must have exactly one non-finalized state root to continue. - } - - err = w.localStorage.NodeDB().Finalize(stateRoots) - if err != nil { - return fmt.Errorf("failed to finalize version %d: %w", v, err) - } - - latestVersion = v - } - } - - stateRoot := storageApi.Root{ - Namespace: rt.ID, - Version: genesisBlock.Header.Round, - Type: storageApi.RootTypeState, - Hash: genesisBlock.Header.StateRoot, - } - - var compatible bool - switch { - case latestVersion < stateRoot.Version: - // Latest version is earlier than the genesis state root. In case it has the same hash - // we can fill in all the missing versions. - maybeRoot := stateRoot - maybeRoot.Version = latestVersion - - if w.localStorage.NodeDB().HasRoot(maybeRoot) { - w.logger.Debug("latest version earlier than genesis state root, filling in versions", - "genesis_state_root", genesisBlock.Header.StateRoot, - "genesis_round", genesisBlock.Header.Round, - "latest_version", latestVersion, - ) - for v := latestVersion; v < stateRoot.Version; v++ { - err := w.localStorage.Apply(ctx, &storageApi.ApplyRequest{ - Namespace: rt.ID, - RootType: storageApi.RootTypeState, - SrcRound: v, - SrcRoot: stateRoot.Hash, - DstRound: v + 1, - DstRoot: stateRoot.Hash, - WriteLog: nil, // No changes. - }) - if err != nil { - return fmt.Errorf("failed to fill in version %d: %w", v, err) - } - - err = w.localStorage.NodeDB().Finalize([]storageApi.Root{{ - Namespace: rt.ID, - Version: v + 1, - Type: storageApi.RootTypeState, - Hash: stateRoot.Hash, - // We can ignore I/O roots. - }}) - if err != nil { - return fmt.Errorf("failed to finalize version %d: %w", v, err) - } - } - compatible = true - } - default: - // Latest finalized version is the same or ahead, root must exist. - compatible = w.localStorage.NodeDB().HasRoot(stateRoot) - } - - // If we are incompatible and the local version is greater or the same as the genesis version, - // we cannot do anything. If the local version is lower we assume the node will sync from a - // different node. - if !compatible && latestVersion >= stateRoot.Version { - w.logger.Error("existing state is incompatible with runtime genesis state", - "genesis_state_root", genesisBlock.Header.StateRoot, - "genesis_round", genesisBlock.Header.Round, - "latest_version", latestVersion, - ) - return fmt.Errorf("existing state is incompatible with runtime genesis state") - } - - if !compatible { - // Database is empty, so assume the state will be replicated from another node. - w.logger.Warn("non-empty state root but no state available, assuming replication", - "state_root", genesisBlock.Header.StateRoot, - ) - w.checkpointSyncForced = true - } - return nil -} - -func (w *Worker) flushSyncedState(summary *blockSummary) (uint64, error) { - w.syncedLock.Lock() - defer w.syncedLock.Unlock() - - w.syncedState = *summary - if err := w.commonNode.Runtime.History().StorageSyncCheckpoint(w.syncedState.Round); err != nil { - return 0, err - } - - return w.syncedState.Round, nil -} - // Run runs state sync worker. func (w *Worker) Run(ctx context.Context) error { // nolint: gocyclo // Wait for the common node to be initialized. @@ -687,395 +503,137 @@ func (w *Worker) Run(ctx context.Context) error { // nolint: gocyclo w.status = api.StatusSyncingRounds w.statusLock.Unlock() - return w.sync(ctx, cachedLastRound) + return w.syncDiffs(ctx, cachedLastRound) } -func (w *Worker) sync( - ctx context.Context, - lastFinalizedRound uint64, -) error { - syncingRounds := make(map[uint64]*inFlight) - summaryCache := make(map[uint64]*blockSummary) - pendingApply := &minRoundQueue{} - pendingFinalize := &minRoundQueue{} // Suggestion: slice would suffice given that application must happen in order. - - diffCh := make(chan *fetchedDiff) - finalizedCh := make(chan finalizedResult) - - fetchPool := workerpool.New("storage_fetch/" + w.commonNode.Runtime.ID().String()) - fetchPool.Resize(config.GlobalConfig.Storage.FetcherCount) - defer fetchPool.Stop() - - heartbeat := heartbeat{} - heartbeat.reset() - defer heartbeat.Stop() - - var wg sync.WaitGroup - defer wg.Wait() - - lastFullyAppliedRound := lastFinalizedRound - // Don't register availability immediately, we want to know first how far behind consensus we are. - latestBlockRound := w.undefinedRound - for { - // Drain the Apply and Finalize queues first, before waiting for new events in the select below. - - // Apply fetched writelogs, but only if they are for the round after the last fully applied one - // and current number of pending roots to be finalized is smaller than max allowed. - applyNext := pendingApply.Len() > 0 && - lastFullyAppliedRound+1 == (*pendingApply)[0].GetRound() && - pendingFinalize.Len() < dbApi.MaxPendingVersions-1 // -1 since one may be already finalizing. - if applyNext { - lastDiff := heap.Pop(pendingApply).(*fetchedDiff) - err := w.apply(ctx, lastDiff) - - syncing := syncingRounds[lastDiff.round] - if err != nil { - syncing.retry(lastDiff.thisRoot.Type) - continue - } - syncing.outstanding.remove(lastDiff.thisRoot.Type) - if !syncing.outstanding.isEmpty() || !syncing.awaitingRetry.isEmpty() { - continue - } - - // We have fully synced the given round. - w.logger.Debug("finished syncing round", "round", lastDiff.round) - delete(syncingRounds, lastDiff.round) - summary := summaryCache[lastDiff.round] - delete(summaryCache, lastDiff.round-1) - lastFullyAppliedRound = lastDiff.round - - // Suggestion: Rename to lastAppliedRoundMetric, as synced is often synonim for finalized in this code. - storageWorkerLastSyncedRound.With(w.getMetricLabels()).Set(float64(lastDiff.round)) - // Suggestion: Ideally this would be recorded once the round is finalized (synced). - storageWorkerRoundSyncLatency.With(w.getMetricLabels()).Observe(time.Since(syncing.startedAt).Seconds()) - - // Trigger finalization for this round, that will happen concurently - // with respect to Apply operations for subsequent rounds. - heap.Push(pendingFinalize, summary) - - continue - } - - // Check if any new rounds were fully applied and need to be finalized. - // Only finalize if it's the round after the one that was finalized last. - // As a consequence at most one finalization can be happening at the time. - if len(*pendingFinalize) > 0 && lastFinalizedRound+1 == (*pendingFinalize)[0].GetRound() { - summary := heap.Pop(pendingFinalize).(*blockSummary) - wg.Add(1) - go func() { // Don't block fetching and applying remaining rounds. - defer wg.Done() - w.finalize(ctx, summary, finalizedCh) - }() - continue - } - - select { - case <-ctx.Done(): - return ctx.Err() - case inBlk := <-w.blockCh.Out(): - blk := inBlk.(*block.Block) - w.logger.Debug("incoming block", - "round", blk.Header.Round, - "last_synced", lastFullyAppliedRound, - "last_finalized", lastFinalizedRound, - ) - - // Check if we're far enough to reasonably register as available. - latestBlockRound = blk.Header.Round - // Fixme: If block channel has many pending blocks (e.g. after checkpoint sync), - // nudgeAvailability may incorrectly set the node as available too early. - w.nudgeAvailability(lastFinalizedRound, latestBlockRound) - - if err := w.fetchMissingBlockHeaders(ctx, lastFullyAppliedRound, blk, summaryCache); err != nil { - return fmt.Errorf("failed to fetch missing block headers: %w", err) // Suggestion: databases can fail, consider retrying. - } - - w.triggerRoundFetches(ctx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) - case item := <-diffCh: - if item.err != nil { - w.logger.Error("error calling getdiff", - "err", item.err, - "round", item.round, - "old_root", item.prevRoot, - "new_root", item.thisRoot, - "fetched", item.fetched, - ) - syncingRounds[item.round].retry(item.thisRoot.Type) // Suggestion: Trigger fetches immediately. - break - } - - heap.Push(pendingApply, item) - // Item was successfully processed, trigger more round fetches. - // This ensures that new rounds are processed as fast as possible - // when we're syncing and are far behind. - w.triggerRoundFetches(ctx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) - heartbeat.reset() - case <-heartbeat.C: - if latestBlockRound != w.undefinedRound { - w.logger.Debug("heartbeat", "in_flight_rounds", len(syncingRounds)) - w.triggerRoundFetches(ctx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) - } - case finalized := <-finalizedCh: - if finalized.err != nil { - return fmt.Errorf("failed to finalize (round: %d): %w", finalized.summary.Round, finalized.err) - } - var err error - lastFinalizedRound, err = w.flushSyncedState(finalized.summary) - if err != nil { // Suggestion: DB operations can always fail, consider retrying. - return fmt.Errorf("failed to flush synced state: %w", err) - } - storageWorkerLastFullRound.With(w.getMetricLabels()).Set(float64(finalized.summary.Round)) - - // Check if we're far enough to reasonably register as available. - w.nudgeAvailability(lastFinalizedRound, latestBlockRound) +func (w *Worker) flushSyncedState(summary *blockSummary) (uint64, error) { + w.syncedLock.Lock() + defer w.syncedLock.Unlock() - // Notify the checkpointer that there is a new finalized round. - if config.GlobalConfig.Storage.Checkpointer.Enabled { - w.checkpointer.NotifyNewVersion(finalized.summary.Round) - } - case <-ctx.Done(): - return ctx.Err() - } + w.syncedState = *summary + if err := w.commonNode.Runtime.History().StorageSyncCheckpoint(w.syncedState.Round); err != nil { + return 0, err } -} -func (w *Worker) fetchMissingBlockHeaders(ctx context.Context, lastFullyAppliedRound uint64, blk *block.Block, summaryCache map[uint64]*blockSummary) error { - if _, ok := summaryCache[lastFullyAppliedRound]; !ok && lastFullyAppliedRound == w.undefinedRound { // Suggestion: Helper that is only done once. - dummy := blockSummary{ - Namespace: blk.Header.Namespace, - Round: lastFullyAppliedRound + 1, - Roots: []storageApi.Root{ - { - Version: lastFullyAppliedRound + 1, - Type: storageApi.RootTypeIO, - }, - { - Version: lastFullyAppliedRound + 1, - Type: storageApi.RootTypeState, - }, - }, - } - dummy.Roots[0].Empty() - dummy.Roots[1].Empty() - summaryCache[lastFullyAppliedRound] = &dummy - } - // Determine if we need to fetch any old block summaries. In case the first - // round is an undefined round, we need to start with the following round - // since the undefined round may be unsigned -1 and in this case the loop - // would not do any iterations. - startSummaryRound := lastFullyAppliedRound - if startSummaryRound == w.undefinedRound { - startSummaryRound++ - } - for i := startSummaryRound; i < blk.Header.Round; i++ { - if _, ok := summaryCache[i]; ok { - continue - } - oldBlock, err := w.commonNode.Runtime.History().GetCommittedBlock(ctx, i) - if err != nil { - return fmt.Errorf("getting block for round %d (current round: %d): %w", i, blk.Header.Round, err) - } - summaryCache[i] = summaryFromBlock(oldBlock) - } - if _, ok := summaryCache[blk.Header.Round]; !ok { - summaryCache[blk.Header.Round] = summaryFromBlock(blk) - } - return nil + return w.syncedState.Round, nil } -func (w *Worker) triggerRoundFetches( - ctx context.Context, - wg *sync.WaitGroup, - fetchPool *workerpool.Pool, - diffCh chan<- *fetchedDiff, - syncingRounds map[uint64]*inFlight, - summaryCache map[uint64]*blockSummary, - start uint64, - end uint64, -) { - for r := start; r <= end; r++ { - syncing, ok := syncingRounds[r] - if ok && syncing.outstanding.hasAll() { - continue - } +func (w *Worker) initGenesis(ctx context.Context, rt *registryApi.Runtime, genesisBlock *block.Block) error { + w.logger.Info("initializing storage at genesis") - if !ok { - if len(syncingRounds) >= maxInFlightRounds { - break - } + // Check what the latest finalized version in the database is as we may be using a database + // from a previous version or network. + latestVersion, alreadyInitialized := w.localStorage.NodeDB().GetLatestVersion() - syncing = &inFlight{ - startedAt: time.Now(), - awaitingRetry: outstandingMaskFull, - } - syncingRounds[r] = syncing + // Finalize any versions that were not yet finalized in the old database. This is only possible + // as long as there is only one non-finalized root per version. Note that we also cannot be sure + // that any of these roots are valid, but this is fine as long as the final version matches the + // genesis root. + if alreadyInitialized { + w.logger.Debug("already initialized, finalizing any non-finalized versions", + "genesis_state_root", genesisBlock.Header.StateRoot, + "genesis_round", genesisBlock.Header.Round, + "latest_version", latestVersion, + ) - if r == end { - storageWorkerLastPendingRound.With(w.getMetricLabels()).Set(float64(r)) + for v := latestVersion + 1; v < genesisBlock.Header.Round; v++ { + roots, err := w.localStorage.NodeDB().GetRootsForVersion(v) + if err != nil { + return fmt.Errorf("failed to fetch roots for version %d: %w", v, err) } - } - w.logger.Debug("preparing round sync", - "round", r, - "outstanding_mask", syncing.outstanding, - "awaiting_retry", syncing.awaitingRetry, - ) - prev := summaryCache[r-1] - this := summaryCache[r] - prevRoots := make([]storageApi.Root, len(prev.Roots)) - copy(prevRoots, prev.Roots) - for i := range prevRoots { - if prevRoots[i].Type == storageApi.RootTypeIO { - // IO roots aren't chained, so clear it (but leave cache intact). - prevRoots[i] = storageApi.Root{ - Namespace: this.Namespace, - Version: this.Round, - Type: storageApi.RootTypeIO, + var stateRoots []storageApi.Root + for _, root := range roots { + if root.Type == storageApi.RootTypeState { + stateRoots = append(stateRoots, root) } - prevRoots[i].Hash.Empty() - break } - } + if len(stateRoots) != 1 { + break // We must have exactly one non-finalized state root to continue. + } - for i := range prevRoots { - rootType := prevRoots[i].Type - if !syncing.outstanding.contains(rootType) && syncing.awaitingRetry.contains(rootType) { - syncing.scheduleDiff(rootType) - wg.Add(1) - fetchPool.Submit(func() { - defer wg.Done() - w.fetchDiff(ctx, diffCh, this.Round, prevRoots[i], this.Roots[i]) - }) + err = w.localStorage.NodeDB().Finalize(stateRoots) + if err != nil { + return fmt.Errorf("failed to finalize version %d: %w", v, err) } - } - } -} -func (w *Worker) fetchDiff(ctx context.Context, fetchCh chan<- *fetchedDiff, round uint64, prevRoot, thisRoot storageApi.Root) { - result := &fetchedDiff{ - fetched: false, - pf: rpc.NewNopPeerFeedback(), - round: round, - prevRoot: prevRoot, - thisRoot: thisRoot, - } - defer func() { - select { - case fetchCh <- result: - case <-ctx.Done(): + latestVersion = v } - }() - - // Check if the new root doesn't already exist. - if w.localStorage.NodeDB().HasRoot(thisRoot) { - return } - result.fetched = true - - // Even if HasRoot returns false the root can still exist if it is equal - // to the previous root and the root was emitted by the consensus committee - // directly (e.g., during an epoch transition). - if thisRoot.Hash.Equal(&prevRoot.Hash) { - result.writeLog = storageApi.WriteLog{} - return - } - - // New root does not yet exist in storage and we need to fetch it from a peer. - w.logger.Debug("calling GetDiff", - "old_root", prevRoot, - "new_root", thisRoot, - ) - - diffCtx, cancel := context.WithCancel(ctx) - defer cancel() - - wl, pf, err := w.getDiff(diffCtx, prevRoot, thisRoot) - if err != nil { - result.err = err - return - } - result.pf = pf - result.writeLog = wl -} - -// getDiff fetches writelog using diff sync p2p protocol client. -// -// The request relies on the default timeout of the underlying p2p protocol clients. -// -// In case of no peers or error, it fallbacks to the legacy storage sync protocol. -func (w *Worker) getDiff(ctx context.Context, prevRoot, thisRoot storageApi.Root) (storageApi.WriteLog, rpc.PeerFeedback, error) { - rsp1, pf, err := w.diffSync.GetDiff(ctx, &diffsync.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) - if err == nil { // if NO error - return rsp1.WriteLog, pf, nil - } - - rsp2, pf, err := w.legacyStorageSync.GetDiff(ctx, &synclegacy.GetDiffRequest{StartRoot: prevRoot, EndRoot: thisRoot}) - if err != nil { - return nil, nil, err - } - return rsp2.WriteLog, pf, nil -} - -func (w *Worker) apply(ctx context.Context, diff *fetchedDiff) error { - if !diff.fetched { - return nil + stateRoot := storageApi.Root{ + Namespace: rt.ID, + Version: genesisBlock.Header.Round, + Type: storageApi.RootTypeState, + Hash: genesisBlock.Header.StateRoot, } - err := w.localStorage.Apply(ctx, &storageApi.ApplyRequest{ - Namespace: diff.thisRoot.Namespace, - RootType: diff.thisRoot.Type, - SrcRound: diff.prevRoot.Version, - SrcRoot: diff.prevRoot.Hash, - DstRound: diff.thisRoot.Version, - DstRoot: diff.thisRoot.Hash, - WriteLog: diff.writeLog, - }) + var compatible bool switch { - case err == nil: - diff.pf.RecordSuccess() - case errors.Is(err, storageApi.ErrExpectedRootMismatch): - diff.pf.RecordBadPeer() - default: - w.logger.Error("can't apply write log", - "err", err, - "old_root", diff.prevRoot, - "new_root", diff.thisRoot, - ) - diff.pf.RecordSuccess() - } + case latestVersion < stateRoot.Version: + // Latest version is earlier than the genesis state root. In case it has the same hash + // we can fill in all the missing versions. + maybeRoot := stateRoot + maybeRoot.Version = latestVersion - return err -} + if w.localStorage.NodeDB().HasRoot(maybeRoot) { + w.logger.Debug("latest version earlier than genesis state root, filling in versions", + "genesis_state_root", genesisBlock.Header.StateRoot, + "genesis_round", genesisBlock.Header.Round, + "latest_version", latestVersion, + ) + for v := latestVersion; v < stateRoot.Version; v++ { + err := w.localStorage.Apply(ctx, &storageApi.ApplyRequest{ + Namespace: rt.ID, + RootType: storageApi.RootTypeState, + SrcRound: v, + SrcRoot: stateRoot.Hash, + DstRound: v + 1, + DstRoot: stateRoot.Hash, + WriteLog: nil, // No changes. + }) + if err != nil { + return fmt.Errorf("failed to fill in version %d: %w", v, err) + } -func (w *Worker) finalize(ctx context.Context, summary *blockSummary, finalizedCh chan<- finalizedResult) { - err := w.localStorage.NodeDB().Finalize(summary.Roots) - switch err { - case nil: - w.logger.Debug("storage round finalized", - "round", summary.Round, - ) - case storageApi.ErrAlreadyFinalized: - // This can happen if we are restoring after a roothash migration or if - // we crashed before updating the sync state. - w.logger.Warn("storage round already finalized", - "round", summary.Round, - ) - err = nil + err = w.localStorage.NodeDB().Finalize([]storageApi.Root{{ + Namespace: rt.ID, + Version: v + 1, + Type: storageApi.RootTypeState, + Hash: stateRoot.Hash, + // We can ignore I/O roots. + }}) + if err != nil { + return fmt.Errorf("failed to finalize version %d: %w", v, err) + } + } + compatible = true + } default: - w.logger.Error("failed to finalize", "err", err, "summary", summary) + // Latest finalized version is the same or ahead, root must exist. + compatible = w.localStorage.NodeDB().HasRoot(stateRoot) } - result := finalizedResult{ - summary: summary, - err: err, + // If we are incompatible and the local version is greater or the same as the genesis version, + // we cannot do anything. If the local version is lower we assume the node will sync from a + // different node. + if !compatible && latestVersion >= stateRoot.Version { + w.logger.Error("existing state is incompatible with runtime genesis state", + "genesis_state_root", genesisBlock.Header.StateRoot, + "genesis_round", genesisBlock.Header.Round, + "latest_version", latestVersion, + ) + return fmt.Errorf("existing state is incompatible with runtime genesis state") } - select { - case finalizedCh <- result: - case <-ctx.Done(): + if !compatible { + // Database is empty, so assume the state will be replicated from another node. + w.logger.Warn("non-empty state root but no state available, assuming replication", + "state_root", genesisBlock.Header.StateRoot, + ) + w.checkpointSyncForced = true } + return nil } // This is only called from the main worker goroutine, so no locking should be necessary. From 67b006e100526778f482ad2bcd109a0c95047403 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Tue, 19 Aug 2025 13:04:59 +0200 Subject: [PATCH 10/11] go/worker/storage/statesync: Prevent deadlock when terminating Previously, if the worker returned an error it would exit main for loop and wait for the waitgroup to be emptied. However, this is not possible as there is no one that is reading the fetched diffs. --- go/worker/storage/statesync/diff_sync.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/go/worker/storage/statesync/diff_sync.go b/go/worker/storage/statesync/diff_sync.go index 468f5c2dc06..2964bca8f51 100644 --- a/go/worker/storage/statesync/diff_sync.go +++ b/go/worker/storage/statesync/diff_sync.go @@ -93,6 +93,8 @@ func (w *Worker) syncDiffs( fetchPool := workerpool.New("storage_fetch/" + w.commonNode.Runtime.ID().String()) fetchPool.Resize(config.GlobalConfig.Storage.FetcherCount) defer fetchPool.Stop() + fetchCtx, cancel := context.WithCancel(ctx) + defer cancel() heartbeat := heartbeat{} heartbeat.reset() @@ -179,7 +181,7 @@ func (w *Worker) syncDiffs( return fmt.Errorf("failed to fetch missing block headers: %w", err) // Suggestion: databases can fail, consider retrying. } - w.triggerRoundFetches(ctx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) + w.triggerRoundFetches(fetchCtx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) case item := <-diffCh: if item.err != nil { w.logger.Error("error calling getdiff", @@ -197,12 +199,12 @@ func (w *Worker) syncDiffs( // Item was successfully processed, trigger more round fetches. // This ensures that new rounds are processed as fast as possible // when we're syncing and are far behind. - w.triggerRoundFetches(ctx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) + w.triggerRoundFetches(fetchCtx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) heartbeat.reset() case <-heartbeat.C: if latestBlockRound != w.undefinedRound { w.logger.Debug("heartbeat", "in_flight_rounds", len(syncingRounds)) - w.triggerRoundFetches(ctx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) + w.triggerRoundFetches(fetchCtx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) } case finalized := <-finalizedCh: if finalized.err != nil { From dca1f4b71317bdd784833e6f7a69e3ac57bf6377 Mon Sep 17 00:00:00 2001 From: Martin Tomazic Date: Tue, 19 Aug 2025 13:13:39 +0200 Subject: [PATCH 11/11] go/worker/storage/statesync: Remove redundant waitgroup In case of termination due to error exiting main for loop or canceled context there is no point in waiting for go routines to finish fetching/doing the cleanup. As long we cancel the context for them and use it properly in the select statements this should be safe and better. --- go/worker/storage/statesync/diff_sync.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/go/worker/storage/statesync/diff_sync.go b/go/worker/storage/statesync/diff_sync.go index 2964bca8f51..0a13578744d 100644 --- a/go/worker/storage/statesync/diff_sync.go +++ b/go/worker/storage/statesync/diff_sync.go @@ -181,7 +181,7 @@ func (w *Worker) syncDiffs( return fmt.Errorf("failed to fetch missing block headers: %w", err) // Suggestion: databases can fail, consider retrying. } - w.triggerRoundFetches(fetchCtx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) + w.triggerRoundFetches(fetchCtx, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) case item := <-diffCh: if item.err != nil { w.logger.Error("error calling getdiff", @@ -199,12 +199,12 @@ func (w *Worker) syncDiffs( // Item was successfully processed, trigger more round fetches. // This ensures that new rounds are processed as fast as possible // when we're syncing and are far behind. - w.triggerRoundFetches(fetchCtx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) + w.triggerRoundFetches(fetchCtx, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) heartbeat.reset() case <-heartbeat.C: if latestBlockRound != w.undefinedRound { w.logger.Debug("heartbeat", "in_flight_rounds", len(syncingRounds)) - w.triggerRoundFetches(fetchCtx, &wg, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) + w.triggerRoundFetches(fetchCtx, fetchPool, diffCh, syncingRounds, summaryCache, lastFullyAppliedRound+1, latestBlockRound) } case finalized := <-finalizedCh: if finalized.err != nil { @@ -274,7 +274,6 @@ func (w *Worker) fetchMissingBlockHeaders(ctx context.Context, lastFullyAppliedR func (w *Worker) triggerRoundFetches( ctx context.Context, - wg *sync.WaitGroup, fetchPool *workerpool.Pool, diffCh chan<- *fetchedDiff, syncingRounds map[uint64]*inFlight, @@ -330,9 +329,7 @@ func (w *Worker) triggerRoundFetches( rootType := prevRoots[i].Type if !syncing.outstanding.contains(rootType) && syncing.awaitingRetry.contains(rootType) { syncing.scheduleDiff(rootType) - wg.Add(1) fetchPool.Submit(func() { - defer wg.Done() w.fetchDiff(ctx, diffCh, this.Round, prevRoots[i], this.Roots[i]) }) }