diff --git a/.changelog/6338.breaking.md b/.changelog/6338.breaking.md new file mode 100644 index 00000000000..c2bc96425f8 --- /dev/null +++ b/.changelog/6338.breaking.md @@ -0,0 +1 @@ +go/consensus/cometbft/apps/scheduler: Run elections at epoch end diff --git a/go/beacon/tests/tester.go b/go/beacon/tests/tester.go index 2aed9dd1606..41fc5953dcb 100644 --- a/go/beacon/tests/tester.go +++ b/go/beacon/tests/tester.go @@ -181,7 +181,7 @@ func MustAdvanceEpochMulti(t *testing.T, consensus consensusAPI.Service, increme if !nd.Node.HasRoles(node.RoleValidator) { continue } - if nd.Node.Expiration > uint64(epoch+1) { + if nd.Node.Expiration > epoch+1 { break EVENTS } case <-time.After(recvTimeout): diff --git a/go/common/node/node.go b/go/common/node/node.go index 6090224e903..680646f5aea 100644 --- a/go/common/node/node.go +++ b/go/common/node/node.go @@ -11,6 +11,7 @@ import ( "github.com/oasisprotocol/curve25519-voi/primitives/x25519" + beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/cbor" "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" @@ -77,7 +78,7 @@ type Node struct { EntityID signature.PublicKey `json:"entity_id"` // Expiration is the epoch in which this node's commitment expires. - Expiration uint64 `json:"expiration"` + Expiration beacon.EpochTime `json:"expiration"` // TLS contains information for connecting to this node via TLS. TLS TLSInfo `json:"tls"` @@ -116,7 +117,7 @@ type nodeV2 struct { EntityID signature.PublicKey `json:"entity_id"` // Expiration is the epoch in which this node's commitment expires. - Expiration uint64 `json:"expiration"` + Expiration beacon.EpochTime `json:"expiration"` // TLS contains information for connecting to this node via TLS. TLS nodeV2TLSInfo `json:"tls"` @@ -385,7 +386,7 @@ func (n *Node) OnlyHasRoles(r RolesMask) bool { // IsExpired returns true if the node expiration epoch is strictly smaller // than the passed (current) epoch. -func (n *Node) IsExpired(epoch uint64) bool { +func (n *Node) IsExpired(epoch beacon.EpochTime) bool { return n.Expiration < epoch } diff --git a/go/consensus/cometbft/api/genesis.go b/go/consensus/cometbft/api/genesis.go index d5e98569d65..b0eab293ef5 100644 --- a/go/consensus/cometbft/api/genesis.go +++ b/go/consensus/cometbft/api/genesis.go @@ -129,7 +129,7 @@ func convertValidators(d *genesis.Document) ([]cmttypes.GenesisValidator, error) } // Skip expired nodes. - if openedNode.IsExpired(uint64(d.Beacon.Base)) { + if openedNode.IsExpired(d.Beacon.Base) { continue } diff --git a/go/consensus/cometbft/apps/beacon/backend_vrf.go b/go/consensus/cometbft/apps/beacon/backend_vrf.go index a21a8095436..a35176eb4bd 100644 --- a/go/consensus/cometbft/apps/beacon/backend_vrf.go +++ b/go/consensus/cometbft/apps/beacon/backend_vrf.go @@ -97,8 +97,8 @@ func (impl *backendVRF) OnBeginBlock( return fmt.Errorf("beacon: timekeeping broken") } - var pendingMockEpoch *beacon.EpochTime - if pendingMockEpoch, err = state.PendingMockEpoch(ctx); err != nil { + pendingMockEpoch, err := state.PendingMockEpoch(ctx) + if err != nil { return fmt.Errorf("beacon: failed to query mock epoch state: %w", err) } if pendingMockEpoch == nil { diff --git a/go/consensus/cometbft/apps/keymanager/churp/txs.go b/go/consensus/cometbft/apps/keymanager/churp/txs.go index be6ecb179fd..2f9b3288390 100644 --- a/go/consensus/cometbft/apps/keymanager/churp/txs.go +++ b/go/consensus/cometbft/apps/keymanager/churp/txs.go @@ -476,7 +476,7 @@ func runtimeAttestationKey(ctx *tmapi.Context, nodeID signature.PublicKey, now b if err != nil { return nil, err } - if n.IsExpired(uint64(now)) { + if n.IsExpired(now) { return nil, fmt.Errorf("keymanager: churp: node registration expired") } if !n.HasRoles(node.RoleKeyManager) { diff --git a/go/consensus/cometbft/apps/keymanager/secrets/status.go b/go/consensus/cometbft/apps/keymanager/secrets/status.go index bd838357853..ebb8bb5d5da 100644 --- a/go/consensus/cometbft/apps/keymanager/secrets/status.go +++ b/go/consensus/cometbft/apps/keymanager/secrets/status.go @@ -75,7 +75,7 @@ func generateStatus( // nolint: gocyclo // to the key manager status fields. nextNode: for _, n := range nodes { - if n.IsExpired(uint64(epoch)) { + if n.IsExpired(epoch) { continue } if !n.HasRoles(node.RoleKeyManager) { diff --git a/go/consensus/cometbft/apps/keymanager/secrets/status_test.go b/go/consensus/cometbft/apps/keymanager/secrets/status_test.go index bbcc8267bf3..414bb57ecaf 100644 --- a/go/consensus/cometbft/apps/keymanager/secrets/status_test.go +++ b/go/consensus/cometbft/apps/keymanager/secrets/status_test.go @@ -135,70 +135,70 @@ func TestGenerateStatus(t *testing.T) { // Validator node. { ID: memorySigner.NewTestSigner("node 0").Public(), - Expiration: uint64(epoch), + Expiration: epoch, Roles: node.RoleValidator, Runtimes: nodeRuntimes[0:1], }, // Expired. { ID: memorySigner.NewTestSigner("node 1").Public(), - Expiration: uint64(epoch) - 1, + Expiration: epoch - 1, Roles: node.RoleKeyManager, Runtimes: nodeRuntimes[0:1], }, // No runtimes. { ID: memorySigner.NewTestSigner("node 2").Public(), - Expiration: uint64(epoch), + Expiration: epoch, Roles: node.RoleKeyManager, Runtimes: []*node.Runtime{}, }, // Compute runtime. { ID: memorySigner.NewTestSigner("node 3").Public(), - Expiration: uint64(epoch), + Expiration: epoch, Roles: node.RoleKeyManager, Runtimes: nodeRuntimes[6:7], }, // The second key manager. { ID: memorySigner.NewTestSigner("node 4").Public(), - Expiration: uint64(epoch), + Expiration: epoch, Roles: node.RoleKeyManager, Runtimes: nodeRuntimes[4:6], }, // One key manager, incompatible versions. { ID: memorySigner.NewTestSigner("node 5").Public(), - Expiration: uint64(epoch), + Expiration: epoch, Roles: node.RoleKeyManager, Runtimes: nodeRuntimes[0:4], }, // One key manager, one version (secure = false). { ID: memorySigner.NewTestSigner("node 6").Public(), - Expiration: uint64(epoch), + Expiration: epoch, Roles: node.RoleKeyManager, Runtimes: nodeRuntimes[0:1], }, // One key manager, two versions (secure = true). { ID: memorySigner.NewTestSigner("node 7").Public(), - Expiration: uint64(epoch), + Expiration: epoch, Roles: node.RoleKeyManager, Runtimes: nodeRuntimes[1:2], }, // One key manager, two versions (secure = true). { ID: memorySigner.NewTestSigner("node 8").Public(), - Expiration: uint64(epoch), + Expiration: epoch, Roles: node.RoleKeyManager, Runtimes: nodeRuntimes[2:4], }, // Two key managers, two versions. { ID: memorySigner.NewTestSigner("node 9").Public(), - Expiration: uint64(epoch), + Expiration: epoch, Roles: node.RoleKeyManager, Runtimes: nodeRuntimes[2:6], }, diff --git a/go/consensus/cometbft/apps/registry/messages_test.go b/go/consensus/cometbft/apps/registry/messages_test.go index 49557b4a62c..649770802d8 100644 --- a/go/consensus/cometbft/apps/registry/messages_test.go +++ b/go/consensus/cometbft/apps/registry/messages_test.go @@ -5,6 +5,7 @@ import ( "github.com/stretchr/testify/require" + beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" "github.com/oasisprotocol/oasis-core/go/common/cbor" abciAPI "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/api" registryState "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/registry/state" @@ -30,7 +31,7 @@ func TestChangeParameters(t *testing.T) { require.NoError(t, err, "setting consensus parameters should succeed") // Prepare proposal. - maxNodeExpiration := uint64(20) + maxNodeExpiration := beacon.EpochTime(20) changes := registry.ConsensusParameterChanges{ MaxNodeExpiration: &maxNodeExpiration, } @@ -90,7 +91,7 @@ func TestChangeParameters(t *testing.T) { t.Run("invalid changes", func(t *testing.T) { require := require.New(t) - var maxNodeExpiration uint64 + var maxNodeExpiration beacon.EpochTime changes := registry.ConsensusParameterChanges{ MaxNodeExpiration: &maxNodeExpiration, } diff --git a/go/consensus/cometbft/apps/registry/query.go b/go/consensus/cometbft/apps/registry/query.go index eb83a4c9895..8f5f6f9016d 100644 --- a/go/consensus/cometbft/apps/registry/query.go +++ b/go/consensus/cometbft/apps/registry/query.go @@ -50,7 +50,7 @@ func (q *Query) Node(ctx context.Context, id signature.PublicKey) (*node.Node, e } // Do not return expired nodes. - if node.IsExpired(uint64(epoch)) { + if node.IsExpired(epoch) { return nil, registry.ErrNoSuchNode } return node, nil @@ -81,7 +81,7 @@ func (q *Query) Nodes(ctx context.Context) ([]*node.Node, error) { // Filter out expired nodes. var filteredNodes []*node.Node for _, n := range nodes { - if n.IsExpired(uint64(epoch)) { + if n.IsExpired(epoch) { continue } filteredNodes = append(filteredNodes, n) diff --git a/go/consensus/cometbft/apps/registry/registry.go b/go/consensus/cometbft/apps/registry/registry.go index 3a93c79c752..db8c947dffa 100644 --- a/go/consensus/cometbft/apps/registry/registry.go +++ b/go/consensus/cometbft/apps/registry/registry.go @@ -212,7 +212,7 @@ func (app *Application) onRegistryEpochChanged(ctx *api.Context, registryEpoch b // otherwise the nodes could not be resolved. var expiredNodes []*node.Node for _, node := range nodes { - if !node.IsExpired(uint64(registryEpoch)) { + if !node.IsExpired(registryEpoch) { continue } @@ -234,11 +234,11 @@ func (app *Application) onRegistryEpochChanged(ctx *api.Context, registryEpoch b } // If node has been expired for the debonding interval, finally remove it. - if math.MaxUint64-node.Expiration < uint64(debondingInterval) { + if math.MaxUint64-node.Expiration < debondingInterval { // Overflow, the node will never be removed. continue } - if beacon.EpochTime(node.Expiration)+debondingInterval < registryEpoch { + if node.Expiration+debondingInterval < registryEpoch { ctx.Logger().Debug("removing expired node", "node_id", node.ID, ) diff --git a/go/consensus/cometbft/apps/registry/transactions.go b/go/consensus/cometbft/apps/registry/transactions.go index 099806287d4..b2463ce0277 100644 --- a/go/consensus/cometbft/apps/registry/transactions.go +++ b/go/consensus/cometbft/apps/registry/transactions.go @@ -266,7 +266,7 @@ func (app *Application) registerNode( // nolint: gocyclo // immediately expire. // // Yes, this is duplicated. Blame the sanity checker. - if !ctx.IsInitChain() && newNode.Expiration <= uint64(epoch) { + if !ctx.IsInitChain() && newNode.Expiration <= epoch { ctx.Logger().Debug("RegisterNode: node descriptor is expired", "new_node", newNode, "epoch", epoch, @@ -274,15 +274,15 @@ func (app *Application) registerNode( // nolint: gocyclo return registry.ErrNodeExpired } - var additionalEpochs uint64 - if newNode.Expiration > uint64(epoch) { - additionalEpochs = newNode.Expiration - uint64(epoch) + var additionalEpochs beacon.EpochTime + if newNode.Expiration > epoch { + additionalEpochs = newNode.Expiration - epoch } // Check if node exists. existingNode, err := state.Node(ctx, newNode.ID) isNewNode := err == registry.ErrNoSuchNode - isExpiredNode := err == nil && existingNode.IsExpired(uint64(epoch)) + isExpiredNode := err == nil && existingNode.IsExpired(epoch) if !isNewNode && err != nil { // Something went horribly wrong, and we failed to query the node. ctx.Logger().Error("RegisterNode: failed to query node", @@ -300,7 +300,7 @@ func (app *Application) registerNode( // nolint: gocyclo // Remaining epochs are credited so the node doesn't end up paying twice. // NOTE: This assumes that changing runtimes is not allowed as otherwise we // would need to account this per-runtime. - remainingEpochs := existingNode.Expiration - uint64(epoch) + remainingEpochs := existingNode.Expiration - epoch if additionalEpochs > remainingEpochs { additionalEpochs = additionalEpochs - remainingEpochs } else { diff --git a/go/consensus/cometbft/apps/roothash/liveness.go b/go/consensus/cometbft/apps/roothash/liveness.go index 0ebbaba694c..1b8a03e0aaa 100644 --- a/go/consensus/cometbft/apps/roothash/liveness.go +++ b/go/consensus/cometbft/apps/roothash/liveness.go @@ -88,9 +88,9 @@ func processLivenessStatistics(ctx *tmapi.Context, epoch beacon.EpochTime, rtSta if fault.Failures >= maxFailures { // Make sure to freeze forever if this would otherwise overflow. if epoch > registry.FreezeForever-slashParams.FreezeInterval { - status.FreezeEndTime = registry.FreezeForever + status.Freeze(registry.FreezeForever) } else { - status.FreezeEndTime = epoch + slashParams.FreezeInterval + status.Freeze(epoch + slashParams.FreezeInterval) } // Slash if configured. diff --git a/go/consensus/cometbft/apps/roothash/messages.go b/go/consensus/cometbft/apps/roothash/messages.go index 3e0981b958e..1cf4b52822e 100644 --- a/go/consensus/cometbft/apps/roothash/messages.go +++ b/go/consensus/cometbft/apps/roothash/messages.go @@ -12,10 +12,12 @@ import ( registryState "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/registry/state" roothashApi "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/roothash/api" roothashState "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/roothash/state" + "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/features" governance "github.com/oasisprotocol/oasis-core/go/governance/api" roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" "github.com/oasisprotocol/oasis-core/go/roothash/api/message" staking "github.com/oasisprotocol/oasis-core/go/staking/api" + "github.com/oasisprotocol/oasis-core/go/upgrade/migrations" ) func fetchRuntimeMessages( @@ -171,6 +173,23 @@ func (app *Application) processRuntimeMessages( func (app *Application) doBeforeSchedule(ctx *tmapi.Context, msg any) (any, error) { epoch := msg.(beacon.EpochTime) + ok, err := features.IsFeatureVersion(ctx, migrations.Version242) + if err != nil { + return nil, err + } + if ok { + ctx.Logger().Debug("finalizing rounds before scheduling", + "epoch", epoch, + ) + + if err := app.tryFinalizeRounds(ctx); err != nil { + return nil, err + } + if err := app.processRoundTimeouts(ctx); err != nil { + return nil, err + } + } + ctx.Logger().Debug("processing liveness statistics before scheduling", "epoch", epoch, ) diff --git a/go/consensus/cometbft/apps/scheduler/debug_force.go b/go/consensus/cometbft/apps/scheduler/debug_force.go index 79dbc955e0a..8b6a1bcaf0d 100644 --- a/go/consensus/cometbft/apps/scheduler/debug_force.go +++ b/go/consensus/cometbft/apps/scheduler/debug_force.go @@ -17,13 +17,13 @@ type debugForceElectState struct { elected map[signature.PublicKey]bool } -func (app *Application) debugForceElect( +func debugForceElect( ctx *api.Context, schedulerParameters *scheduler.ConsensusParameters, rt *registry.Runtime, kind scheduler.CommitteeKind, role scheduler.Role, - nodeList []*node.Node, + nodes []*node.Node, wantedNodes int, ) (bool, []*scheduler.CommitteeNode, *debugForceElectState) { elected := make([]*scheduler.CommitteeNode, 0, wantedNodes) @@ -61,12 +61,12 @@ forceLoop: } // Ensure the node is currently registered and eligible. - for _, v := range nodeList { + for _, n := range nodes { ctx.Logger().Debug("checking to see if this is the force elected node", - "iter_id", v.ID, + "iter_id", n.ID, "node", nodeID, ) - if v.ID.Equal(nodeID) { + if n.ID.Equal(nodeID) { // And force it into the committee. elected = append(elected, &scheduler.CommitteeNode{ Role: role, @@ -86,7 +86,7 @@ forceLoop: ctx.Logger().Error("available nodes can't fulfill forced committee members", "kind", kind, "runtime_id", rt.ID, - "nr_nodes", len(nodeList), + "nr_nodes", len(nodes), "mandatory_nodes", len(toForce), ) return false, nil, nil @@ -95,7 +95,7 @@ forceLoop: return true, elected, state } -func (app *Application) debugForceRoles( +func debugForceRoles( ctx *api.Context, state *debugForceElectState, elected []*scheduler.CommitteeNode, diff --git a/go/consensus/cometbft/apps/scheduler/scheduler.go b/go/consensus/cometbft/apps/scheduler/scheduler.go index b419d9599b8..3afda384e74 100644 --- a/go/consensus/cometbft/apps/scheduler/scheduler.go +++ b/go/consensus/cometbft/apps/scheduler/scheduler.go @@ -4,13 +4,14 @@ import ( "bytes" "crypto" "fmt" + "maps" "math/rand" + "slices" "sort" "github.com/cometbft/cometbft/abci/types" beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" - "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/crypto/drbg" "github.com/oasisprotocol/oasis-core/go/common/crypto/mathrand" "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" @@ -28,9 +29,11 @@ import ( schedulerState "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/scheduler/state" stakingapp "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/staking" stakingState "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/staking/state" + "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/features" registry "github.com/oasisprotocol/oasis-core/go/registry/api" scheduler "github.com/oasisprotocol/oasis-core/go/scheduler/api" staking "github.com/oasisprotocol/oasis-core/go/staking/api" + "github.com/oasisprotocol/oasis-core/go/upgrade/migrations" ) var ( @@ -93,175 +96,215 @@ func (app *Application) OnCleanup() {} // BeginBlock implements api.Application. func (app *Application) BeginBlock(ctx *api.Context) error { - // Check if any stake slashing has occurred in the staking layer. - // NOTE: This will NOT trigger for any slashing that happens as part of - // any transactions being submitted to the chain. - slashed := ctx.HasEvent(stakingapp.AppName, &staking.TakeEscrowEvent{}) + return app.maybeElectInBeginBlock(ctx) +} + +// maybeElectInBeginBlock determines whether elections should be performed +// in the begin block phase and executes them if needed. +func (app *Application) maybeElectInBeginBlock(ctx *api.Context) error { + res, err := app.shouldElectInBeginBlock(ctx) + if err != nil { + return err + } + if !res.elect { + return nil + } + return app.elect(ctx, res.epoch, res.reward) +} + +// shouldElectInBeginBlock determines whether elections should be performed +// in the begin block phase. +func (app *Application) shouldElectInBeginBlock(ctx *api.Context) (*electionDecision, error) { + // Check if feature version disables old election logic. + ok, err := features.IsFeatureVersion(ctx, migrations.Version242) + if err != nil { + return nil, err + } + if ok { + return &electionDecision{}, nil + } + // Check if epoch has changed. // TODO: We'll later have this for each type of committee. epochChanged, epoch := app.state.EpochChanged(ctx) + if epochChanged { + // For elections on epoch changes, distribute rewards. + return &electionDecision{ + epoch: epoch, + elect: true, + reward: true, + }, nil + } - if epochChanged || slashed { - // Notify applications that we are going to schedule committees. - _, err := app.md.Publish(ctx, schedulerApi.MessageBeforeSchedule, epoch) - if err != nil { - return fmt.Errorf("cometbft/scheduler: before schedule notification failed: %w", err) - } + // Check if any stake slashing has occurred in the staking layer. + // NOTE: This will NOT trigger for any slashing that happens as part of + // any transactions being submitted to the chain. + slashed := ctx.HasEvent(stakingapp.AppName, &staking.TakeEscrowEvent{}) + if !slashed { + return &electionDecision{}, nil + } - // The 0th epoch will not have suitable entropy for elections, nor - // will it have useful node registrations. - baseEpoch, err := app.state.GetBaseEpoch() - if err != nil { - return fmt.Errorf("cometbft/scheduler: couldn't get base epoch: %w", err) - } + return &electionDecision{ + epoch: epoch, + elect: true, + }, nil +} - if epoch == baseEpoch { - ctx.Logger().Info("system in bootstrap period, skipping election", - "epoch", epoch, - ) - return nil - } +// elect elects validators and runtime committees for the given epoch +// and optionally distributes staking rewards. +func (app *Application) elect(ctx *api.Context, epoch beacon.EpochTime, reward bool) error { + // Notify applications that we are going to schedule committees. + _, err := app.md.Publish(ctx, schedulerApi.MessageBeforeSchedule, epoch) + if err != nil { + return fmt.Errorf("cometbft/scheduler: before schedule notification failed: %w", err) + } - state := schedulerState.NewMutableState(ctx.State()) - params, err := state.ConsensusParameters(ctx) - if err != nil { - ctx.Logger().Error("failed to fetch consensus parameters", - "err", err, - ) - return err - } + // The 0th epoch will not have suitable entropy for elections, nor + // will it have useful node registrations. + baseEpoch, err := app.state.GetBaseEpoch() + if err != nil { + return fmt.Errorf("cometbft/scheduler: couldn't get base epoch: %w", err) + } - beaconState := beaconState.NewMutableState(ctx.State()) - beaconParameters, err := beaconState.ConsensusParameters(ctx) - if err != nil { - return fmt.Errorf("cometbft/scheduler: couldn't get beacon parameters: %w", err) - } - // If weak alphas are allowed then skip the eligibility check as - // well because the byzantine node and associated tests are extremely - // fragile, and breaks in hard-to-debug ways if timekeeping isn't - // exactly how it expects. - filterCommitteeNodes := beaconParameters.Backend == beacon.BackendVRF && !params.DebugAllowWeakAlpha - - regState := registryState.NewMutableState(ctx.State()) - registryParameters, err := regState.ConsensusParameters(ctx) - if err != nil { - return fmt.Errorf("cometbft/scheduler: couldn't get registry parameters: %w", err) - } - runtimes, err := regState.Runtimes(ctx) - if err != nil { - return fmt.Errorf("cometbft/scheduler: couldn't get runtimes: %w", err) - } - allNodes, err := regState.Nodes(ctx) - if err != nil { - return fmt.Errorf("cometbft/scheduler: couldn't get nodes: %w", err) - } + if epoch == baseEpoch { + ctx.Logger().Info("system in bootstrap period, skipping election", + "epoch", epoch, + ) + return nil + } - // Filter nodes. - var ( - nodes []*node.Node - committeeNodes []*nodeWithStatus + state := schedulerState.NewMutableState(ctx.State()) + schedulerParameters, err := state.ConsensusParameters(ctx) + if err != nil { + ctx.Logger().Error("failed to fetch consensus parameters", + "err", err, ) - for _, node := range allNodes { - var status *registry.NodeStatus - status, err = regState.NodeStatus(ctx, node.ID) - if err != nil { - return fmt.Errorf("cometbft/scheduler: couldn't get node status: %w", err) - } + return err + } - // Nodes which are currently frozen cannot be scheduled. - if status.IsFrozen() { - continue - } - // Expired nodes cannot be scheduled (nodes can be expired and not yet removed). - if node.IsExpired(uint64(epoch)) { - continue - } + beaconState := beaconState.NewMutableState(ctx.State()) + beaconParameters, err := beaconState.ConsensusParameters(ctx) + if err != nil { + return fmt.Errorf("cometbft/scheduler: couldn't get beacon parameters: %w", err) + } + entropy, err := beaconState.Beacon(ctx) + if err != nil { + return fmt.Errorf("cometbft/scheduler: couldn't get beacon: %w", err) + } - nodes = append(nodes, node) - if !filterCommitteeNodes || (status.ElectionEligibleAfter != beacon.EpochInvalid && epoch > status.ElectionEligibleAfter) { - committeeNodes = append(committeeNodes, &nodeWithStatus{node, status}) - } + // Always use VRF state from the epoch preceding the election epoch. + var vrf *beacon.PrevVRFState + if beaconParameters.Backend == beacon.BackendVRF { + vrfState, err := beaconState.VRFState(ctx) + if err != nil { + return fmt.Errorf("cometbft/scheduler: failed to query VRF state: %w", err) } - var stakeAcc *stakingState.StakeAccumulatorCache - if !params.DebugBypassStake { - stakeAcc, err = stakingState.NewStakeAccumulatorCache(ctx) - if err != nil { - return fmt.Errorf("cometbft/scheduler: failed to create stake accumulator cache: %w", err) + switch epoch { + case vrfState.Epoch: + vrf = vrfState.PrevState + case vrfState.Epoch + 1: + vrf = &beacon.PrevVRFState{ + Pi: vrfState.Pi, + CanElectCommittees: vrfState.AlphaIsHighQuality, } - defer stakeAcc.Discard() + default: + return fmt.Errorf("cometbft/scheduler: failed to query previous VRF state") } + } - var entitiesEligibleForReward map[staking.Address]bool - if epochChanged { - // For elections on epoch changes, distribute rewards to entities with any eligible nodes. - entitiesEligibleForReward = make(map[staking.Address]bool) - } + // If weak alphas are allowed then skip the eligibility check as + // well because the byzantine node and associated tests are extremely + // fragile, and breaks in hard-to-debug ways if timekeeping isn't + // exactly how it expects. + filterCommitteeNodes := beaconParameters.Backend == beacon.BackendVRF && !schedulerParameters.DebugAllowWeakAlpha - // Handle the validator election first, because no consensus is - // catastrophic, while failing to elect other committees is not. - var validatorEntities map[staking.Address]bool - if validatorEntities, err = app.electValidators( - ctx, - app.state, - beaconState, - beaconParameters, - stakeAcc, - entitiesEligibleForReward, - nodes, - params, - ); err != nil { - // It is unclear what the behavior should be if the validator - // election fails. The system can not ensure integrity, so - // presumably manual intervention is required... - return fmt.Errorf("cometbft/scheduler: couldn't elect validators: %w", err) - } + regState := registryState.NewImmutableState(ctx.State()) + registryParameters, err := regState.ConsensusParameters(ctx) + if err != nil { + return fmt.Errorf("cometbft/scheduler: couldn't get registry parameters: %w", err) + } + allNodes, err := regState.Nodes(ctx) + if err != nil { + return fmt.Errorf("cometbft/scheduler: couldn't get nodes: %w", err) + } - kinds := []scheduler.CommitteeKind{ - scheduler.KindComputeExecutor, - } - for _, kind := range kinds { - if err = app.electAllCommittees( - ctx, - epoch, - params, - beaconState, - beaconParameters, - registryParameters, - stakeAcc, - entitiesEligibleForReward, - validatorEntities, - runtimes, - committeeNodes, - kind, - ); err != nil { - return fmt.Errorf("cometbft/scheduler: couldn't elect %s committees: %w", kind, err) - } + // Filter nodes. + var ( + nodes []*node.Node + committeeNodes []*nodeWithStatus + ) + for _, node := range allNodes { + status, err := regState.NodeStatus(ctx, node.ID) + if err != nil { + return fmt.Errorf("cometbft/scheduler: couldn't get node status: %w", err) } - ctx.EmitEvent(api.NewEventBuilder(app.Name()).TypedAttribute(&scheduler.ElectedEvent{Kinds: kinds})) - var kindNames []string - for _, kind := range kinds { - kindNames = append(kindNames, kind.String()) + // Nodes which are currently frozen cannot be scheduled. + if status.IsFrozen() { + continue } - var runtimeIDs []string - for _, rt := range runtimes { - runtimeIDs = append(runtimeIDs, rt.ID.String()) + // Expired nodes cannot be scheduled (nodes can be expired and not yet removed). + if node.IsExpired(epoch) { + continue } - ctx.Logger().Debug("finished electing committees", - "epoch", epoch, - "kinds", kindNames, - "runtimes", runtimeIDs, - ) - if entitiesEligibleForReward != nil { - accountAddrs := stakingAddressMapToSortedSlice(entitiesEligibleForReward) - stakingSt := stakingState.NewMutableState(ctx.State()) - if err = stakingSt.AddRewards(ctx, epoch, ¶ms.RewardFactorEpochElectionAny, accountAddrs); err != nil { - return fmt.Errorf("cometbft/scheduler: failed to add rewards: %w", err) - } + nodes = append(nodes, node) + if !filterCommitteeNodes || status.IsEligibleForElection(epoch) { + committeeNodes = append(committeeNodes, &nodeWithStatus{node, status}) } } + + stakeAcc, err := stakingState.NewStakeAccumulatorCache(ctx) + if err != nil { + return fmt.Errorf("cometbft/scheduler: failed to create stake accumulator cache: %w", err) + } + + rewardableEntities := make(map[staking.Address]struct{}) + + // Handle the validator election first, because no consensus is + // catastrophic, while failing to elect other committees is not. + validatorEntities, err := electValidators( + ctx, + epoch, + beaconParameters, + stakeAcc, + rewardableEntities, + nodes, + schedulerParameters, + entropy, + vrf, + ) + if err != nil { + // It is unclear what the behavior should be if the validator + // election fails. The system can not ensure integrity, so + // presumably manual intervention is required... + return fmt.Errorf("cometbft/scheduler: couldn't elect validators: %w", err) + } + + if err = app.electCommittees( + ctx, + epoch, + schedulerParameters, + beaconParameters, + registryParameters, + stakeAcc, + rewardableEntities, + validatorEntities, + committeeNodes, + entropy, + vrf, + ); err != nil { + return fmt.Errorf("cometbft/scheduler: couldn't elect committees: %w", err) + } + + if !reward { + return nil + } + if err := distributeRewards(ctx, epoch, rewardableEntities, schedulerParameters); err != nil { + return fmt.Errorf("cometbft/scheduler: failed to add rewards: %w", err) + } + return nil } @@ -285,6 +328,59 @@ func (app *Application) ExecuteTx(*api.Context, *transaction.Transaction) error return fmt.Errorf("cometbft/scheduler: unexpected transaction") } +// EndBlock implements api.Application. +func (app *Application) EndBlock(ctx *api.Context) (types.ResponseEndBlock, error) { + var resp types.ResponseEndBlock + + if err := app.maybeElectInEndBlock(ctx); err != nil { + return resp, err + } + + validatorUpdates, err := updateValidators(ctx, resp) + if err != nil { + return resp, err + } + + resp.ValidatorUpdates = validatorUpdates + + return resp, nil +} + +func updateValidators(ctx *api.Context, resp types.ResponseEndBlock) ([]types.ValidatorUpdate, error) { + state := schedulerState.NewMutableState(ctx.State()) + pendingValidators, err := state.PendingValidators(ctx) + if err != nil { + return nil, fmt.Errorf("cometbft/scheduler: failed to query pending validators: %w", err) + } + if pendingValidators == nil { + // No validator updates to apply. + return nil, nil + } + + currentValidators, err := state.CurrentValidators(ctx) + if err != nil { + return nil, fmt.Errorf("cometbft/scheduler: failed to query current validators: %w", err) + } + + // Clear out the pending validator update. + if err = state.PutPendingValidators(ctx, nil); err != nil { + return nil, fmt.Errorf("cometbft/scheduler: failed to clear validators: %w", err) + } + + // CometBFT expects a vector of ValidatorUpdate that expresses + // the difference between the current validator set (tracked manually + // from InitChain), and the new validator set, which is a huge pain + // in the ass. + + validatorUpdates := diffValidators(ctx.Logger(), currentValidators, pendingValidators) + + // Stash the updated validator set. + if err = state.PutCurrentValidators(ctx, pendingValidators); err != nil { + return nil, fmt.Errorf("cometbft/scheduler: failed to set validators: %w", err) + } + return validatorUpdates, nil +} + func diffValidators(logger *logging.Logger, current, pending map[signature.PublicKey]*scheduler.Validator) []types.ValidatorUpdate { var updates []types.ValidatorUpdate for v := range current { @@ -315,46 +411,73 @@ func diffValidators(logger *logging.Logger, current, pending map[signature.Publi return updates } -// EndBlock implements api.Application. -func (app *Application) EndBlock(ctx *api.Context) (types.ResponseEndBlock, error) { - var resp types.ResponseEndBlock - - state := schedulerState.NewMutableState(ctx.State()) - pendingValidators, err := state.PendingValidators(ctx) +// maybeElectInEndBlock determines whether elections should be performed +// in the end block phase and executes them if needed. +func (app *Application) maybeElectInEndBlock(ctx *api.Context) error { + res, err := app.shouldElectInEndBlock(ctx) if err != nil { - return resp, fmt.Errorf("cometbft/scheduler: failed to query pending validators: %w", err) + return err } - if pendingValidators == nil { - // No validator updates to apply. - return resp, nil + if !res.elect { + return nil } + return app.elect(ctx, res.epoch, res.reward) +} - currentValidators, err := state.CurrentValidators(ctx) +// shouldElectInEndBlock determines whether elections should be performed +// in the end block phase. +func (app *Application) shouldElectInEndBlock(ctx *api.Context) (*electionDecision, error) { + // Check if feature version disables old election logic. + ok, err := features.IsFeatureVersion(ctx, migrations.Version242) if err != nil { - return resp, fmt.Errorf("cometbft/scheduler: failed to query current validators: %w", err) + return nil, err } - - // Clear out the pending validator update. - if err = state.PutPendingValidators(ctx, nil); err != nil { - return resp, fmt.Errorf("cometbft/scheduler: failed to clear validators: %w", err) + if !ok { + return &electionDecision{}, nil } - // CometBFT expects a vector of ValidatorUpdate that expresses - // the difference between the current validator set (tracked manually - // from InitChain), and the new validator set, which is a huge pain - // in the ass. + // Skip elections if the insecure beacon entropy required for elections + // is not available. + beaconState := beaconState.NewMutableState(ctx.State()) + epoch, _, err := beaconState.GetEpoch(ctx) + if err != nil { + return nil, err + } + baseEpoch, err := app.state.GetBaseEpoch() + if err != nil { + return nil, err + } + if epoch == baseEpoch { + return &electionDecision{}, nil + } - resp.ValidatorUpdates = diffValidators(ctx.Logger(), currentValidators, pendingValidators) + // Elect at the end of every epoch. + future, err := beaconState.GetFutureEpoch(ctx) + if err != nil { + return nil, err + } + if future != nil && future.Height == ctx.CurrentHeight()+1 { + // For elections on epoch end, distribute rewards. + return &electionDecision{ + epoch: future.Epoch, + elect: true, + reward: true, + }, nil + } - // Stash the updated validator set. - if err = state.PutCurrentValidators(ctx, pendingValidators); err != nil { - return resp, fmt.Errorf("cometbft/scheduler: failed to set validators: %w", err) + // Re-elect if slashed. + slashed := ctx.HasEvent(stakingapp.AppName, &staking.TakeEscrowEvent{}) + if !slashed { + return &electionDecision{}, nil } - return resp, nil + return &electionDecision{ + epoch: epoch, + elect: true, + }, nil } -func (app *Application) isSuitableExecutorWorker( +func isSuitableExecutorWorker( ctx *api.Context, n *nodeWithStatus, rt *registry.Runtime, @@ -415,93 +538,94 @@ func (app *Application) isSuitableExecutorWorker( return false } -// GetPerm generates a permutation that we use to choose nodes from a list of eligible nodes to elect. -func GetPerm(beacon []byte, runtimeID common.Namespace, rngCtx []byte, nrNodes int) ([]int, error) { - drbg, err := drbg.New(crypto.SHA512, beacon, runtimeID[:], rngCtx) - if err != nil { - return nil, fmt.Errorf("cometbft/scheduler: couldn't instantiate DRBG: %w", err) - } - rng := rand.New(mathrand.New(drbg)) - return rng.Perm(nrNodes), nil -} - -// Operates on consensus connection. -func (app *Application) electAllCommittees( +func (app *Application) electCommittees( ctx *api.Context, epoch beacon.EpochTime, schedulerParameters *scheduler.ConsensusParameters, - beaconState *beaconState.MutableState, beaconParameters *beacon.ConsensusParameters, registryParameters *registry.ConsensusParameters, stakeAcc *stakingState.StakeAccumulatorCache, - entitiesEligibleForReward map[staking.Address]bool, - validatorEntities map[staking.Address]bool, - runtimes []*registry.Runtime, - nodeList []*nodeWithStatus, - kind scheduler.CommitteeKind, + rewardableEntities map[staking.Address]struct{}, + validatorEntities map[staking.Address]struct{}, + nodes []*nodeWithStatus, + entropy []byte, + vrf *beacon.PrevVRFState, ) error { + runtimes, err := fetchRuntimes(ctx) + if err != nil { + return err + } + + kinds := []scheduler.CommitteeKind{ + scheduler.KindComputeExecutor, + } + for _, runtime := range runtimes { - if err := app.electCommittee( - ctx, - epoch, - schedulerParameters, - beaconState, - beaconParameters, - registryParameters, - stakeAcc, - entitiesEligibleForReward, - validatorEntities, - runtime, - nodeList, - kind, - ); err != nil { - return err + for _, kind := range kinds { + if err := electCommittee( + ctx, + epoch, + schedulerParameters, + beaconParameters, + registryParameters, + stakeAcc, + rewardableEntities, + validatorEntities, + runtime, + nodes, + kind, + entropy, + vrf, + ); err != nil { + return err + } } } + + ctx.EmitEvent(api.NewEventBuilder(app.Name()).TypedAttribute(&scheduler.ElectedEvent{Kinds: kinds})) + return nil } -func (app *Application) electValidators( +func electValidators( ctx *api.Context, - appState api.ApplicationQueryState, - beaconState *beaconState.MutableState, + epoch beacon.EpochTime, beaconParameters *beacon.ConsensusParameters, stakeAcc *stakingState.StakeAccumulatorCache, - entitiesEligibleForReward map[staking.Address]bool, + rewardableEntities map[staking.Address]struct{}, nodes []*node.Node, - params *scheduler.ConsensusParameters, -) (map[staking.Address]bool, error) { - // Filter the node list based on eligibility and minimum required - // entity stake. - var nodeList []*node.Node - entities := make(map[staking.Address]bool) + schedulerParameters *scheduler.ConsensusParameters, + entropy []byte, + vrf *beacon.PrevVRFState, +) (map[staking.Address]struct{}, error) { + // Filter nodes based on eligibility and minimum required entity stake. + var validators []*node.Node + entities := make(map[staking.Address]struct{}) for _, n := range nodes { if !n.HasRoles(node.RoleValidator) { continue } + entAddr := staking.NewAddress(n.EntityID) - if stakeAcc != nil { + if !schedulerParameters.DebugBypassStake { if err := stakeAcc.CheckStakeClaims(entAddr); err != nil { continue } } - nodeList = append(nodeList, n) - entities[entAddr] = true + + validators = append(validators, n) + entities[entAddr] = struct{}{} } // Sort all of the entities that are actually running eligible validator // nodes by descending stake. - weakEntropy, err := beaconState.Beacon(ctx) - if err != nil { - return nil, fmt.Errorf("cometbft/scheduler: couldn't get beacon: %w", err) - } - sortedEntities, err := stakingAddressMapToSliceByStake(entities, stakeAcc, weakEntropy) + sortedEntities, err := stakingAddressMapToSliceByStake(entities, stakeAcc, entropy, schedulerParameters) if err != nil { return nil, err } - // Shuffle the node list. - shuffledNodes, err := shuffleValidators(ctx, appState, params, beaconState, beaconParameters, nodeList) + // Shuffle validator nodes. + shuffledNodes, err := shuffleValidators(ctx, epoch, schedulerParameters, beaconParameters, validators, entropy, vrf) if err != nil { return nil, err } @@ -520,7 +644,7 @@ func (app *Application) electValidators( // Go down the list of entities running nodes by stake, picking one node // to act as a validator till the maximum is reached. - validatorEntities := make(map[staking.Address]bool) + validatorEntities := make(map[staking.Address]struct{}) newValidators := make(map[signature.PublicKey]*scheduler.Validator) electLoop: for _, entAddr := range sortedEntities { @@ -529,7 +653,7 @@ electLoop: // This is usually a maximum of 1, but if more are allowed, // like in certain test scenarios, then pick as many nodes // as the entity's stake allows - for i := 0; i < params.MaxValidatorsPerEntity; i++ { + for i := 0; i < schedulerParameters.MaxValidatorsPerEntity; i++ { if i >= len(nodes) { break } @@ -539,21 +663,18 @@ electLoop: // If the entity gets a validator elected, it is eligible // for rewards, but only once regardless of the number // of validators owned by the entity in the set. - if entitiesEligibleForReward != nil { - entitiesEligibleForReward[entAddr] = true - } + rewardableEntities[entAddr] = struct{}{} var power int64 - if stakeAcc == nil { + if schedulerParameters.DebugBypassStake { // In simplified no-stake deployments, make validators have flat voting power. power = 1 } else { - var stake *quantity.Quantity - stake, err = stakeAcc.GetEscrowBalance(entAddr) + stake, err := stakeAcc.GetEscrowBalance(entAddr) if err != nil { return nil, fmt.Errorf("failed to fetch escrow balance for account %s: %w", entAddr, err) } - power, err = scheduler.VotingPowerFromStake(stake, params.VotingPowerDistribution) + power, err = scheduler.VotingPowerFromStake(stake, schedulerParameters.VotingPowerDistribution) if err != nil { return nil, fmt.Errorf("computing voting power for account %s with balance %v: %w", entAddr, stake, err, @@ -561,13 +682,13 @@ electLoop: } } - validatorEntities[entAddr] = true + validatorEntities[entAddr] = struct{}{} newValidators[n.Consensus.ID] = &scheduler.Validator{ ID: n.ID, EntityID: n.EntityID, VotingPower: power, } - if len(newValidators) >= params.MaxValidators { + if len(newValidators) >= schedulerParameters.MaxValidators { break electLoop } } @@ -576,7 +697,7 @@ electLoop: if len(newValidators) == 0 { return nil, fmt.Errorf("cometbft/scheduler: failed to elect any validators") } - if len(newValidators) < params.MinValidators { + if len(newValidators) < schedulerParameters.MinValidators { return nil, fmt.Errorf("cometbft/scheduler: insufficient validators") } @@ -591,59 +712,96 @@ electLoop: } func stakingAddressMapToSliceByStake( - entMap map[staking.Address]bool, + entities map[staking.Address]struct{}, stakeAcc *stakingState.StakeAccumulatorCache, - beacon []byte, + entropy []byte, + schedulerParameters *scheduler.ConsensusParameters, ) ([]staking.Address, error) { - // Convert the map of entity's stake account addresses to a lexicographically - // sorted slice (i.e. make it deterministic). - entities := stakingAddressMapToSortedSlice(entMap) + // Sort addrs lexicographically, i.e. make order deterministic. + addrs := slices.Collect(maps.Keys(entities)) + sortAddresses(addrs) - // Shuffle the sorted slice to make tie-breaks "random". - drbg, err := drbg.New(crypto.SHA512, beacon, nil, RNGContextEntities) + // Shuffle entities to make tie-breaks "random". + rng, err := initRNG(entropy, nil, RNGContextEntities) if err != nil { - return nil, fmt.Errorf("cometbft/scheduler: couldn't instantiate DRBG: %w", err) + return nil, err } - rngSrc := mathrand.New(drbg) - rng := rand.New(rngSrc) + shuffleAddresses(addrs, rng) - rng.Shuffle(len(entities), func(i, j int) { - entities[i], entities[j] = entities[j], entities[i] + if schedulerParameters.DebugBypassStake { + return addrs, nil + } + + // Stable-sort the shuffled slice by descending escrow balance. + balances, err := fetchBalances(addrs, stakeAcc) + if err != nil { + return nil, err + } + sortAddressesByBalance(addrs, balances) + + return addrs, nil +} + +func sortAddresses(addrs []staking.Address) { + sort.Slice(addrs, func(i, j int) bool { + return bytes.Compare(addrs[i][:], addrs[j][:]) < 0 }) +} - if stakeAcc == nil { - return entities, nil +func sortAddressesByBalance(addrs []staking.Address, balances map[staking.Address]*quantity.Quantity) { + sort.SliceStable(addrs, func(i, j int) bool { + bi := balances[addrs[i]] + bj := balances[addrs[j]] + return bi.Cmp(bj) == 1 // Note: Not -1 to get a reversed sort. + }) +} + +func shuffleAddresses(addrs []staking.Address, rng *rand.Rand) { + rng.Shuffle(len(addrs), func(i, j int) { + addrs[i], addrs[j] = addrs[j], addrs[i] + }) +} + +func initRNG(entropy []byte, nonce []byte, context []byte) (*rand.Rand, error) { + drbg, err := drbg.New(crypto.SHA512, entropy, nonce, context) + if err != nil { + return nil, fmt.Errorf("cometbft/scheduler: couldn't instantiate DRBG: %w", err) } + src := mathrand.New(drbg) + rng := rand.New(src) + return rng, nil +} - // Stable-sort the shuffled slice by descending escrow balance. - var balanceErr error - sort.SliceStable(entities, func(i, j int) bool { - iBal, err := stakeAcc.GetEscrowBalance(entities[i]) +func fetchBalances(addrs []staking.Address, stakeAcc *stakingState.StakeAccumulatorCache) (map[staking.Address]*quantity.Quantity, error) { + balances := make(map[staking.Address]*quantity.Quantity) + for _, addr := range addrs { + balance, err := stakeAcc.GetEscrowBalance(addr) if err != nil { - balanceErr = err - return false + return nil, fmt.Errorf("failed to fetch escrow balance: %w", err) } - jBal, err := stakeAcc.GetEscrowBalance(entities[j]) - if err != nil { - balanceErr = err - return false - } - return iBal.Cmp(jBal) == 1 // Note: Not -1 to get a reversed sort. - }) - if balanceErr != nil { - return nil, fmt.Errorf("failed to fetch escrow balance: %w", balanceErr) + balances[addr] = balance } - - return entities, nil + return balances, nil } -func stakingAddressMapToSortedSlice(m map[staking.Address]bool) []staking.Address { - sorted := make([]staking.Address, 0, len(m)) - for mk := range m { - sorted = append(sorted, mk) +func fetchRuntimes(ctx *api.Context) ([]*registry.Runtime, error) { + regState := registryState.NewImmutableState(ctx.State()) + runtimes, err := regState.Runtimes(ctx) + if err != nil { + return nil, fmt.Errorf("cometbft/scheduler: couldn't get runtimes: %w", err) } - sort.Slice(sorted, func(i, j int) bool { - return bytes.Compare(sorted[i][:], sorted[j][:]) < 0 - }) - return sorted + return runtimes, nil +} + +func distributeRewards(ctx *api.Context, epoch beacon.EpochTime, entities map[staking.Address]struct{}, schedulerParameters *scheduler.ConsensusParameters) error { + addrs := slices.Collect(maps.Keys(entities)) + sortAddresses(addrs) + state := stakingState.NewMutableState(ctx.State()) + return state.AddRewards(ctx, epoch, &schedulerParameters.RewardFactorEpochElectionAny, addrs) +} + +type electionDecision struct { + epoch beacon.EpochTime + elect bool + reward bool } diff --git a/go/consensus/cometbft/apps/scheduler/scheduler_test.go b/go/consensus/cometbft/apps/scheduler/scheduler_test.go index 973eab95376..3b291b989ba 100644 --- a/go/consensus/cometbft/apps/scheduler/scheduler_test.go +++ b/go/consensus/cometbft/apps/scheduler/scheduler_test.go @@ -16,6 +16,7 @@ import ( "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/api" beaconState "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/beacon/state" schedulerState "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/scheduler/state" + stakingState "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/staking/state" registry "github.com/oasisprotocol/oasis-core/go/registry/api" scheduler "github.com/oasisprotocol/oasis-core/go/scheduler/api" staking "github.com/oasisprotocol/oasis-core/go/staking/api" @@ -87,13 +88,20 @@ func TestElectCommittee(t *testing.T) { require := require.New(t) appState := api.NewMockApplicationState(&api.MockApplicationStateConfig{}) + + // Initialize staking state. + func() { + ctx := appState.NewContext(api.ContextEndBlock) + defer ctx.Close() + + stakingState := stakingState.NewMutableState(ctx.State()) + err := stakingState.SetConsensusParameters(ctx, &staking.ConsensusParameters{}) + require.NoError(err, "setting staking consensus parameters should succeed") + }() + ctx := appState.NewContext(api.ContextBeginBlock) defer ctx.Close() - app := &Application{ - state: appState, - } - schedulerParameters := &scheduler.ConsensusParameters{} schedulerState := schedulerState.NewMutableState(ctx.State()) @@ -124,7 +132,7 @@ func TestElectCommittee(t *testing.T) { kind scheduler.CommitteeKind nodes []*node.Node nodeStatuses map[signature.PublicKey]*registry.NodeStatus - validatorEntities map[staking.Address]bool + validatorEntities map[staking.Address]struct{} rt registry.Runtime shouldElect bool }{ @@ -133,7 +141,7 @@ func TestElectCommittee(t *testing.T) { scheduler.KindComputeExecutor, []*node.Node{}, map[signature.PublicKey]*registry.NodeStatus{}, - map[staking.Address]bool{}, + map[staking.Address]struct{}{}, registry.Runtime{}, false, }, @@ -162,7 +170,7 @@ func TestElectCommittee(t *testing.T) { }, }, map[signature.PublicKey]*registry.NodeStatus{}, - map[staking.Address]bool{}, + map[staking.Address]struct{}{}, registry.Runtime{ ID: rtID1, Kind: registry.KindCompute, @@ -187,7 +195,7 @@ func TestElectCommittee(t *testing.T) { }, }, map[signature.PublicKey]*registry.NodeStatus{}, - map[staking.Address]bool{}, + map[staking.Address]struct{}{}, registry.Runtime{ ID: rtID1, Kind: registry.KindCompute, @@ -226,7 +234,7 @@ func TestElectCommittee(t *testing.T) { }, }, map[signature.PublicKey]*registry.NodeStatus{}, - map[staking.Address]bool{}, + map[staking.Address]struct{}{}, registry.Runtime{ ID: rtID1, Kind: registry.KindCompute, @@ -265,7 +273,7 @@ func TestElectCommittee(t *testing.T) { }, }, map[signature.PublicKey]*registry.NodeStatus{}, - map[staking.Address]bool{}, + map[staking.Address]struct{}{}, registry.Runtime{ ID: rtID1, Kind: registry.KindCompute, @@ -304,7 +312,7 @@ func TestElectCommittee(t *testing.T) { }, }, map[signature.PublicKey]*registry.NodeStatus{}, - map[staking.Address]bool{}, + map[staking.Address]struct{}{}, registry.Runtime{ ID: rtID1, Kind: registry.KindCompute, @@ -352,7 +360,7 @@ func TestElectCommittee(t *testing.T) { }, }, map[signature.PublicKey]*registry.NodeStatus{}, - map[staking.Address]bool{}, + map[staking.Address]struct{}{}, registry.Runtime{ ID: rtID1, Kind: registry.KindCompute, @@ -400,7 +408,7 @@ func TestElectCommittee(t *testing.T) { }, }, map[signature.PublicKey]*registry.NodeStatus{}, - map[staking.Address]bool{}, + map[staking.Address]struct{}{}, registry.Runtime{ ID: rtID1, Kind: registry.KindCompute, @@ -449,9 +457,9 @@ func TestElectCommittee(t *testing.T) { }, }, map[signature.PublicKey]*registry.NodeStatus{}, - map[staking.Address]bool{ - staking.NewAddress(entityID1): true, - staking.NewAddress(entityID2): true, + map[staking.Address]struct{}{ + staking.NewAddress(entityID1): {}, + staking.NewAddress(entityID2): {}, }, registry.Runtime{ ID: rtID1, @@ -501,7 +509,7 @@ func TestElectCommittee(t *testing.T) { }, }, map[signature.PublicKey]*registry.NodeStatus{}, - map[staking.Address]bool{}, + map[staking.Address]struct{}{}, registry.Runtime{ ID: rtID1, Kind: registry.KindCompute, @@ -552,7 +560,7 @@ func TestElectCommittee(t *testing.T) { }, }, map[signature.PublicKey]*registry.NodeStatus{}, - map[staking.Address]bool{}, + map[staking.Address]struct{}{}, registry.Runtime{ ID: rtID1, Kind: registry.KindCompute, @@ -607,7 +615,7 @@ func TestElectCommittee(t *testing.T) { FreezeEndTime: 42, // Frozen. }, }, - map[staking.Address]bool{}, + map[staking.Address]struct{}{}, registry.Runtime{ ID: rtID1, Kind: registry.KindCompute, @@ -657,7 +665,7 @@ func TestElectCommittee(t *testing.T) { }, }, }, - map[staking.Address]bool{}, + map[staking.Address]struct{}{}, registry.Runtime{ ID: rtID1, Kind: registry.KindCompute, @@ -707,7 +715,7 @@ func TestElectCommittee(t *testing.T) { }, }, }, - map[staking.Address]bool{}, + map[staking.Address]struct{}{}, registry.Runtime{ ID: rtID1, Kind: registry.KindCompute, @@ -760,7 +768,7 @@ func TestElectCommittee(t *testing.T) { }, }, map[signature.PublicKey]*registry.NodeStatus{}, - map[staking.Address]bool{}, + map[staking.Address]struct{}{}, registry.Runtime{ ID: rtID1, Kind: registry.KindCompute, @@ -791,19 +799,28 @@ func TestElectCommittee(t *testing.T) { nodes = append(nodes, &nodeWithStatus{node, status}) } - err := app.electCommittee( + stakeAcc, err := stakingState.NewStakeAccumulatorCache(ctx) + require.NoError(err, "creating stake accumulator cache should not fail") + + rewardableEntities := make(map[staking.Address]struct{}) + + entropy, err := beaconState.Beacon(ctx) + require.NoError(err, "Beacon") + + err = electCommittee( ctx, epoch, schedulerParameters, - beaconState, beaconParameters, registryParameters, - nil, - nil, + stakeAcc, + rewardableEntities, tc.validatorEntities, &tc.rt, nodes, tc.kind, + entropy, + nil, ) require.NoError(err, "committee election should not fail") diff --git a/go/consensus/cometbft/apps/scheduler/shuffle.go b/go/consensus/cometbft/apps/scheduler/shuffle.go index 59f26d017f0..8a405a27d96 100644 --- a/go/consensus/cometbft/apps/scheduler/shuffle.go +++ b/go/consensus/cometbft/apps/scheduler/shuffle.go @@ -2,7 +2,6 @@ package scheduler import ( "bytes" - "crypto" "encoding/binary" "fmt" "math/rand" @@ -10,14 +9,11 @@ import ( beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" "github.com/oasisprotocol/oasis-core/go/common" - "github.com/oasisprotocol/oasis-core/go/common/crypto/drbg" - "github.com/oasisprotocol/oasis-core/go/common/crypto/mathrand" "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" "github.com/oasisprotocol/oasis-core/go/common/crypto/tuplehash" "github.com/oasisprotocol/oasis-core/go/common/node" "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/api" tmBeacon "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/beacon" - beaconState "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/beacon/state" schedulerState "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/scheduler/state" stakingState "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/staking/state" "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/flags" @@ -31,43 +27,20 @@ type nodeWithStatus struct { status *registry.NodeStatus } -func getPrevVRFState( - ctx *api.Context, - beaconState *beaconState.MutableState, -) (*beacon.PrevVRFState, error) { - st, err := beaconState.VRFState(ctx) - if err != nil { - return nil, fmt.Errorf("cometbft/scheduler: failed to query VRF state: %w", err) - } - return st.PrevState, nil -} - func shuffleValidators( ctx *api.Context, - _ api.ApplicationQueryState, + epoch beacon.EpochTime, schedulerParameters *scheduler.ConsensusParameters, - beaconState *beaconState.MutableState, beaconParameters *beacon.ConsensusParameters, - nodeList []*node.Node, + nodes []*node.Node, + entropy []byte, + vrf *beacon.PrevVRFState, ) ([]*node.Node, error) { - epoch, _, err := beaconState.GetEpoch(ctx) - if err != nil { - return nil, fmt.Errorf("cometbft/scheduler: failed to query current epoch: %w", err) - } - - switch { // Used so that we can break to fallback. - case beaconParameters.Backend == beacon.BackendVRF: - var prevState *beacon.PrevVRFState - - // Do the VRF-based validator shuffle. - prevState, err = getPrevVRFState(ctx, beaconState) - if err != nil { - return nil, err - } - + switch beaconParameters.Backend { // Used so that we can break to fallback. + case beacon.BackendVRF: var numValidatorsWithPi int - for _, v := range nodeList { - if prevState.Pi[v.ID] != nil { + for _, n := range nodes { + if vrf.Pi[n.ID] != nil { numValidatorsWithPi++ } } @@ -90,7 +63,7 @@ func shuffleValidators( ctx.Logger().Info( "validator election: shuffling by hashed betas", "epoch", epoch, - "num_proofs", len(prevState.Pi), + "num_proofs", len(vrf.Pi), ) baseHasher := newBetaHasher( @@ -101,9 +74,9 @@ func shuffleValidators( // Do the cryptographic sortition. ret := sortNodesByHashedBeta( - prevState, + vrf, baseHasher, - nodeList, + nodes, ) return ret, nil @@ -118,66 +91,66 @@ func shuffleValidators( "epoch", epoch, ) - entropy, err := beaconState.Beacon(ctx) + rng, err := initRNG(entropy, nil, RNGContextValidators) if err != nil { - return nil, fmt.Errorf("cometbft/scheduler: couldn't get beacon: %w", err) + return nil, err } - return shuffleValidatorsByEntropy(entropy, nodeList) -} -func shuffleValidatorsByEntropy( - entropy []byte, - nodeList []*node.Node, -) ([]*node.Node, error) { - drbg, err := drbg.New(crypto.SHA512, entropy, nil, RNGContextValidators) - if err != nil { - return nil, fmt.Errorf("cometbft/scheduler: couldn't instantiate DRBG: %w", err) - } - rng := rand.New(mathrand.New(drbg)) + return shuffleNodes(nodes, rng) +} - l := len(nodeList) +func shuffleNodes(nodes []*node.Node, rng *rand.Rand) ([]*node.Node, error) { + l := len(nodes) idxs := rng.Perm(l) shuffled := make([]*node.Node, 0, l) - for i := 0; i < l; i++ { - shuffled = append(shuffled, nodeList[idxs[i]]) + for i := range l { + shuffled = append(shuffled, nodes[idxs[i]]) } return shuffled, nil } -func (app *Application) electCommittee( +func electCommittee( ctx *api.Context, epoch beacon.EpochTime, schedulerParameters *scheduler.ConsensusParameters, - beaconState *beaconState.MutableState, beaconParameters *beacon.ConsensusParameters, registryParameters *registry.ConsensusParameters, stakeAcc *stakingState.StakeAccumulatorCache, - entitiesEligibleForReward map[staking.Address]bool, - validatorEntities map[staking.Address]bool, + rewardableEntities map[staking.Address]struct{}, + validatorEntities map[staking.Address]struct{}, rt *registry.Runtime, - nodeList []*nodeWithStatus, + nodes []*nodeWithStatus, kind scheduler.CommitteeKind, + entropy []byte, + vrfState *beacon.PrevVRFState, ) error { + ctx.Logger().Debug("electing committee", + "epoch", epoch, + "kind", kind, + "runtime", rt.ID, + ) + // Only generic compute runtimes need to elect all the committees. - if !rt.IsCompute() && kind != scheduler.KindComputeExecutor { + if !rt.IsCompute() || kind != scheduler.KindComputeExecutor { return nil } - members, err := app.electCommitteeMembers( + members, err := electCommitteeMembers( ctx, epoch, schedulerParameters, - beaconState, beaconParameters, registryParameters, stakeAcc, - entitiesEligibleForReward, + rewardableEntities, validatorEntities, rt, - nodeList, + nodes, kind, + entropy, + vrfState, ) if err != nil { return err @@ -200,22 +173,29 @@ func (app *Application) electCommittee( return fmt.Errorf("cometbft/scheduler: failed to save committee: %w", err) } + ctx.Logger().Debug("finished electing committee", + "epoch", epoch, + "kind", kind, + "runtime", rt.ID, + ) + return nil } -func (app *Application) electCommitteeMembers( //nolint: gocyclo +func electCommitteeMembers( //nolint: gocyclo ctx *api.Context, epoch beacon.EpochTime, schedulerParameters *scheduler.ConsensusParameters, - beaconState *beaconState.MutableState, beaconParameters *beacon.ConsensusParameters, registryParameters *registry.ConsensusParameters, stakeAcc *stakingState.StakeAccumulatorCache, - entitiesEligibleForReward map[staking.Address]bool, - validatorEntities map[staking.Address]bool, + rewardableEntities map[staking.Address]struct{}, + validatorEntities map[staking.Address]struct{}, rt *registry.Runtime, - nodeList []*nodeWithStatus, + nodes []*nodeWithStatus, kind scheduler.CommitteeKind, + entropy []byte, + vrfState *beacon.PrevVRFState, ) ([]*scheduler.CommitteeNode, error) { // Workers must be listed before backup workers, as other parts of the code depend on this // order for better performance. @@ -228,15 +208,9 @@ func (app *Application) electCommitteeMembers( //nolint: gocyclo useVRF := beaconParameters.Backend == beacon.BackendVRF // If a VRF-based election is to be done, query the VRF state. - var ( - prevState *beacon.PrevVRFState - err error - ) + var err error if useVRF { - if prevState, err = getPrevVRFState(ctx, beaconState); err != nil { - return nil, err - } - if !prevState.CanElectCommittees { + if !vrfState.CanElectCommittees { if !schedulerParameters.DebugAllowWeakAlpha { ctx.Logger().Error("epoch had weak VRF alpha, committee elections not allowed", "kind", kind, @@ -259,7 +233,7 @@ func (app *Application) electCommitteeMembers( //nolint: gocyclo groupSizes := make(map[scheduler.Role]int) switch kind { case scheduler.KindComputeExecutor: - isSuitableFn = app.isSuitableExecutorWorker + isSuitableFn = isSuitableExecutorWorker groupSizes[scheduler.RoleWorker] = int(rt.Executor.GroupSize) groupSizes[scheduler.RoleBackupWorker] = int(rt.Executor.GroupBackupSize) default: @@ -279,15 +253,16 @@ func (app *Application) electCommitteeMembers( //nolint: gocyclo cs := rt.Constraints[kind] // Perform pre-election eligibility filtering. - nodeLists := make(map[scheduler.Role][]*node.Node) - for _, n := range nodeList { + nodesPerRole := make(map[scheduler.Role][]*node.Node) + for _, n := range nodes { // Check if an entity has enough stake. entAddr := staking.NewAddress(n.node.EntityID) - if stakeAcc != nil { + if !schedulerParameters.DebugBypassStake { if err = stakeAcc.CheckStakeClaims(entAddr); err != nil { continue } } + // Check general node compatibility. if !isSuitableFn(ctx, n, rt, epoch, registryParameters) { continue @@ -295,7 +270,7 @@ func (app *Application) electCommitteeMembers( //nolint: gocyclo // If the election uses VRFs, make sure that the node bothered to submit // a VRF proof for this election. - if useVRF && prevState.Pi[n.node.ID] == nil { + if useVRF && vrfState.Pi[n.node.ID] == nil { // ... as long as we aren't testing with mandatory committee // members. isForceElect := false @@ -325,22 +300,20 @@ func (app *Application) electCommitteeMembers( //nolint: gocyclo // Validator set membership constraint. if cs[role].ValidatorSet != nil { - if !validatorEntities[entAddr] { + if _, ok := validatorEntities[entAddr]; !ok { // Not eligible if not in the validator set. continue } } - nodeLists[role] = append(nodeLists[role], n.node) + nodesPerRole[role] = append(nodesPerRole[role], n.node) eligible = true } if !eligible { continue } - if entitiesEligibleForReward != nil { - entitiesEligibleForReward[entAddr] = true - } + rewardableEntities[entAddr] = struct{}{} } // Perform election. @@ -355,7 +328,7 @@ func (app *Application) electCommitteeMembers( //nolint: gocyclo // will ensure fairness if the constraint is set to 1 (as is the // case with all currently deployed runtimes with the constraint), // but is still not ideal if the constraint is larger. - nodeList := nodeLists[role] + nodes := nodesPerRole[role] if mn := cs[role].MaxNodes; mn != nil && mn.Limit > 0 { if flags.DebugDontBlameOasis() && schedulerParameters.DebugForceElect != nil { ctx.Logger().Error("debug force elect is incompatible with de-duplication", @@ -370,24 +343,24 @@ func (app *Application) electCommitteeMembers( //nolint: gocyclo case false: // Just use the first seen nodes in the node list up to // the limit, per-entity. This is only used in testing. - nodeList = dedupEntityNodesTrivial( - nodeList, + nodes = dedupEntityNodesTrivial( + nodes, mn.Limit, ) case true: - nodeList = dedupEntityNodesByHashedBeta( - prevState, + nodes = dedupEntityNodesByHashedBeta( + vrfState, tmBeacon.MustGetChainContext(ctx), epoch, rt.ID, kind, role, - nodeList, + nodes, mn.Limit, ) } } - nrNodes := len(nodeList) + nrNodes := len(nodes) // Check election scheduling constraints. var minPoolSize int @@ -436,15 +409,11 @@ func (app *Application) electCommitteeMembers( //nolint: gocyclo return nil, fmt.Errorf("cometbft/scheduler: unsupported role: %v", role) } - var entropy []byte - if entropy, err = beaconState.Beacon(ctx); err != nil { - return nil, fmt.Errorf("cometbft/scheduler: couldn't get beacon: %w", err) - } - - idxs, err = GetPerm(entropy, rt.ID, rngCtx, nrNodes) + rng, err := initRNG(entropy, rt.ID[:], rngCtx) if err != nil { - return nil, fmt.Errorf("failed to derive permutation: %w", err) + return nil, err } + idxs = rng.Perm(nrNodes) case true: // Use the VRF proofs to do the elections. baseHasher := newCommitteeBetaHasher( @@ -456,21 +425,21 @@ func (app *Application) electCommitteeMembers( //nolint: gocyclo ) idxs = committeeVRFBetaIndexes( - prevState, + vrfState, baseHasher, - nodeList, + nodes, ) } // If the election is rigged for testing purposes, force-elect the // nodes if possible. - ok, elected, forceState := app.debugForceElect( + ok, elected, forceState := debugForceElect( ctx, schedulerParameters, rt, kind, role, - nodeList, + nodes, wantedNodes, ) if !ok { @@ -485,7 +454,7 @@ func (app *Application) electCommitteeMembers( //nolint: gocyclo break } - n := nodeList[idx] + n := nodes[idx] if forceState != nil && forceState.elected[n.ID] { // Already elected to the committee by the debug forcing option. continue @@ -525,7 +494,7 @@ func (app *Application) electCommitteeMembers( //nolint: gocyclo // If the election is rigged for testing purposes, fixup the force // elected node roles. - if ok, elected = app.debugForceRoles( + if ok, elected = debugForceRoles( ctx, forceState, elected, @@ -541,19 +510,19 @@ func (app *Application) electCommitteeMembers( //nolint: gocyclo } func committeeVRFBetaIndexes( - prevState *beacon.PrevVRFState, + vrfState *beacon.PrevVRFState, baseHasher *tuplehash.Hasher, - nodeList []*node.Node, + nodes []*node.Node, ) []int { indexByNode := make(map[signature.PublicKey]int) - for i, n := range nodeList { + for i, n := range nodes { indexByNode[n.ID] = i } sorted := sortNodesByHashedBeta( - prevState, + vrfState, baseHasher, - nodeList, + nodes, ) ret := make([]int, 0, len(sorted)) @@ -565,16 +534,16 @@ func committeeVRFBetaIndexes( } func sortNodesByHashedBeta( - prevState *beacon.PrevVRFState, + vrfState *beacon.PrevVRFState, baseHasher *tuplehash.Hasher, - nodeList []*node.Node, + nodes []*node.Node, ) []*node.Node { // Accumulate the hashed betas. nodeByHashedBeta := make(map[hashedBeta]*node.Node) - betas := make([]hashedBeta, 0, len(nodeList)) - for i := range nodeList { - n := nodeList[i] - pi := prevState.Pi[n.ID] + betas := make([]hashedBeta, 0, len(nodes)) + for i := range nodes { + n := nodes[i] + pi := vrfState.Pi[n.ID] if pi == nil { continue } @@ -665,18 +634,18 @@ func newBetaHasher( } func dedupEntityNodesByHashedBeta( - prevState *beacon.PrevVRFState, + vrfState *beacon.PrevVRFState, chainContext []byte, epoch beacon.EpochTime, runtimeID common.Namespace, kind scheduler.CommitteeKind, role scheduler.Role, - nodeList []*node.Node, + nodes []*node.Node, perEntityLimit uint16, ) []*node.Node { // If there is no limit, just return. if perEntityLimit == 0 { - return nodeList + return nodes } baseHasher := newCommitteeDedupBetaHasher( @@ -688,32 +657,32 @@ func dedupEntityNodesByHashedBeta( ) // Do the cryptographic sortition. - shuffledNodeList := sortNodesByHashedBeta( - prevState, + shuffled := sortNodesByHashedBeta( + vrfState, baseHasher, - nodeList, + nodes, ) return dedupEntityNodesTrivial( - shuffledNodeList, + shuffled, perEntityLimit, ) } func dedupEntityNodesTrivial( - nodeList []*node.Node, + nodes []*node.Node, perEntityLimit uint16, ) []*node.Node { nodesPerEntity := make(map[signature.PublicKey]int) - dedupedNodeList := make([]*node.Node, 0, len(nodeList)) - for i := range nodeList { - n := nodeList[i] + deduped := make([]*node.Node, 0, len(nodes)) + for i := range nodes { + n := nodes[i] if nodesPerEntity[n.EntityID] >= int(perEntityLimit) { continue } nodesPerEntity[n.EntityID]++ - dedupedNodeList = append(dedupedNodeList, n) + deduped = append(deduped, n) } - return dedupedNodeList + return deduped } diff --git a/go/consensus/cometbft/apps/staking/slashing.go b/go/consensus/cometbft/apps/staking/slashing.go index 969d390b9e3..9d4628ad942 100644 --- a/go/consensus/cometbft/apps/staking/slashing.go +++ b/go/consensus/cometbft/apps/staking/slashing.go @@ -78,9 +78,9 @@ func onEvidenceByzantineConsensus( // Check for overflow. if math.MaxUint64-penalty.FreezeInterval < epoch { - nodeStatus.FreezeEndTime = registry.FreezeForever + nodeStatus.Freeze(registry.FreezeForever) } else { - nodeStatus.FreezeEndTime = epoch + penalty.FreezeInterval + nodeStatus.Freeze(epoch + penalty.FreezeInterval) } } diff --git a/go/oasis-node/cmd/debug/txsource/workload/registration.go b/go/oasis-node/cmd/debug/txsource/workload/registration.go index 482103f5cb0..be1b1f6c5f8 100644 --- a/go/oasis-node/cmd/debug/txsource/workload/registration.go +++ b/go/oasis-node/cmd/debug/txsource/workload/registration.go @@ -173,7 +173,7 @@ func (r *registration) Run( // nolint: gocyclo // Initialize base workload. r.BaseWorkload.Init(consensus, sm, fundingAccount) - beacon := beacon.NewClient(conn) + beaconClient := beacon.NewClient(conn) ctx := context.Background() var err error @@ -288,7 +288,7 @@ func (r *registration) Run( // nolint: gocyclo // XXX: currently only a single runtime is used throughout the test, could use more. if i == 0 { // Current epoch. - epoch, err := beacon.GetEpoch(ctx, consensusAPI.HeightLatest) + epoch, err := beaconClient.GetEpoch(ctx, consensusAPI.HeightLatest) if err != nil { return fmt.Errorf("failed to get current epoch: %w", err) } @@ -325,7 +325,7 @@ func (r *registration) Run( // nolint: gocyclo selectedNode := selectedAcc.nodeIdentities[rng.Intn(registryNumNodesPerEntity)] // Current epoch. - epoch, err := beacon.GetEpoch(loopCtx, consensusAPI.HeightLatest) + epoch, err := beaconClient.GetEpoch(loopCtx, consensusAPI.HeightLatest) if err != nil { return fmt.Errorf("failed to get current epoch: %w", err) } @@ -333,7 +333,7 @@ func (r *registration) Run( // nolint: gocyclo // Randomized expiration. // We should update for at minimum 2 epochs, as the epoch could change between querying it // and actually performing the registration. - selectedNode.nodeDesc.Expiration = uint64(epoch) + 2 + uint64(rng.Intn(registryNodeMaxEpochUpdate-1)) + selectedNode.nodeDesc.Expiration = epoch + 2 + beacon.EpochTime(rng.Intn(registryNodeMaxEpochUpdate-1)) sigNode, err := signNode(selectedNode.id, selectedNode.nodeDesc) if err != nil { return fmt.Errorf("failed to sign node: %w", err) diff --git a/go/oasis-node/cmd/genesis/genesis.go b/go/oasis-node/cmd/genesis/genesis.go index 84ff2804cd3..16260fa34d3 100644 --- a/go/oasis-node/cmd/genesis/genesis.go +++ b/go/oasis-node/cmd/genesis/genesis.go @@ -351,7 +351,7 @@ func AppendRegistryState(doc *genesis.Document, entities, runtimes, nodes []stri DebugAllowUnroutableAddresses: viper.GetBool(CfgRegistryDebugAllowUnroutableAddresses), DebugAllowTestRuntimes: viper.GetBool(CfgRegistryDebugAllowTestRuntimes), GasCosts: registry.DefaultGasCosts, // TODO: Make these configurable. - MaxNodeExpiration: viper.GetUint64(CfgRegistryMaxNodeExpiration), + MaxNodeExpiration: beacon.EpochTime(viper.GetUint64(CfgRegistryMaxNodeExpiration)), DisableRuntimeRegistration: viper.GetBool(CfgRegistryDisableRuntimeRegistration), EnableRuntimeGovernanceModels: make(map[registry.RuntimeGovernanceModel]bool), }, diff --git a/go/oasis-node/cmd/registry/node/node.go b/go/oasis-node/cmd/registry/node/node.go index ab91c049496..609b6890973 100644 --- a/go/oasis-node/cmd/registry/node/node.go +++ b/go/oasis-node/cmd/registry/node/node.go @@ -13,6 +13,7 @@ import ( "github.com/spf13/viper" "google.golang.org/grpc" + beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" "github.com/oasisprotocol/oasis-core/go/common/cbor" "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" fileSigner "github.com/oasisprotocol/oasis-core/go/common/crypto/signature/signers/file" @@ -146,7 +147,7 @@ func doInit(*cobra.Command, []string) { // nolint: gocyclo Versioned: cbor.NewVersioned(node.LatestNodeDescriptorVersion), ID: nodeIdentity.NodeSigner.Public(), EntityID: entityID, - Expiration: viper.GetUint64(CfgExpiration), + Expiration: beacon.EpochTime(viper.GetUint64(CfgExpiration)), TLS: node.TLSInfo{ PubKey: nodeIdentity.TLSSigner.Public(), }, diff --git a/go/oasis-test-runner/scenario/e2e/runtime/byzantine.go b/go/oasis-test-runner/scenario/e2e/runtime/byzantine.go index 97193c5c946..ad60826eb64 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/byzantine.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/byzantine.go @@ -618,12 +618,11 @@ WatchBlocksLoop: } // Advance epoch to trigger any liveness slashing/suspension. - sc.Logger.Info("triggering epoch transition") - if err = sc.Net.Controller().SetEpoch(ctx, epoch+1); err != nil { + if err = sc.Net.Controller().SetEpoch(ctx, epoch); err != nil { return fmt.Errorf("failed to set epoch: %w", err) } - sc.Logger.Info("triggering epoch transition") - if err = sc.Net.Controller().SetEpoch(ctx, epoch+2); err != nil { + epoch++ + if err = sc.Net.Controller().SetEpoch(ctx, epoch); err != nil { return fmt.Errorf("failed to set epoch: %w", err) } diff --git a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_rotation_failure.go b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_rotation_failure.go index 91943db6ecd..76ee4bdc190 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_rotation_failure.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_rotation_failure.go @@ -171,7 +171,7 @@ func (sc *kmRotationFailureImpl) extendKeymanagerRegistrations(ctx context.Conte if err != nil { return err } - expiration := uint64(epoch) + params.MaxNodeExpiration + expiration := epoch + params.MaxNodeExpiration for _, idx := range idxs { km := sc.Net.Keymanagers()[idx] diff --git a/go/registry/api/admission.go b/go/registry/api/admission.go index f80bcafde57..3d70cf05bc3 100644 --- a/go/registry/api/admission.go +++ b/go/registry/api/admission.go @@ -266,7 +266,7 @@ func verifyNodeCountWithRoleForRuntime( var curNodes int for _, n := range nodes { - if n.ID.Equal(newNode.ID) || n.IsExpired(uint64(epoch)) || !n.HasRuntime(rt.ID) { + if n.ID.Equal(newNode.ID) || n.IsExpired(epoch) || !n.HasRuntime(rt.ID) { // Skip existing node when re-registering. Also skip // expired nodes and nodes that haven't registered // for the same runtime. diff --git a/go/registry/api/api.go b/go/registry/api/api.go index 26d51ecc37b..a60c5e1e2d5 100644 --- a/go/registry/api/api.go +++ b/go/registry/api/api.go @@ -557,7 +557,7 @@ func VerifyRegisterNodeArgs( // nolint: gocyclo // the consensus state. // Ensure valid expiration. - maxExpiration := uint64(epoch) + params.MaxNodeExpiration + maxExpiration := epoch + params.MaxNodeExpiration if params.MaxNodeExpiration > 0 && n.Expiration > maxExpiration { logger.Error("RegisterNode: node expiration greater than max allowed expiration", "node", n, @@ -1063,7 +1063,7 @@ func VerifyNodeUpdate( } // Following checks are only done for active nodes. - if currentNode.IsExpired(uint64(epoch)) { + if currentNode.IsExpired(epoch) { return nil } @@ -1406,7 +1406,7 @@ type ConsensusParameters struct { // MaxNodeExpiration is the maximum number of epochs relative to the epoch // at registration time that a single node registration is valid for. - MaxNodeExpiration uint64 `json:"max_node_expiration,omitempty"` + MaxNodeExpiration beacon.EpochTime `json:"max_node_expiration,omitempty"` // EnableRuntimeGovernanceModels is a set of enabled runtime governance models. EnableRuntimeGovernanceModels map[RuntimeGovernanceModel]bool `json:"enable_runtime_governance_models,omitempty"` @@ -1430,7 +1430,7 @@ type ConsensusParameterChanges struct { GasCosts transaction.Costs `json:"gas_costs,omitempty"` // MaxNodeExpiration is the maximum node expiration. - MaxNodeExpiration *uint64 `json:"max_node_expiration,omitempty"` + MaxNodeExpiration *beacon.EpochTime `json:"max_node_expiration,omitempty"` // EnableRuntimeGovernanceModels are the new enabled runtime governance models. EnableRuntimeGovernanceModels map[RuntimeGovernanceModel]bool `json:"enable_runtime_governance_models,omitempty"` diff --git a/go/registry/api/status.go b/go/registry/api/status.go index 6261e42ce01..112d98af4be 100644 --- a/go/registry/api/status.go +++ b/go/registry/api/status.go @@ -39,6 +39,12 @@ func (ns NodeStatus) IsFrozen() bool { return ns.FreezeEndTime > 0 } +// Freeze makes the node frozen until the specified epoch, after which it may +// become unfrozen. +func (ns *NodeStatus) Freeze(epoch beacon.EpochTime) { + ns.FreezeEndTime = epoch +} + // Unfreeze makes the node unfrozen. func (ns *NodeStatus) Unfreeze() { ns.FreezeEndTime = 0 @@ -89,6 +95,12 @@ func (ns *NodeStatus) IsSuspended(runtimeID common.Namespace, epoch beacon.Epoch return fault.IsSuspended(epoch) } +// IsEligibleForElection checks if the node is eligible to be included +// in non-validator committee elections at the given epoch. +func (ns *NodeStatus) IsEligibleForElection(epoch beacon.EpochTime) bool { + return epoch > ns.ElectionEligibleAfter +} + // Fault is used to track the state of nodes that are experiencing liveness failures. type Fault struct { // Failures is the number of times a node has been declared faulty. diff --git a/go/registry/tests/tester.go b/go/registry/tests/tester.go index 5076ae6fe93..e168bef9445 100644 --- a/go/registry/tests/tester.go +++ b/go/registry/tests/tester.go @@ -1156,7 +1156,7 @@ func (ent *TestEntity) NewTestNodes(nCompute int, idNonce []byte, runtimes []*no Versioned: cbor.NewVersioned(node.LatestNodeDescriptorVersion), ID: nod.Signer.Public(), EntityID: ent.Entity.ID, - Expiration: uint64(expiration), + Expiration: expiration, VRF: node.VRFInfo{ ID: nodeIdentity.VRFSigner.Public(), }, @@ -1411,7 +1411,7 @@ func (ent *TestEntity) NewTestNodes(nCompute int, idNonce []byte, runtimes []*no Versioned: cbor.NewVersioned(node.LatestNodeDescriptorVersion), ID: nod.Signer.Public(), EntityID: ent.Entity.ID, - Expiration: uint64(expiration), + Expiration: expiration, Runtimes: moreRuntimes, Roles: role, VRF: nod.Node.VRF, @@ -1441,7 +1441,7 @@ func (ent *TestEntity) NewTestNodes(nCompute int, idNonce []byte, runtimes []*no Versioned: cbor.NewVersioned(node.LatestNodeDescriptorVersion), ID: nod.Signer.Public(), EntityID: ent.Entity.ID, - Expiration: uint64(expiration), + Expiration: expiration, Runtimes: newRuntimes, Roles: role, P2P: nod.Node.P2P, diff --git a/go/worker/registration/worker.go b/go/worker/registration/worker.go index c8f629354d9..44e70308d9f 100644 --- a/go/worker/registration/worker.go +++ b/go/worker/registration/worker.go @@ -841,7 +841,7 @@ func (w *Worker) registerNode(epoch beacon.EpochTime, hook RegisterNodeHook) (er Versioned: cbor.NewVersioned(node.LatestNodeDescriptorVersion), ID: identityPublic, EntityID: w.entityID, - Expiration: uint64(epoch) + 2, + Expiration: epoch + 2, TLS: node.TLSInfo{ PubKey: w.identity.TLSSigner.Public(), }, @@ -874,7 +874,7 @@ func (w *Worker) registerNode(epoch beacon.EpochTime, hook RegisterNodeHook) (er w.status.LastAttemptErrorMessage = err.Error() w.status.LastAttempt = time.Now() if w.status.Descriptor != nil { - if w.status.Descriptor.Expiration < uint64(epoch) { + if w.status.Descriptor.Expiration < epoch { w.status.Descriptor = nil } }