From 80aa88e2b9afbb47d53ffbd1986b05d1baed387b Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 7 May 2025 22:00:50 +0100 Subject: [PATCH 01/13] add configs for db batch sizes used by hashdb Cap and Commit operations; add pebble batch size safety checks --- core/blockchain.go | 12 ++++++- core/rawdb/accessors_trie.go | 8 +++++ ethdb/pebble/pebble.go | 31 +++++++++++++++++ ethdb/pebble/pebble_test.go | 31 +++++++++++++++++ triedb/hashdb/database.go | 64 +++++++++++++++++++++++++++++++++--- 5 files changed, 141 insertions(+), 5 deletions(-) diff --git a/core/blockchain.go b/core/blockchain.go index 361b1c2cd4..58bb25fc1d 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -161,10 +161,14 @@ type CacheConfig struct { SnapshotRestoreMaxGas uint64 // Rollback up to this much gas to restore snapshot (otherwise snapshot recalculated from nothing) HeadRewindBlocksLimit uint64 // Rollback up to this many blocks to restore chain head (0 = preserve default upstream behaviour), only for HashScheme - // Arbitrum: configure GC window + // Arbitrum: + // configure GC window TriesInMemory uint64 // Height difference before which a trie may not be garbage-collected TrieRetention time.Duration // Time limit before which a trie may not be garbage-collected TrieTimeLimitRandomOffset time.Duration // Range of random offset of each commit due to TrieTimeLimit period + // configure write batch size thresholds + TrieCapBatchSize uint32 // threshold used during capping triedb size + TrieCommitBatchSize uint32 // threshold used during committing triedb to disk MaxNumberOfBlocksToSkipStateSaving uint32 MaxAmountOfGasToSkipStateSaving uint64 @@ -186,6 +190,10 @@ func (c *CacheConfig) triedbConfig(isVerkle bool) *triedb.Config { } if c.StateScheme == rawdb.HashScheme { config.HashDB = &hashdb.Config{ + // Arbitrum: + IdealCapBatchSize: c.TrieCapBatchSize, + IdealCommitBatchSize: c.TrieCommitBatchSize, + CleanCacheSize: c.TrieCleanLimit * 1024 * 1024, } } @@ -212,6 +220,8 @@ var defaultCacheConfig = &CacheConfig{ TriesInMemory: state.DefaultTriesInMemory, TrieRetention: 30 * time.Minute, TrieTimeLimitRandomOffset: 0, + TrieCapBatchSize: 0, + TrieCommitBatchSize: 0, MaxNumberOfBlocksToSkipStateSaving: 0, MaxAmountOfGasToSkipStateSaving: 0, diff --git a/core/rawdb/accessors_trie.go b/core/rawdb/accessors_trie.go index 8bd6b71eee..87abbc566d 100644 --- a/core/rawdb/accessors_trie.go +++ b/core/rawdb/accessors_trie.go @@ -147,6 +147,14 @@ func WriteLegacyTrieNode(db ethdb.KeyValueWriter, hash common.Hash, node []byte) } } +// Arbitrum: version of WriteLegacyTrieNode that returns error instead of os.Exit in log.Crit +func WriteLegacyTrieNodeWithError(db ethdb.KeyValueWriter, hash common.Hash, node []byte) error { + if err := db.Put(hash.Bytes(), node); err != nil { + return fmt.Errorf("Failed to store legacy trie node, err: %w", err) + } + return nil +} + // DeleteLegacyTrieNode deletes the specified legacy trie node from database. func DeleteLegacyTrieNode(db ethdb.KeyValueWriter, hash common.Hash) { if err := db.Delete(hash.Bytes()); err != nil { diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index 517bc5b9e8..db6e1a7498 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -21,12 +21,14 @@ package pebble import ( "bytes" + "encoding/binary" "fmt" "runtime" "sync" "sync/atomic" "time" + "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble" "github.com/cockroachdb/pebble/bloom" "github.com/ethereum/go-ethereum/common" @@ -51,8 +53,21 @@ const ( // degradationWarnInterval specifies how often warning should be printed if the // leveldb database cannot keep up with requested writes. degradationWarnInterval = time.Minute + + // The max batch size is limited by the uint32 offsets stored in + // internal/batchskl.node, DeferredBatchOp, and flushableBatchEntry. + // + // Pebble limits the size to MaxUint32 (just short of 4GB) so that the exclusive + // end of an allocation fits in uint32. + // + // On 32-bit systems, slices are naturally limited to MaxInt (just short of + // 2GB). + // see: cockroachdb/pebble.maxBatchSize + maxBatchSize = (1<<31)<<(^uint(0)>>63) - 1 ) +var ErrBatchTooLarge = errors.Newf("go-ethereum/pebble: batch too large: >= %s bytes", maxBatchSize) + // Database is a persistent key-value store based on the pebble storage engine. // Apart from basic data storage functionality it also supports batch writes and // iterating over the keyspace in binary-alphabetical order. @@ -648,7 +663,15 @@ type batch struct { } // Put inserts the given value into the batch for later committing. +// In case ErrBatchTooLarge is returned, it is safe to flush the batch and retry putting the key,value pair. func (b *batch) Put(key, value []byte) error { + // The size increase is argument in call to cockroachdb/pebble.Batch.grow in cockroachdb/pebble.Batch.prepareDeferredKeyValueRecord. pebble.Batch.grow may panic if the batch data size plus the increase reaches cockroachdb/pebble.maxBatchSize + sizeIncrease := 1 + uint64(2*binary.MaxVarintLen32) + uint64(len(key)) + uint64(len(value)) + // check if we fit within maxBatchSize + if uint64(b.b.Len())+sizeIncrease >= maxBatchSize { + // return an error instead of letting b.b.Set to panic + return ErrBatchTooLarge + } if err := b.b.Set(key, value, nil); err != nil { return err } @@ -657,7 +680,15 @@ func (b *batch) Put(key, value []byte) error { } // Delete inserts the key removal into the batch for later committing. +// In case ErrBatchTooLarge is returned, it is safe to flush the batch and retry deleting the key func (b *batch) Delete(key []byte) error { + // The size increase is argument in call to cockroachdb/pebble.Batch.grow in cockroachdb/pebble.Batch.prepareDeferredKeyRecord. pebble.Batch.grow may panic if the batch data size plus the increase reaches cockroachdb/pebble.maxBatchSize + sizeIncrease := 1 + uint64(binary.MaxVarintLen32) + uint64(len(key)) + // check if we fit within maxBatchSize + if uint64(b.b.Len())+sizeIncrease >= maxBatchSize { + // return an error instead of letting b.b.Delete to panic + return ErrBatchTooLarge + } if err := b.b.Delete(key, nil); err != nil { return err } diff --git a/ethdb/pebble/pebble_test.go b/ethdb/pebble/pebble_test.go index 47ce6ec3a1..a9527e398c 100644 --- a/ethdb/pebble/pebble_test.go +++ b/ethdb/pebble/pebble_test.go @@ -19,6 +19,8 @@ package pebble import ( + "encoding/binary" + "errors" "testing" "github.com/cockroachdb/pebble" @@ -43,6 +45,35 @@ func TestPebbleDB(t *testing.T) { }) } +func TestPebbleBatch(t *testing.T) { + pebbleDb, err := pebble.Open("", &pebble.Options{ + FS: vfs.NewMem(), + }) + if err != nil { + t.Fatal(err) + } + var db ethdb.KeyValueStore = &Database{ + db: pebbleDb, + } + batch := db.NewBatch() + data := make([]byte, maxBatchSize-1-binary.MaxVarintLen32) + err = batch.Delete(data) + if err == nil { + t.Fatal("batch.Delete shouldn't succeed") + } + if !errors.Is(err, ErrBatchTooLarge) { + t.Fatalf("batch.Delete returned unexpected error: %v", err) + } + data = data[:len(data)-binary.MaxVarintLen32] + err = batch.Put(data[0:len(data)/2], data[len(data)/2:]) + if err == nil { + t.Fatal("batch.Put shouldn't succeed") + } + if !errors.Is(err, ErrBatchTooLarge) { + t.Fatalf("batch.Put returned unexpected error: %v", err) + } +} + func BenchmarkPebbleDB(b *testing.B) { dbtest.BenchDatabaseSuite(b, func() ethdb.KeyValueStore { db, err := pebble.Open("", &pebble.Options{ diff --git a/triedb/hashdb/database.go b/triedb/hashdb/database.go index 43ceb472b3..58ab19ee79 100644 --- a/triedb/hashdb/database.go +++ b/triedb/hashdb/database.go @@ -28,6 +28,7 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/ethdb/pebble" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/rlp" @@ -62,12 +63,21 @@ var ( // Config contains the settings for database. type Config struct { + // Arbitrum: + IdealCapBatchSize uint32 // write batch size threshold used during capping triedb size (if 0, ethdb.IdealBatchSize will be used) + IdealCommitBatchSize uint32 // write batch size threshold used during committing trie nodes to disk (if 0, ethdb.IdealBatchSize will be used) + CleanCacheSize int // Maximum memory allowance (in bytes) for caching clean nodes } // Defaults is the default setting for database if it's not specified. // Notably, clean cache is disabled explicitly, var Defaults = &Config{ + // Arbitrum: + // default zeroes used to prevent need for correct initialization in all places used upstream + IdealCapBatchSize: 0, // 0 = ethdb.IdealBatchSize will be used + IdealCommitBatchSize: 0, // 0 = ethdb.IdealBatchSize will be used + // Explicitly set clean cache size to 0 to avoid creating fastcache, // otherwise database must be closed when it's no longer needed to // prevent memory leak. @@ -78,6 +88,9 @@ var Defaults = &Config{ // the disk database. The aim is to accumulate trie writes in-memory and only // periodically flush a couple tries to disk, garbage collecting the remainder. type Database struct { + // Arbitrum: + config *Config + diskdb ethdb.Database // Persistent storage for matured trie nodes cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs dirties map[common.Hash]*cachedNode // Data and references relationships of dirty trie nodes @@ -133,6 +146,8 @@ func New(diskdb ethdb.Database, config *Config) *Database { cleans = fastcache.New(config.CleanCacheSize) } return &Database{ + config: config, + diskdb: diskdb, cleans: cleans, dirties: make(map[common.Hash]*cachedNode), @@ -342,15 +357,35 @@ func (db *Database) Cap(limit common.StorageSize) error { size := db.dirtiesSize + common.StorageSize(len(db.dirties)*cachedNodeSize) size += db.childrenSize + // Arbitrum: + idealBatchSize := uint(db.config.IdealCapBatchSize) + if idealBatchSize == 0 { + idealBatchSize = uint(ethdb.IdealBatchSize) + } + // Keep committing nodes from the flush-list until we're below allowance oldest := db.oldest for size > limit && oldest != (common.Hash{}) { // Fetch the oldest referenced node and push into the batch node := db.dirties[oldest] - rawdb.WriteLegacyTrieNode(batch, oldest, node.node) + + err := rawdb.WriteLegacyTrieNodeWithError(batch, oldest, node.node) + if err != nil { + if errors.Is(err, pebble.ErrBatchTooLarge) { + log.Warn("Pebble batch limit reached in hashdb Cap operation, flushing batch.") + // flush batch & retry the write + if err = batch.Write(); err != nil { + return err + } + batch.Reset() + rawdb.WriteLegacyTrieNode(batch, oldest, node.node) + } else { + log.Crit("Failure in hashdb Cap operation", "err", err) + } + } // If we exceeded the ideal batch size, commit and reset - if batch.ValueSize() >= ethdb.IdealBatchSize { + if uint(batch.ValueSize()) >= idealBatchSize { if err := batch.Write(); err != nil { log.Error("Failed to write flush list to disk", "err", err) return err @@ -474,8 +509,29 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleane return err } // If we've reached an optimal batch size, commit and start over - rawdb.WriteLegacyTrieNode(batch, hash, node.node) - if batch.ValueSize() >= ethdb.IdealBatchSize { + err = rawdb.WriteLegacyTrieNodeWithError(batch, hash, node.node) + if err != nil { + if errors.Is(err, pebble.ErrBatchTooLarge) { + log.Warn("Pebble batch limit reached in hashdb Commit operation, flushing batch.") + // flush batch & retry the write + if err = batch.Write(); err != nil { + return err + } + err = batch.Replay(uncacher) + if err != nil { + return err + } + batch.Reset() + rawdb.WriteLegacyTrieNode(batch, hash, node.node) + } else { + log.Crit("Failure in hashdb Commit operation", "err", err) + } + } + idealBatchSize := uint(db.config.IdealCommitBatchSize) + if idealBatchSize == 0 { + idealBatchSize = uint(ethdb.IdealBatchSize) + } + if uint(batch.ValueSize()) >= idealBatchSize { if err := batch.Write(); err != nil { return err } From 67e561241cb13725c09b7390fc06eff7c2b49eae Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 8 May 2025 19:55:40 +0100 Subject: [PATCH 02/13] rename pebble batch test --- ethdb/pebble/pebble_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ethdb/pebble/pebble_test.go b/ethdb/pebble/pebble_test.go index a9527e398c..fec5651e20 100644 --- a/ethdb/pebble/pebble_test.go +++ b/ethdb/pebble/pebble_test.go @@ -45,7 +45,7 @@ func TestPebbleDB(t *testing.T) { }) } -func TestPebbleBatch(t *testing.T) { +func TestPebbleBatchBatchTooLargeError(t *testing.T) { pebbleDb, err := pebble.Open("", &pebble.Options{ FS: vfs.NewMem(), }) From 0e2aea9f8dd73131b11233234e43a81850d12f08 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 21 May 2025 17:50:37 +0100 Subject: [PATCH 03/13] improve pebble batch limit reached warnings --- triedb/hashdb/database.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/triedb/hashdb/database.go b/triedb/hashdb/database.go index 58ab19ee79..a272620039 100644 --- a/triedb/hashdb/database.go +++ b/triedb/hashdb/database.go @@ -372,7 +372,7 @@ func (db *Database) Cap(limit common.StorageSize) error { err := rawdb.WriteLegacyTrieNodeWithError(batch, oldest, node.node) if err != nil { if errors.Is(err, pebble.ErrBatchTooLarge) { - log.Warn("Pebble batch limit reached in hashdb Cap operation, flushing batch.") + log.Warn("Pebble batch limit reached in hashdb Cap operation, flushing batch. Consider setting ideal cap batch size to a lower value.", "pebbleError", err) // flush batch & retry the write if err = batch.Write(); err != nil { return err @@ -512,7 +512,7 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleane err = rawdb.WriteLegacyTrieNodeWithError(batch, hash, node.node) if err != nil { if errors.Is(err, pebble.ErrBatchTooLarge) { - log.Warn("Pebble batch limit reached in hashdb Commit operation, flushing batch.") + log.Warn("Pebble batch limit reached in hashdb Commit operation, flushing batch. Consider setting ideal commit batch size to a lower value.", "pebbleError", err) // flush batch & retry the write if err = batch.Write(); err != nil { return err From 97bc243200362399f0de2d896cff75e5bfe49139 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 27 May 2025 23:01:06 +0100 Subject: [PATCH 04/13] move ErrBatchTooLarge to ethdb, fixes wasm build --- ethdb/batch.go | 4 ++++ ethdb/pebble/pebble.go | 11 ++++------- ethdb/pebble/pebble_test.go | 4 ++-- triedb/hashdb/database.go | 5 ++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ethdb/batch.go b/ethdb/batch.go index 541f40c838..9d4766eb13 100644 --- a/ethdb/batch.go +++ b/ethdb/batch.go @@ -16,6 +16,10 @@ package ethdb +import "errors" + +var ErrBatchTooLarge = errors.New("batch too large") + // IdealBatchSize defines the size of the data batches should ideally add in one // write. const IdealBatchSize = 100 * 1024 diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index db6e1a7498..2c51b3afc3 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -28,7 +28,6 @@ import ( "sync/atomic" "time" - "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble" "github.com/cockroachdb/pebble/bloom" "github.com/ethereum/go-ethereum/common" @@ -66,8 +65,6 @@ const ( maxBatchSize = (1<<31)<<(^uint(0)>>63) - 1 ) -var ErrBatchTooLarge = errors.Newf("go-ethereum/pebble: batch too large: >= %s bytes", maxBatchSize) - // Database is a persistent key-value store based on the pebble storage engine. // Apart from basic data storage functionality it also supports batch writes and // iterating over the keyspace in binary-alphabetical order. @@ -663,14 +660,14 @@ type batch struct { } // Put inserts the given value into the batch for later committing. -// In case ErrBatchTooLarge is returned, it is safe to flush the batch and retry putting the key,value pair. +// In case ethdb.ErrBatchTooLarge is returned, it is safe to flush the batch and retry putting the key,value pair. func (b *batch) Put(key, value []byte) error { // The size increase is argument in call to cockroachdb/pebble.Batch.grow in cockroachdb/pebble.Batch.prepareDeferredKeyValueRecord. pebble.Batch.grow may panic if the batch data size plus the increase reaches cockroachdb/pebble.maxBatchSize sizeIncrease := 1 + uint64(2*binary.MaxVarintLen32) + uint64(len(key)) + uint64(len(value)) // check if we fit within maxBatchSize if uint64(b.b.Len())+sizeIncrease >= maxBatchSize { // return an error instead of letting b.b.Set to panic - return ErrBatchTooLarge + return ethdb.ErrBatchTooLarge } if err := b.b.Set(key, value, nil); err != nil { return err @@ -680,14 +677,14 @@ func (b *batch) Put(key, value []byte) error { } // Delete inserts the key removal into the batch for later committing. -// In case ErrBatchTooLarge is returned, it is safe to flush the batch and retry deleting the key +// In case ethdb.ErrBatchTooLarge is returned, it is safe to flush the batch and retry deleting the key func (b *batch) Delete(key []byte) error { // The size increase is argument in call to cockroachdb/pebble.Batch.grow in cockroachdb/pebble.Batch.prepareDeferredKeyRecord. pebble.Batch.grow may panic if the batch data size plus the increase reaches cockroachdb/pebble.maxBatchSize sizeIncrease := 1 + uint64(binary.MaxVarintLen32) + uint64(len(key)) // check if we fit within maxBatchSize if uint64(b.b.Len())+sizeIncrease >= maxBatchSize { // return an error instead of letting b.b.Delete to panic - return ErrBatchTooLarge + return ethdb.ErrBatchTooLarge } if err := b.b.Delete(key, nil); err != nil { return err diff --git a/ethdb/pebble/pebble_test.go b/ethdb/pebble/pebble_test.go index fec5651e20..c2ff08b73f 100644 --- a/ethdb/pebble/pebble_test.go +++ b/ethdb/pebble/pebble_test.go @@ -61,7 +61,7 @@ func TestPebbleBatchBatchTooLargeError(t *testing.T) { if err == nil { t.Fatal("batch.Delete shouldn't succeed") } - if !errors.Is(err, ErrBatchTooLarge) { + if !errors.Is(err, ethdb.ErrBatchTooLarge) { t.Fatalf("batch.Delete returned unexpected error: %v", err) } data = data[:len(data)-binary.MaxVarintLen32] @@ -69,7 +69,7 @@ func TestPebbleBatchBatchTooLargeError(t *testing.T) { if err == nil { t.Fatal("batch.Put shouldn't succeed") } - if !errors.Is(err, ErrBatchTooLarge) { + if !errors.Is(err, ethdb.ErrBatchTooLarge) { t.Fatalf("batch.Put returned unexpected error: %v", err) } } diff --git a/triedb/hashdb/database.go b/triedb/hashdb/database.go index a272620039..16877665bd 100644 --- a/triedb/hashdb/database.go +++ b/triedb/hashdb/database.go @@ -28,7 +28,6 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" - "github.com/ethereum/go-ethereum/ethdb/pebble" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/rlp" @@ -371,7 +370,7 @@ func (db *Database) Cap(limit common.StorageSize) error { err := rawdb.WriteLegacyTrieNodeWithError(batch, oldest, node.node) if err != nil { - if errors.Is(err, pebble.ErrBatchTooLarge) { + if errors.Is(err, ethdb.ErrBatchTooLarge) { log.Warn("Pebble batch limit reached in hashdb Cap operation, flushing batch. Consider setting ideal cap batch size to a lower value.", "pebbleError", err) // flush batch & retry the write if err = batch.Write(); err != nil { @@ -511,7 +510,7 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleane // If we've reached an optimal batch size, commit and start over err = rawdb.WriteLegacyTrieNodeWithError(batch, hash, node.node) if err != nil { - if errors.Is(err, pebble.ErrBatchTooLarge) { + if errors.Is(err, ethdb.ErrBatchTooLarge) { log.Warn("Pebble batch limit reached in hashdb Commit operation, flushing batch. Consider setting ideal commit batch size to a lower value.", "pebbleError", err) // flush batch & retry the write if err = batch.Write(); err != nil { From e22751236a0ef729669fa259101a605c58ba606c Mon Sep 17 00:00:00 2001 From: Maciej Kulawik <10907694+magicxyyz@users.noreply.github.com> Date: Wed, 16 Jul 2025 14:59:15 +0100 Subject: [PATCH 05/13] fix grammar in comment Co-authored-by: Diego Ximenes Mendes --- ethdb/pebble/pebble.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index 2c51b3afc3..f9e3a3b948 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -662,7 +662,7 @@ type batch struct { // Put inserts the given value into the batch for later committing. // In case ethdb.ErrBatchTooLarge is returned, it is safe to flush the batch and retry putting the key,value pair. func (b *batch) Put(key, value []byte) error { - // The size increase is argument in call to cockroachdb/pebble.Batch.grow in cockroachdb/pebble.Batch.prepareDeferredKeyValueRecord. pebble.Batch.grow may panic if the batch data size plus the increase reaches cockroachdb/pebble.maxBatchSize + // The size increase is an argument to the cockroachdb/pebble.Batch.grow call in cockroachdb/pebble.Batch.prepareDeferredKeyValueRecord. pebble.Batch.grow may panic if the batch data size plus the increase reaches cockroachdb/pebble.maxBatchSize sizeIncrease := 1 + uint64(2*binary.MaxVarintLen32) + uint64(len(key)) + uint64(len(value)) // check if we fit within maxBatchSize if uint64(b.b.Len())+sizeIncrease >= maxBatchSize { From 72fa562a521fa7a7e805f9d7158f1f975e86ef1e Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 16 Jul 2025 15:25:58 +0100 Subject: [PATCH 06/13] reword comment for pebble maximal batch size --- ethdb/pebble/pebble.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index 88f719c1c4..f3b9ddc0ba 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -53,16 +53,14 @@ const ( // leveldb database cannot keep up with requested writes. degradationWarnInterval = time.Minute - // The max batch size is limited by the uint32 offsets stored in - // internal/batchskl.node, DeferredBatchOp, and flushableBatchEntry. - // + // The max size of internal pebble batch is limited by the uint32 offsets. // Pebble limits the size to MaxUint32 (just short of 4GB) so that the exclusive // end of an allocation fits in uint32. - // // On 32-bit systems, slices are naturally limited to MaxInt (just short of // 2GB). // see: cockroachdb/pebble.maxBatchSize - maxBatchSize = (1<<31)<<(^uint(0)>>63) - 1 + oneIf64Bit = ^uint(0) >> 63 + maxBatchSize = (1<<31)< Date: Thu, 17 Jul 2025 15:38:21 +0100 Subject: [PATCH 07/13] better explain pebble internal batch size increase calculations --- ethdb/pebble/pebble.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index f3b9ddc0ba..c6225d350a 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -645,7 +645,13 @@ type batch struct { // Put inserts the given value into the batch for later committing. // In case ethdb.ErrBatchTooLarge is returned, it is safe to flush the batch and retry putting the key,value pair. func (b *batch) Put(key, value []byte) error { - // The size increase is an argument to the cockroachdb/pebble.Batch.grow call in cockroachdb/pebble.Batch.prepareDeferredKeyValueRecord. pebble.Batch.grow may panic if the batch data size plus the increase reaches cockroachdb/pebble.maxBatchSize + // The size increase is an argument to the cockroachdb/pebble.Batch.grow call in cockroachdb/pebble.Batch.prepareDeferredKeyValueRecord called internally in cockroachdb/pebble.Batch.Set. + // pebble.Batch.grow panics when the batch data size plus the increase reaches cockroachdb/pebble.maxBatchSize + // 1 byte for batch entry kind + // + MaxVariantLen32 for encoding of key length + // + MaxVariantLen32 for encoding of value length + // + key length + // + value length sizeIncrease := 1 + uint64(2*binary.MaxVarintLen32) + uint64(len(key)) + uint64(len(value)) // check if we fit within maxBatchSize if uint64(b.b.Len())+sizeIncrease >= maxBatchSize { @@ -662,7 +668,11 @@ func (b *batch) Put(key, value []byte) error { // Delete inserts the key removal into the batch for later committing. // In case ethdb.ErrBatchTooLarge is returned, it is safe to flush the batch and retry deleting the key func (b *batch) Delete(key []byte) error { - // The size increase is argument in call to cockroachdb/pebble.Batch.grow in cockroachdb/pebble.Batch.prepareDeferredKeyRecord. pebble.Batch.grow may panic if the batch data size plus the increase reaches cockroachdb/pebble.maxBatchSize + // the size increase is argument in call to cockroachdb/pebble.Batch.grow in cockroachdb/pebble.Batch.prepareDeferredKeyRecord called internally in cockroachdb/pebble.Batch.Delete. + // pebble.Batch.grow panics when the batch data size plus the increase reaches cockroachdb/pebble.maxBatchSize + // 1 byte for batch entry kind + // + MaxVariantLen32 for encoding of key length + // + key length sizeIncrease := 1 + uint64(binary.MaxVarintLen32) + uint64(len(key)) // check if we fit within maxBatchSize if uint64(b.b.Len())+sizeIncrease >= maxBatchSize { From 5832ef2e3c430bfa14b49a93a001b3c55be89292 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 17 Jul 2025 15:54:43 +0100 Subject: [PATCH 08/13] remove not needed comment --- triedb/hashdb/database.go | 1 - 1 file changed, 1 deletion(-) diff --git a/triedb/hashdb/database.go b/triedb/hashdb/database.go index 16877665bd..f149b50103 100644 --- a/triedb/hashdb/database.go +++ b/triedb/hashdb/database.go @@ -73,7 +73,6 @@ type Config struct { // Notably, clean cache is disabled explicitly, var Defaults = &Config{ // Arbitrum: - // default zeroes used to prevent need for correct initialization in all places used upstream IdealCapBatchSize: 0, // 0 = ethdb.IdealBatchSize will be used IdealCommitBatchSize: 0, // 0 = ethdb.IdealBatchSize will be used From 4dca801ad5937e9344646303c917e54274006979 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 17 Jul 2025 15:58:06 +0100 Subject: [PATCH 09/13] add missing error log when failed to write hashdb cap batch --- triedb/hashdb/database.go | 1 + 1 file changed, 1 insertion(+) diff --git a/triedb/hashdb/database.go b/triedb/hashdb/database.go index f149b50103..f416b9bd2b 100644 --- a/triedb/hashdb/database.go +++ b/triedb/hashdb/database.go @@ -373,6 +373,7 @@ func (db *Database) Cap(limit common.StorageSize) error { log.Warn("Pebble batch limit reached in hashdb Cap operation, flushing batch. Consider setting ideal cap batch size to a lower value.", "pebbleError", err) // flush batch & retry the write if err = batch.Write(); err != nil { + log.Error("Failed to write flush list to disk", "err", err) return err } batch.Reset() From 0c68e3e5e9f0145b578ece56477aaadfb0be3851 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 17 Jul 2025 16:48:15 +0100 Subject: [PATCH 10/13] sanitize batch sizes once in hashdb.New --- triedb/hashdb/database.go | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/triedb/hashdb/database.go b/triedb/hashdb/database.go index f416b9bd2b..411f576819 100644 --- a/triedb/hashdb/database.go +++ b/triedb/hashdb/database.go @@ -87,7 +87,8 @@ var Defaults = &Config{ // periodically flush a couple tries to disk, garbage collecting the remainder. type Database struct { // Arbitrum: - config *Config + idealCapBatchSize uint32 + idealCommitBatchSize uint32 diskdb ethdb.Database // Persistent storage for matured trie nodes cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs @@ -143,8 +144,15 @@ func New(diskdb ethdb.Database, config *Config) *Database { if config.CleanCacheSize > 0 { cleans = fastcache.New(config.CleanCacheSize) } + sanitizeBatchSize := func(size uint32) uint32 { + if size > 0 { + return size + } + return ethdb.IdealBatchSize + } return &Database{ - config: config, + idealCapBatchSize: sanitizeBatchSize(config.IdealCapBatchSize), + idealCommitBatchSize: sanitizeBatchSize(config.IdealCommitBatchSize), diskdb: diskdb, cleans: cleans, @@ -355,12 +363,6 @@ func (db *Database) Cap(limit common.StorageSize) error { size := db.dirtiesSize + common.StorageSize(len(db.dirties)*cachedNodeSize) size += db.childrenSize - // Arbitrum: - idealBatchSize := uint(db.config.IdealCapBatchSize) - if idealBatchSize == 0 { - idealBatchSize = uint(ethdb.IdealBatchSize) - } - // Keep committing nodes from the flush-list until we're below allowance oldest := db.oldest for size > limit && oldest != (common.Hash{}) { @@ -384,7 +386,7 @@ func (db *Database) Cap(limit common.StorageSize) error { } // If we exceeded the ideal batch size, commit and reset - if uint(batch.ValueSize()) >= idealBatchSize { + if uint32(batch.ValueSize()) >= db.idealCapBatchSize { if err := batch.Write(); err != nil { log.Error("Failed to write flush list to disk", "err", err) return err @@ -526,11 +528,7 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleane log.Crit("Failure in hashdb Commit operation", "err", err) } } - idealBatchSize := uint(db.config.IdealCommitBatchSize) - if idealBatchSize == 0 { - idealBatchSize = uint(ethdb.IdealBatchSize) - } - if uint(batch.ValueSize()) >= idealBatchSize { + if uint32(batch.ValueSize()) >= db.idealCommitBatchSize { if err := batch.Write(); err != nil { return err } From 37dd07f54d9139de4ba2ba910405a0668d086339 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 17 Jul 2025 16:56:59 +0100 Subject: [PATCH 11/13] fix integer casting --- triedb/hashdb/database.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/triedb/hashdb/database.go b/triedb/hashdb/database.go index 411f576819..becf326eb3 100644 --- a/triedb/hashdb/database.go +++ b/triedb/hashdb/database.go @@ -87,8 +87,8 @@ var Defaults = &Config{ // periodically flush a couple tries to disk, garbage collecting the remainder. type Database struct { // Arbitrum: - idealCapBatchSize uint32 - idealCommitBatchSize uint32 + idealCapBatchSize uint + idealCommitBatchSize uint diskdb ethdb.Database // Persistent storage for matured trie nodes cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs @@ -144,9 +144,9 @@ func New(diskdb ethdb.Database, config *Config) *Database { if config.CleanCacheSize > 0 { cleans = fastcache.New(config.CleanCacheSize) } - sanitizeBatchSize := func(size uint32) uint32 { + sanitizeBatchSize := func(size uint32) uint { if size > 0 { - return size + return uint(size) } return ethdb.IdealBatchSize } @@ -386,7 +386,7 @@ func (db *Database) Cap(limit common.StorageSize) error { } // If we exceeded the ideal batch size, commit and reset - if uint32(batch.ValueSize()) >= db.idealCapBatchSize { + if uint(batch.ValueSize()) >= db.idealCapBatchSize { if err := batch.Write(); err != nil { log.Error("Failed to write flush list to disk", "err", err) return err @@ -528,7 +528,7 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleane log.Crit("Failure in hashdb Commit operation", "err", err) } } - if uint32(batch.ValueSize()) >= db.idealCommitBatchSize { + if uint(batch.ValueSize()) >= db.idealCommitBatchSize { if err := batch.Write(); err != nil { return err } From bd11e99565146cdab735b44e296b99fcb3ab7384 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik <10907694+magicxyyz@users.noreply.github.com> Date: Wed, 23 Jul 2025 19:10:22 +0100 Subject: [PATCH 12/13] simplify ErrBatchTooLarge checks Co-authored-by: Diego Ximenes Mendes --- triedb/hashdb/database.go | 50 ++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/triedb/hashdb/database.go b/triedb/hashdb/database.go index becf326eb3..7f7a045f66 100644 --- a/triedb/hashdb/database.go +++ b/triedb/hashdb/database.go @@ -370,19 +370,17 @@ func (db *Database) Cap(limit common.StorageSize) error { node := db.dirties[oldest] err := rawdb.WriteLegacyTrieNodeWithError(batch, oldest, node.node) - if err != nil { - if errors.Is(err, ethdb.ErrBatchTooLarge) { - log.Warn("Pebble batch limit reached in hashdb Cap operation, flushing batch. Consider setting ideal cap batch size to a lower value.", "pebbleError", err) - // flush batch & retry the write - if err = batch.Write(); err != nil { - log.Error("Failed to write flush list to disk", "err", err) - return err - } - batch.Reset() - rawdb.WriteLegacyTrieNode(batch, oldest, node.node) - } else { - log.Crit("Failure in hashdb Cap operation", "err", err) + if errors.Is(err, ethdb.ErrBatchTooLarge) { + log.Warn("Pebble batch limit reached in hashdb Cap operation, flushing batch. Consider setting ideal cap batch size to a lower value.", "pebbleError", err) + // flush batch & retry the write + if err = batch.Write(); err != nil { + log.Error("Failed to write flush list to disk", "err", err) + return err } + batch.Reset() + rawdb.WriteLegacyTrieNode(batch, oldest, node.node) + } else if err != nil { + log.Crit("Failure in hashdb Cap operation", "err", err) } // If we exceeded the ideal batch size, commit and reset @@ -511,22 +509,20 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleane } // If we've reached an optimal batch size, commit and start over err = rawdb.WriteLegacyTrieNodeWithError(batch, hash, node.node) - if err != nil { - if errors.Is(err, ethdb.ErrBatchTooLarge) { - log.Warn("Pebble batch limit reached in hashdb Commit operation, flushing batch. Consider setting ideal commit batch size to a lower value.", "pebbleError", err) - // flush batch & retry the write - if err = batch.Write(); err != nil { - return err - } - err = batch.Replay(uncacher) - if err != nil { - return err - } - batch.Reset() - rawdb.WriteLegacyTrieNode(batch, hash, node.node) - } else { - log.Crit("Failure in hashdb Commit operation", "err", err) + if errors.Is(err, ethdb.ErrBatchTooLarge) { + log.Warn("Pebble batch limit reached in hashdb Commit operation, flushing batch. Consider setting ideal commit batch size to a lower value.", "pebbleError", err) + // flush batch & retry the write + if err = batch.Write(); err != nil { + return err } + err = batch.Replay(uncacher) + if err != nil { + return err + } + batch.Reset() + rawdb.WriteLegacyTrieNode(batch, hash, node.node) + } else if err != nil { + log.Crit("Failure in hashdb Commit operation", "err", err) } if uint(batch.ValueSize()) >= db.idealCommitBatchSize { if err := batch.Write(); err != nil { From 4760e9cacb0c4c34771567f0882908d90c86ab11 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 23 Jul 2025 19:14:23 +0100 Subject: [PATCH 13/13] simplify pebble.maxBatchSize constant --- ethdb/pebble/pebble.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index c6225d350a..91b10840ed 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -23,6 +23,7 @@ import ( "bytes" "encoding/binary" "fmt" + "math" "runtime" "sync" "sync/atomic" @@ -59,8 +60,7 @@ const ( // On 32-bit systems, slices are naturally limited to MaxInt (just short of // 2GB). // see: cockroachdb/pebble.maxBatchSize - oneIf64Bit = ^uint(0) >> 63 - maxBatchSize = (1<<31)<