From aac32eda9a79e11d271a34bc8b6966b9b5ce668d Mon Sep 17 00:00:00 2001 From: freemans13 Date: Wed, 28 Jan 2026 11:08:55 +0000 Subject: [PATCH 1/2] do not try another peer on a local error --- services/blockvalidation/get_blocks.go | 44 +++++++++++++ .../blockvalidation/is_local_error_test.go | 65 +++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 services/blockvalidation/is_local_error_test.go diff --git a/services/blockvalidation/get_blocks.go b/services/blockvalidation/get_blocks.go index 509c7dc86..e0b49e51b 100644 --- a/services/blockvalidation/get_blocks.go +++ b/services/blockvalidation/get_blocks.go @@ -33,6 +33,34 @@ type resultItem struct { err error } +// isLocalError checks if an error is a local resource error (not peer-related). +// Local errors include context cancellation, semaphore exhaustion, and storage errors +// that are caused by local resource constraints rather than peer failures. +// These errors should not trigger peer failover since trying another peer won't help. +func isLocalError(err error) bool { + if err == nil { + return false + } + + // Check for context cancellation (includes semaphore wait timeouts) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return true + } + + // Check for our custom context canceled error type + if errors.Is(err, errors.ErrContextCanceled) { + return true + } + + // Check for storage errors that indicate local resource issues + if errors.Is(err, errors.ErrStorageError) { + // Storage errors during existence checks or local operations are local issues + return true + } + + return false +} + // fetchBlocksConcurrently fetches blocks from a peer using a high-performance worker pool architecture. // This function implements: // 1. Large batch fetching (~100 blocks per HTTP request) for maximum throughput @@ -551,8 +579,16 @@ func (u *Server) fetchAndStoreSubtreeAndSubtreeData(ctx context.Context, block * if err = u.fetchAndStoreSubtreeData(ctx, block, subtreeHash, subtree, peerID, baseURL); err == nil { return nil // Success } + // Check if error is local (not peer-related) - don't retry with other peers + if isLocalError(err) { + return errors.NewServiceError("[catchup:fetchAndStoreSubtreeAndSubtreeData] Local error fetching subtreeData for %s (not retrying with other peers)", subtreeHash.String(), err) + } u.logger.Warnf("[catchup:fetchAndStoreSubtreeAndSubtreeData] Primary peer %s failed to fetch subtreeData for %s: %v, trying alternatives", peerID, subtreeHash.String(), err) } else { + // Check if error is local (not peer-related) - don't retry with other peers + if isLocalError(err) { + return errors.NewServiceError("[catchup:fetchAndStoreSubtreeAndSubtreeData] Local error fetching subtree for %s (not retrying with other peers)", subtreeHash.String(), err) + } u.logger.Warnf("[catchup:fetchAndStoreSubtreeAndSubtreeData] Primary peer %s failed to fetch subtree for %s: %v, trying alternatives", peerID, subtreeHash.String(), err) } @@ -580,6 +616,10 @@ func (u *Server) fetchAndStoreSubtreeAndSubtreeData(ctx context.Context, block * if err != nil { u.logger.Debugf("[catchup:fetchAndStoreSubtreeAndSubtreeData] Alternative peer %s failed for subtree %s: %v", altPeerID, subtreeHash.String(), err) lastErr = err + // Don't continue trying other peers if it's a local error + if isLocalError(err) { + return errors.NewServiceError("[catchup:fetchAndStoreSubtreeAndSubtreeData] Local error fetching subtree %s (aborting peer retry)", subtreeHash.String(), err) + } continue } @@ -587,6 +627,10 @@ func (u *Server) fetchAndStoreSubtreeAndSubtreeData(ctx context.Context, block * if err = u.fetchAndStoreSubtreeData(ctx, block, subtreeHash, subtree, altPeerID, altBaseURL); err != nil { u.logger.Debugf("[catchup:fetchAndStoreSubtreeAndSubtreeData] Alternative peer %s failed for subtreeData %s: %v", altPeerID, subtreeHash.String(), err) lastErr = err + // Don't continue trying other peers if it's a local error + if isLocalError(err) { + return errors.NewServiceError("[catchup:fetchAndStoreSubtreeAndSubtreeData] Local error fetching subtreeData %s (aborting peer retry)", subtreeHash.String(), err) + } continue } diff --git a/services/blockvalidation/is_local_error_test.go b/services/blockvalidation/is_local_error_test.go new file mode 100644 index 000000000..d7500236f --- /dev/null +++ b/services/blockvalidation/is_local_error_test.go @@ -0,0 +1,65 @@ +package blockvalidation + +import ( + "context" + "testing" + + "github.com/bsv-blockchain/teranode/errors" + "github.com/stretchr/testify/require" +) + +func TestIsLocalError(t *testing.T) { + tests := []struct { + name string + err error + expected bool + }{ + { + name: "nil error", + err: nil, + expected: false, + }, + { + name: "context canceled", + err: context.Canceled, + expected: true, + }, + { + name: "context deadline exceeded", + err: context.DeadlineExceeded, + expected: true, + }, + { + name: "wrapped context canceled", + err: errors.NewContextCanceledError("test", context.Canceled), + expected: true, + }, + { + name: "storage error", + err: errors.NewStorageError("test"), + expected: true, + }, + { + name: "network error - should retry with other peers", + err: errors.NewNetworkTimeoutError("test"), + expected: false, + }, + { + name: "service error - should retry with other peers", + err: errors.NewServiceError("test"), + expected: false, + }, + { + name: "processing error - should retry with other peers", + err: errors.NewProcessingError("test"), + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := isLocalError(tt.err) + require.Equal(t, tt.expected, result) + }) + } +} From a305cdacc81a1c9eeb5315b0938bb4ecc62b5f35 Mon Sep 17 00:00:00 2001 From: freemans13 Date: Wed, 28 Jan 2026 15:14:38 +0000 Subject: [PATCH 2/2] Move isLocalErr to errors package --- errors/error_utils.go | 28 ++++++++ errors/error_utils_test.go | 56 ++++++++++++++++ services/blockvalidation/get_blocks.go | 36 ++-------- .../blockvalidation/is_local_error_test.go | 65 ------------------- 4 files changed, 88 insertions(+), 97 deletions(-) delete mode 100644 services/blockvalidation/is_local_error_test.go diff --git a/errors/error_utils.go b/errors/error_utils.go index 580434e3a..52464cfe9 100644 --- a/errors/error_utils.go +++ b/errors/error_utils.go @@ -205,6 +205,34 @@ func IsContextError(err error) bool { return false } +// IsLocalError checks if an error is a local resource error (not peer-related). +// Local errors include context cancellation, semaphore exhaustion, and storage errors +// that are caused by local resource constraints rather than peer failures. +// These errors should not trigger peer failover since trying another peer won't help. +// +// Parameters: +// - err: Error to check +// +// Returns: +// - bool: true if error is a local resource error +func IsLocalError(err error) bool { + if err == nil { + return false + } + + // Context errors are local (includes semaphore wait timeouts) + if IsContextError(err) { + return true + } + + // Storage errors indicate local resource issues + if Is(err, ErrStorageError) { + return true + } + + return false +} + // GetErrorCategory returns a string representing the category of the error. // This is useful for logging and metrics. // diff --git a/errors/error_utils_test.go b/errors/error_utils_test.go index 693dd3766..c52af7422 100644 --- a/errors/error_utils_test.go +++ b/errors/error_utils_test.go @@ -222,6 +222,62 @@ func TestIsContextError(t *testing.T) { } } +func TestIsLocalError(t *testing.T) { + tests := []struct { + name string + err error + expected bool + }{ + { + name: "nil error", + err: nil, + expected: false, + }, + { + name: "context canceled", + err: context.Canceled, + expected: true, + }, + { + name: "context deadline exceeded", + err: context.DeadlineExceeded, + expected: true, + }, + { + name: "wrapped context canceled", + err: NewContextCanceledError("test"), + expected: true, + }, + { + name: "storage error", + err: NewStorageError("test"), + expected: true, + }, + { + name: "network error - should retry with other peers", + err: NewNetworkTimeoutError("test"), + expected: false, + }, + { + name: "service error - should retry with other peers", + err: NewServiceError("test"), + expected: false, + }, + { + name: "processing error - should retry with other peers", + err: NewProcessingError("test"), + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := IsLocalError(tt.err) + assert.Equal(t, tt.expected, result) + }) + } +} + func TestGetErrorCategory(t *testing.T) { tests := []struct { name string diff --git a/services/blockvalidation/get_blocks.go b/services/blockvalidation/get_blocks.go index e0b49e51b..24f5b81ff 100644 --- a/services/blockvalidation/get_blocks.go +++ b/services/blockvalidation/get_blocks.go @@ -33,34 +33,6 @@ type resultItem struct { err error } -// isLocalError checks if an error is a local resource error (not peer-related). -// Local errors include context cancellation, semaphore exhaustion, and storage errors -// that are caused by local resource constraints rather than peer failures. -// These errors should not trigger peer failover since trying another peer won't help. -func isLocalError(err error) bool { - if err == nil { - return false - } - - // Check for context cancellation (includes semaphore wait timeouts) - if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { - return true - } - - // Check for our custom context canceled error type - if errors.Is(err, errors.ErrContextCanceled) { - return true - } - - // Check for storage errors that indicate local resource issues - if errors.Is(err, errors.ErrStorageError) { - // Storage errors during existence checks or local operations are local issues - return true - } - - return false -} - // fetchBlocksConcurrently fetches blocks from a peer using a high-performance worker pool architecture. // This function implements: // 1. Large batch fetching (~100 blocks per HTTP request) for maximum throughput @@ -580,13 +552,13 @@ func (u *Server) fetchAndStoreSubtreeAndSubtreeData(ctx context.Context, block * return nil // Success } // Check if error is local (not peer-related) - don't retry with other peers - if isLocalError(err) { + if errors.IsLocalError(err) { return errors.NewServiceError("[catchup:fetchAndStoreSubtreeAndSubtreeData] Local error fetching subtreeData for %s (not retrying with other peers)", subtreeHash.String(), err) } u.logger.Warnf("[catchup:fetchAndStoreSubtreeAndSubtreeData] Primary peer %s failed to fetch subtreeData for %s: %v, trying alternatives", peerID, subtreeHash.String(), err) } else { // Check if error is local (not peer-related) - don't retry with other peers - if isLocalError(err) { + if errors.IsLocalError(err) { return errors.NewServiceError("[catchup:fetchAndStoreSubtreeAndSubtreeData] Local error fetching subtree for %s (not retrying with other peers)", subtreeHash.String(), err) } u.logger.Warnf("[catchup:fetchAndStoreSubtreeAndSubtreeData] Primary peer %s failed to fetch subtree for %s: %v, trying alternatives", peerID, subtreeHash.String(), err) @@ -617,7 +589,7 @@ func (u *Server) fetchAndStoreSubtreeAndSubtreeData(ctx context.Context, block * u.logger.Debugf("[catchup:fetchAndStoreSubtreeAndSubtreeData] Alternative peer %s failed for subtree %s: %v", altPeerID, subtreeHash.String(), err) lastErr = err // Don't continue trying other peers if it's a local error - if isLocalError(err) { + if errors.IsLocalError(err) { return errors.NewServiceError("[catchup:fetchAndStoreSubtreeAndSubtreeData] Local error fetching subtree %s (aborting peer retry)", subtreeHash.String(), err) } continue @@ -628,7 +600,7 @@ func (u *Server) fetchAndStoreSubtreeAndSubtreeData(ctx context.Context, block * u.logger.Debugf("[catchup:fetchAndStoreSubtreeAndSubtreeData] Alternative peer %s failed for subtreeData %s: %v", altPeerID, subtreeHash.String(), err) lastErr = err // Don't continue trying other peers if it's a local error - if isLocalError(err) { + if errors.IsLocalError(err) { return errors.NewServiceError("[catchup:fetchAndStoreSubtreeAndSubtreeData] Local error fetching subtreeData %s (aborting peer retry)", subtreeHash.String(), err) } continue diff --git a/services/blockvalidation/is_local_error_test.go b/services/blockvalidation/is_local_error_test.go deleted file mode 100644 index d7500236f..000000000 --- a/services/blockvalidation/is_local_error_test.go +++ /dev/null @@ -1,65 +0,0 @@ -package blockvalidation - -import ( - "context" - "testing" - - "github.com/bsv-blockchain/teranode/errors" - "github.com/stretchr/testify/require" -) - -func TestIsLocalError(t *testing.T) { - tests := []struct { - name string - err error - expected bool - }{ - { - name: "nil error", - err: nil, - expected: false, - }, - { - name: "context canceled", - err: context.Canceled, - expected: true, - }, - { - name: "context deadline exceeded", - err: context.DeadlineExceeded, - expected: true, - }, - { - name: "wrapped context canceled", - err: errors.NewContextCanceledError("test", context.Canceled), - expected: true, - }, - { - name: "storage error", - err: errors.NewStorageError("test"), - expected: true, - }, - { - name: "network error - should retry with other peers", - err: errors.NewNetworkTimeoutError("test"), - expected: false, - }, - { - name: "service error - should retry with other peers", - err: errors.NewServiceError("test"), - expected: false, - }, - { - name: "processing error - should retry with other peers", - err: errors.NewProcessingError("test"), - expected: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := isLocalError(tt.err) - require.Equal(t, tt.expected, result) - }) - } -}