diff --git a/errors/error_utils.go b/errors/error_utils.go index 580434e3a..52464cfe9 100644 --- a/errors/error_utils.go +++ b/errors/error_utils.go @@ -205,6 +205,34 @@ func IsContextError(err error) bool { return false } +// IsLocalError checks if an error is a local resource error (not peer-related). +// Local errors include context cancellation, semaphore exhaustion, and storage errors +// that are caused by local resource constraints rather than peer failures. +// These errors should not trigger peer failover since trying another peer won't help. +// +// Parameters: +// - err: Error to check +// +// Returns: +// - bool: true if error is a local resource error +func IsLocalError(err error) bool { + if err == nil { + return false + } + + // Context errors are local (includes semaphore wait timeouts) + if IsContextError(err) { + return true + } + + // Storage errors indicate local resource issues + if Is(err, ErrStorageError) { + return true + } + + return false +} + // GetErrorCategory returns a string representing the category of the error. // This is useful for logging and metrics. // diff --git a/errors/error_utils_test.go b/errors/error_utils_test.go index 693dd3766..c52af7422 100644 --- a/errors/error_utils_test.go +++ b/errors/error_utils_test.go @@ -222,6 +222,62 @@ func TestIsContextError(t *testing.T) { } } +func TestIsLocalError(t *testing.T) { + tests := []struct { + name string + err error + expected bool + }{ + { + name: "nil error", + err: nil, + expected: false, + }, + { + name: "context canceled", + err: context.Canceled, + expected: true, + }, + { + name: "context deadline exceeded", + err: context.DeadlineExceeded, + expected: true, + }, + { + name: "wrapped context canceled", + err: NewContextCanceledError("test"), + expected: true, + }, + { + name: "storage error", + err: NewStorageError("test"), + expected: true, + }, + { + name: "network error - should retry with other peers", + err: NewNetworkTimeoutError("test"), + expected: false, + }, + { + name: "service error - should retry with other peers", + err: NewServiceError("test"), + expected: false, + }, + { + name: "processing error - should retry with other peers", + err: NewProcessingError("test"), + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := IsLocalError(tt.err) + assert.Equal(t, tt.expected, result) + }) + } +} + func TestGetErrorCategory(t *testing.T) { tests := []struct { name string diff --git a/services/blockvalidation/get_blocks.go b/services/blockvalidation/get_blocks.go index 85ed5d991..0ee69efd4 100644 --- a/services/blockvalidation/get_blocks.go +++ b/services/blockvalidation/get_blocks.go @@ -567,8 +567,16 @@ func (u *Server) fetchAndStoreSubtreeAndSubtreeData(ctx context.Context, block * if err = u.fetchAndStoreSubtreeData(ctx, block, subtreeHash, subtree, peerID, baseURL); err == nil { return nil // Success } + // Check if error is local (not peer-related) - don't retry with other peers + if errors.IsLocalError(err) { + return errors.NewServiceError("[catchup:fetchAndStoreSubtreeAndSubtreeData] Local error fetching subtreeData for %s (not retrying with other peers)", subtreeHash.String(), err) + } u.logger.Warnf("[catchup:fetchAndStoreSubtreeAndSubtreeData] Primary peer %s failed to fetch subtreeData for %s: %v, trying alternatives", peerID, subtreeHash.String(), err) } else { + // Check if error is local (not peer-related) - don't retry with other peers + if errors.IsLocalError(err) { + return errors.NewServiceError("[catchup:fetchAndStoreSubtreeAndSubtreeData] Local error fetching subtree for %s (not retrying with other peers)", subtreeHash.String(), err) + } u.logger.Warnf("[catchup:fetchAndStoreSubtreeAndSubtreeData] Primary peer %s failed to fetch subtree for %s: %v, trying alternatives", peerID, subtreeHash.String(), err) } @@ -596,6 +604,10 @@ func (u *Server) fetchAndStoreSubtreeAndSubtreeData(ctx context.Context, block * if err != nil { u.logger.Debugf("[catchup:fetchAndStoreSubtreeAndSubtreeData] Alternative peer %s failed for subtree %s: %v", altPeerID, subtreeHash.String(), err) lastErr = err + // Don't continue trying other peers if it's a local error + if errors.IsLocalError(err) { + return errors.NewServiceError("[catchup:fetchAndStoreSubtreeAndSubtreeData] Local error fetching subtree %s (aborting peer retry)", subtreeHash.String(), err) + } continue } @@ -603,6 +615,10 @@ func (u *Server) fetchAndStoreSubtreeAndSubtreeData(ctx context.Context, block * if err = u.fetchAndStoreSubtreeData(ctx, block, subtreeHash, subtree, altPeerID, altBaseURL); err != nil { u.logger.Debugf("[catchup:fetchAndStoreSubtreeAndSubtreeData] Alternative peer %s failed for subtreeData %s: %v", altPeerID, subtreeHash.String(), err) lastErr = err + // Don't continue trying other peers if it's a local error + if errors.IsLocalError(err) { + return errors.NewServiceError("[catchup:fetchAndStoreSubtreeAndSubtreeData] Local error fetching subtreeData %s (aborting peer retry)", subtreeHash.String(), err) + } continue }