Skip to content

Commit 6508966

Browse files
committed
Handle intermittent errors
1 parent dbdda5c commit 6508966

File tree

2 files changed

+80
-29
lines changed

2 files changed

+80
-29
lines changed

test/ginkgo-e2e/utils/constants.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,23 @@
11
package utils
22

33
var (
4-
// Slices can't be constants
5-
LogLineErrorsToExclude = [...]string{
6-
// Arc token adapter
7-
"create or renew cluster identity error",
8-
"get token from status error",
9-
"Objects listed",
10-
// Target allocator
11-
"client connection lost",
4+
// ExpectedIntermittentErrors are error patterns that should be tolerated up to IntermittentErrorThreshold occurrences
5+
ExpectedIntermittentErrors = []string{
6+
"Error in plugin: error making HTTP request",
7+
"connection refused",
8+
"HTTP/1.1 500 Internal Server Error",
9+
"WINHTTP_CALLBACK_STATUS_REQUEST_ERROR",
10+
"The connection with the server was terminated abnormally",
11+
"TCP connection failed",
12+
"no upstream connections available",
1213
}
1314
)
1415

1516
const (
1617
WindowsLabel = "windows"
1718
ARM64Label = "arm64"
1819
FIPSLabel = "fips"
20+
21+
// IntermittentErrorThreshold is the maximum number of occurrences allowed for expected intermittent errors
22+
IntermittentErrorThreshold = 10
1923
)

test/ginkgo-e2e/utils/kubernetes_api_utils.go

Lines changed: 68 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,48 @@ import (
2020
"io"
2121
)
2222

23+
// categorizeErrors categorizes error lines into expected intermittent errors (with counts) and unexpected errors.
24+
// Returns unexpected errors and an error if any pattern exceeds the threshold.
25+
func categorizeErrors(errorLines []string, threshold int) ([]string, error) {
26+
errorCounts := make(map[string]int)
27+
unexpectedErrors := []string{}
28+
29+
for _, line := range errorLines {
30+
if line == "" {
31+
continue
32+
}
33+
34+
// Check if this line matches any expected intermittent error pattern
35+
matchedPattern := false
36+
for _, pattern := range ExpectedIntermittentErrors {
37+
if strings.Contains(line, pattern) {
38+
errorCounts[pattern]++
39+
matchedPattern = true
40+
break
41+
}
42+
}
43+
44+
// If no pattern matched, it's an unexpected error
45+
if !matchedPattern {
46+
unexpectedErrors = append(unexpectedErrors, line)
47+
}
48+
}
49+
50+
// Check if any expected error pattern exceeded the threshold
51+
for pattern, count := range errorCounts {
52+
if count > threshold {
53+
return unexpectedErrors, fmt.Errorf("expected intermittent error '%s' exceeded threshold: count=%d (threshold=%d)",
54+
pattern, count, threshold)
55+
}
56+
}
57+
58+
return unexpectedErrors, nil
59+
}
60+
2361
/*
2462
* Checks that the logs of all containers in all pods with the given label do not contain any errors.
2563
* Also returns an error if there are no pods that exist with the given label.
64+
* It tolerates intermittent errors up to 10 occurrences per pattern.
2665
*/
2766
func CheckContainerLogsForErrors(clientset *kubernetes.Clientset, namespace, labelName, labelValue string) error {
2867
// Get all pods with the given label
@@ -39,28 +78,24 @@ func CheckContainerLogsForErrors(clientset *kubernetes.Clientset, namespace, lab
3978
return err
4079
}
4180

42-
if strings.Contains(logs, "error") || strings.Contains(logs, "Error") {
43-
// Get the exact log line of the error
44-
for _, line := range strings.Split(logs, "\n") {
45-
46-
if strings.Contains(line, "error") || strings.Contains(line, "Error") {
47-
48-
// Exclude known error lines that are transient
49-
shouldExcludeLine := false
50-
for _, lineToExclude := range LogLineErrorsToExclude {
51-
if strings.Contains(line, lineToExclude) {
52-
shouldExcludeLine = true
53-
break
54-
}
55-
}
56-
if shouldExcludeLine {
57-
continue
58-
}
59-
60-
return fmt.Errorf("Logs for container %s in pod %s contain errors:\n %s", container.Name, pod.Name, line)
61-
}
81+
// Collect error lines
82+
errorLines := []string{}
83+
for _, line := range strings.Split(logs, "\n") {
84+
if strings.Contains(line, "error") || strings.Contains(line, "Error") {
85+
errorLines = append(errorLines, line)
6286
}
6387
}
88+
89+
// Categorize errors and check thresholds
90+
unexpectedErrors, err := categorizeErrors(errorLines, IntermittentErrorThreshold)
91+
if err != nil {
92+
return fmt.Errorf("logs for container %s in pod %s: %v", container.Name, pod.Name, err)
93+
}
94+
95+
// If there are any unexpected errors, fail immediately
96+
if len(unexpectedErrors) > 0 {
97+
return fmt.Errorf("logs for container %s in pod %s contain errors:\n %s", container.Name, pod.Name, strings.Join(unexpectedErrors, "\n"))
98+
}
6499
}
65100
}
66101
return nil
@@ -494,6 +529,7 @@ func GetAllNodes(clientset *kubernetes.Clientset) ([]corev1.Node, error) {
494529
}
495530

496531
// CheckFileForErrors checks if a specific file in a linux container contains errors.
532+
// It tolerates intermittent errors up to 10 occurrences per pattern.
497533
func CheckFileForErrors(clientset *kubernetes.Clientset, Cfg *rest.Config, namespace, labelName, labelValue, containerName, filePath string) error {
498534
pods, err := GetPodsWithLabel(clientset, namespace, labelName, labelValue)
499535
if err != nil {
@@ -513,7 +549,18 @@ func CheckFileForErrors(clientset *kubernetes.Clientset, Cfg *rest.Config, names
513549
}
514550

515551
if stdout != "" {
516-
return fmt.Errorf("errors found in file %s in pod %s, container %s: %s", filePath, pod.Name, containerName, stdout)
552+
// Parse the stdout and categorize errors
553+
lines := strings.Split(stdout, "\n")
554+
unexpectedErrors, err := categorizeErrors(lines, IntermittentErrorThreshold)
555+
if err != nil {
556+
return fmt.Errorf("in file %s in pod %s, container %s: %v", filePath, pod.Name, containerName, err)
557+
}
558+
559+
// If there are any unexpected errors, fail immediately
560+
if len(unexpectedErrors) > 0 {
561+
return fmt.Errorf("unexpected errors found in file %s in pod %s, container %s: %s",
562+
filePath, pod.Name, containerName, strings.Join(unexpectedErrors, "\n"))
563+
}
517564
}
518565
}
519566

0 commit comments

Comments
 (0)