@@ -20,9 +20,48 @@ import (
2020 "io"
2121)
2222
23+ // categorizeErrors categorizes error lines into expected intermittent errors (with counts) and unexpected errors.
24+ // Returns unexpected errors and an error if any pattern exceeds the threshold.
25+ func categorizeErrors (errorLines []string , threshold int ) ([]string , error ) {
26+ errorCounts := make (map [string ]int )
27+ unexpectedErrors := []string {}
28+
29+ for _ , line := range errorLines {
30+ if line == "" {
31+ continue
32+ }
33+
34+ // Check if this line matches any expected intermittent error pattern
35+ matchedPattern := false
36+ for _ , pattern := range ExpectedIntermittentErrors {
37+ if strings .Contains (line , pattern ) {
38+ errorCounts [pattern ]++
39+ matchedPattern = true
40+ break
41+ }
42+ }
43+
44+ // If no pattern matched, it's an unexpected error
45+ if ! matchedPattern {
46+ unexpectedErrors = append (unexpectedErrors , line )
47+ }
48+ }
49+
50+ // Check if any expected error pattern exceeded the threshold
51+ for pattern , count := range errorCounts {
52+ if count > threshold {
53+ return unexpectedErrors , fmt .Errorf ("expected intermittent error '%s' exceeded threshold: count=%d (threshold=%d)" ,
54+ pattern , count , threshold )
55+ }
56+ }
57+
58+ return unexpectedErrors , nil
59+ }
60+
2361/*
2462 * Checks that the logs of all containers in all pods with the given label do not contain any errors.
2563 * Also returns an error if there are no pods that exist with the given label.
64+ * It tolerates intermittent errors up to 10 occurrences per pattern.
2665 */
2766func CheckContainerLogsForErrors (clientset * kubernetes.Clientset , namespace , labelName , labelValue string ) error {
2867 // Get all pods with the given label
@@ -39,28 +78,24 @@ func CheckContainerLogsForErrors(clientset *kubernetes.Clientset, namespace, lab
3978 return err
4079 }
4180
42- if strings .Contains (logs , "error" ) || strings .Contains (logs , "Error" ) {
43- // Get the exact log line of the error
44- for _ , line := range strings .Split (logs , "\n " ) {
45-
46- if strings .Contains (line , "error" ) || strings .Contains (line , "Error" ) {
47-
48- // Exclude known error lines that are transient
49- shouldExcludeLine := false
50- for _ , lineToExclude := range LogLineErrorsToExclude {
51- if strings .Contains (line , lineToExclude ) {
52- shouldExcludeLine = true
53- break
54- }
55- }
56- if shouldExcludeLine {
57- continue
58- }
59-
60- return fmt .Errorf ("Logs for container %s in pod %s contain errors:\n %s" , container .Name , pod .Name , line )
61- }
81+ // Collect error lines
82+ errorLines := []string {}
83+ for _ , line := range strings .Split (logs , "\n " ) {
84+ if strings .Contains (line , "error" ) || strings .Contains (line , "Error" ) {
85+ errorLines = append (errorLines , line )
6286 }
6387 }
88+
89+ // Categorize errors and check thresholds
90+ unexpectedErrors , err := categorizeErrors (errorLines , IntermittentErrorThreshold )
91+ if err != nil {
92+ return fmt .Errorf ("logs for container %s in pod %s: %v" , container .Name , pod .Name , err )
93+ }
94+
95+ // If there are any unexpected errors, fail immediately
96+ if len (unexpectedErrors ) > 0 {
97+ return fmt .Errorf ("logs for container %s in pod %s contain errors:\n %s" , container .Name , pod .Name , strings .Join (unexpectedErrors , "\n " ))
98+ }
6499 }
65100 }
66101 return nil
@@ -494,6 +529,7 @@ func GetAllNodes(clientset *kubernetes.Clientset) ([]corev1.Node, error) {
494529}
495530
496531// CheckFileForErrors checks if a specific file in a linux container contains errors.
532+ // It tolerates intermittent errors up to 10 occurrences per pattern.
497533func CheckFileForErrors (clientset * kubernetes.Clientset , Cfg * rest.Config , namespace , labelName , labelValue , containerName , filePath string ) error {
498534 pods , err := GetPodsWithLabel (clientset , namespace , labelName , labelValue )
499535 if err != nil {
@@ -513,7 +549,18 @@ func CheckFileForErrors(clientset *kubernetes.Clientset, Cfg *rest.Config, names
513549 }
514550
515551 if stdout != "" {
516- return fmt .Errorf ("errors found in file %s in pod %s, container %s: %s" , filePath , pod .Name , containerName , stdout )
552+ // Parse the stdout and categorize errors
553+ lines := strings .Split (stdout , "\n " )
554+ unexpectedErrors , err := categorizeErrors (lines , IntermittentErrorThreshold )
555+ if err != nil {
556+ return fmt .Errorf ("in file %s in pod %s, container %s: %v" , filePath , pod .Name , containerName , err )
557+ }
558+
559+ // If there are any unexpected errors, fail immediately
560+ if len (unexpectedErrors ) > 0 {
561+ return fmt .Errorf ("unexpected errors found in file %s in pod %s, container %s: %s" ,
562+ filePath , pod .Name , containerName , strings .Join (unexpectedErrors , "\n " ))
563+ }
517564 }
518565 }
519566
0 commit comments