diff --git a/pkg/distributor/field_detection.go b/pkg/distributor/field_detection.go index ce0ebfbd86650..3d80495c69d14 100644 --- a/pkg/distributor/field_detection.go +++ b/pkg/distributor/field_detection.go @@ -330,31 +330,51 @@ func isJSON(line string) bool { return firstNonSpaceChar == '{' && lastNonSpaceChar == '}' } -func detectLevelFromLogLine(log string) string { - if strings.Contains(log, "info:") || strings.Contains(log, "INFO:") || - strings.Contains(log, "[info]") || strings.Contains(log, "[INFO]") || - strings.Contains(log, "info") || strings.Contains(log, "INFO") { - return constants.LogLevelInfo +func isWordBoundary(s string, pos int) bool { + if pos < 0 || pos >= len(s) { + return true // Start/end of string is a boundary } - if strings.Contains(log, "err:") || strings.Contains(log, "ERR:") || - strings.Contains(log, "[err]") || strings.Contains(log, "[ERR]") || - strings.Contains(log, "[error]") || strings.Contains(log, "[ERROR]") { - return constants.LogLevelError + r := rune(s[pos]) + return !unicode.IsLetter(r) && !unicode.IsDigit(r) +} + +func logHasBoundedLevel(log, level string) bool { + log = strings.ToLower(log) + level = strings.ToLower(level) + + pos := strings.Index(log, level) + if pos == -1 { + return false } - if strings.Contains(log, "warn:") || strings.Contains(log, "WARN:") || - strings.Contains(log, "[warn]") || strings.Contains(log, "[WARN]") || - strings.Contains(log, "[warning]") || strings.Contains(log, "[WARNING]") || - strings.Contains(log, "warning") || strings.Contains(log, "WARNING") { - return constants.LogLevelWarn + + // make sure the word occurs on it's own, with boundaries on both sides + if isWordBoundary(log, pos-1) && isWordBoundary(log, pos+len(level)) { + return true } - if strings.Contains(log, "critical:") || strings.Contains(log, "CRITICAL:") || - strings.Contains(log, "[critical]") || strings.Contains(log, "[CRITICAL]") { - return constants.LogLevelCritical + + return false +} + +func detectLevelFromLogLine(log string) string { + levelPatterns := []struct { + word string + level string + }{ + {"trace", constants.LogLevelTrace}, + {"debug", constants.LogLevelDebug}, + {"fatal", constants.LogLevelFatal}, + {"critical", constants.LogLevelCritical}, + {"error", constants.LogLevelError}, + {"err", constants.LogLevelError}, + {"warning", constants.LogLevelWarn}, + {"warn", constants.LogLevelWarn}, + {"info", constants.LogLevelInfo}, } - if strings.Contains(log, "debug:") || strings.Contains(log, "DEBUG:") || - strings.Contains(log, "[debug]") || strings.Contains(log, "[DEBUG]") || - strings.Contains(log, "debug") || strings.Contains(log, "DEBUG") { - return constants.LogLevelDebug + for _, level := range levelPatterns { + if logHasBoundedLevel(log, level.word) { + return level.level + } } + return constants.LogLevelUnknown } diff --git a/pkg/distributor/field_detection_test.go b/pkg/distributor/field_detection_test.go index 6d32de522badd..36c45ec3d7224 100644 --- a/pkg/distributor/field_detection_test.go +++ b/pkg/distributor/field_detection_test.go @@ -2,6 +2,7 @@ package distributor import ( "fmt" + "strings" "testing" "github.com/grafana/dskit/flagext" @@ -388,14 +389,14 @@ func Test_detectLogLevelFromLogEntry(t *testing.T) { entry: logproto.Entry{ Line: "this is a critical message", }, - expectedLogLevel: constants.LogLevelUnknown, + expectedLogLevel: constants.LogLevelCritical, // Changed from Unknown to Critical }, { name: "non otlp with CRITICAL keyword in log line", entry: logproto.Entry{ Line: "this is a CRITICAL message", }, - expectedLogLevel: constants.LogLevelUnknown, + expectedLogLevel: constants.LogLevelCritical, // Changed from Unknown to Critical }, { name: "non otlp with critical: prefix in log line", @@ -617,7 +618,7 @@ func Test_detectLogLevelFromLogEntry(t *testing.T) { } { t.Run(tc.name, func(t *testing.T) { detectedLogLevel := ld.detectLogLevelFromLogEntry(tc.entry, logproto.FromLabelAdaptersToLabels(tc.entry.StructuredMetadata)) - require.Equal(t, tc.expectedLogLevel, detectedLogLevel) + require.Equal(t, tc.expectedLogLevel, detectedLogLevel, "log line: %s", tc.entry.Line) }) } } @@ -707,14 +708,14 @@ func Test_detectLogLevelFromLogEntryWithCustomLabels(t *testing.T) { entry: logproto.Entry{ Line: "this is a critical message", }, - expectedLogLevel: constants.LogLevelUnknown, + expectedLogLevel: constants.LogLevelCritical, }, { name: "non otlp with CRITICAL keyword in log line", entry: logproto.Entry{ Line: "this is a CRITICAL message", }, - expectedLogLevel: constants.LogLevelUnknown, + expectedLogLevel: constants.LogLevelCritical, }, { name: "non otlp with critical: prefix in log line", @@ -1117,3 +1118,268 @@ func TestGetLevelUsingJsonParser(t *testing.T) { }) } } + +func Test_detectLevelFromLogLine(t *testing.T) { + for _, level := range []struct { + word string + level string + }{ + {word: "trace", level: constants.LogLevelTrace}, + {word: "debug", level: constants.LogLevelDebug}, + {word: "info", level: constants.LogLevelInfo}, + {word: "warn", level: constants.LogLevelWarn}, + {word: "warning", level: constants.LogLevelWarn}, + {word: "error", level: constants.LogLevelError}, + {word: "err", level: constants.LogLevelError}, + {word: "fatal", level: constants.LogLevelFatal}, + {word: "critical", level: constants.LogLevelCritical}, + {word: "unknown", level: constants.LogLevelUnknown}, + } { + tests := []struct { + name string + log string + expected string + }{ + { + name: fmt.Sprintf("detect %s level", level.word), + log: fmt.Sprintf("this is a %s message", level.word), + expected: level.level, + }, + { + name: fmt.Sprintf("detect %s level uppercase", strings.ToUpper(level.word)), + log: fmt.Sprintf("this is a %s message", strings.ToUpper(level.word)), + expected: level.level, + }, + { + name: fmt.Sprintf("detect %s level at start of string", level.word), + log: fmt.Sprintf("%s occurred", level.word), + expected: level.level, + }, + { + name: fmt.Sprintf("detect %s level at end of string", level.word), + log: fmt.Sprintf("an %s", level.word), + expected: level.level, + }, + { + name: fmt.Sprintf("detect %s level with space before", level.word), + log: fmt.Sprintf("this is an %s message", level.word), + expected: level.level, + }, + { + name: fmt.Sprintf("detect %s level with colon", level.word), + log: fmt.Sprintf("%s: something happened", level.word), + expected: level.level, + }, + { + name: fmt.Sprintf("detect %s level with punctuation after", level.word), + log: fmt.Sprintf("%s, something happened", level.word), + expected: level.level, + }, + { + name: fmt.Sprintf("detect %s level with special characters", level.word), + log: fmt.Sprintf("%s! something happened", level.word), + expected: level.level, + }, + { + name: fmt.Sprintf("detect %s level with parentheses", level.word), + log: fmt.Sprintf("(%s) something happened", level.word), + expected: level.level, + }, + { + name: fmt.Sprintf("detect %s level with brackets", level.word), + log: fmt.Sprintf("[%s] something happened", level.word), + expected: level.level, + }, + { + name: fmt.Sprintf("nginx like log with %s", level), + log: fmt.Sprintf("2024-01-01T10:00:00Z %s: connection failed", level.word), + expected: level.level, + }, + { + name: fmt.Sprintf("bracket pattern [%s]", level.word), + log: fmt.Sprintf("[%s] connection failed", level.word), + expected: level.level, + }, + { + name: fmt.Sprintf("bracket pattern [%s]", strings.ToUpper(level.word)), + log: fmt.Sprintf("[%s] connection failed", strings.ToUpper(level.word)), + expected: level.level, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := detectLevelFromLogLine(tt.log) + require.Equal(t, tt.expected, result, "log: %q", tt.log) + }) + } + } + + additionalTestCases := []struct { + name string + log string + expected string + }{ + { + name: "terror should not match error", + log: "this is a terror attack", + expected: constants.LogLevelUnknown, + }, + { + name: "errors should not match error", + log: "there were multiple errors", + expected: constants.LogLevelUnknown, + }, + { + name: "error123 should not match error", + log: "error123 occurred", + expected: constants.LogLevelUnknown, + }, + { + name: "123error should not match error", + log: "123error occurred", + expected: constants.LogLevelUnknown, + }, + { + name: "errorCode should not match error", + log: "errorCode is invalid", + expected: constants.LogLevelUnknown, + }, + { + name: "myError should not match error", + log: "myError occurred", + expected: constants.LogLevelUnknown, + }, + { + name: "debugging should not match debug", + log: "debugging the issue", + expected: constants.LogLevelUnknown, + }, + { + name: "debugger should not match debug", + log: "debugger attached", + expected: constants.LogLevelUnknown, + }, + { + name: "information should not match info", + log: "information is available", + expected: constants.LogLevelUnknown, + }, + { + name: "informative should not match info", + log: "informative message", + expected: constants.LogLevelUnknown, + }, + { + name: "warnings should not match warn", + log: "warnings issued", + expected: constants.LogLevelUnknown, + }, + { + name: "criticality should not match critical", + log: "criticality assessment", + expected: constants.LogLevelUnknown, + }, + { + name: "fatalism should not match fatal", + log: "fatalism philosophy", + expected: constants.LogLevelUnknown, + }, + { + name: "tracer should not match trace", + log: "tracer bullet", + expected: constants.LogLevelUnknown, + }, + { + name: "traced should not match trace", + log: "traced the issue", + expected: constants.LogLevelUnknown, + }, + { + name: "empty string", + log: "", + expected: constants.LogLevelUnknown, + }, + { + name: "no level word", + log: "this is a regular log message", + expected: constants.LogLevelUnknown, + }, + { + name: "JSON string with error", + log: `{"message": "error occurred"}`, + expected: constants.LogLevelError, + }, + { + name: "JSON string with error in key", + log: `{"error": "something"}`, + expected: constants.LogLevelError, + }, + { + name: "JSON string with errorCode should not match", + log: `{"errorCode": 123}`, + expected: constants.LogLevelUnknown, + }, + { + name: "logfmt with error", + log: `msg="error occurred"`, + expected: constants.LogLevelError, + }, + { + name: "logfmt with errorCode should not match", + log: `errorCode=123`, + expected: constants.LogLevelUnknown, + }, + { + name: "error with equals sign", + log: "error=something", + expected: constants.LogLevelError, + }, + { + name: "logfmt level", + log: "level=error", + expected: constants.LogLevelError, + }, + { + name: "bracket pattern [terror] should not match error", + log: "[terror] attack occurred", + expected: constants.LogLevelUnknown, + }, + { + name: "bracket pattern [errors] should not match error", + log: "[errors] occurred", + expected: constants.LogLevelUnknown, + }, + { + name: "bracket pattern [errorCode] should not match error", + log: "[errorCode] is invalid", + expected: constants.LogLevelUnknown, + }, + { + name: "bracket pattern [debugging] should not match debug", + log: "[debugging] the issue", + expected: constants.LogLevelUnknown, + }, + { + name: "bracket pattern [information] should not match info", + log: "[information] is available", + expected: constants.LogLevelUnknown, + }, + { + name: "bracket pattern in JSON string", + log: `{"message": "[error] occurred"}`, + expected: constants.LogLevelError, + }, + { + name: "bracket pattern in logfmt string", + log: `msg="[error] occurred"`, + expected: constants.LogLevelError, + }, + } + for _, tt := range additionalTestCases { + t.Run(tt.name, func(t *testing.T) { + result := detectLevelFromLogLine(tt.log) + require.Equal(t, tt.expected, result, "log: %q", tt.log) + }) + } +}