chore: Improve level detection and best guess capability (#21132)

This commit is contained in:
Shantanu Alshi
2026-03-11 20:53:31 +05:30
committed by GitHub
parent c6c3fb6739
commit 9c2c0633f0
2 changed files with 169 additions and 35 deletions

View File

@@ -38,6 +38,21 @@ var (
fatal = []byte("fatal")
errKeyFound = errors.New("key found")
levelPatterns = []struct {
word string
level string
}{
{"trace", constants.LogLevelTrace},
{"debug", constants.LogLevelDebug},
{"fatal", constants.LogLevelFatal},
{"critical", constants.LogLevelCritical},
{"error", constants.LogLevelError},
{"err", constants.LogLevelError},
{"warning", constants.LogLevelWarn},
{"warn", constants.LogLevelWarn},
{"info", constants.LogLevelInfo},
}
)
func allowedLabelsForLevel(allowedFields []string) []string {
@@ -330,49 +345,59 @@ func isJSON(line string) bool {
return firstNonSpaceChar == '{' && lastNonSpaceChar == '}'
}
func isWordBoundary(s string, pos int) bool {
// isLeftWordBoundary checks the character to the left of a potential keyword match.
// Colons are intentionally excluded: they indicate a key:value compound (e.g. misc:error)
// where the keyword is not a standalone log level.
// Operates on bytes since log lines are expected to be ASCII.
func isLeftWordBoundary(s string, pos int) bool {
if pos < 0 || pos >= len(s) {
return true // Start/end of string is a boundary
}
r := rune(s[pos])
return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_'
}
func logHasBoundedLevel(log, level string) bool {
pos := strings.Index(log, level)
if pos == -1 {
return false
}
// make sure the word occurs on its own, with boundaries on both sides
if isWordBoundary(log, pos-1) && isWordBoundary(log, pos+len(level)) {
return true
}
c := s[pos]
return c == ' ' || c == '\t' || c == '\n' || c == '[' || c == '(' || c == '{' || c == '"' || c == '='
}
return false
// isRightWordBoundary checks the character to the right of a potential keyword match.
// Colons are allowed here to support "level:" prefix patterns (e.g. "debug: message").
// Equals and quotes are allowed to support key=value and key="value" patterns.
// Operates on bytes since log lines are expected to be ASCII.
func isRightWordBoundary(s string, pos int) bool {
if pos < 0 || pos >= len(s) {
return true
}
c := s[pos]
return c == ' ' || c == '\t' || c == '\n' || c == '[' || c == ']' || c == '(' || c == ')' || c == '{' || c == '}' || c == ':' || c == ',' || c == '!' || c == '"' || c == '='
}
func indexOfBoundedLevel(log, level string) int {
offset := 0
for {
pos := strings.Index(log[offset:], level)
if pos == -1 {
return -1
}
abs := offset + pos
if isLeftWordBoundary(log, abs-1) && isRightWordBoundary(log, abs+len(level)) {
return abs
}
offset = abs + 1
}
}
func detectLevelFromLogLine(log string) string {
levelPatterns := []struct {
word string
level string
}{
{"trace", constants.LogLevelTrace},
{"debug", constants.LogLevelDebug},
{"fatal", constants.LogLevelFatal},
{"critical", constants.LogLevelCritical},
{"error", constants.LogLevelError},
{"err", constants.LogLevelError},
{"warning", constants.LogLevelWarn},
{"warn", constants.LogLevelWarn},
{"info", constants.LogLevelInfo},
}
lowerLog := strings.ToLower(log)
for _, level := range levelPatterns {
if logHasBoundedLevel(lowerLog, level.word) {
return level.level
idx, bestGuess := len(lowerLog), constants.LogLevelUnknown
for _, p := range levelPatterns {
pos := indexOfBoundedLevel(lowerLog, p.word)
if pos == -1 || pos >= idx {
continue
}
idx = pos
bestGuess = p.level
if idx == 0 {
break
}
}
return constants.LogLevelUnknown
return bestGuess
}

View File

@@ -1408,3 +1408,112 @@ func Test_detectLevelFromLogLine(t *testing.T) {
})
}
}
func Test_detectLevelFromLogLine_earliestPosition(t *testing.T) {
for _, tc := range []struct {
name string
log string
expected string
}{
{
name: "info before warning and error in arguments picks info",
log: "2024-01-01T10:00:00Z info request failed -l warning --log-level misc:error",
expected: constants.LogLevelInfo,
},
{
name: "warning appears before info positionally wins despite lower pattern priority",
log: "warning: degraded, info follows",
expected: constants.LogLevelWarn,
},
{
name: "error appears before info positionally wins",
log: "[error] something went wrong, info available",
expected: constants.LogLevelError,
},
{
name: "key:value colon prefix does not match, later bounded keyword wins",
log: "component=misc:error [info] processed",
expected: constants.LogLevelInfo,
},
{
name: "first occurrence of keyword is unbounded, second is bounded",
log: "fooerror [error] message",
expected: constants.LogLevelError,
},
{
name: "all occurrences unbounded returns unknown",
log: "fooerror errorbar misc:error error_code",
expected: constants.LogLevelUnknown,
},
// nginx error log: level in [brackets] after timestamp
{
name: "nginx error log with [error] bracket",
log: "2024/01/01 10:00:00 [error] 1234#1234: *1 connect() failed (111: Connection refused) while connecting to upstream",
expected: constants.LogLevelError,
},
{
name: "nginx warn log with [warn] bracket",
log: "2024/01/01 10:00:00 [warn] 1234#1234: upstream server temporarily disabled while reading response header from upstream",
expected: constants.LogLevelWarn,
},
{
name: "nginx access log with error in url path returns unknown",
log: `192.168.1.1 - - [01/Jan/2024:10:00:00 +0000] "GET /api/error-handler HTTP/1.1" 500 1234 "-" "curl/7.68.0"`,
expected: constants.LogLevelUnknown,
},
// apache httpd error log: level in second [brackets] after date
{
name: "apache httpd error log",
log: "[Mon Jan 01 10:00:00.000000 2024] [error] [pid 1234] File does not exist: /var/www/html/favicon.ico",
expected: constants.LogLevelError,
},
{
name: "apache httpd warn log",
log: "[Mon Jan 01 10:00:00.000000 2024] [warn] [pid 1234] RSA server certificate CommonName does not match",
expected: constants.LogLevelWarn,
},
// spring boot: uppercase level between timestamp and PID, message may repeat level words
{
name: "spring boot INFO log",
log: "2024-01-01 10:00:00.000 INFO 1234 --- [main] com.example.App : Starting application on host",
expected: constants.LogLevelInfo,
},
{
name: "spring boot ERROR where message also contains error",
log: "2024-01-01 10:00:00.000 ERROR 1234 --- [main] com.example.App : Application startup failed due to connection error",
expected: constants.LogLevelError,
},
{
name: "spring boot INFO where class name contains error but is not a standalone keyword",
log: "2024-01-01 10:00:00.000 INFO 1234 --- [main] com.example.ErrorController : Handling request for /api/error-resource",
expected: constants.LogLevelInfo,
},
// log4j: level after timestamp, followed by logger name and message
{
name: "log4j WARN log",
log: "2024-01-01 10:00:00,000 WARN com.example.service.UserService - Failed to load user preferences, using defaults",
expected: constants.LogLevelWarn,
},
{
name: "log4j ERROR where logger name and message both contain error",
log: "2024-01-01 10:00:00,000 ERROR com.example.service.DatabaseService - Connection failed after error: timeout",
expected: constants.LogLevelError,
},
// istio/envoy: tab-separated fields, level is second field; message may contain metric names like error_rate
{
name: "istio warn log with error_rate metric in message",
log: "2024-01-01T10:00:00.000000Z\twarn\tads\tcluster update: warning threshold exceeded, error_rate=0.05",
expected: constants.LogLevelWarn,
},
{
name: "envoy info log with warning and misc:error flags in command args",
log: "2024-01-01T10:00:00.000000Z info Envoy command: [-c /etc/envoy.json -l warning --component-log-level misc:error --concurrency 2]",
expected: constants.LogLevelInfo,
},
} {
t.Run(tc.name, func(t *testing.T) {
result := detectLevelFromLogLine(tc.log)
require.Equal(t, tc.expected, result, "log: %q", tc.log)
})
}
}