mirror of
https://github.com/grafana/loki.git
synced 2026-03-13 09:33:58 +08:00
chore: Improve level detection and best guess capability (#21132)
This commit is contained in:
@@ -38,6 +38,21 @@ var (
|
||||
fatal = []byte("fatal")
|
||||
|
||||
errKeyFound = errors.New("key found")
|
||||
|
||||
levelPatterns = []struct {
|
||||
word string
|
||||
level string
|
||||
}{
|
||||
{"trace", constants.LogLevelTrace},
|
||||
{"debug", constants.LogLevelDebug},
|
||||
{"fatal", constants.LogLevelFatal},
|
||||
{"critical", constants.LogLevelCritical},
|
||||
{"error", constants.LogLevelError},
|
||||
{"err", constants.LogLevelError},
|
||||
{"warning", constants.LogLevelWarn},
|
||||
{"warn", constants.LogLevelWarn},
|
||||
{"info", constants.LogLevelInfo},
|
||||
}
|
||||
)
|
||||
|
||||
func allowedLabelsForLevel(allowedFields []string) []string {
|
||||
@@ -330,49 +345,59 @@ func isJSON(line string) bool {
|
||||
return firstNonSpaceChar == '{' && lastNonSpaceChar == '}'
|
||||
}
|
||||
|
||||
func isWordBoundary(s string, pos int) bool {
|
||||
// isLeftWordBoundary checks the character to the left of a potential keyword match.
|
||||
// Colons are intentionally excluded: they indicate a key:value compound (e.g. misc:error)
|
||||
// where the keyword is not a standalone log level.
|
||||
// Operates on bytes since log lines are expected to be ASCII.
|
||||
func isLeftWordBoundary(s string, pos int) bool {
|
||||
if pos < 0 || pos >= len(s) {
|
||||
return true // Start/end of string is a boundary
|
||||
}
|
||||
r := rune(s[pos])
|
||||
return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_'
|
||||
}
|
||||
|
||||
func logHasBoundedLevel(log, level string) bool {
|
||||
pos := strings.Index(log, level)
|
||||
if pos == -1 {
|
||||
return false
|
||||
}
|
||||
|
||||
// make sure the word occurs on its own, with boundaries on both sides
|
||||
if isWordBoundary(log, pos-1) && isWordBoundary(log, pos+len(level)) {
|
||||
return true
|
||||
}
|
||||
c := s[pos]
|
||||
return c == ' ' || c == '\t' || c == '\n' || c == '[' || c == '(' || c == '{' || c == '"' || c == '='
|
||||
}
|
||||
|
||||
return false
|
||||
// isRightWordBoundary checks the character to the right of a potential keyword match.
|
||||
// Colons are allowed here to support "level:" prefix patterns (e.g. "debug: message").
|
||||
// Equals and quotes are allowed to support key=value and key="value" patterns.
|
||||
// Operates on bytes since log lines are expected to be ASCII.
|
||||
func isRightWordBoundary(s string, pos int) bool {
|
||||
if pos < 0 || pos >= len(s) {
|
||||
return true
|
||||
}
|
||||
c := s[pos]
|
||||
return c == ' ' || c == '\t' || c == '\n' || c == '[' || c == ']' || c == '(' || c == ')' || c == '{' || c == '}' || c == ':' || c == ',' || c == '!' || c == '"' || c == '='
|
||||
}
|
||||
|
||||
func indexOfBoundedLevel(log, level string) int {
|
||||
offset := 0
|
||||
for {
|
||||
pos := strings.Index(log[offset:], level)
|
||||
if pos == -1 {
|
||||
return -1
|
||||
}
|
||||
abs := offset + pos
|
||||
if isLeftWordBoundary(log, abs-1) && isRightWordBoundary(log, abs+len(level)) {
|
||||
return abs
|
||||
}
|
||||
offset = abs + 1
|
||||
}
|
||||
}
|
||||
|
||||
func detectLevelFromLogLine(log string) string {
|
||||
levelPatterns := []struct {
|
||||
word string
|
||||
level string
|
||||
}{
|
||||
{"trace", constants.LogLevelTrace},
|
||||
{"debug", constants.LogLevelDebug},
|
||||
{"fatal", constants.LogLevelFatal},
|
||||
{"critical", constants.LogLevelCritical},
|
||||
{"error", constants.LogLevelError},
|
||||
{"err", constants.LogLevelError},
|
||||
{"warning", constants.LogLevelWarn},
|
||||
{"warn", constants.LogLevelWarn},
|
||||
{"info", constants.LogLevelInfo},
|
||||
}
|
||||
lowerLog := strings.ToLower(log)
|
||||
for _, level := range levelPatterns {
|
||||
if logHasBoundedLevel(lowerLog, level.word) {
|
||||
return level.level
|
||||
idx, bestGuess := len(lowerLog), constants.LogLevelUnknown
|
||||
|
||||
for _, p := range levelPatterns {
|
||||
pos := indexOfBoundedLevel(lowerLog, p.word)
|
||||
if pos == -1 || pos >= idx {
|
||||
continue
|
||||
}
|
||||
idx = pos
|
||||
bestGuess = p.level
|
||||
if idx == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return constants.LogLevelUnknown
|
||||
return bestGuess
|
||||
}
|
||||
|
||||
@@ -1408,3 +1408,112 @@ func Test_detectLevelFromLogLine(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_detectLevelFromLogLine_earliestPosition(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
name string
|
||||
log string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "info before warning and error in arguments picks info",
|
||||
log: "2024-01-01T10:00:00Z info request failed -l warning --log-level misc:error",
|
||||
expected: constants.LogLevelInfo,
|
||||
},
|
||||
{
|
||||
name: "warning appears before info positionally wins despite lower pattern priority",
|
||||
log: "warning: degraded, info follows",
|
||||
expected: constants.LogLevelWarn,
|
||||
},
|
||||
{
|
||||
name: "error appears before info positionally wins",
|
||||
log: "[error] something went wrong, info available",
|
||||
expected: constants.LogLevelError,
|
||||
},
|
||||
{
|
||||
name: "key:value colon prefix does not match, later bounded keyword wins",
|
||||
log: "component=misc:error [info] processed",
|
||||
expected: constants.LogLevelInfo,
|
||||
},
|
||||
{
|
||||
name: "first occurrence of keyword is unbounded, second is bounded",
|
||||
log: "fooerror [error] message",
|
||||
expected: constants.LogLevelError,
|
||||
},
|
||||
{
|
||||
name: "all occurrences unbounded returns unknown",
|
||||
log: "fooerror errorbar misc:error error_code",
|
||||
expected: constants.LogLevelUnknown,
|
||||
},
|
||||
// nginx error log: level in [brackets] after timestamp
|
||||
{
|
||||
name: "nginx error log with [error] bracket",
|
||||
log: "2024/01/01 10:00:00 [error] 1234#1234: *1 connect() failed (111: Connection refused) while connecting to upstream",
|
||||
expected: constants.LogLevelError,
|
||||
},
|
||||
{
|
||||
name: "nginx warn log with [warn] bracket",
|
||||
log: "2024/01/01 10:00:00 [warn] 1234#1234: upstream server temporarily disabled while reading response header from upstream",
|
||||
expected: constants.LogLevelWarn,
|
||||
},
|
||||
{
|
||||
name: "nginx access log with error in url path returns unknown",
|
||||
log: `192.168.1.1 - - [01/Jan/2024:10:00:00 +0000] "GET /api/error-handler HTTP/1.1" 500 1234 "-" "curl/7.68.0"`,
|
||||
expected: constants.LogLevelUnknown,
|
||||
},
|
||||
// apache httpd error log: level in second [brackets] after date
|
||||
{
|
||||
name: "apache httpd error log",
|
||||
log: "[Mon Jan 01 10:00:00.000000 2024] [error] [pid 1234] File does not exist: /var/www/html/favicon.ico",
|
||||
expected: constants.LogLevelError,
|
||||
},
|
||||
{
|
||||
name: "apache httpd warn log",
|
||||
log: "[Mon Jan 01 10:00:00.000000 2024] [warn] [pid 1234] RSA server certificate CommonName does not match",
|
||||
expected: constants.LogLevelWarn,
|
||||
},
|
||||
// spring boot: uppercase level between timestamp and PID, message may repeat level words
|
||||
{
|
||||
name: "spring boot INFO log",
|
||||
log: "2024-01-01 10:00:00.000 INFO 1234 --- [main] com.example.App : Starting application on host",
|
||||
expected: constants.LogLevelInfo,
|
||||
},
|
||||
{
|
||||
name: "spring boot ERROR where message also contains error",
|
||||
log: "2024-01-01 10:00:00.000 ERROR 1234 --- [main] com.example.App : Application startup failed due to connection error",
|
||||
expected: constants.LogLevelError,
|
||||
},
|
||||
{
|
||||
name: "spring boot INFO where class name contains error but is not a standalone keyword",
|
||||
log: "2024-01-01 10:00:00.000 INFO 1234 --- [main] com.example.ErrorController : Handling request for /api/error-resource",
|
||||
expected: constants.LogLevelInfo,
|
||||
},
|
||||
// log4j: level after timestamp, followed by logger name and message
|
||||
{
|
||||
name: "log4j WARN log",
|
||||
log: "2024-01-01 10:00:00,000 WARN com.example.service.UserService - Failed to load user preferences, using defaults",
|
||||
expected: constants.LogLevelWarn,
|
||||
},
|
||||
{
|
||||
name: "log4j ERROR where logger name and message both contain error",
|
||||
log: "2024-01-01 10:00:00,000 ERROR com.example.service.DatabaseService - Connection failed after error: timeout",
|
||||
expected: constants.LogLevelError,
|
||||
},
|
||||
// istio/envoy: tab-separated fields, level is second field; message may contain metric names like error_rate
|
||||
{
|
||||
name: "istio warn log with error_rate metric in message",
|
||||
log: "2024-01-01T10:00:00.000000Z\twarn\tads\tcluster update: warning threshold exceeded, error_rate=0.05",
|
||||
expected: constants.LogLevelWarn,
|
||||
},
|
||||
{
|
||||
name: "envoy info log with warning and misc:error flags in command args",
|
||||
log: "2024-01-01T10:00:00.000000Z info Envoy command: [-c /etc/envoy.json -l warning --component-log-level misc:error --concurrency 2]",
|
||||
expected: constants.LogLevelInfo,
|
||||
},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
result := detectLevelFromLogLine(tc.log)
|
||||
require.Equal(t, tc.expected, result, "log: %q", tc.log)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user