Files
Kir Kolyshkin 0dddc5e3c0 Apply De Morgan's law
This fixes a bunch of "QF1001: could apply De Morgan's law" warnings
from staticcheck linter.

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2025-03-31 12:27:55 -07:00

509 lines
11 KiB
Go

package parser
import (
"fmt"
"strings"
"unicode"
)
/* Functions to split/join, unescape/escape strings similar to Exec=... lines in unit files */
type SplitFlags = uint64
const (
SplitRelax SplitFlags = 1 << iota // Allow unbalanced quote and eat up trailing backslash.
SplitCUnescape // Unescape known escape sequences.
SplitUnescapeRelax // Allow and keep unknown escape sequences, allow and keep trailing backslash.
SplitUnescapeSeparators // Unescape separators (those specified, or whitespace by default).
SplitKeepQuote // Ignore separators in quoting with "" and ''.
SplitUnquote // Ignore separators in quoting with "" and '', and remove the quotes.
SplitDontCoalesceSeparators // Don't treat multiple adjacent separators as one
SplitRetainEscape // Treat escape character '\' as any other character without special meaning
SplitRetainSeparators // Do not advance the original string pointer past the separator(s) */
)
const WhitespaceSeparators = " \t\n\r"
func unoctchar(v byte) int {
if v >= '0' && v <= '7' {
return int(v - '0')
}
return -1
}
func unhexchar(v byte) int {
if v >= '0' && v <= '9' {
return int(v - '0')
}
if v >= 'a' && v <= 'f' {
return int(v - 'a' + 10)
}
if v >= 'A' && v <= 'F' {
return int(v - 'A' + 10)
}
return -1
}
func isValidUnicode(c uint32) bool {
return c <= unicode.MaxRune
}
/* This is based on code from systemd (src/basic/escape.c), marked LGPL-2.1-or-later and is copyrighted by the systemd developers */
func cUnescapeOne(p string, acceptNul bool) (int, rune, bool) {
var count = 1
var eightBit = false
var ret rune
// Unescapes C style. Returns the unescaped character in ret.
// Returns eightBit as true if the escaped sequence either fits in
// one byte in UTF-8 or is a non-unicode literal byte and should
// instead be copied directly.
if len(p) < 1 {
return -1, 0, false
}
switch p[0] {
case 'a':
ret = '\a'
case 'b':
ret = '\b'
case 'f':
ret = '\f'
case 'n':
ret = '\n'
case 'r':
ret = '\r'
case 't':
ret = '\t'
case 'v':
ret = '\v'
case '\\':
ret = '\\'
case '"':
ret = '"'
case '\'':
ret = '\''
case 's':
/* This is an extension of the XDG syntax files */
ret = ' '
case 'x':
/* hexadecimal encoding */
if len(p) < 3 {
return -1, 0, false
}
a := unhexchar(p[1])
if a < 0 {
return -1, 0, false
}
b := unhexchar(p[2])
if b < 0 {
return -1, 0, false
}
/* Don't allow NUL bytes */
if a == 0 && b == 0 && !acceptNul {
return -1, 0, false
}
ret = rune((a << 4) | b)
eightBit = true
count = 3
case 'u':
/* C++11 style 16bit unicode */
if len(p) < 5 {
return -1, 0, false
}
var a [4]int
for i := 0; i < 4; i++ {
a[i] = unhexchar(p[1+i])
if a[i] < 0 {
return -1, 0, false
}
}
c := (uint32(a[0]) << 12) | (uint32(a[1]) << 8) | (uint32(a[2]) << 4) | uint32(a[3])
/* Don't allow 0 chars */
if c == 0 && !acceptNul {
return -1, 0, false
}
ret = rune(c)
count = 5
case 'U':
/* C++11 style 32bit unicode */
if len(p) < 9 {
return -1, 0, false
}
var a [8]int
for i := 0; i < 8; i++ {
a[i] = unhexchar(p[1+i])
if a[i] < 0 {
return -10, 0, false
}
}
c := (uint32(a[0]) << 28) | (uint32(a[1]) << 24) | (uint32(a[2]) << 20) | (uint32(a[3]) << 16) |
(uint32(a[4]) << 12) | (uint32(a[5]) << 8) | (uint32(a[6]) << 4) | uint32(a[7])
/* Don't allow 0 chars */
if c == 0 && !acceptNul {
return -1, 0, false
}
/* Don't allow invalid code points */
if !isValidUnicode(c) {
return -1, 0, false
}
ret = rune(c)
count = 9
case '0', '1', '2', '3', '4', '5', '6', '7':
/* octal encoding */
if len(p) < 3 {
return -1, 0, false
}
a := unoctchar(p[0])
if a < 0 {
return -1, 0, false
}
b := unoctchar(p[1])
if b < 0 {
return -1, 0, false
}
c := unoctchar(p[2])
if c < 0 {
return -1, 0, false
}
/* don't allow NUL bytes */
if a == 0 && b == 0 && c == 0 && !acceptNul {
return -1, 0, false
}
/* Don't allow bytes above 255 */
m := (uint32(a) << 6) | (uint32(b) << 3) | uint32(c)
if m > 255 {
return -1, 0, false
}
ret = rune(m)
eightBit = true
count = 3
default:
return -1, 0, false
}
return count, ret, eightBit
}
/* This is based on code from systemd (src/basic/extract-workd.c), marked LGPL-2.1-or-later and is copyrighted by the systemd developers */
// Returns: word, remaining, more-words, error
func extractFirstWord(in string, separators string, flags SplitFlags) (string, string, bool, error) {
var s strings.Builder
var quote byte // 0 or ' or "
backslash := false // whether we've just seen a backslash
// The string handling in this function is a bit weird, using
// 0 bytes to mark end-of-string. This is because it is a direct
// conversion of the C in systemd, and w want to ensure
// exactly the same behaviour of some complex code
p := 0
end := len(in)
var c byte
nextChar := func() byte {
p++
if p >= end {
return 0
}
return in[p]
}
/* Bail early if called after last value or with no input */
if len(in) == 0 {
goto finish
}
// Parses the first word of a string, and returns it and the
// remainder. Removes all quotes in the process. When parsing
// fails (because of an uneven number of quotes or similar),
// the rest is at the first invalid character. */
loop1:
for c = in[0]; ; c = nextChar() {
switch {
case c == 0:
goto finishForceTerminate
case strings.ContainsRune(separators, rune(c)):
if flags&SplitDontCoalesceSeparators != 0 {
if flags&SplitRetainSeparators == 0 {
p++
}
goto finishForceNext
}
default:
// We found a non-blank character, so we will always
// want to return a string (even if it is empty),
// allocate it here.
break loop1
}
}
for ; ; c = nextChar() {
switch {
case backslash:
if c == 0 {
if flags&SplitUnescapeRelax != 0 &&
(quote == 0 || flags&SplitRelax != 0) {
// If we find an unquoted trailing backslash and we're in
// SplitUnescapeRelax mode, keep it verbatim in the
// output.
//
// Unbalanced quotes will only be allowed in SplitRelax
// mode, SplitUnescapeRelax mode does not allow them.
s.WriteString("\\")
goto finishForceTerminate
}
if flags&SplitRelax != 0 {
goto finishForceTerminate
}
return "", "", false, fmt.Errorf("unbalanced escape")
}
if flags&(SplitCUnescape|SplitUnescapeSeparators) != 0 {
var r = -1
var u rune
if flags&SplitCUnescape != 0 {
r, u, _ = cUnescapeOne(in[p:], false)
}
switch {
case r > 0:
p += r - 1
s.WriteRune(u)
case (flags&SplitUnescapeSeparators != 0) &&
(strings.ContainsRune(separators, rune(c)) || c == '\\'):
/* An escaped separator char or the escape char itself */
s.WriteByte(c)
case flags&SplitUnescapeRelax != 0:
s.WriteByte('\\')
s.WriteByte(c)
default:
return "", "", false, fmt.Errorf("unsupported escape char")
}
} else {
s.WriteByte(c)
}
backslash = false
case quote != 0:
/* inside either single or double quotes */
quoteloop:
for ; ; c = nextChar() {
switch {
case c == 0:
if flags&SplitRelax != 0 {
goto finishForceTerminate
}
return "", "", false, fmt.Errorf("unbalanced quotes")
case c == quote:
/* found the end quote */
quote = 0
if flags&SplitUnquote != 0 {
break quoteloop
}
case c == '\\' && (flags&SplitRetainEscape == 0):
backslash = true
break quoteloop
}
s.WriteByte(c)
if quote == 0 {
break quoteloop
}
}
default:
nonquoteloop:
for ; ; c = nextChar() {
switch {
case c == 0:
goto finishForceTerminate
case (c == '\'' || c == '"') && (flags&(SplitKeepQuote|SplitUnquote) != 0):
quote = c
if flags&SplitUnquote != 0 {
break nonquoteloop
}
case c == '\\' && (flags&SplitRetainEscape == 0):
backslash = true
break nonquoteloop
case strings.ContainsRune(separators, rune(c)):
if flags&SplitDontCoalesceSeparators != 0 {
if flags&SplitRetainSeparators == 0 {
p++
}
goto finishForceNext
}
if flags&SplitRetainSeparators == 0 {
/* Skip additional coalesced separators. */
for ; ; c = nextChar() {
if c == 0 {
goto finishForceTerminate
}
if !strings.ContainsRune(separators, rune(c)) {
break
}
}
}
goto finish
}
s.WriteByte(c)
if quote != 0 {
break nonquoteloop
}
}
}
}
finishForceTerminate:
p = end
finish:
if s.Len() == 0 {
return "", "", false, nil
}
finishForceNext:
return s.String(), in[p:], true, nil
}
func splitStringAppend(appendTo []string, s string, separators string, flags SplitFlags) ([]string, error) {
orig := appendTo
for {
word, remaining, moreWords, err := extractFirstWord(s, separators, flags)
if err != nil {
return orig, err
}
if !moreWords {
break
}
appendTo = append(appendTo, word)
s = remaining
}
return appendTo, nil
}
func splitString(s string, separators string, flags SplitFlags) ([]string, error) {
return splitStringAppend(make([]string, 0), s, separators, flags)
}
func charNeedEscape(c rune, isPath bool) bool {
if c > 128 {
return false /* unicode is ok */
}
pathRune := (isPath && c == '-') ||
(isPath && c == '/')
return unicode.IsSpace(c) ||
unicode.IsControl(c) ||
pathRune ||
c == '"' ||
c == '\'' ||
c == '\\'
}
func wordNeedEscape(word string) bool {
for _, c := range word {
if charNeedEscape(c, false) {
return true
}
}
return false
}
func appendEscapeWord(escaped *strings.Builder, word string) {
escaped.WriteRune('"')
escapeString(escaped, word, false)
escaped.WriteRune('"')
}
func escapeString(escaped *strings.Builder, word string, isPath bool) {
for i, c := range word {
if charNeedEscape(c, isPath) {
switch c {
case '\a':
escaped.WriteString("\\a")
case '\b':
escaped.WriteString("\\b")
case '\n':
escaped.WriteString("\\n")
case '\r':
escaped.WriteString("\\r")
case '\t':
escaped.WriteString("\\t")
case '\v':
escaped.WriteString("\\v")
case '\f':
escaped.WriteString("\\f")
case '\\':
escaped.WriteString("\\\\")
case ' ':
escaped.WriteString(" ")
case '"':
escaped.WriteString("\\\"")
case '\'':
escaped.WriteString("'")
case '/':
if isPath && i != 0 {
escaped.WriteString("-")
}
default:
escaped.WriteString(fmt.Sprintf("\\x%.2x", c))
}
} else {
escaped.WriteRune(c)
}
}
}
func escapeWords(words []string) string {
var escaped strings.Builder
for i, word := range words {
if i != 0 {
escaped.WriteString(" ")
}
if wordNeedEscape(word) {
appendEscapeWord(&escaped, word)
} else {
escaped.WriteString(word)
}
}
return escaped.String()
}