mirror of
				https://github.com/caddyserver/caddy.git
				synced 2025-11-04 10:12:29 +08:00 
			
		
		
		
	caddyfile: Export Tokenize function for lexing (#3549)
This commit is contained in:
		@ -16,6 +16,7 @@ package caddyfile
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
	"bufio"
 | 
						"bufio"
 | 
				
			||||||
 | 
						"bytes"
 | 
				
			||||||
	"io"
 | 
						"io"
 | 
				
			||||||
	"unicode"
 | 
						"unicode"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
@ -168,3 +169,21 @@ func (l *lexer) next() bool {
 | 
				
			|||||||
		val = append(val, ch)
 | 
							val = append(val, ch)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Tokenize takes bytes as input and lexes it into
 | 
				
			||||||
 | 
					// a list of tokens that can be parsed as a Caddyfile.
 | 
				
			||||||
 | 
					// Also takes a filename to fill the token's File as
 | 
				
			||||||
 | 
					// the source of the tokens, which is important to
 | 
				
			||||||
 | 
					// determine relative paths for `import` directives.
 | 
				
			||||||
 | 
					func Tokenize(input []byte, filename string) ([]Token, error) {
 | 
				
			||||||
 | 
						l := lexer{}
 | 
				
			||||||
 | 
						if err := l.load(bytes.NewReader(input)); err != nil {
 | 
				
			||||||
 | 
							return nil, err
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						var tokens []Token
 | 
				
			||||||
 | 
						for l.next() {
 | 
				
			||||||
 | 
							l.token.File = filename
 | 
				
			||||||
 | 
							tokens = append(tokens, l.token)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return tokens, nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -15,37 +15,35 @@
 | 
				
			|||||||
package caddyfile
 | 
					package caddyfile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
	"log"
 | 
					 | 
				
			||||||
	"strings"
 | 
					 | 
				
			||||||
	"testing"
 | 
						"testing"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type lexerTestCase struct {
 | 
					type lexerTestCase struct {
 | 
				
			||||||
	input    string
 | 
						input    []byte
 | 
				
			||||||
	expected []Token
 | 
						expected []Token
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func TestLexer(t *testing.T) {
 | 
					func TestLexer(t *testing.T) {
 | 
				
			||||||
	testCases := []lexerTestCase{
 | 
						testCases := []lexerTestCase{
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `host:123`,
 | 
								input: []byte(`host:123`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: "host:123"},
 | 
									{Line: 1, Text: "host:123"},
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `host:123
 | 
								input: []byte(`host:123
 | 
				
			||||||
 | 
					
 | 
				
			||||||
					directive`,
 | 
										directive`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: "host:123"},
 | 
									{Line: 1, Text: "host:123"},
 | 
				
			||||||
				{Line: 3, Text: "directive"},
 | 
									{Line: 3, Text: "directive"},
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `host:123 {
 | 
								input: []byte(`host:123 {
 | 
				
			||||||
						directive
 | 
											directive
 | 
				
			||||||
					}`,
 | 
										}`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: "host:123"},
 | 
									{Line: 1, Text: "host:123"},
 | 
				
			||||||
				{Line: 1, Text: "{"},
 | 
									{Line: 1, Text: "{"},
 | 
				
			||||||
@ -54,7 +52,7 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `host:123 { directive }`,
 | 
								input: []byte(`host:123 { directive }`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: "host:123"},
 | 
									{Line: 1, Text: "host:123"},
 | 
				
			||||||
				{Line: 1, Text: "{"},
 | 
									{Line: 1, Text: "{"},
 | 
				
			||||||
@ -63,12 +61,12 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `host:123 {
 | 
								input: []byte(`host:123 {
 | 
				
			||||||
						#comment
 | 
											#comment
 | 
				
			||||||
						directive
 | 
											directive
 | 
				
			||||||
						# comment
 | 
											# comment
 | 
				
			||||||
						foobar # another comment
 | 
											foobar # another comment
 | 
				
			||||||
					}`,
 | 
										}`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: "host:123"},
 | 
									{Line: 1, Text: "host:123"},
 | 
				
			||||||
				{Line: 1, Text: "{"},
 | 
									{Line: 1, Text: "{"},
 | 
				
			||||||
@ -78,10 +76,10 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `host:123 {
 | 
								input: []byte(`host:123 {
 | 
				
			||||||
						# hash inside string is not a comment
 | 
											# hash inside string is not a comment
 | 
				
			||||||
						redir / /some/#/path
 | 
											redir / /some/#/path
 | 
				
			||||||
					}`,
 | 
										}`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: "host:123"},
 | 
									{Line: 1, Text: "host:123"},
 | 
				
			||||||
				{Line: 1, Text: "{"},
 | 
									{Line: 1, Text: "{"},
 | 
				
			||||||
@ -92,14 +90,14 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: "# comment at beginning of file\n# comment at beginning of line\nhost:123",
 | 
								input: []byte("# comment at beginning of file\n# comment at beginning of line\nhost:123"),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 3, Text: "host:123"},
 | 
									{Line: 3, Text: "host:123"},
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `a "quoted value" b
 | 
								input: []byte(`a "quoted value" b
 | 
				
			||||||
					foobar`,
 | 
										foobar`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: "a"},
 | 
									{Line: 1, Text: "a"},
 | 
				
			||||||
				{Line: 1, Text: "quoted value"},
 | 
									{Line: 1, Text: "quoted value"},
 | 
				
			||||||
@ -108,7 +106,7 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `A "quoted \"value\" inside" B`,
 | 
								input: []byte(`A "quoted \"value\" inside" B`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: "A"},
 | 
									{Line: 1, Text: "A"},
 | 
				
			||||||
				{Line: 1, Text: `quoted "value" inside`},
 | 
									{Line: 1, Text: `quoted "value" inside`},
 | 
				
			||||||
@ -116,7 +114,7 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: "An escaped \"newline\\\ninside\" quotes",
 | 
								input: []byte("An escaped \"newline\\\ninside\" quotes"),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: "An"},
 | 
									{Line: 1, Text: "An"},
 | 
				
			||||||
				{Line: 1, Text: "escaped"},
 | 
									{Line: 1, Text: "escaped"},
 | 
				
			||||||
@ -125,7 +123,7 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: "An escaped newline\\\noutside quotes",
 | 
								input: []byte("An escaped newline\\\noutside quotes"),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: "An"},
 | 
									{Line: 1, Text: "An"},
 | 
				
			||||||
				{Line: 1, Text: "escaped"},
 | 
									{Line: 1, Text: "escaped"},
 | 
				
			||||||
@ -135,7 +133,7 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: "line1\\\nescaped\nline2\nline3",
 | 
								input: []byte("line1\\\nescaped\nline2\nline3"),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: "line1"},
 | 
									{Line: 1, Text: "line1"},
 | 
				
			||||||
				{Line: 1, Text: "escaped"},
 | 
									{Line: 1, Text: "escaped"},
 | 
				
			||||||
@ -144,7 +142,7 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: "line1\\\nescaped1\\\nescaped2\nline4\nline5",
 | 
								input: []byte("line1\\\nescaped1\\\nescaped2\nline4\nline5"),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: "line1"},
 | 
									{Line: 1, Text: "line1"},
 | 
				
			||||||
				{Line: 1, Text: "escaped1"},
 | 
									{Line: 1, Text: "escaped1"},
 | 
				
			||||||
@ -154,34 +152,34 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `"unescapable\ in quotes"`,
 | 
								input: []byte(`"unescapable\ in quotes"`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: `unescapable\ in quotes`},
 | 
									{Line: 1, Text: `unescapable\ in quotes`},
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `"don't\escape"`,
 | 
								input: []byte(`"don't\escape"`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: `don't\escape`},
 | 
									{Line: 1, Text: `don't\escape`},
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `"don't\\escape"`,
 | 
								input: []byte(`"don't\\escape"`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: `don't\\escape`},
 | 
									{Line: 1, Text: `don't\\escape`},
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `un\escapable`,
 | 
								input: []byte(`un\escapable`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: `un\escapable`},
 | 
									{Line: 1, Text: `un\escapable`},
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `A "quoted value with line
 | 
								input: []byte(`A "quoted value with line
 | 
				
			||||||
					break inside" {
 | 
										break inside" {
 | 
				
			||||||
						foobar
 | 
											foobar
 | 
				
			||||||
					}`,
 | 
										}`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: "A"},
 | 
									{Line: 1, Text: "A"},
 | 
				
			||||||
				{Line: 1, Text: "quoted value with line\n\t\t\t\t\tbreak inside"},
 | 
									{Line: 1, Text: "quoted value with line\n\t\t\t\t\tbreak inside"},
 | 
				
			||||||
@ -191,13 +189,13 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `"C:\php\php-cgi.exe"`,
 | 
								input: []byte(`"C:\php\php-cgi.exe"`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: `C:\php\php-cgi.exe`},
 | 
									{Line: 1, Text: `C:\php\php-cgi.exe`},
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: `empty "" string`,
 | 
								input: []byte(`empty "" string`),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: `empty`},
 | 
									{Line: 1, Text: `empty`},
 | 
				
			||||||
				{Line: 1, Text: ``},
 | 
									{Line: 1, Text: ``},
 | 
				
			||||||
@ -205,7 +203,7 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: "skip those\r\nCR characters",
 | 
								input: []byte("skip those\r\nCR characters"),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: "skip"},
 | 
									{Line: 1, Text: "skip"},
 | 
				
			||||||
				{Line: 1, Text: "those"},
 | 
									{Line: 1, Text: "those"},
 | 
				
			||||||
@ -214,13 +212,13 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: "\xEF\xBB\xBF:8080", // test with leading byte order mark
 | 
								input: []byte("\xEF\xBB\xBF:8080"), // test with leading byte order mark
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: ":8080"},
 | 
									{Line: 1, Text: ":8080"},
 | 
				
			||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: "simple `backtick quoted` string",
 | 
								input: []byte("simple `backtick quoted` string"),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: `simple`},
 | 
									{Line: 1, Text: `simple`},
 | 
				
			||||||
				{Line: 1, Text: `backtick quoted`},
 | 
									{Line: 1, Text: `backtick quoted`},
 | 
				
			||||||
@ -228,7 +226,7 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: "multiline `backtick\nquoted\n` string",
 | 
								input: []byte("multiline `backtick\nquoted\n` string"),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: `multiline`},
 | 
									{Line: 1, Text: `multiline`},
 | 
				
			||||||
				{Line: 1, Text: "backtick\nquoted\n"},
 | 
									{Line: 1, Text: "backtick\nquoted\n"},
 | 
				
			||||||
@ -236,7 +234,7 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: "nested `\"quotes inside\" backticks` string",
 | 
								input: []byte("nested `\"quotes inside\" backticks` string"),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: `nested`},
 | 
									{Line: 1, Text: `nested`},
 | 
				
			||||||
				{Line: 1, Text: `"quotes inside" backticks`},
 | 
									{Line: 1, Text: `"quotes inside" backticks`},
 | 
				
			||||||
@ -244,7 +242,7 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
			},
 | 
								},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			input: "reverse-nested \"`backticks` inside\" quotes",
 | 
								input: []byte("reverse-nested \"`backticks` inside\" quotes"),
 | 
				
			||||||
			expected: []Token{
 | 
								expected: []Token{
 | 
				
			||||||
				{Line: 1, Text: `reverse-nested`},
 | 
									{Line: 1, Text: `reverse-nested`},
 | 
				
			||||||
				{Line: 1, Text: "`backticks` inside"},
 | 
									{Line: 1, Text: "`backticks` inside"},
 | 
				
			||||||
@ -254,22 +252,14 @@ func TestLexer(t *testing.T) {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for i, testCase := range testCases {
 | 
						for i, testCase := range testCases {
 | 
				
			||||||
		actual := tokenize(testCase.input)
 | 
							actual, err := Tokenize(testCase.input, "")
 | 
				
			||||||
 | 
							if err != nil {
 | 
				
			||||||
 | 
								t.Errorf("%v", err)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
		lexerCompare(t, i, testCase.expected, actual)
 | 
							lexerCompare(t, i, testCase.expected, actual)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func tokenize(input string) (tokens []Token) {
 | 
					 | 
				
			||||||
	l := lexer{}
 | 
					 | 
				
			||||||
	if err := l.load(strings.NewReader(input)); err != nil {
 | 
					 | 
				
			||||||
		log.Printf("[ERROR] load failed: %v", err)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	for l.next() {
 | 
					 | 
				
			||||||
		tokens = append(tokens, l.token)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
func lexerCompare(t *testing.T, n int, expected, actual []Token) {
 | 
					func lexerCompare(t *testing.T, n int, expected, actual []Token) {
 | 
				
			||||||
	if len(expected) != len(actual) {
 | 
						if len(expected) != len(actual) {
 | 
				
			||||||
		t.Errorf("Test case %d: expected %d token(s) but got %d", n, len(expected), len(actual))
 | 
							t.Errorf("Test case %d: expected %d token(s) but got %d", n, len(expected), len(actual))
 | 
				
			||||||
 | 
				
			|||||||
@ -87,16 +87,10 @@ func allTokens(filename string, input []byte) ([]Token, error) {
 | 
				
			|||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		return nil, err
 | 
							return nil, err
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	l := new(lexer)
 | 
						tokens, err := Tokenize(input, filename)
 | 
				
			||||||
	err = l.load(bytes.NewReader(input))
 | 
					 | 
				
			||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		return nil, err
 | 
							return nil, err
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	var tokens []Token
 | 
					 | 
				
			||||||
	for l.next() {
 | 
					 | 
				
			||||||
		l.token.File = filename
 | 
					 | 
				
			||||||
		tokens = append(tokens, l.token)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return tokens, nil
 | 
						return tokens, nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user