From f8a72214cf0fd2016e95588a107c7c2b07bccaad Mon Sep 17 00:00:00 2001 From: Sam Jewell <2903904+samjewell@users.noreply.github.com> Date: Mon, 7 Apr 2025 19:23:39 +0100 Subject: [PATCH] SQL Expressions: Add more functions to the allowlist (#103546) * SQL Expressions: Allow many more functions and nodes * Also allow the `REGEXP_SUBSTR` function * Add window functions * add more JSON support, remove now and current time (for now) --------- Co-authored-by: Kyle Brandt --- pkg/expr/sql/parser_allow.go | 64 +++++++++++++++-- pkg/expr/sql/parser_allow_test.go | 114 +++++++++++++++++++++++++++--- 2 files changed, 161 insertions(+), 17 deletions(-) diff --git a/pkg/expr/sql/parser_allow.go b/pkg/expr/sql/parser_allow.go index 7ae630fdc46..ad04529ba86 100644 --- a/pkg/expr/sql/parser_allow.go +++ b/pkg/expr/sql/parser_allow.go @@ -66,7 +66,10 @@ func allowedNode(node sqlparser.SQLNode) (b bool) { case *sqlparser.CaseExpr, *sqlparser.When: return - case sqlparser.ColIdent, *sqlparser.ColName, sqlparser.Columns: + case *sqlparser.CharExpr: + return + + case sqlparser.ColIdent, *sqlparser.ColName, sqlparser.Columns, sqlparser.ColumnType: return case sqlparser.Comments: // TODO: understand why some are pointer vs not @@ -87,6 +90,9 @@ func allowedNode(node sqlparser.SQLNode) (b bool) { case sqlparser.Exprs: return + case *sqlparser.ExtractFuncExpr: + return + case *sqlparser.GroupConcatExpr: return @@ -108,6 +114,9 @@ func allowedNode(node sqlparser.SQLNode) (b bool) { case *sqlparser.JoinTableExpr, sqlparser.JoinCondition: return + case *sqlparser.JSONTableExpr, *sqlparser.JSONTableSpec, *sqlparser.JSONTableColDef: + return + case *sqlparser.Select, sqlparser.SelectExprs, *sqlparser.ParenSelect: return @@ -141,6 +150,9 @@ func allowedNode(node sqlparser.SQLNode) (b bool) { case sqlparser.TableName, sqlparser.TableExprs, sqlparser.TableIdent: return + case *sqlparser.TimestampFuncExpr: + return + case *sqlparser.TrimExpr: return @@ -174,6 +186,12 @@ func allowedFunction(f *sqlparser.FuncExpr) (b bool) { return case "variance", "var_pop": return + case "group_concat": + return + case "row_number", "rank", "dense_rank", "lead", "lag": + return + case "first_value", "last_value": + return // Mathematical functions case "abs": @@ -184,7 +202,13 @@ func allowedFunction(f *sqlparser.FuncExpr) (b bool) { return case "mod", "log", "log10", "exp": return - case "sign": + case "sign", "ln", "truncate": + return + case "sin", "cos", "tan": + return + case "asin", "acos", "atan", "atan2": + return + case "rand", "pi": return // String functions @@ -194,11 +218,25 @@ func allowedFunction(f *sqlparser.FuncExpr) (b bool) { return case "substring", "substring_index": return + case "left", "right": + return + case "ltrim", "rtrim": + return + case "replace", "reverse": + return + case "lcase", "ucase", "mid", "repeat": + return + case "position", "instr", "locate": + return + case "ascii", "ord", "char": + return + case "regexp_substr": + return // Date functions case "str_to_date": return - case "date_format", "now", "curdate", "curtime": + case "date_format": return case "date_add", "date_sub": return @@ -208,15 +246,27 @@ func allowedFunction(f *sqlparser.FuncExpr) (b bool) { return case "unix_timestamp", "from_unixtime": return + case "extract", "hour", "minute", "second": + return + case "dayname", "monthname", "dayofweek", "dayofmonth", "dayofyear": + return + case "week", "quarter", "time_to_sec", "sec_to_time": + return + case "timestampdiff", "timestampadd": + return // Type conversion - case "cast": + case "cast", "convert": return // JSON functions - case "json_extract", "json_unquote", "json_contains", - "json_object", "json_array", "json_set", "json_remove", - "json_length", "json_search", "json_type": + case "json_extract", "json_object", "json_array", "json_merge_patch", "json_valid": + return + case "json_contains", "json_length", "json_type", "json_keys": + return + case "json_search", "json_quote", "json_unquote": + return + case "json_set", "json_insert", "json_replace", "json_remove": return default: diff --git a/pkg/expr/sql/parser_allow_test.go b/pkg/expr/sql/parser_allow_test.go index 4d42107da33..f03b3ff2cd1 100644 --- a/pkg/expr/sql/parser_allow_test.go +++ b/pkg/expr/sql/parser_allow_test.go @@ -28,8 +28,13 @@ func TestAllowQuery(t *testing.T) { err: nil, }, { - name: "all allowed functions", - q: example_all_allowed_functions, + name: "many allowed functions", + q: example_many_allowed_functions, + err: nil, + }, + { + name: "many more allowed functions", + q: example_many_more_allowed_functions, err: nil, }, { @@ -77,6 +82,16 @@ func TestAllowQuery(t *testing.T) { q: `SELECT '2024-04-01 15:30:00' BETWEEN '2024-04-01 15:29:00' AND '2024-04-01 15:31:00'`, err: nil, }, + { + name: "window functions", + q: example_window_functions, + err: nil, + }, + { + name: "json table", + q: "SELECT * FROM mockGitHubIssuesDSResponse, JSON_TABLE(labels, '$[*]' COLUMNS(val VARCHAR(255) PATH '$')) AS jt WHERE CAST(jt.val AS CHAR) LIKE 'type%'", + err: nil, + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { @@ -174,15 +189,15 @@ var example_case_statement = `SELECT END AS category FROM metrics` -var example_all_allowed_functions = `WITH sample_data AS ( +var example_many_allowed_functions = `WITH sample_data AS ( SELECT 100 AS value, 'example' AS name, - NOW() AS created_at + '2025-01-01 00:00:00' AS created_at UNION ALL SELECT 50 AS value, 'test' AS name, - DATE_SUB(NOW(), INTERVAL 1 DAY) AS created_at + DATE_SUB('2025-01-01 00:00:00', INTERVAL 1 DAY) AS created_at ) SELECT -- Conditional functions @@ -229,17 +244,15 @@ SELECT -- Date functions STR_TO_DATE('2023-01-01', '%Y-%m-%d') AS date_str_to_date, - DATE_FORMAT(NOW(), '%Y-%m-%d') AS date_format, - NOW() AS date_now, - CURDATE() AS date_curdate, - CURTIME() AS date_curtime, + DATE_FORMAT('2025-01-01 00:00:00', '%Y-%m-%d') AS date_format, + '2025-01-01 00:00:00' AS date_now, DATE_ADD(created_at, INTERVAL 1 DAY) AS date_add, DATE_SUB(created_at, INTERVAL 1 DAY) AS date_sub, YEAR(created_at) AS date_year, MONTH(created_at) AS date_month, DAY(created_at) AS date_day, WEEKDAY(created_at) AS date_weekday, - DATEDIFF(NOW(), created_at) AS date_datediff, + DATEDIFF('2025-01-01 00:00:00', created_at) AS date_datediff, UNIX_TIMESTAMP(created_at) AS date_unix_timestamp, FROM_UNIXTIME(1634567890) AS date_from_unixtime, @@ -261,3 +274,84 @@ var example_json_functions = `SELECT JSON_LENGTH('{"a": 1, "b": {"c": 3}}') AS json_len, JSON_SEARCH('{"a": "xyz", "b": "abc"}', 'one', 'abc') AS json_search, JSON_TYPE('{"a": 1}') AS json_type` + +var example_many_more_allowed_functions = ` +SELECT + -- Math functions + LN(10) as ln_val, + TRUNCATE(12.345, 2) as truncate_val, + SIN(0.5) as sin_val, + COS(0.5) as cos_val, + TAN(0.5) as tan_val, + ASIN(0.5) as asin_val, + ACOS(0.5) as acos_val, + ATAN(0.5) as atan_val, + ATAN2(1, 2) as atan2_val, + RAND() as rand_val, + PI() as pi_val, + + -- String functions + LEFT('hello', 2) as left_val, + RIGHT('hello', 2) as right_val, + LTRIM(' hello') as ltrim_val, + RTRIM('hello ') as rtrim_val, + REPLACE('hello', 'l', 'x') as replace_val, + REVERSE('hello') as reverse_val, + LCASE('HELLO') as lcase_val, + UCASE('hello') as ucase_val, + MID('hello', 2, 2) as mid_val, + REPEAT('a', 3) as repeat_val, + POSITION('l' IN 'hello') as position_val, + INSTR('hello', 'l') as instr_val, + LOCATE('l', 'hello') as locate_val, + ASCII('A') as ascii_val, + ORD('A') as ord_val, + CHAR(65) as char_val, + REGEXP_SUBSTR('hello world', 'world') as regexp_substr_val, + + -- Date functions + EXTRACT(YEAR FROM '2023-01-01') as extract_val, + HOUR('12:34:56') as hour_val, + MINUTE('12:34:56') as minute_val, + SECOND('12:34:56') as second_val, + DAYNAME('2023-01-01') as dayname_val, + MONTHNAME('2023-01-01') as monthname_val, + DAYOFWEEK('2023-01-01') as dayofweek_val, + DAYOFMONTH('2023-01-01') as dayofmonth_val, + DAYOFYEAR('2023-01-01') as dayofyear_val, + WEEK('2023-01-01') as week_val, + QUARTER('2023-01-01') as quarter_val, + TIME_TO_SEC('12:34:56') as time_to_sec_val, + SEC_TO_TIME(45296) as sec_to_time_val, + TIMESTAMPDIFF(HOUR, '2023-01-01', '2023-01-02') as timestampdiff_val, + TIMESTAMPADD(HOUR, 1, '2023-01-01') as timestampadd_val, + + -- Type conversion + CONVERT(12.34, CHAR) as convert_val, + + -- JSON functions + JSON_MERGE_PATCH('{"a": 1}', '{"b": 2}') as json_merge_patch_val, + JSON_VALID('{"a": 1}') as json_valid_val, + JSON_KEYS('{"a": 1, "b": 2}') as json_keys_val, + JSON_QUOTE('hello') as json_quote_val, + JSON_INSERT('{"a": 1}', '$.b', 2) as json_insert_val, + JSON_REPLACE('{"a": 1, "b": 2}', '$.b', 3) as json_replace_val +FROM dual;` + +var example_window_functions = ` +WITH dummy_data AS ( + SELECT 1 as val, 'apple' as txt + UNION ALL SELECT 2, 'banana' + UNION ALL SELECT 3, 'cherry' +) +SELECT + val, + txt, + ROW_NUMBER() OVER (ORDER BY val) as row_num, + RANK() OVER (ORDER BY val) as rank_val, + DENSE_RANK() OVER (ORDER BY val) as dense_rank_val, + LEAD(val) OVER (ORDER BY val) as lead_val, + LAG(val) OVER (ORDER BY val) as lag_val, + FIRST_VALUE(val) OVER (ORDER BY val) as first_val, + LAST_VALUE(val) OVER (ORDER BY val ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as last_val +FROM dummy_data;`