chore(engine): introduce experimental expression engine (#20626)

2026-03-13 09:33:58 +08:00 · 2026-02-02 09:31:41 -05:00
parent 322a02d35d
commit 9d9d8ce14d
11 changed files with 572 additions and 19 deletions
--- a/pkg/columnar/recordbatch.go
+++ b/pkg/columnar/recordbatch.go
@@ -3,21 +3,26 @@ package columnar
 // RecordBatch is a collection of equal-length arrays.
 // This corresponds to the RecordBatch concept in the Arrow specification.
 type RecordBatch struct {
-	// TODO(ivkalita): add schema?
-
-	nrows int64
-	arrs  []Array
+	schema *Schema
+	nrows  int64
+	arrs   []Array
 }

 // NewRecordBatch returns a new RecordBatch created from the provided arrays.
 // nrows specifies the total number of rows in the batch.
-func NewRecordBatch(nrows int64, arrs []Array) *RecordBatch {
+func NewRecordBatch(schema *Schema, nrows int64, arrs []Array) *RecordBatch {
 	return &RecordBatch{
-		nrows: nrows,
-		arrs:  arrs,
+		schema: schema,
+		nrows:  nrows,
+		arrs:   arrs,
 	}
 }

+// Schema returns the schema of the record batch.
+func (rb *RecordBatch) Schema() *Schema {
+	return rb.schema
+}
+
 // NumRows returns the number of rows in the batch.
 func (rb *RecordBatch) NumRows() int64 {
 	return rb.nrows
--- a/pkg/columnar/schema.go
+++ b/pkg/columnar/schema.go
@@ -0,0 +1,49 @@
+package columnar
+
+import "fmt"
+
+// TODO(rfratto): This is a placeholder for a more fleshed out schema
+// implementation. It's added to unblock the implementation of pkg/expr.
+
+// A Column describes a single column in a [RecordBatch].
+type Column struct {
+	Name string // Name of the column.
+}
+
+// A Schema describes the set of columns in a [RecordBatch].
+type Schema struct {
+	columns       []Column
+	columnIndices map[string]int
+}
+
+// NewSchema creates a new schema from a list of columns. Column names must be
+// unique. If column names are not unique, NewSchema panics.
+func NewSchema(columns []Column) *Schema {
+	indices := make(map[string]int, len(columns))
+	for i, col := range columns {
+		if _, ok := indices[col.Name]; ok {
+			panic(fmt.Sprintf("duplicate column name %s", col.Name))
+		}
+		indices[col.Name] = i
+	}
+
+	return &Schema{
+		columns:       columns,
+		columnIndices: indices,
+	}
+}
+
+// NumColumns returns the number of columns in the schema.
+func (s *Schema) NumColumns() int { return len(s.columns) }
+
+// Column returns the column at index i. Column panics if i is out of bounds.
+func (s *Schema) Column(i int) Column { return s.columns[i] }
+
+// ColumnIndex returns the column with the given name, along with its index.
+// ColumnIndex returns the Column{}, -1 if the column doesn't exist.
+func (s *Schema) ColumnIndex(name string) (Column, int) {
+	if idx, ok := s.columnIndices[name]; ok {
+		return s.columns[idx], idx
+	}
+	return Column{}, -1 // not found
+}
--- a/pkg/dataobj/internal/arrowconv/columnar_test.go
+++ b/pkg/dataobj/internal/arrowconv/columnar_test.go
@@ -23,7 +23,7 @@ func TestToRecordBatch_int64(t *testing.T) {
 	validity.Set(4, false)
 	validity.Set(5, false)
 	int64Arr := columnar.NewNumber[int64](srcInt64, validity)
-	src := columnar.NewRecordBatch(int64(len(srcInt64)), []columnar.Array{int64Arr})
+	src := columnar.NewRecordBatch(nil, int64(len(srcInt64)), []columnar.Array{int64Arr})

 	schema := arrow.NewSchema([]arrow.Field{
 		{Name: "myint64", Type: arrow.PrimitiveTypes.Int64},
@@ -48,7 +48,7 @@ func TestToRecordBatch_uint64(t *testing.T) {
 	validity.Set(4, false)
 	validity.Set(5, false)
 	uint64Arr := columnar.NewNumber[uint64](srcUint64, validity)
-	src := columnar.NewRecordBatch(int64(len(srcUint64)), []columnar.Array{uint64Arr})
+	src := columnar.NewRecordBatch(nil, int64(len(srcUint64)), []columnar.Array{uint64Arr})

 	schema := arrow.NewSchema([]arrow.Field{
 		{Name: "myuint64", Type: arrow.PrimitiveTypes.Uint64},
@@ -74,7 +74,7 @@ func TestToRecordBatch_string(t *testing.T) {
 	validity.Set(2, false)

 	utf8Arr := columnar.NewUTF8([]byte(strings.Join(srcStrings, "")), []int32{0, 1, 2, 2, 4}, validity)
-	src := columnar.NewRecordBatch(int64(len(srcStrings)), []columnar.Array{utf8Arr})
+	src := columnar.NewRecordBatch(nil, int64(len(srcStrings)), []columnar.Array{utf8Arr})

 	schema := arrow.NewSchema([]arrow.Field{
 		{Name: "mystring", Type: arrow.BinaryTypes.String},
@@ -110,7 +110,7 @@ func TestToRecordBatch_binary(t *testing.T) {
 	data := []byte{0x00, 0x01, 0xff}
 	offsets := []int32{0, 2, 2, 3}
 	utf8Arr := columnar.NewUTF8(data, offsets, validity)
-	src := columnar.NewRecordBatch(int64(len(srcValues)), []columnar.Array{utf8Arr})
+	src := columnar.NewRecordBatch(nil, int64(len(srcValues)), []columnar.Array{utf8Arr})

 	schema := arrow.NewSchema([]arrow.Field{
 		{Name: "mybinary", Type: arrow.BinaryTypes.Binary},
@@ -145,7 +145,7 @@ func TestToRecordBatch_timestamp(t *testing.T) {
 	validity.SetRange(0, len(srcTimestamps), true)
 	validity.Set(2, false)
 	int64Arr := columnar.NewNumber[int64](srcNanos, validity)
-	src := columnar.NewRecordBatch(int64(len(srcTimestamps)), []columnar.Array{int64Arr})
+	src := columnar.NewRecordBatch(nil, int64(len(srcTimestamps)), []columnar.Array{int64Arr})

 	schema := arrow.NewSchema([]arrow.Field{
 		{Name: "mytimestamp", Type: arrow.FixedWidthTypes.Timestamp_ns},
@@ -214,7 +214,7 @@ func makeInt64BenchmarkBatch(b *testing.B, n int) (*columnar.RecordBatch, *arrow
 	}
 	validity := makeValidity(alloc, n, 10)
 	int64Arr := columnar.NewNumber[int64](values, validity)
-	src := columnar.NewRecordBatch(int64(n), []columnar.Array{int64Arr})
+	src := columnar.NewRecordBatch(nil, int64(n), []columnar.Array{int64Arr})
 	schema := arrow.NewSchema([]arrow.Field{
 		{Name: "myint64", Type: arrow.PrimitiveTypes.Int64},
 	}, nil)
@@ -230,7 +230,7 @@ func makeUint64BenchmarkBatch(b *testing.B, n int) (*columnar.RecordBatch, *arro
 	}
 	validity := makeValidity(alloc, n, 10)
 	uint64Arr := columnar.NewNumber[uint64](values, validity)
-	src := columnar.NewRecordBatch(int64(n), []columnar.Array{uint64Arr})
+	src := columnar.NewRecordBatch(nil, int64(n), []columnar.Array{uint64Arr})
 	schema := arrow.NewSchema([]arrow.Field{
 		{Name: "myuint64", Type: arrow.PrimitiveTypes.Uint64},
 	}, nil)
@@ -258,7 +258,7 @@ func makeStringBenchmarkBatch(b *testing.B, n int) (*columnar.RecordBatch, *arro
 	}

 	utf8Arr := columnar.NewUTF8(data, offsets, validity)
-	src := columnar.NewRecordBatch(int64(n), []columnar.Array{utf8Arr})
+	src := columnar.NewRecordBatch(nil, int64(n), []columnar.Array{utf8Arr})
 	schema := arrow.NewSchema([]arrow.Field{
 		{Name: "mystring", Type: arrow.BinaryTypes.String},
 	}, nil)
@@ -284,7 +284,7 @@ func makeTimestampBenchmarkBatch(b *testing.B, n int) (*columnar.RecordBatch, *a
 	}
 	validity := makeValidity(alloc, n, 10)
 	int64Arr := columnar.NewNumber[int64](values, validity)
-	src := columnar.NewRecordBatch(int64(n), []columnar.Array{int64Arr})
+	src := columnar.NewRecordBatch(nil, int64(n), []columnar.Array{int64Arr})
 	schema := arrow.NewSchema([]arrow.Field{
 		{Name: "mytimestamp", Type: arrow.FixedWidthTypes.Timestamp_ns},
 	}, nil)
--- a/pkg/dataobj/sections/internal/columnar/reader_adapter.go
+++ b/pkg/dataobj/sections/internal/columnar/reader_adapter.go
@@ -107,5 +107,5 @@ func (r *ReaderAdapter) Read(ctx context.Context, alloc *memory.Allocator, batch

 	// We only return readErr after processing n so that we properly handle n>0
 	// while also getting an error such as io.EOF.
-	return columnar.NewRecordBatch(int64(n), arrs), readErr
+	return columnar.NewRecordBatch(nil, int64(n), arrs), readErr
 }
--- a/pkg/dataobj/sections/logs/reader.go
+++ b/pkg/dataobj/sections/logs/reader.go
@@ -169,7 +169,7 @@ func (r *Reader) Read(ctx context.Context, batchSize int) (arrow.RecordBatch, er
 		for i := range arrs {
 			arrs[i] = rb.Column(int64(i))
 		}
-		rb = columnarv2.NewRecordBatch(rb.NumRows(), arrs)
+		rb = columnarv2.NewRecordBatch(nil, rb.NumRows(), arrs)
 	}
 	result, err := arrowconv.ToRecordBatch(rb, r.schema)
 	if err != nil {
--- a/pkg/dataobj/sections/streams/reader.go
+++ b/pkg/dataobj/sections/streams/reader.go
@@ -169,7 +169,7 @@ func (r *Reader) Read(ctx context.Context, batchSize int) (arrow.RecordBatch, er
 		for i := range arrs {
 			arrs[i] = rb.Column(int64(i))
 		}
-		rb = columnarv2.NewRecordBatch(rb.NumRows(), arrs)
+		rb = columnarv2.NewRecordBatch(nil, rb.NumRows(), arrs)
 	}
 	result, err := arrowconv.ToRecordBatch(rb, r.schema)
 	if err != nil {
--- a/pkg/expr/evaluate.go
+++ b/pkg/expr/evaluate.go
@@ -0,0 +1,94 @@
+package expr
+
+import (
+	"fmt"
+
+	"github.com/grafana/loki/v3/pkg/columnar"
+	"github.com/grafana/loki/v3/pkg/compute"
+	"github.com/grafana/loki/v3/pkg/memory"
+)
+
+// Evaluate processes expr against the provided batch, producing a datum as a
+// result using alloc.
+//
+// The return type of Evaluate depends on the expression provided. See the
+// documentation for implementations of Expression for what they produce when
+// evaluated.
+func Evaluate(alloc *memory.Allocator, expr Expression, batch *columnar.RecordBatch) (columnar.Datum, error) {
+	switch expr := expr.(type) {
+	case *Constant:
+		return expr.Value, nil
+
+	case *Column:
+		columnIndex := -1
+		if schema := batch.Schema(); schema != nil {
+			_, columnIndex = schema.ColumnIndex(expr.Name)
+		}
+
+		if columnIndex == -1 {
+			validity := memory.NewBitmap(alloc, int(batch.NumRows()))
+			validity.AppendCount(false, int(batch.NumRows()))
+			return columnar.NewNull(validity), nil
+		}
+		return batch.Column(int64(columnIndex)), nil
+
+	case *Unary:
+		return evaluateUnary(alloc, expr, batch)
+
+	case *Binary:
+		return evaluateBinary(alloc, expr, batch)
+
+	default:
+		panic(fmt.Sprintf("unexpected expression type %T", expr))
+	}
+}
+
+func evaluateUnary(alloc *memory.Allocator, expr *Unary, batch *columnar.RecordBatch) (columnar.Datum, error) {
+	switch expr.Op {
+	case UnaryOpNOT:
+		value, err := Evaluate(alloc, expr.Value, batch)
+		if err != nil {
+			return nil, err
+		}
+		return compute.Not(alloc, value)
+	}
+
+	return nil, fmt.Errorf("unexpected unary operator %s", expr.Op)
+}
+
+func evaluateBinary(alloc *memory.Allocator, expr *Binary, batch *columnar.RecordBatch) (columnar.Datum, error) {
+	// TODO(rfratto): If expr.Op is [BinaryOpAND] or [BinaryOpOR], we can
+	// propagate selection vectors to avoid unnecessary evaluations.
+	left, err := Evaluate(alloc, expr.Left, batch)
+	if err != nil {
+		return nil, err
+	}
+
+	right, err := Evaluate(alloc, expr.Right, batch)
+	if err != nil {
+		return nil, err
+	}
+
+	switch expr.Op {
+	case BinaryOpEQ:
+		return compute.Equals(alloc, left, right)
+	case BinaryOpNEQ:
+		return compute.NotEquals(alloc, left, right)
+	case BinaryOpGT:
+		return compute.GreaterThan(alloc, left, right)
+	case BinaryOpGTE:
+		return compute.GreaterOrEqual(alloc, left, right)
+	case BinaryOpLT:
+		return compute.LessThan(alloc, left, right)
+	case BinaryOpLTE:
+		return compute.LessOrEqual(alloc, left, right)
+	case BinaryOpAND:
+		return compute.And(alloc, left, right)
+	case BinaryOpOR:
+		return compute.Or(alloc, left, right)
+	case BinaryOpHasSubstrIgnoreCase:
+		return compute.SubstrInsensitive(alloc, left, right)
+	}
+
+	return nil, fmt.Errorf("unexpected binary operator %s", expr.Op)
+}
--- a/pkg/expr/evaluate_test.go
+++ b/pkg/expr/evaluate_test.go
@@ -0,0 +1,219 @@
+package expr_test
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/grafana/loki/v3/pkg/columnar"
+	"github.com/grafana/loki/v3/pkg/columnar/columnartest"
+	"github.com/grafana/loki/v3/pkg/expr"
+	"github.com/grafana/loki/v3/pkg/memory"
+)
+
+// TestEvaluate performs a basic end-to-end test of expression evaluation.
+func TestEvaluate(t *testing.T) {
+	var alloc memory.Allocator
+
+	record := columnar.NewRecordBatch(
+		columnar.NewSchema([]columnar.Column{
+			{Name: "name"},
+			{Name: "age"},
+		}),
+		3, // row count
+		[]columnar.Array{
+			columnartest.Array(t, columnar.KindUTF8, &alloc, "Peter", "Paul", "Mary"),
+			columnartest.Array(t, columnar.KindUint64, &alloc, 30, 25, 43),
+		},
+	)
+
+	// (name != "Paul" AND age > 25)
+	e := &expr.Binary{
+		Left: &expr.Binary{
+			Left:  &expr.Column{Name: "name"},
+			Op:    expr.BinaryOpNEQ,
+			Right: &expr.Constant{Value: columnartest.Scalar(t, columnar.KindUTF8, "Paul")},
+		},
+		Op: expr.BinaryOpAND,
+		Right: &expr.Binary{
+			Left:  &expr.Column{Name: "age"},
+			Op:    expr.BinaryOpGT,
+			Right: &expr.Constant{Value: columnartest.Scalar(t, columnar.KindUint64, 25)},
+		},
+	}
+
+	expect := columnartest.Array(t, columnar.KindBool, &alloc, true, false, true)
+
+	result, err := expr.Evaluate(&alloc, e, record)
+	require.NoError(t, err)
+	columnartest.RequireDatumsEqual(t, expect, result)
+}
+
+func TestEvaluate_Constant(t *testing.T) {
+	var alloc memory.Allocator
+
+	e := &expr.Constant{Value: columnartest.Scalar(t, columnar.KindUint64, 42)}
+
+	expect := columnartest.Scalar(t, columnar.KindUint64, 42)
+
+	result, err := expr.Evaluate(&alloc, e, nil)
+	require.NoError(t, err)
+	columnartest.RequireDatumsEqual(t, expect, result)
+}
+
+func TestEvaluate_Column(t *testing.T) {
+	var alloc memory.Allocator
+
+	record := columnar.NewRecordBatch(
+		columnar.NewSchema([]columnar.Column{
+			{Name: "name"},
+			{Name: "age"},
+			{Name: "city"},
+		}),
+		3, // row count
+		[]columnar.Array{
+			columnartest.Array(t, columnar.KindUTF8, &alloc, "Alice", "Bob", "Charlie"),
+			columnartest.Array(t, columnar.KindUint64, &alloc, 30, 25, 35),
+			columnartest.Array(t, columnar.KindUTF8, &alloc, "NYC", "LA", "SF"),
+		},
+	)
+
+	t.Run("existing column", func(t *testing.T) {
+		e := &expr.Column{Name: "age"}
+
+		expect := columnartest.Array(t, columnar.KindUint64, &alloc, 30, 25, 35)
+
+		result, err := expr.Evaluate(&alloc, e, record)
+		require.NoError(t, err)
+		columnartest.RequireDatumsEqual(t, expect, result)
+	})
+
+	t.Run("non-existing column", func(t *testing.T) {
+		e := &expr.Column{Name: "nonexistent"}
+
+		expect := columnartest.Array(t, columnar.KindNull, &alloc, nil, nil, nil)
+
+		result, err := expr.Evaluate(&alloc, e, record)
+		require.NoError(t, err)
+		columnartest.RequireDatumsEqual(t, expect, result)
+	})
+}
+
+func TestEvaluate_Unary(t *testing.T) {
+	var alloc memory.Allocator
+
+	record := columnar.NewRecordBatch(
+		columnar.NewSchema([]columnar.Column{
+			{Name: "active"},
+		}),
+		3, // row count
+		[]columnar.Array{
+			columnartest.Array(t, columnar.KindBool, &alloc, true, false, true),
+		},
+	)
+
+	e := &expr.Unary{
+		Op:    expr.UnaryOpNOT,
+		Value: &expr.Column{Name: "active"},
+	}
+
+	expect := columnartest.Array(t, columnar.KindBool, &alloc, false, true, false)
+
+	result, err := expr.Evaluate(&alloc, e, record)
+	require.NoError(t, err)
+	columnartest.RequireDatumsEqual(t, expect, result)
+}
+
+func TestEvaluate_Binary(t *testing.T) {
+	var alloc memory.Allocator
+
+	record := columnar.NewRecordBatch(
+		columnar.NewSchema([]columnar.Column{
+			{Name: "name"},
+			{Name: "age"},
+			{Name: "active"},
+		}),
+		3, // row count
+		[]columnar.Array{
+			columnartest.Array(t, columnar.KindUTF8, &alloc, "Alice", "Bob", "Charlie"),
+			columnartest.Array(t, columnar.KindUint64, &alloc, 30, 25, 35),
+			columnartest.Array(t, columnar.KindBool, &alloc, true, false, true),
+		},
+	)
+
+	tests := []struct {
+		op     expr.BinaryOp
+		left   expr.Expression
+		right  expr.Expression
+		expect columnar.Datum
+	}{
+		{
+			op:     expr.BinaryOpEQ,
+			left:   &expr.Column{Name: "age"},
+			right:  &expr.Constant{Value: columnartest.Scalar(t, columnar.KindUint64, 30)},
+			expect: columnartest.Array(t, columnar.KindBool, &alloc, true, false, false),
+		},
+		{
+			op:     expr.BinaryOpNEQ,
+			left:   &expr.Column{Name: "age"},
+			right:  &expr.Constant{Value: columnartest.Scalar(t, columnar.KindUint64, 30)},
+			expect: columnartest.Array(t, columnar.KindBool, &alloc, false, true, true),
+		},
+		{
+			op:     expr.BinaryOpGT,
+			left:   &expr.Column{Name: "age"},
+			right:  &expr.Constant{Value: columnartest.Scalar(t, columnar.KindUint64, 25)},
+			expect: columnartest.Array(t, columnar.KindBool, &alloc, true, false, true),
+		},
+		{
+			op:     expr.BinaryOpGTE,
+			left:   &expr.Column{Name: "age"},
+			right:  &expr.Constant{Value: columnartest.Scalar(t, columnar.KindUint64, 30)},
+			expect: columnartest.Array(t, columnar.KindBool, &alloc, true, false, true),
+		},
+		{
+			op:     expr.BinaryOpLT,
+			left:   &expr.Column{Name: "age"},
+			right:  &expr.Constant{Value: columnartest.Scalar(t, columnar.KindUint64, 30)},
+			expect: columnartest.Array(t, columnar.KindBool, &alloc, false, true, false),
+		},
+		{
+			op:     expr.BinaryOpLTE,
+			left:   &expr.Column{Name: "age"},
+			right:  &expr.Constant{Value: columnartest.Scalar(t, columnar.KindUint64, 30)},
+			expect: columnartest.Array(t, columnar.KindBool, &alloc, true, true, false),
+		},
+		{
+			op:     expr.BinaryOpAND,
+			left:   &expr.Column{Name: "active"},
+			right:  &expr.Constant{Value: columnartest.Scalar(t, columnar.KindBool, true)},
+			expect: columnartest.Array(t, columnar.KindBool, &alloc, true, false, true),
+		},
+		{
+			op:     expr.BinaryOpOR,
+			left:   &expr.Column{Name: "active"},
+			right:  &expr.Constant{Value: columnartest.Scalar(t, columnar.KindBool, false)},
+			expect: columnartest.Array(t, columnar.KindBool, &alloc, true, false, true),
+		},
+		{
+			op:     expr.BinaryOpHasSubstrIgnoreCase,
+			left:   &expr.Column{Name: "name"},
+			right:  &expr.Constant{Value: columnartest.Scalar(t, columnar.KindUTF8, "li")},
+			expect: columnartest.Array(t, columnar.KindBool, &alloc, true, false, true),
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.op.String(), func(t *testing.T) {
+			e := &expr.Binary{
+				Left:  tt.left,
+				Op:    tt.op,
+				Right: tt.right,
+			}
+
+			result, err := expr.Evaluate(&alloc, e, record)
+			require.NoError(t, err)
+			columnartest.RequireDatumsEqual(t, tt.expect, result)
+		})
+	}
+}
--- a/pkg/expr/expr.go
+++ b/pkg/expr/expr.go
@@ -0,0 +1,52 @@
+// Package expr provides utilities for evaluating expressions against a
+// [columnar.RecordBatch].
+//
+// Package expr is EXPERIMENTAL and currently only intended to be used by
+// [github.com/grafana/loki/v3/pkg/dataobj].
+package expr
+
+import (
+	"github.com/grafana/loki/v3/pkg/columnar"
+)
+
+// Expression represents an operation that can be evaluated to produce a result.
+type Expression interface{ isExpr() }
+
+// Types implementing [Expression].
+type (
+	// Constant is an [Expression] that produces a single scalar value when
+	// evaluated.
+	Constant struct{ Value columnar.Scalar }
+
+	// Column is an [Expression] that looks up the column by name in the record
+	// batch supplied to [Evaluate].
+	//
+	// If the column doesn't exist, a Null column is produced.
+	Column struct{ Name string }
+
+	// Unary is an [Expression] that performs a unary operation against a single
+	// argument.
+	//
+	// The result of the expression depends on value of [UnaryOp]. The documentation
+	// of [UnaryOp] will describe the behavior of the expression.
+	Unary struct {
+		Op    UnaryOp
+		Value Expression
+	}
+
+	// Binary is an [Expression] that performs a binary operation against a left and
+	// a right expression.
+	//
+	// The result of the expression depends on value of [BinaryOp]. The documentation
+	// of [BinaryOp] will describe the behavior of the expression.
+	Binary struct {
+		Left  Expression
+		Op    BinaryOp
+		Right Expression
+	}
+)
+
+func (*Constant) isExpr() {}
+func (*Column) isExpr()   {}
+func (*Unary) isExpr()    {}
+func (*Binary) isExpr()   {}
--- a/pkg/expr/expr_binary_op.go
+++ b/pkg/expr/expr_binary_op.go
@@ -0,0 +1,107 @@
+package expr
+
+// BinaryOp denotes a binary operation to perform against two arguments.
+type BinaryOp int
+
+const (
+	// BinaryOpInvalid indicates an invalid binary operation. Evaluating a
+	// BinaryOpInvalid will result in an error.
+	BinaryOpInvalid BinaryOp = iota
+
+	// BinaryOpEQ performs an equality (==) check of the left and right
+	// expressions. The expressions must be of the same type.
+	//
+	// The result is a bool datum, which is either a bool scalar if both
+	// arguments are scalars, otherwise the result is a bool array.
+	BinaryOpEQ
+
+	// BinaryOpNEQ performs an inequality (!=) check of the left and right
+	// expressions. The expressions must be of the same type.
+	//
+	// The result is a bool datum, which is either a bool scalar if both
+	// arguments are scalars, otherwise the result is a bool array.
+	BinaryOpNEQ
+
+	// BinaryOpGT performs a greater than (>) check of the left and right
+	// expressions. The expressions must be of the same type, and must be
+	// ordered (numeric or UTF8).
+	//
+	// The result is a bool datum, which is either a bool scalar if both
+	// arguments are scalars, otherwise the result is a bool array.
+	BinaryOpGT
+
+	// BinaryOpGTE performs a greater than or equal (>=) check of the left and
+	// right expressions. The expressions must be of the same type, and must be
+	// ordered (numeric or UTF8).
+	//
+	// The result is a bool datum, which is either a bool scalar if both
+	// arguments are scalars, otherwise the result is a bool array.
+	BinaryOpGTE
+
+	// BinaryOpLT performs a less than (<) check of the left and right
+	// expressions. The expressions must be of the same type, and must be
+	// ordered (numeric or UTF8).
+	//
+	// The result is a bool datum, which is either a bool scalar if both
+	// arguments are scalars, otherwise the result is a bool array.
+	BinaryOpLT
+
+	// BinaryOpLTE performs a less than or equal (<=) check of the left and
+	// right expressions. The expressions must be of the same type, and must be
+	// ordered (numeric or UTF8).
+	//
+	// The result is a bool datum, which is either a bool scalar if both
+	// arguments are scalars, otherwise the result is a bool array.
+	BinaryOpLTE
+
+	// BinaryOpAND performs a logical AND (&&) operation on the left and right
+	// expressions. The expressions must be of bool type.
+	//
+	// The result is a bool datum, which is either a bool scalar if both
+	// arguments are scalars, otherwise the result is a bool array.
+	BinaryOpAND
+
+	// BinaryOpOR performs a logical OR (||) operation on the left and right
+	// expressions. The expressions must be of bool type.
+	//
+	// The result is a bool datum, which is either a bool scalar if both
+	// arguments are scalars, otherwise the result is a bool array.
+	BinaryOpOR
+
+	// BinaryOpHasSubstrIgnoreCase performs a case-insensitive substring check
+	// of the left and right expressions.
+	//
+	// The left expression denotes the "haystack" to search, and must be a UTF8
+	// scalar or array. The right expression denotes the "needle" to search
+	// with, and must be a UTF8 scalar. If the needle is found in the haystack
+	// (ignoring case), the result is true.
+	//
+	// The result is a bool datum, which is either a bool scalar if both
+	// arguments are scalars, otherwise the result is a bool array.
+	BinaryOpHasSubstrIgnoreCase
+)
+
+var binaryOpStrings = [...]string{
+	BinaryOpInvalid: "INVALID",
+
+	BinaryOpEQ:  "EQ",
+	BinaryOpNEQ: "NEQ",
+	BinaryOpGT:  "GT",
+	BinaryOpGTE: "GTE",
+	BinaryOpLT:  "LT",
+	BinaryOpLTE: "LTE",
+
+	BinaryOpAND: "AND",
+	BinaryOpOR:  "OR",
+
+	BinaryOpHasSubstrIgnoreCase: "HAS_SUBSTR_IGNORECASE",
+}
+
+// String returns the string representation of op. If op is out of bounds, it
+// returns "INVALID."
+func (op BinaryOp) String() string {
+	if op < 0 || int(op) >= len(binaryOpStrings) {
+		return "INVALID"
+	}
+	return binaryOpStrings[op]
+}
--- a/pkg/expr/expr_unary_op.go
+++ b/pkg/expr/expr_unary_op.go
@@ -0,0 +1,27 @@
+package expr
+
+// UnaryOp denotes a unary operation to perform against a single argument.
+type UnaryOp int
+
+const (
+	// UnaryOpInvalid indicates an invalid unary operation. Evaluating a
+	// UnaryOpInvalid will result in an error.
+	UnaryOpInvalid UnaryOp = iota
+
+	// UnaryOpNOT represents a logical NOT operation over a boolean value.
+	UnaryOpNOT
+)
+
+var unaryOpStrings = [...]string{
+	UnaryOpInvalid: "INVALID",
+	UnaryOpNOT:     "NOT",
+}
+
+// String returns the string representation of op. If op is out of bounds, it
+// returns "INVALID."
+func (op UnaryOp) String() string {
+	if op < 0 || int(op) >= len(unaryOpStrings) {
+		return "INVALID"
+	}
+	return unaryOpStrings[op]
+}