chore(columnar): add experimental columnar package for pkg/dataobj (#20478)

This commit is contained in:
Robert Fratto
2026-01-19 10:40:35 -05:00
committed by GitHub
parent 7da73aaa7e
commit d14bcc1c06
16 changed files with 467 additions and 88 deletions

View File

@@ -0,0 +1,76 @@
package columnar
import (
"github.com/grafana/loki/v3/pkg/memory"
)
// Bool is an [Array] of bit-packed boolean values.
type Bool struct {
validity memory.Bitmap // Empty when there's no nulls.
values memory.Bitmap
nullCount int
}
var _ Array = (*Bool)(nil)
// MakeBool creates a new Bool array from the given values and optional validity
// bitmap.
//
// Bool arrays made from memory owned by a [memory.Allocator] are invalidated
// when the allocator reclaims memory.
//
// If validity is of length zero, all elements are considered valid. Otherwise,
// MakeBool panics if the number of elements does not match the length of
// validity.
func MakeBool(values, validity memory.Bitmap) *Bool {
arr := &Bool{
validity: validity,
values: values,
}
arr.init()
return arr
}
//go:noinline
func (arr *Bool) init() {
if arr.validity.Len() > 0 && arr.validity.Len() != arr.values.Len() {
panic("length mismatch between values and validity")
}
arr.nullCount = arr.validity.ClearCount()
}
// Len returns the total number of elements in the array.
func (arr *Bool) Len() int { return arr.values.Len() }
// Nulls returns the number of null elements in the array. The number of
// non-null elements can be calculated from Len() - Nulls().
func (arr *Bool) Nulls() int { return arr.nullCount }
// Get returns the value at index i. If the element at index i is null, Get
// returns an undefined value.
//
// Get panics if i is out of range.
func (arr *Bool) Get(i int) bool {
return arr.values.Get(i)
}
// IsNull returns true if the element at index i is null.
func (arr *Bool) IsNull(i int) bool {
if arr.nullCount == 0 {
return false
}
return !arr.validity.Get(i)
}
// Values returns the underlying values bitmap.
func (arr *Bool) Values() memory.Bitmap { return arr.values }
// Validity returns the validity bitmap of the array. The returned bitmap
// may be of length 0 if there are no nulls.
//
// A value of 1 in the Validity bitmap indicates that the corresponding
// element at that position is valid (not null).
func (arr *Bool) Validity() memory.Bitmap { return arr.validity }
// Kind returns the kind of Array being represented.
func (arr *Bool) Kind() Kind { return KindBool }

View File

@@ -0,0 +1,74 @@
package columnar
import "github.com/grafana/loki/v3/pkg/memory"
// Int64 is an [Array] of 64-bit signed integer values.
type Int64 struct {
validity memory.Bitmap // Empty when there's no nulls.
values []int64
nullCount int
}
var _ Array = (*Int64)(nil)
// MakeInt64 creates a new Int64 array from the given values and optional validity
// bitmap.
//
// Int64 arrays made from memory owned by a [memory.Allocator] are invalidated
// when the allocator reclaims memory.
//
// If validity is of length zero, all elements are considered valid. Otherwise,
// MakeInt64 panics if the number of elements does not match the length of
// validity.
func MakeInt64(values []int64, validity memory.Bitmap) *Int64 {
arr := &Int64{
validity: validity,
values: values,
}
arr.init()
return arr
}
//go:noinline
func (arr *Int64) init() {
if arr.validity.Len() > 0 && arr.validity.Len() != len(arr.values) {
panic("length mismatch between values and validity")
}
arr.nullCount = arr.validity.ClearCount()
}
// Len returns the total number of elements in the array.
func (arr *Int64) Len() int { return len(arr.values) }
// Nulls returns the number of null elements in the array. The number of
// non-null elements can be calculated from Len() - Nulls().
func (arr *Int64) Nulls() int { return arr.nullCount }
// Get returns the value at index i. If the element at index i is null, Get
// returns an undefined value.
//
// Get panics if i is out of range.
func (arr *Int64) Get(i int) int64 {
return arr.values[i]
}
// IsNull returns true if the element at index i is null.
func (arr *Int64) IsNull(i int) bool {
if arr.nullCount == 0 {
return false
}
return !arr.validity.Get(i)
}
// Values returns the underlying array of values.
func (arr *Int64) Values() []int64 { return arr.values }
// Validity returns the validity bitmap of the array. The returned bitmap
// may be of length 0 if there are no nulls.
//
// A value of 1 in the Validity bitmap indicates that the corresponding
// element at that position is valid (not null).
func (arr *Int64) Validity() memory.Bitmap { return arr.validity }
// Kind returns the kind of Array being represented.
func (arr *Int64) Kind() Kind { return KindInt64 }

View File

@@ -0,0 +1,74 @@
package columnar
import "github.com/grafana/loki/v3/pkg/memory"
// Uint64 is an [Array] of 64-bit unsigned integer values.
type Uint64 struct {
validity memory.Bitmap // Empty when there's no nulls.
values []uint64
nullCount int
}
var _ Array = (*Uint64)(nil)
// MakeUint64 creates a new Uint64 array from the given values and optional validity
// bitmap.
//
// Uint64 arrays made from memory owned by a [memory.Allocator] are invalidated
// when the allocator reclaims memory.
//
// If validity is of length zero, all elements are considered valid. Otherwise,
// MakeUint64 panics if the number of elements does not match the length of
// validity.
func MakeUint64(values []uint64, validity memory.Bitmap) *Uint64 {
arr := &Uint64{
validity: validity,
values: values,
}
arr.init()
return arr
}
//go:noinline
func (arr *Uint64) init() {
if arr.validity.Len() > 0 && arr.validity.Len() != len(arr.values) {
panic("length mismatch between values and validity")
}
arr.nullCount = arr.validity.ClearCount()
}
// Len returns the total number of elements in the array.
func (arr *Uint64) Len() int { return len(arr.values) }
// Nulls returns the number of null elements in the array. The number of
// non-null elements can be calculated from Len() - Nulls().
func (arr *Uint64) Nulls() int { return arr.nullCount }
// Get returns the value at index i. If the element at index i is null, Get
// returns an undefined value.
//
// Get panics if i is out of range.
func (arr *Uint64) Get(i int) uint64 {
return arr.values[i]
}
// IsNull returns true if the element at index i is null.
func (arr *Uint64) IsNull(i int) bool {
if arr.nullCount == 0 {
return false
}
return !arr.validity.Get(i)
}
// Values returns the underlying array of values.
func (arr *Uint64) Values() []uint64 { return arr.values }
// Validity returns the validity bitmap of the array. The returned bitmap
// may be of length 0 if there are no nulls.
//
// A value of 1 in the Validity bitmap indicates that the corresponding
// element at that position is valid (not null).
func (arr *Uint64) Validity() memory.Bitmap { return arr.validity }
// Kind returns the kind of Array being represented.
func (arr *Uint64) Kind() Kind { return KindUint64 }

View File

@@ -0,0 +1,94 @@
package columnar
import "github.com/grafana/loki/v3/pkg/memory"
// UTF8 is an [Array] of UTF-8 encoded strings.
type UTF8 struct {
validity memory.Bitmap // Empty when there's no nulls.
offsets []int32
data []byte
length int
nullCount int
}
var _ Array = (*UTF8)(nil)
// MakeUTF8 creates a new UTF8 array from the given data, offsets, and
// optional validity bitmap.
//
// UTF8 arrays made from memory owned by a [memory.Allocator] are invalidated
// when the allocator reclaims memory.
//
// Each UTF-8 string goes from data[offsets[i] : offsets[i+1]]. Offsets must
// be monotonically increasing, even for null values. The offsets slice is not
// validated for correctness.
//
// If validity is of length zero, all elements are considered valid. Otherwise,
// MakeUTF8 panics if the number of elements does not match the length of
// validity.
func MakeUTF8(data []byte, offsets []int32, validity memory.Bitmap) *UTF8 {
arr := &UTF8{
validity: validity,
offsets: offsets,
data: data,
}
arr.init()
return arr
}
//go:noinline
func (arr *UTF8) init() {
// Moving initialization of additional fields to a non-inlined init method
// improved the performance of the plain bytes decoder in dataset by 10%.
numElements := max(0, len(arr.offsets)-1)
if arr.validity.Len() > 0 && arr.validity.Len() != numElements {
panic("length mismatch with validity")
}
arr.length = numElements
arr.nullCount = arr.validity.ClearCount()
}
// Len returns the total number of elements in the array.
func (arr *UTF8) Len() int { return arr.length }
// Nulls returns the number of null elements in the array. The number of
// non-null elements can be calculated from Len() - Nulls().
func (arr *UTF8) Nulls() int { return arr.nullCount }
// Get returns the value at index i. If the element at index i is null, Get
// returns an empty string.
//
// Get panics if i is out of range.
func (arr *UTF8) Get(i int) []byte {
var (
start = arr.offsets[i]
end = arr.offsets[i+1]
)
return arr.data[start:end]
}
// IsNull returns true if the element at index i is null.
func (arr *UTF8) IsNull(i int) bool {
if arr.nullCount == 0 {
return false
}
return !arr.validity.Get(i)
}
// Data returns the underlying packed UTF8 bytes.
func (arr *UTF8) Data() []byte { return arr.data }
// Offsets returns the underlying offsets array.
func (arr *UTF8) Offsets() []int32 { return arr.offsets }
// Validity returns the validity bitmap of the array. The returned bitmap
// may be of length 0 if there are no nulls.
//
// A value of 1 in the Validity bitmap indicates that the corresponding
// element at that position is valid (not null).
func (arr *UTF8) Validity() memory.Bitmap { return arr.validity }
// Kind returns the kind of Array being represented.
func (arr *UTF8) Kind() Kind { return KindUTF8 }

35
pkg/columnar/columnar.go Normal file
View File

@@ -0,0 +1,35 @@
// Package columnar provides utilities for working with columnar in-memory
// arrays.
//
// Columnar types are Arrow-compatible. The columnar package exists to provide a
// Loki-optimized APIs for columnar data that is based on [memory.Allocator] for
// memory management rather than arrow-go's reference counting.
//
// Package columnar is EXPERIMENTAL and currently only intended to be used by
// [github.com/grafana/loki/v3/pkg/dataobj].
package columnar
import "github.com/grafana/loki/v3/pkg/memory"
// An Array is a sequence of elements of the same data type.
type Array interface {
// Len returns the total number of elements in the array.
Len() int
// Nulls returns the number of null elements in the array. The number of
// non-null elements can be calculated from Len() - Nulls().
Nulls() int
// IsNull returns true if the element at index i is null.
IsNull(i int) bool
// Validity returns the validity bitmap of the array. The returned bitmap
// may be of length 0 if there are no nulls.
//
// A value of 1 in the Validity bitmap indicates that the corresponding
// element at that position is valid (not null).
Validity() memory.Bitmap
// Kind returns the kind of Array being represented.
Kind() Kind
}

31
pkg/columnar/kind.go Normal file
View File

@@ -0,0 +1,31 @@
package columnar
import "strconv"
// A Kind represents the specific type of an Array. The zero Kind is a null
// value.
type Kind uint8
const (
KindNull Kind = iota // KindNull represents a null value.
KindBool // KindBool represents a boolean value.
KindInt64 // KindInt64 represents a 64-bit integer value.
KindUint64 // KindUint64 represents a 64-bit unsigned integer value.
KindUTF8 // KindUTF8 represents a UTF-8 encoded string value.
)
var kindNames = [...]string{
KindNull: "null",
KindBool: "bool",
KindInt64: "int64",
KindUint64: "uint64",
KindUTF8: "utf8",
}
// String returns the string representation of k.
func (k Kind) String() string {
if int(k) < len(kindNames) {
return kindNames[k]
}
return "Kind(" + strconv.Itoa(int(k)) + ")"
}

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"io"
"github.com/grafana/loki/v3/pkg/columnar"
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
"github.com/grafana/loki/v3/pkg/dataobj/internal/util/slicegrow"
"github.com/grafana/loki/v3/pkg/memory"
@@ -25,8 +26,7 @@ type pageReader struct {
presenceDec *bitmapDecoder
valuesDec legacyValueDecoder
presenceBuf memory.Bitmap
valuesBuf []Value
valuesBuf []Value
pageRow int64
nextRow int64
@@ -93,9 +93,9 @@ func (pr *pageReader) read(v []Value) (n int, err error) {
pr.valuesBuf = reuseValuesBuffer(pr.valuesBuf, v)
// First read presence values for the next len(v) rows.
presenceBuf, err := pr.presenceDec.Decode(&pr.alloc, len(v))
pr.presenceBuf = presenceBuf.(memory.Bitmap)
count := pr.presenceBuf.Len()
presenceArr, err := pr.presenceDec.Decode(&pr.alloc, len(v))
presenceBuf := presenceArr.(*columnar.Bool)
count := presenceBuf.Len()
if err != nil && !errors.Is(err, io.EOF) {
return n, err
} else if count == 0 && errors.Is(err, io.EOF) {
@@ -109,13 +109,10 @@ func (pr *pageReader) read(v []Value) (n int, err error) {
return 0, nil
}
// The number of 1-s in pr.presenceBuf determines how many values we need to read from the inner page.
var presentCount int
for i := range count {
if pr.presenceBuf.Get(i) {
presentCount++
}
}
// The number of bits set to 1 in presenceBuf determines how many values we
// need to read from the inner page.
presenceValues := presenceBuf.Values()
presentCount := presenceValues.SetCount()
// Now fill up to presentCount values of concrete values.
var valuesCount int
@@ -132,7 +129,7 @@ func (pr *pageReader) read(v []Value) (n int, err error) {
// copying from pr.valuesBuf where appropriate.
var valuesIndex int
for i := range count {
if pr.presenceBuf.Get(i) {
if presenceBuf.Get(i) {
if valuesIndex >= valuesCount {
return n, fmt.Errorf("unexpected end of values")
}

View File

@@ -3,6 +3,7 @@ package dataset
import (
"fmt"
"github.com/grafana/loki/v3/pkg/columnar"
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
"github.com/grafana/loki/v3/pkg/dataobj/internal/streamio"
"github.com/grafana/loki/v3/pkg/memory"
@@ -62,14 +63,9 @@ type valueDecoder interface {
// EncodingType returns the encoding type used by the decoder.
EncodingType() datasetmd.EncodingType
// Decode up to count values using the provided allocator. An opaque return
// variable represents the decoded values. Callers are responsible for
// understanding which concrete type the return value is based on the
// encoding type.
//
// TODO(rfratto): Replace the any return type with a more general "array"
// type.
Decode(alloc *memory.Allocator, count int) (any, error)
// Decode decodes an array up to count values using the provided allocator.
// At the end of the stream, Decode returns nil, [io.EOF].
Decode(alloc *memory.Allocator, count int) (columnar.Array, error)
// Reset discards any state and resets the decoder to read from data.
// This permits reusing a decoder rather than allocating a new one.

View File

@@ -3,30 +3,12 @@ package dataset
import (
"fmt"
"github.com/grafana/loki/v3/pkg/columnar"
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
"github.com/grafana/loki/v3/pkg/dataobj/internal/util/slicegrow"
"github.com/grafana/loki/v3/pkg/memory"
)
// stringArray is an Arrow-compatible representation of multiple strings.
type stringArray struct {
offsets []int32
data []byte
}
func (sa *stringArray) Len() int {
return max(0, len(sa.offsets)-1) // Account for first offset
}
func (sa *stringArray) Get(i int) []byte {
var (
start = sa.offsets[i]
end = sa.offsets[i+1]
)
return sa.data[start:end]
}
// valueDecoderAdapter implements [legacyValueDecoder] for a newer
// [valueDecoder] implementation.
type valueDecoderAdapter struct {
@@ -48,23 +30,23 @@ func (a *valueDecoderAdapter) EncodingType() datasetmd.EncodingType { return a.I
func (a *valueDecoderAdapter) Decode(s []Value) (n int, err error) {
result, err := a.Inner.Decode(a.Alloc, len(s))
if result != nil {
n = a.unpackResult(s, result)
n = a.unpackArray(s, result)
}
return n, err
}
func (a *valueDecoderAdapter) unpackResult(dst []Value, result any) int {
func (a *valueDecoderAdapter) unpackArray(dst []Value, result columnar.Array) int {
switch result := result.(type) {
case stringArray:
return a.unpackStringArray(dst, result)
case []int64:
return a.unpackInt64Array(dst, result)
case *columnar.UTF8:
return a.unpackUTF8(dst, result)
case *columnar.Int64:
return a.unpackInt64(dst, result.Values())
default:
panic(fmt.Sprintf("legacy decoder adapter found unexpected type %T", result))
}
}
func (a *valueDecoderAdapter) unpackStringArray(dst []Value, result stringArray) int {
func (a *valueDecoderAdapter) unpackUTF8(dst []Value, result *columnar.UTF8) int {
if result.Len() > len(dst) {
panic(fmt.Sprintf("invariant broken: larger src len (%d) than dst (%d)", result.Len(), len(dst)))
}
@@ -82,7 +64,11 @@ func (a *valueDecoderAdapter) unpackStringArray(dst []Value, result stringArray)
return result.Len()
}
func (a *valueDecoderAdapter) unpackInt64Array(dst []Value, result []int64) int {
func (a *valueDecoderAdapter) unpackInt64(dst []Value, result []int64) int {
if len(result) > len(dst) {
panic(fmt.Sprintf("invariant broken: larger src len (%d) than dst (%d)", len(result), len(dst)))
}
for i := range result {
dst[i] = Int64Value(result[i])
}

View File

@@ -6,6 +6,7 @@ import (
"io"
"math/bits"
"github.com/grafana/loki/v3/pkg/columnar"
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
"github.com/grafana/loki/v3/pkg/dataobj/internal/streamio"
"github.com/grafana/loki/v3/pkg/memory"
@@ -555,6 +556,8 @@ type bitmapDecoder struct {
set byte // Current bitpacked set byte (8 values, LSB-first).
}
var _ valueDecoder = (*bitmapDecoder)(nil)
// newBitmapDecoder creates a new bitmap decoder that reads encoded bools from data.
func newBitmapDecoder(data []byte) *bitmapDecoder {
return &bitmapDecoder{data: data}
@@ -573,7 +576,7 @@ func (dec *bitmapDecoder) EncodingType() datasetmd.EncodingType {
// Decode decodes up to count values and returns them as a bitmap. The number
// of decoded values is bm.Len(). At the end of the stream, Decode returns
// any decoded values along with [io.EOF].
func (dec *bitmapDecoder) Decode(alloc *memory.Allocator, count int) (any, error) {
func (dec *bitmapDecoder) Decode(alloc *memory.Allocator, count int) (columnar.Array, error) {
var (
runLength = dec.runLength
sets = dec.sets
@@ -601,11 +604,11 @@ func (dec *bitmapDecoder) Decode(alloc *memory.Allocator, count int) (any, error
switch {
case runLength == 0 && sets == 0 && setSize == 0: // READY
if off >= len(data) {
return bm, io.EOF
return columnar.MakeBool(bm, memory.Bitmap{}), io.EOF
}
header, uvarintSize := binary.Uvarint(data[off:])
if uvarintSize <= 0 {
return bm, io.EOF
return columnar.MakeBool(bm, memory.Bitmap{}), io.EOF
}
off += uvarintSize
@@ -617,23 +620,23 @@ func (dec *bitmapDecoder) Decode(alloc *memory.Allocator, count int) (any, error
setWidth := int((header>>1)&0x3f) + 1
// only support bool values encoded as ints
if setWidth != 1 {
return bm, fmt.Errorf("set width is supposed to be 1, got %d", setWidth)
return columnar.MakeBool(bm, memory.Bitmap{}), fmt.Errorf("set width is supposed to be 1, got %d", setWidth)
}
} else {
// RLE run.
runLength = header >> 1
if off >= len(data) {
return bm, io.EOF
return columnar.MakeBool(bm, memory.Bitmap{}), io.EOF
}
val, uvarintSize := binary.Uvarint(data[off:])
if uvarintSize <= 0 {
return bm, io.EOF
return columnar.MakeBool(bm, memory.Bitmap{}), io.EOF
}
off += uvarintSize
// only support bool values encoded as ints
if val != 0 && val != 1 {
return bm, fmt.Errorf("unsupported RLE value %d", val)
return columnar.MakeBool(bm, memory.Bitmap{}), fmt.Errorf("unsupported RLE value %d", val)
}
runValue = val > 0
}
@@ -648,7 +651,7 @@ func (dec *bitmapDecoder) Decode(alloc *memory.Allocator, count int) (any, error
case sets > 0 && setSize == 0: // BITPACK-READY
if off >= len(dec.data) {
return bm, io.EOF
return columnar.MakeBool(bm, memory.Bitmap{}), io.EOF
}
set = data[off]
off++
@@ -668,7 +671,7 @@ func (dec *bitmapDecoder) Decode(alloc *memory.Allocator, count int) (any, error
}
}
return bm, nil
return columnar.MakeBool(bm, memory.Bitmap{}), nil
}
// Reset resets dec to read from data.

View File

@@ -11,6 +11,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/grafana/loki/v3/pkg/columnar"
"github.com/grafana/loki/v3/pkg/memory"
)
@@ -88,7 +89,7 @@ func Test_bitmapDecoder_TruncatedData(t *testing.T) {
res, err = dec.Decode(&alloc, 8)
})
bm, ok := res.(memory.Bitmap)
bm, ok := res.(*columnar.Bool)
require.True(t, ok)
require.ErrorIs(t, err, io.EOF)
require.Zero(t, bm.Len())
@@ -102,7 +103,7 @@ func decodeBitmapValues(t *testing.T, dec *bitmapDecoder, alloc *memory.Allocato
var actual []bool
for {
res, err := dec.Decode(alloc, batchSize)
bm, ok := res.(memory.Bitmap)
bm, ok := res.(*columnar.Bool)
require.True(t, ok)
for i := range bm.Len() {
actual = append(actual, bm.Get(i))
@@ -262,8 +263,6 @@ func benchmarkBitmapEncoder(b *testing.B, width int) {
})
}
var bitmapDecoderSink memory.Bitmap
func Benchmark_bitmapDecoder_DecodeBatches(b *testing.B) {
const valuesPerPage = 1 << 16
@@ -313,7 +312,7 @@ func Benchmark_bitmapDecoder_DecodeBatches(b *testing.B) {
dec := newBitmapDecoder(nil)
b.ReportAllocs()
decodedBytesPerOp := int64(sc.valueCount) * 8 // uint64 output
decodedBytesPerOp := int64(sc.valueCount) / 8 // Each value is one bit
b.SetBytes(decodedBytesPerOp)
for b.Loop() {
@@ -323,8 +322,7 @@ func Benchmark_bitmapDecoder_DecodeBatches(b *testing.B) {
decoded := 0
for {
res, err := dec.Decode(&alloc, batchSize)
bm := res.(memory.Bitmap)
bitmapDecoderSink = bm
bm := res.(*columnar.Bool)
decoded += bm.Len()
if errors.Is(err, io.EOF) {

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"io"
"github.com/grafana/loki/v3/pkg/columnar"
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
"github.com/grafana/loki/v3/pkg/dataobj/internal/streamio"
"github.com/grafana/loki/v3/pkg/memory"
@@ -98,10 +99,10 @@ func (dec *deltaDecoder) EncodingType() datasetmd.EncodingType {
return datasetmd.ENCODING_TYPE_DELTA
}
// Decode decodes up to count values, storing the results into a new int64 slice obtained from the provided allocator. The
// decoded values are returned as an any type, which must be cast to []int64.
// At the end of the stream, Decode returns an [io.EOF].
func (dec *deltaDecoder) Decode(alloc *memory.Allocator, count int) (any, error) {
// Decode decodes up to count values, storing the results into a new
// [columnar.Int64] array obtained from the provided allocator. At the end of
// the stream, Decode returns an [io.EOF].
func (dec *deltaDecoder) Decode(alloc *memory.Allocator, count int) (columnar.Array, error) {
// Obtain a buffer from the allocator with enough capacity for an optimistic `count` values.
// Resize the buffer explicitly in order to use the Set API which avoids a reslice compared to Push.
// Resize must be used again before returning any data if the slice is not completely filled.
@@ -129,14 +130,14 @@ func (dec *deltaDecoder) Decode(alloc *memory.Allocator, count int) (any, error)
delta, n := binary.Varint(buf[off:])
if n <= 0 {
valuesBuf.Resize(i)
return values[:i], io.EOF
return columnar.MakeInt64(values[:i], memory.Bitmap{}), io.EOF
}
off += n
prev += delta
values[i] = prev
}
return values, nil
return columnar.MakeInt64(values, memory.Bitmap{}), nil
}
// Reset resets the deltaDecoder to its initial state.

View File

@@ -12,6 +12,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/grafana/loki/v3/pkg/columnar"
"github.com/grafana/loki/v3/pkg/dataobj/internal/streamio"
"github.com/grafana/loki/v3/pkg/memory"
)
@@ -43,7 +44,7 @@ func Test_delta(t *testing.T) {
if !errors.Is(err, io.EOF) {
require.NoError(t, err)
}
actual = append(actual, values.([]int64)...)
actual = append(actual, values.(*columnar.Int64).Values()...)
if err != nil {
break
}
@@ -85,7 +86,7 @@ func Fuzz_delta(f *testing.F) {
if err != nil && !errors.Is(err, io.EOF) {
t.Fatalf("error decoding: %v", err)
}
actual = append(actual, values.([]int64)...)
actual = append(actual, values.(*columnar.Int64).Values()...)
if errors.Is(err, io.EOF) {
break
}
@@ -187,7 +188,7 @@ func Benchmark_deltaDecoder_Decode(b *testing.B) {
for {
values, err := dec.Decode(&alloc, batchSize)
valuesRead += len(values.([]int64))
valuesRead += values.Len()
if err != nil && errors.Is(err, io.EOF) {
break
} else if err != nil {

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"io"
"github.com/grafana/loki/v3/pkg/columnar"
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
"github.com/grafana/loki/v3/pkg/dataobj/internal/streamio"
"github.com/grafana/loki/v3/pkg/memory"
@@ -98,8 +99,8 @@ func (dec *plainBytesDecoder) EncodingType() datasetmd.EncodingType {
// Decode decodes up to count values using the provided allocator to store the
// At the end of the stream, Decode returns nil, [io.EOF].
//
// The return value is a [stringArray].
func (dec *plainBytesDecoder) Decode(alloc *memory.Allocator, count int) (any, error) {
// The return value is a [columnar.UTF8].
func (dec *plainBytesDecoder) Decode(alloc *memory.Allocator, count int) (columnar.Array, error) {
var (
// Strings need a an offsets and a value buffer.
//
@@ -140,10 +141,11 @@ func (dec *plainBytesDecoder) Decode(alloc *memory.Allocator, count int) (any, e
return nil, io.EOF
}
return stringArray{
offsets: offsets[:i+1],
data: values[:totalBytes],
}, io.EOF
return columnar.MakeUTF8(
values[:totalBytes],
offsets[:i+1],
memory.Bitmap{},
), io.EOF
}
copied := copy(values[totalBytes:], data[off+uvarintSize:off+uvarintSize+int(stringSize)])
@@ -153,10 +155,11 @@ func (dec *plainBytesDecoder) Decode(alloc *memory.Allocator, count int) (any, e
offsets[i+1] = int32(totalBytes)
}
return stringArray{
offsets: offsets[:count+1],
data: values[:totalBytes],
}, nil
return columnar.MakeUTF8(
values[:totalBytes],
offsets[:count+1],
memory.Bitmap{},
), nil
}
// Reset implements [valueDecoder]. It resets the decoder to read from data.

View File

@@ -10,6 +10,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/grafana/loki/v3/pkg/columnar"
"github.com/grafana/loki/v3/pkg/dataobj/internal/streamio"
"github.com/grafana/loki/v3/pkg/memory"
)
@@ -41,7 +42,7 @@ func Test_plainBytesEncoder(t *testing.T) {
// Handle potential value before checking errors.
if v != nil {
strArr := v.(stringArray)
strArr := v.(*columnar.UTF8)
for i := range strArr.Len() {
out = append(out, string(strArr.Get(i)))
}
@@ -184,10 +185,9 @@ func Benchmark_plainBytesDecoder_Decode(b *testing.B) {
dec.Reset(buf.Bytes())
for {
n, err := dec.Decode(&alloc, totalCount)
if n != nil {
sa := n.(stringArray)
totalRows += sa.Len()
arr, err := dec.Decode(&alloc, totalCount)
if arr != nil {
totalRows += arr.Len()
}
if err != nil && errors.Is(err, io.EOF) {
break

View File

@@ -170,6 +170,16 @@ func (bmap *Bitmap) Len() int { return bmap.len }
// Cap returns how many values bmap can hold without needing a new allocation.
func (bmap *Bitmap) Cap() int { return bmap.capValues() }
// SetCount returns the number of bits set in the bitmap.
func (bmap *Bitmap) SetCount() int {
return bitutil.CountSetBits(bmap.data, 0, bmap.len)
}
// ClearCount returns the number of bits unset in the bitmap.
func (bmap *Bitmap) ClearCount() int {
return bmap.Len() - bmap.SetCount()
}
// Clone returns a copy of bmap. If bmap is associated with an allocator, the
// returned bitmap uses the same allocator.
func (bmap *Bitmap) Clone() *Bitmap {