Files
grafana/pkg/storage/unified/search/custom_analyzers.go
owensmallwood 827da46c51 Unified Storage: Remove wildcard queries (#101441)
* WIP adding custom analyzer so we can do substring search efficiently

* Adding unit tests for title search

* formatting

* adds more title search unit tests

* organize helpers

* fixes issue caused by having two title mappings

* Removes camelcase token filter since it prevents you from searching for a substring of chars and numbers. Adds regression test.

* adds back mapping for title_phrase

* use simple analyzer for input query string so it doesn't filter out english stop words

* ran bleve tests, table snapshots updated

* ignore linter for "unused" test functions. They are very helpful for troubleshooting search. Keeping them.

* only log total hits and query cost if result not nil

* fixes failing test - one more field because there are two title mappings now

* fix test

* fixes test - only take first item when its the title

* Adds separate internal field for title ngram mapping.

When searching with a query, results are sorted by score desc.

When searching without a query, results are sorted by title desc.

Adjusts ngram max to be 10.

Text queries are a disjunction of an exact match, phrase match, and a match. Boosted to have priority in that order.

Adds more unit tests for searching.

* linter

* fix test

* ran tests - generated new test dash json

* sort by title phrase instead of title

* fix test - not relying on /apis/dashboard/search to apply title sorting anymore
2025-03-07 09:51:15 -06:00

49 lines
1.5 KiB
Go

package search
import (
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/token/edgengram"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unique"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/whitespace"
"github.com/blevesearch/bleve/v2/mapping"
)
const TITLE_ANALYZER = "title_analyzer"
func RegisterCustomAnalyzers(mapper *mapping.IndexMappingImpl) error {
return registerTitleAnalyzer(mapper)
}
// The registerTitleAnalyzer function defines a custom analyzer for the title field.
// The edgeNgramTokenFilter will create n-grams anchored to the front of each token.
// For example, the token "hello" will be tokenized into "hel", "hell", "hello".
func registerTitleAnalyzer(mapper *mapping.IndexMappingImpl) error {
// Define an N-Gram tokenizer (for substring search)
edgeNgramTokenFilter := map[string]interface{}{
"type": edgengram.Name,
"min": 3.0,
"max": 10.0,
"back": edgengram.FRONT,
}
err := mapper.AddCustomTokenFilter("edge_ngram_filter", edgeNgramTokenFilter)
if err != nil {
return err
}
//Create a custom analyzer using the N-Gram tokenizer
ngramAnalyzer := map[string]interface{}{
"type": custom.Name,
"tokenizer": whitespace.Name,
"token_filters": []string{"edge_ngram_filter", lowercase.Name, unique.Name},
//"char_filters": //TODO IF NEEDED
}
err = mapper.AddCustomAnalyzer(TITLE_ANALYZER, ngramAnalyzer)
if err != nil {
return err
}
return nil
}