Improve issue & code search (#33860)

Each "indexer" should provide the "search modes" they support by
themselves. And we need to remove the "fuzzy" search for code.
This commit is contained in:
wxiaoguang
2025-03-13 11:07:48 +08:00
committed by GitHub
parent cd10456664
commit 403775e74e
31 changed files with 317 additions and 172 deletions

View File

@ -16,6 +16,7 @@ import (
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/indexer/code/internal"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
@ -24,7 +25,6 @@ import (
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
"code.gitea.io/gitea/modules/util"
"github.com/go-enry/go-enry/v2"
"github.com/olivere/elastic/v7"
@ -46,6 +46,10 @@ type Indexer struct {
indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much
}
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
return indexer.SearchModesExactWords()
}
// NewIndexer creates a new elasticsearch indexer
func NewIndexer(url, indexerName string) *Indexer {
inner := inner_elasticsearch.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping)
@ -361,15 +365,10 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
// Search searches for codes and language stats by given conditions.
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
var contentQuery elastic.Query
keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword)
if isPhrase {
contentQuery = elastic.NewMatchPhraseQuery("content", keywordAsPhrase)
} else {
// TODO: this is the old logic, but not really using "fuzziness"
// * IsKeywordFuzzy=true: "best_fields"
// * IsKeywordFuzzy=false: "phrase_prefix"
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).
Type(util.Iif(opts.IsKeywordFuzzy, esMultiMatchTypeBestFields, esMultiMatchTypePhrasePrefix))
if opts.SearchMode == indexer.SearchModeExact {
contentQuery = elastic.NewMatchPhraseQuery("content", opts.Keyword)
} else /* words */ {
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).Type(esMultiMatchTypeBestFields).Operator("and")
}
kwQuery := elastic.NewBoolQuery().Should(
contentQuery,