chore (plg_search_sqlitefts): remove unused code

This commit is contained in:
MickaelK
2025-08-17 18:46:42 +10:00
parent 3d5f307139
commit 7af5da9622
3 changed files with 2 additions and 36 deletions

View File

@ -45,7 +45,7 @@ var SEARCH_EXCLUSION = func() []string {
f.Id = "folder_exclusion" f.Id = "folder_exclusion"
f.Name = "folder_exclusion" f.Name = "folder_exclusion"
f.Type = "text" f.Type = "text"
f.Description = "Exclude folders during the exploration phase" f.Description = "Exclude some specific folder from the crawl / index"
f.Placeholder = "Default: node_modules,bower_components,.cache,.npm,.git" f.Placeholder = "Default: node_modules,bower_components,.cache,.npm,.git"
f.Default = "node_modules,bower_components,.cache,.npm,.git" f.Default = "node_modules,bower_components,.cache,.npm,.git"
return f return f
@ -138,7 +138,7 @@ var INDEXING_EXT = func() string {
f.Id = "indexer_ext" f.Id = "indexer_ext"
f.Name = "indexer_ext" f.Name = "indexer_ext"
f.Type = "text" f.Type = "text"
f.Description = "File extension we want to see indexed" f.Description = "Extensions that will be handled by the full text search engine"
f.Placeholder = "Default: org,txt,docx,pdf,md,form" f.Placeholder = "Default: org,txt,docx,pdf,md,form"
f.Default = "org,txt,docx,pdf,md,form" f.Default = "org,txt,docx,pdf,md,form"
return f return f

View File

@ -26,7 +26,6 @@ type Crawler struct {
Backend IBackend Backend IBackend
State indexer.Index State indexer.Index
mu sync.Mutex mu sync.Mutex
lastHash string
} }
func NewCrawler(id string, b IBackend) (Crawler, error) { func NewCrawler(id string, b IBackend) (Crawler, error) {

View File

@ -2,10 +2,7 @@ package plg_search_sqlitefts
import ( import (
"container/heap" "container/heap"
"encoding/base64"
"hash/fnv"
"path/filepath" "path/filepath"
"strconv"
"strings" "strings"
"time" "time"
@ -30,36 +27,7 @@ func (this *Crawler) Discover(tx indexer.Manager) bool {
this.CurrentPhase = "" this.CurrentPhase = ""
return true return true
} }
if len(files) == 0 {
return true
}
// We don't want our indexer to go wild and diverge over time. As such we need to detect those edge cases: aka
// recursive folder structure. Our detection is relying on a Hash of []os.FileInfo
hashFiles := func() string {
var step int = len(files) / 50
if step == 0 {
step = 1
}
hasher := fnv.New32()
hasher.Write([]byte(strconv.Itoa(len(files))))
for i := 0; i < len(files); i = i + step {
hasher.Write([]byte(files[i].Name()))
}
return base64.StdEncoding.EncodeToString(hasher.Sum(nil))
}()
if hashFiles == this.lastHash {
return true
}
this.lastHash = ""
for i := 0; i < this.FoldersUnknown.Len(); i++ {
if this.FoldersUnknown[i].Hash == hashFiles && filepath.Base(doc.Path) != filepath.Base(this.FoldersUnknown[i].Path) {
this.lastHash = hashFiles
return true
}
}
// Insert the newly found data within our index
excluded := SEARCH_EXCLUSION() excluded := SEARCH_EXCLUSION()
for i := range files { for i := range files {
f := files[i] f := files[i]
@ -105,7 +73,6 @@ func (this *Crawler) Discover(tx indexer.Manager) bool {
Path: p, Path: p,
Size: f.Size(), Size: f.Size(),
ModTime: f.ModTime(), ModTime: f.ModTime(),
Hash: hashFiles,
}) })
} }
} else { } else {