mirror of
https://github.com/mickael-kerjean/filestash.git
synced 2025-10-30 17:46:41 +08:00
feature (seach): index content subject to search
This commit is contained in:
@ -8,7 +8,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
var MimeTypes map[string]string
|
var MimeTypes map[string]string = map[string]string{ "txt": "text/plain" }
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
path := filepath.Join(GetCurrentDir(), CONFIG_PATH + "mime.json")
|
path := filepath.Join(GetCurrentDir(), CONFIG_PATH + "mime.json")
|
||||||
|
|||||||
@ -46,13 +46,13 @@ func FileLs(ctx App, res http.ResponseWriter, req *http.Request) {
|
|||||||
SendErrorResult(res, err)
|
SendErrorResult(res, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
model.SProc.Append(&ctx, path) // ping the search indexer
|
|
||||||
|
|
||||||
entries, err := ctx.Backend.Ls(path)
|
entries, err := ctx.Backend.Ls(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
SendErrorResult(res, err)
|
SendErrorResult(res, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
go model.SProc.HintLs(&ctx, path)
|
||||||
|
|
||||||
files := make([]FileInfo, len(entries))
|
files := make([]FileInfo, len(entries))
|
||||||
etagger := fnv.New32()
|
etagger := fnv.New32()
|
||||||
@ -154,6 +154,7 @@ func FileCat(ctx App, res http.ResponseWriter, req *http.Request) {
|
|||||||
if req.Header.Get("range") != "" {
|
if req.Header.Get("range") != "" {
|
||||||
needToCreateCache = true
|
needToCreateCache = true
|
||||||
}
|
}
|
||||||
|
go model.SProc.HintLs(&ctx, filepath.Dir(path) + "/")
|
||||||
}
|
}
|
||||||
|
|
||||||
// plugin hooks
|
// plugin hooks
|
||||||
@ -296,6 +297,8 @@ func FileSave(ctx App, res http.ResponseWriter, req *http.Request) {
|
|||||||
SendErrorResult(res, NewError(err.Error(), 403))
|
SendErrorResult(res, NewError(err.Error(), 403))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
go model.SProc.HintLs(&ctx, filepath.Dir(path) + "/")
|
||||||
|
go model.SProc.HintFile(&ctx, path)
|
||||||
SendSuccessResult(res, nil)
|
SendSuccessResult(res, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -325,6 +328,9 @@ func FileMv(ctx App, res http.ResponseWriter, req *http.Request) {
|
|||||||
SendErrorResult(res, err)
|
SendErrorResult(res, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
go model.SProc.HintRm(&ctx, filepath.Dir(from) + "/")
|
||||||
|
go model.SProc.HintLs(&ctx, filepath.Dir(to) + "/")
|
||||||
SendSuccessResult(res, nil)
|
SendSuccessResult(res, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -344,6 +350,7 @@ func FileRm(ctx App, res http.ResponseWriter, req *http.Request) {
|
|||||||
SendErrorResult(res, err)
|
SendErrorResult(res, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
model.SProc.HintRm(&ctx, path)
|
||||||
SendSuccessResult(res, nil)
|
SendSuccessResult(res, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -364,6 +371,7 @@ func FileMkdir(ctx App, res http.ResponseWriter, req *http.Request) {
|
|||||||
SendErrorResult(res, err)
|
SendErrorResult(res, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
go model.SProc.HintLs(&ctx, filepath.Dir(path) + "/")
|
||||||
SendSuccessResult(res, nil)
|
SendSuccessResult(res, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -384,6 +392,7 @@ func FileTouch(ctx App, res http.ResponseWriter, req *http.Request) {
|
|||||||
SendErrorResult(res, err)
|
SendErrorResult(res, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
go model.SProc.HintLs(&ctx, filepath.Dir(path) + "/")
|
||||||
SendSuccessResult(res, nil)
|
SendSuccessResult(res, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -8,8 +8,8 @@ import (
|
|||||||
. "github.com/mickael-kerjean/filestash/server/common"
|
. "github.com/mickael-kerjean/filestash/server/common"
|
||||||
"github.com/mickael-kerjean/filestash/server/model/formater"
|
"github.com/mickael-kerjean/filestash/server/model/formater"
|
||||||
"hash/fnv"
|
"hash/fnv"
|
||||||
"io"
|
"io/ioutil"
|
||||||
"math/rand"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
@ -22,6 +22,8 @@ const (
|
|||||||
PHASE_EXPLORE = "PHASE_EXPLORE"
|
PHASE_EXPLORE = "PHASE_EXPLORE"
|
||||||
PHASE_INDEXING = "PHASE_INDEXING"
|
PHASE_INDEXING = "PHASE_INDEXING"
|
||||||
PHASE_MAINTAIN = "PHASE_MAINTAIN"
|
PHASE_MAINTAIN = "PHASE_MAINTAIN"
|
||||||
|
PHASE_PAUSE = "PHASE_PAUSE"
|
||||||
|
MAX_HEAP_SIZE = 100000
|
||||||
)
|
)
|
||||||
var (
|
var (
|
||||||
SEARCH_ENABLE func() bool
|
SEARCH_ENABLE func() bool
|
||||||
@ -29,8 +31,9 @@ var (
|
|||||||
SEARCH_PROCESS_PAR func() int
|
SEARCH_PROCESS_PAR func() int
|
||||||
SEARCH_REINDEX func() int
|
SEARCH_REINDEX func() int
|
||||||
CYCLE_TIME func() int
|
CYCLE_TIME func() int
|
||||||
MAX_INDEXING_FSIZE func() int
|
|
||||||
INDEXING_EXT func() string
|
INDEXING_EXT func() string
|
||||||
|
MAX_INDEXING_FSIZE func() int
|
||||||
|
INDEXING_EXCLUSION = []string{"/node_modules/", "/bower_components/", "/.cache/", "/.npm/", "/.git/"}
|
||||||
)
|
)
|
||||||
|
|
||||||
var SProc SearchProcess = SearchProcess{
|
var SProc SearchProcess = SearchProcess{
|
||||||
@ -70,7 +73,6 @@ func init(){
|
|||||||
}
|
}
|
||||||
SEARCH_PROCESS_MAX()
|
SEARCH_PROCESS_MAX()
|
||||||
SEARCH_PROCESS_PAR = func() int {
|
SEARCH_PROCESS_PAR = func() int {
|
||||||
return 1
|
|
||||||
return Config.Get("features.search.process_par").Schema(func(f *FormElement) *FormElement {
|
return Config.Get("features.search.process_par").Schema(func(f *FormElement) *FormElement {
|
||||||
if f == nil {
|
if f == nil {
|
||||||
f = &FormElement{}
|
f = &FormElement{}
|
||||||
@ -139,13 +141,14 @@ func init(){
|
|||||||
f.Name = "indexer_ext"
|
f.Name = "indexer_ext"
|
||||||
f.Type = "string"
|
f.Type = "string"
|
||||||
f.Description = "File extension we want to see indexed"
|
f.Description = "File extension we want to see indexed"
|
||||||
f.Placeholder = "Default: org,txt,docx,pdf,md"
|
f.Placeholder = "Default: org,txt,docx,pdf,md,form"
|
||||||
f.Default = "/"
|
f.Default = "org,txt,docx,pdf,md,form"
|
||||||
return f
|
return f
|
||||||
}).String()
|
}).String()
|
||||||
}
|
}
|
||||||
INDEXING_EXT()
|
INDEXING_EXT()
|
||||||
|
|
||||||
|
|
||||||
runner := func() {
|
runner := func() {
|
||||||
for {
|
for {
|
||||||
if SEARCH_ENABLE() == false {
|
if SEARCH_ENABLE() == false {
|
||||||
@ -156,9 +159,6 @@ func init(){
|
|||||||
if sidx == nil {
|
if sidx == nil {
|
||||||
time.Sleep(5 * time.Second)
|
time.Sleep(5 * time.Second)
|
||||||
continue
|
continue
|
||||||
} else if sidx.FoldersUnknown.Len() == 0 {
|
|
||||||
time.Sleep(5 * time.Second)
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
sidx.mu.Lock()
|
sidx.mu.Lock()
|
||||||
sidx.Execute()
|
sidx.Execute()
|
||||||
@ -174,7 +174,7 @@ func Search(app *App, path string, keyword string) []File {
|
|||||||
var files []File = make([]File, 0)
|
var files []File = make([]File, 0)
|
||||||
|
|
||||||
// extract our search indexer
|
// extract our search indexer
|
||||||
s := SProc.Append(app, path)
|
s := SProc.HintLs(app, path)
|
||||||
if s == nil {
|
if s == nil {
|
||||||
return files
|
return files
|
||||||
}
|
}
|
||||||
@ -183,7 +183,7 @@ func Search(app *App, path string, keyword string) []File {
|
|||||||
path = "/"
|
path = "/"
|
||||||
}
|
}
|
||||||
|
|
||||||
rows, err := s.db.Query(
|
rows, err := s.DB.Query(
|
||||||
"SELECT type, path, size, modTime FROM file WHERE path IN (" +
|
"SELECT type, path, size, modTime FROM file WHERE path IN (" +
|
||||||
" SELECT path FROM file_index WHERE file_index MATCH ? AND path > ? AND path < ?" +
|
" SELECT path FROM file_index WHERE file_index MATCH ? AND path > ? AND path < ?" +
|
||||||
" ORDER BY rank LIMIT 2000" +
|
" ORDER BY rank LIMIT 2000" +
|
||||||
@ -194,6 +194,7 @@ func Search(app *App, path string, keyword string) []File {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return files
|
return files
|
||||||
}
|
}
|
||||||
|
defer rows.Close()
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
f := File{}
|
f := File{}
|
||||||
var t string
|
var t string
|
||||||
@ -216,7 +217,7 @@ type SearchProcess struct {
|
|||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func(this *SearchProcess) Append(app *App, path string) *SearchIndexer {
|
func(this *SearchProcess) HintLs(app *App, path string) *SearchIndexer {
|
||||||
id := GenerateID(app)
|
id := GenerateID(app)
|
||||||
this.mu.Lock()
|
this.mu.Lock()
|
||||||
defer this.mu.Unlock()
|
defer this.mu.Unlock()
|
||||||
@ -244,15 +245,14 @@ func(this *SearchProcess) Append(app *App, path string) *SearchIndexer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Having all indexers running in memory could be expensive => instead we're cycling a pool
|
// Having all indexers running in memory could be expensive => instead we're cycling a pool
|
||||||
search_process_max := 2//SEARCH_PROCESS_MAX()
|
search_process_max := SEARCH_PROCESS_MAX()
|
||||||
if len(this.idx) > ( search_process_max - 1) {
|
if len(this.idx) > ( search_process_max - 1) {
|
||||||
toDel := this.idx[0 : len(this.idx) - ( search_process_max - 1)]
|
toDel := this.idx[0 : len(this.idx) - ( search_process_max - 1)]
|
||||||
for i := range toDel {
|
for i := range toDel {
|
||||||
toDel[i].db.Close()
|
toDel[i].DB.Close()
|
||||||
}
|
}
|
||||||
this.idx = this.idx[len(this.idx) - ( search_process_max - 1) :]
|
this.idx = this.idx[len(this.idx) - ( search_process_max - 1) :]
|
||||||
}
|
}
|
||||||
|
|
||||||
// instantiate the new indexer
|
// instantiate the new indexer
|
||||||
s := NewSearchIndexer(id, app.Backend)
|
s := NewSearchIndexer(id, app.Backend)
|
||||||
heap.Push(&s.FoldersUnknown, &Document{
|
heap.Push(&s.FoldersUnknown, &Document{
|
||||||
@ -265,6 +265,27 @@ func(this *SearchProcess) Append(app *App, path string) *SearchIndexer {
|
|||||||
return &s
|
return &s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func(this *SearchProcess) HintRm(app *App, path string) {
|
||||||
|
id := GenerateID(app)
|
||||||
|
for i:=len(this.idx)-1; i>=0; i-- {
|
||||||
|
if id == this.idx[i].Id {
|
||||||
|
this.idx[i].DB.Exec("DELETE FROM file WHERE path >= ? AND path < ?", path, path + "~")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func(this *SearchProcess) HintFile(app *App, path string) {
|
||||||
|
id := GenerateID(app)
|
||||||
|
for i:=len(this.idx)-1; i>=0; i-- {
|
||||||
|
if id == this.idx[i].Id {
|
||||||
|
this.idx[i].DB.Exec("UPDATE file set indexTime = NULL WHERE path = ?", path)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
func(this *SearchProcess) Peek() *SearchIndexer {
|
func(this *SearchProcess) Peek() *SearchIndexer {
|
||||||
if len(this.idx) == 0 {
|
if len(this.idx) == 0 {
|
||||||
return nil
|
return nil
|
||||||
@ -280,27 +301,35 @@ func(this *SearchProcess) Peek() *SearchIndexer {
|
|||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func(this *SearchProcess) Reset() {
|
||||||
|
for i := range this.idx {
|
||||||
|
this.idx[i].DB.Close()
|
||||||
|
}
|
||||||
|
this.idx = make([]SearchIndexer, 0)
|
||||||
|
this.n = -1
|
||||||
|
}
|
||||||
|
|
||||||
type SearchIndexer struct {
|
type SearchIndexer struct {
|
||||||
Id string
|
Id string
|
||||||
FoldersUnknown HeapDoc
|
FoldersUnknown HeapDoc
|
||||||
FilesUnknown HeapDoc
|
CurrentPhase string
|
||||||
Backend IBackend
|
Backend IBackend
|
||||||
db *sql.DB
|
DBPath string
|
||||||
|
DB *sql.DB
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
|
lastHash string
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewSearchIndexer(id string, b IBackend) SearchIndexer {
|
func NewSearchIndexer(id string, b IBackend) SearchIndexer {
|
||||||
s := SearchIndexer {
|
s := SearchIndexer {
|
||||||
|
DBPath: filepath.Join(GetCurrentDir(), FTS_PATH, "fts_" + id + ".sql"),
|
||||||
Id: id,
|
Id: id,
|
||||||
Backend: b,
|
Backend: b,
|
||||||
FoldersUnknown: make(HeapDoc, 0, 1),
|
FoldersUnknown: make(HeapDoc, 0, 1),
|
||||||
FilesUnknown: make(HeapDoc, 0, 1),
|
|
||||||
}
|
}
|
||||||
heap.Init(&s.FoldersUnknown)
|
heap.Init(&s.FoldersUnknown)
|
||||||
heap.Init(&s.FilesUnknown)
|
|
||||||
|
|
||||||
db, err := sql.Open("sqlite3", filepath.Join(GetCurrentDir(), FTS_PATH, "fts_" + id + ".sql"))
|
db, err := sql.Open("sqlite3", s.DBPath + "?_journal_mode=wal")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Log.Warning("search::init can't open database (%v)", err)
|
Log.Warning("search::init can't open database (%v)", err)
|
||||||
return s
|
return s
|
||||||
@ -311,6 +340,7 @@ func NewSearchIndexer(id string, b IBackend) SearchIndexer {
|
|||||||
Log.Warning("search::initschema prepare schema error(%v)", err)
|
Log.Warning("search::initschema prepare schema error(%v)", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
defer stmt.Close()
|
||||||
_, err = stmt.Exec()
|
_, err = stmt.Exec()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Log.Warning("search::initschema execute error(%v)", err)
|
Log.Warning("search::initschema execute error(%v)", err)
|
||||||
@ -318,10 +348,16 @@ func NewSearchIndexer(id string, b IBackend) SearchIndexer {
|
|||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if queryDB("CREATE TABLE IF NOT EXISTS file(path VARCHAR(1024) PRIMARY KEY, filename VARCHAR(64), filetype VARCHAR(16), type VARCHAR(16), size INTEGER, modTime timestamp, indexTime timestamp DEFAULT NULL);"); err != nil {
|
if queryDB("CREATE TABLE IF NOT EXISTS file(path VARCHAR(1024) PRIMARY KEY, filename VARCHAR(64), filetype VARCHAR(16), type VARCHAR(16), parent VARCHAR(1024), size INTEGER, modTime timestamp, indexTime timestamp DEFAULT NULL);"); err != nil {
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
if queryDB("CREATE VIRTUAL TABLE IF NOT EXISTS file_index USING fts5(path UNINDEXED, filename, filetype, content);"); err != nil {
|
if queryDB("CREATE INDEX idx_file_index_time ON file(indexTime) WHERE indexTime IS NOT NULL;"); err != nil {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
if queryDB("CREATE INDEX idx_file_parent ON file(parent);"); err != nil {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
if queryDB("CREATE VIRTUAL TABLE IF NOT EXISTS file_index USING fts5(path UNINDEXED, filename, filetype, content, tokenize = 'porter');"); err != nil {
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
if queryDB("CREATE TRIGGER IF NOT EXISTS after_file_insert AFTER INSERT ON file BEGIN INSERT INTO file_index (path, filename, filetype) VALUES(new.path, new.filename, new.filetype); END;"); err != nil {
|
if queryDB("CREATE TRIGGER IF NOT EXISTS after_file_insert AFTER INSERT ON file BEGIN INSERT INTO file_index (path, filename, filetype) VALUES(new.path, new.filename, new.filetype); END;"); err != nil {
|
||||||
@ -333,59 +369,65 @@ func NewSearchIndexer(id string, b IBackend) SearchIndexer {
|
|||||||
if queryDB("CREATE TRIGGER IF NOT EXISTS after_file_update_path UPDATE OF path ON file BEGIN UPDATE file_index SET path = new.path, filepath = new.filepath, filetype = new.filetype WHERE path = old.path; END;"); err != nil {
|
if queryDB("CREATE TRIGGER IF NOT EXISTS after_file_update_path UPDATE OF path ON file BEGIN UPDATE file_index SET path = new.path, filepath = new.filepath, filetype = new.filetype WHERE path = old.path; END;"); err != nil {
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
s.db = db
|
s.DB = db
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
func(this *SearchIndexer) Execute(){
|
func(this *SearchIndexer) Execute(){
|
||||||
currentPhase := func() string {
|
if this.CurrentPhase == "" {
|
||||||
if len(this.FoldersUnknown) != 0 {
|
time.Sleep(1 * time.Second)
|
||||||
return PHASE_EXPLORE
|
this.CurrentPhase = PHASE_EXPLORE
|
||||||
}
|
}
|
||||||
if len(this.FilesUnknown) != 0 {
|
|
||||||
return PHASE_INDEXING
|
cycleExecute := func(fn func(*sql.Tx) bool) {
|
||||||
}
|
|
||||||
return PHASE_MAINTAIN
|
|
||||||
}()
|
|
||||||
cycleExecute := func(fn func() bool) {
|
|
||||||
stopTime := time.Now().Add(time.Duration(CYCLE_TIME()) * time.Second)
|
stopTime := time.Now().Add(time.Duration(CYCLE_TIME()) * time.Second)
|
||||||
|
tx, err := this.DB.Begin()
|
||||||
|
if err != nil {
|
||||||
|
Log.Warning("search::index cycle_begin (%+v)", err)
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
}
|
||||||
for {
|
for {
|
||||||
if fn() == false {
|
if fn(tx) == false {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if stopTime.After(time.Now()) == false {
|
if stopTime.After(time.Now()) == false {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if err = tx.Commit(); err != nil {
|
||||||
|
Log.Warning("search::index cycle_commit (%+v)", err)
|
||||||
}
|
}
|
||||||
if currentPhase == PHASE_EXPLORE {
|
}
|
||||||
|
if this.CurrentPhase == PHASE_EXPLORE {
|
||||||
cycleExecute(this.Discover)
|
cycleExecute(this.Discover)
|
||||||
return
|
return
|
||||||
} else if currentPhase == PHASE_INDEXING {
|
} else if this.CurrentPhase == PHASE_INDEXING {
|
||||||
r := rand.Intn(100)
|
|
||||||
if r < 30 {
|
|
||||||
cycleExecute(this.Bookkeeping)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
cycleExecute(this.Indexing)
|
cycleExecute(this.Indexing)
|
||||||
return
|
return
|
||||||
} else if currentPhase == PHASE_MAINTAIN {
|
} else if this.CurrentPhase == PHASE_MAINTAIN {
|
||||||
cycleExecute(this.Bookkeeping)
|
cycleExecute(this.Consolidate)
|
||||||
return
|
return
|
||||||
|
} else if this.CurrentPhase == PHASE_PAUSE {
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
this.CurrentPhase = ""
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func(this *SearchIndexer) Discover() bool {
|
func(this *SearchIndexer) Discover(tx *sql.Tx) bool {
|
||||||
if this.FoldersUnknown.Len() == 0 {
|
if this.FoldersUnknown.Len() == 0 {
|
||||||
|
this.CurrentPhase = PHASE_INDEXING
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
doc := heap.Pop(&this.FoldersUnknown).(*Document)
|
var doc *Document
|
||||||
|
doc = heap.Pop(&this.FoldersUnknown).(*Document)
|
||||||
if doc == nil {
|
if doc == nil {
|
||||||
|
this.CurrentPhase = PHASE_INDEXING
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
files, err := this.Backend.Ls(doc.Path)
|
files, err := this.Backend.Ls(doc.Path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
this.CurrentPhase = ""
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if len(files) == 0 {
|
if len(files) == 0 {
|
||||||
@ -406,32 +448,26 @@ func(this *SearchIndexer) Discover() bool {
|
|||||||
}
|
}
|
||||||
return base64.StdEncoding.EncodeToString(hasher.Sum(nil))
|
return base64.StdEncoding.EncodeToString(hasher.Sum(nil))
|
||||||
}()
|
}()
|
||||||
|
if hashFiles == this.lastHash {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
this.lastHash = ""
|
||||||
for i:=0; i<this.FoldersUnknown.Len(); i++ {
|
for i:=0; i<this.FoldersUnknown.Len(); i++ {
|
||||||
if this.FoldersUnknown[i].Hash == hashFiles && filepath.Base(doc.Path) != filepath.Base(this.FoldersUnknown[i].Path) {
|
if this.FoldersUnknown[i].Hash == hashFiles && filepath.Base(doc.Path) != filepath.Base(this.FoldersUnknown[i].Path) {
|
||||||
|
this.lastHash = hashFiles
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert the newly found data within our index
|
// Insert the newly found data within our index
|
||||||
tx, _ := this.db.Begin()
|
|
||||||
tx.Exec("BEGIN EXCLUSIVE TRANSACTION;")
|
|
||||||
for i := range files {
|
for i := range files {
|
||||||
f := files[i]
|
f := files[i]
|
||||||
name := f.Name()
|
name := f.Name()
|
||||||
p := filepath.Join(doc.Path, name)
|
|
||||||
if f.IsDir() {
|
if f.IsDir() {
|
||||||
p += "/"
|
|
||||||
_, err = tx.Exec(
|
|
||||||
"INSERT INTO file(path, filename, type, size, modTime, indexTime) VALUES(?, ?, ?, ?, ?, ?)",
|
|
||||||
p,
|
|
||||||
name,
|
|
||||||
"directory",
|
|
||||||
f.Size(),
|
|
||||||
f.ModTime(),
|
|
||||||
time.Now(),
|
|
||||||
);
|
|
||||||
var performPush bool = false
|
var performPush bool = false
|
||||||
if err == nil {
|
p := filepath.Join(doc.Path, name)
|
||||||
|
p += "/"
|
||||||
|
if err = this.dbInsert(doc.Path, f, tx); err == nil {
|
||||||
performPush = true
|
performPush = true
|
||||||
} else if e, ok := err.(sqlite3.Error); ok && e.Code == sqlite3.ErrConstraint {
|
} else if e, ok := err.(sqlite3.Error); ok && e.Code == sqlite3.ErrConstraint {
|
||||||
performPush = func(path string) bool{
|
performPush = func(path string) bool{
|
||||||
@ -441,7 +477,7 @@ func(this *SearchIndexer) Discover() bool {
|
|||||||
Log.Warning("search::discovery unknown_path (%v)", err)
|
Log.Warning("search::discovery unknown_path (%v)", err)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
tm, err := time.Parse(time.RFC3339, t);
|
tm, err := time.Parse(time.RFC3339, t)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Log.Warning("search::discovery invalid_time (%v)", err)
|
Log.Warning("search::discovery invalid_time (%v)", err)
|
||||||
return false
|
return false
|
||||||
@ -455,6 +491,8 @@ func(this *SearchIndexer) Discover() bool {
|
|||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}(p)
|
}(p)
|
||||||
|
} else {
|
||||||
|
Log.Error("search::indexing insert_index (%v)", err)
|
||||||
}
|
}
|
||||||
if performPush == true {
|
if performPush == true {
|
||||||
heap.Push(&this.FoldersUnknown, &Document{
|
heap.Push(&this.FoldersUnknown, &Document{
|
||||||
@ -467,50 +505,74 @@ func(this *SearchIndexer) Discover() bool {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
_, err = tx.Exec(
|
if err = this.dbInsert(doc.Path, f, tx); err != nil {
|
||||||
"INSERT INTO file(path, filename, filetype, type, size, modTime) VALUES(?, ?, ?, ?, ?, ?)",
|
Log.Warning("search::insert index_error (%v)", err)
|
||||||
filepath.Join(doc.Path, name),
|
return false
|
||||||
name,
|
}
|
||||||
strings.TrimPrefix(filepath.Ext(name), "."),
|
}
|
||||||
"file",
|
}
|
||||||
f.Size(),
|
return true
|
||||||
f.ModTime(),
|
}
|
||||||
|
|
||||||
|
func(this *SearchIndexer) Indexing(tx *sql.Tx) bool {
|
||||||
|
ext := strings.Split(INDEXING_EXT(), ",")
|
||||||
|
for i:=0; i<len(ext); i++ {
|
||||||
|
ext[i] = "'" + strings.TrimSpace(ext[i]) + "'"
|
||||||
|
}
|
||||||
|
|
||||||
|
rows, err := tx.Query(
|
||||||
|
"SELECT path FROM file WHERE (" +
|
||||||
|
" type = 'file' AND size < ? AND filetype IN (" + strings.Join(ext, ",") +") AND indexTime IS NULL " +
|
||||||
|
") LIMIT 2",
|
||||||
|
MAX_INDEXING_FSIZE(),
|
||||||
)
|
)
|
||||||
}
|
|
||||||
}
|
|
||||||
err = tx.Commit()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Log.Warning("search::discovery transaction_error (%v)", err)
|
Log.Warning("search::insert index_query (%v)", err)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
i := 0
|
||||||
|
for rows.Next() {
|
||||||
|
i += 1
|
||||||
|
var path string
|
||||||
|
if err = rows.Scan(&path); err != nil {
|
||||||
|
Log.Warning("search::indexing index_scan (%v)", err)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if err = this.updateFile(path, tx); err != nil {
|
||||||
|
Log.Warning("search::indexing index_update (%v)", err)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if i == 0 {
|
||||||
|
this.CurrentPhase = PHASE_MAINTAIN
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func(this *SearchIndexer) Indexing() bool {
|
func(this *SearchIndexer) updateFile(path string, tx *sql.Tx) error {
|
||||||
var path string
|
if _, err := tx.Exec("UPDATE file SET indexTime = ? WHERE path = ?", time.Now(), path); err != nil {
|
||||||
// find some file that needs to be indexed
|
return err
|
||||||
var err error
|
|
||||||
if err = this.db.QueryRow(
|
|
||||||
"SELECT path FROM file WHERE (" +
|
|
||||||
" type = 'file' AND size < 512000 AND filetype = 'txt' AND indexTime IS NULL" +
|
|
||||||
") LIMIT 1;",
|
|
||||||
).Scan(&path); err != nil {
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
defer this.db.Exec(
|
|
||||||
"UPDATE file SET indexTime = ? WHERE path = ?",
|
|
||||||
time.Now(), path,
|
|
||||||
)
|
|
||||||
mime := GetMimeType(path)
|
|
||||||
|
|
||||||
// Index content
|
for i:=0; i<len(INDEXING_EXCLUSION); i++ {
|
||||||
var reader io.ReadCloser
|
if strings.Contains(path, INDEXING_EXCLUSION[i]) {
|
||||||
reader, err = this.Backend.Cat(path)
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
reader, err := this.Backend.Cat(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
if _, a := tx.Exec("DELETE FROM file WHERE path = ?", path); a != nil {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
defer reader.Close()
|
defer reader.Close()
|
||||||
switch mime {
|
|
||||||
|
switch GetMimeType(path) {
|
||||||
case "text/plain": reader, err = formater.TxtFormater(reader)
|
case "text/plain": reader, err = formater.TxtFormater(reader)
|
||||||
case "text/org": reader, err = formater.TxtFormater(reader)
|
case "text/org": reader, err = formater.TxtFormater(reader)
|
||||||
case "text/markdown": reader, err = formater.TxtFormater(reader)
|
case "text/markdown": reader, err = formater.TxtFormater(reader)
|
||||||
@ -519,17 +581,186 @@ func(this *SearchIndexer) Indexing() bool {
|
|||||||
case "application/vnd.ms-powerpoint": reader, err = formater.OfficeFormater(reader)
|
case "application/vnd.ms-powerpoint": reader, err = formater.OfficeFormater(reader)
|
||||||
case "application/word": reader, err = formater.OfficeFormater(reader)
|
case "application/word": reader, err = formater.OfficeFormater(reader)
|
||||||
case "application/msword": reader, err = formater.OfficeFormater(reader)
|
case "application/msword": reader, err = formater.OfficeFormater(reader)
|
||||||
default: return true
|
default: return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Log.Warning("search::indexing formater_error (%v)", err)
|
return nil
|
||||||
return true
|
}
|
||||||
|
var content []byte
|
||||||
|
if content, err = ioutil.ReadAll(reader); err != nil {
|
||||||
|
Log.Warning("search::index content_read (%v)", err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if _, err = tx.Exec("UPDATE file_index SET content = ? WHERE path = ?", content, path); err != nil {
|
||||||
|
Log.Warning("search::index index_update (%v)", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func(this *SearchIndexer) updateFolder(path string, tx *sql.Tx) error {
|
||||||
|
if _, err := tx.Exec("UPDATE file SET indexTime = ? WHERE path = ?", time.Now(), path); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for i:=0; i<len(INDEXING_EXCLUSION); i++ {
|
||||||
|
if strings.Contains(path, INDEXING_EXCLUSION[i]) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch list of folders as in the remote filesystem
|
||||||
|
currFiles, err := this.Backend.Ls(path)
|
||||||
|
if err != nil {
|
||||||
|
tx.Exec("DELETE FROM file WHERE path >= ? AND path < ?", path, path + "~")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch FS as appear in our search cache
|
||||||
|
rows, err := tx.Query("SELECT filename, type, size FROM file WHERE parent = ?", path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
previousFiles := make([]File, 0)
|
||||||
|
for rows.Next() {
|
||||||
|
var f File
|
||||||
|
rows.Scan(&f.FName, &f.FType, f.FSize)
|
||||||
|
previousFiles = append(previousFiles, f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform the DB operation to ensure previousFiles and currFiles are in sync
|
||||||
|
// 1. Find the content that have been created and did not exist before
|
||||||
|
for i:=0; i<len(currFiles); i++ {
|
||||||
|
currFilenameAlreadyExist := false
|
||||||
|
currFilename := currFiles[i].Name()
|
||||||
|
for j:=0; j<len(previousFiles); j++ {
|
||||||
|
if currFilename == previousFiles[j].Name() {
|
||||||
|
if currFiles[i].Size() != previousFiles[j].Size() {
|
||||||
|
err = this.dbUpdate(path, currFiles[i], tx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
currFilenameAlreadyExist = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if currFilenameAlreadyExist == false {
|
||||||
|
this.dbInsert(path, currFiles[i], tx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 2. Find the content that was existing before but got removed
|
||||||
|
for i:=0; i<len(previousFiles); i++ {
|
||||||
|
previousFilenameStillExist := false
|
||||||
|
previousFilename := previousFiles[i].Name()
|
||||||
|
for j:=0; j<len(currFiles); j++ {
|
||||||
|
if previousFilename == currFiles[j].Name() {
|
||||||
|
previousFilenameStillExist = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if previousFilenameStillExist == false {
|
||||||
|
this.dbDelete(path, previousFiles[i], tx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func(this *SearchIndexer) Consolidate(tx *sql.Tx) bool {
|
||||||
|
rows, err := tx.Query(
|
||||||
|
"SELECT path, type FROM file WHERE indexTime < ? ORDER BY indexTime DESC LIMIT 5",
|
||||||
|
time.Now().Add(- time.Duration(SEARCH_REINDEX()) * time.Hour),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
if err == sql.ErrNoRows {
|
||||||
|
this.CurrentPhase = PHASE_PAUSE
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
this.CurrentPhase = ""
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
i := 0
|
||||||
|
for rows.Next() {
|
||||||
|
i += 1
|
||||||
|
var path string
|
||||||
|
var cType string
|
||||||
|
if err = rows.Scan(&path, &cType); err != nil {
|
||||||
|
Log.Warning("search::index db_stale (%v)", err)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if cType == "directory" {
|
||||||
|
this.updateFolder(path, tx)
|
||||||
|
} else {
|
||||||
|
this.updateFile(path, tx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if i == 0 {
|
||||||
|
this.CurrentPhase = PHASE_PAUSE
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func(this SearchIndexer) Bookkeeping() bool {
|
func(this *SearchIndexer) dbInsert(parent string, f os.FileInfo, tx *sql.Tx) error {
|
||||||
return false
|
var name string = f.Name()
|
||||||
|
var err error
|
||||||
|
path := filepath.Join(parent, name)
|
||||||
|
|
||||||
|
if f.IsDir() {
|
||||||
|
_, err = tx.Exec(
|
||||||
|
"INSERT INTO file(path, parent, filename, type, size, modTime, indexTime) " +
|
||||||
|
"VALUES(?, ?, ?, ?, ?, ?, ?)",
|
||||||
|
path + "/",
|
||||||
|
parent,
|
||||||
|
name,
|
||||||
|
"directory",
|
||||||
|
f.Size(),
|
||||||
|
f.ModTime(),
|
||||||
|
time.Now(),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
_, err = tx.Exec(
|
||||||
|
"INSERT INTO file(path, parent, filename, type, size, modTime, indexTime, filetype) " +
|
||||||
|
"VALUES(?, ?, ?, ?, ?, ?, ?, ?)",
|
||||||
|
path,
|
||||||
|
parent,
|
||||||
|
name,
|
||||||
|
"file",
|
||||||
|
f.Size(),
|
||||||
|
f.ModTime(),
|
||||||
|
nil,
|
||||||
|
strings.TrimPrefix(filepath.Ext(name), "."),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func(this *SearchIndexer) dbUpdate(parent string, f os.FileInfo, tx *sql.Tx) error {
|
||||||
|
path := filepath.Join(parent, f.Name())
|
||||||
|
if f.IsDir() {
|
||||||
|
path += "/"
|
||||||
|
}
|
||||||
|
_, err := tx.Exec(
|
||||||
|
"UPDATE file SET size = ?, modTime = ? indexTime = NULL WHERE path = ?",
|
||||||
|
f.Size(), f.ModTime(), path,
|
||||||
|
)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func(this *SearchIndexer) dbDelete(parent string, f os.FileInfo, tx *sql.Tx) error {
|
||||||
|
path := filepath.Join(parent, f.Name())
|
||||||
|
if f.IsDir() {
|
||||||
|
path += "/"
|
||||||
|
}
|
||||||
|
_, err := tx.Exec(
|
||||||
|
"DELETE FROM file WHERE path >= ? AND path < ?",
|
||||||
|
path, path + "~",
|
||||||
|
)
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
type Document struct {
|
type Document struct {
|
||||||
@ -542,12 +773,16 @@ type Document struct {
|
|||||||
ModTime time.Time `json:"time"`
|
ModTime time.Time `json:"time"`
|
||||||
Size int64 `json:"size"`
|
Size int64 `json:"size"`
|
||||||
Content []byte `json:"content"`
|
Content []byte `json:"content"`
|
||||||
|
Priority int `json:"-"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://golang.org/pkg/container/heap/
|
// https://golang.org/pkg/container/heap/
|
||||||
type HeapDoc []*Document
|
type HeapDoc []*Document
|
||||||
func(h HeapDoc) Len() int { return len(h) }
|
func(h HeapDoc) Len() int { return len(h) }
|
||||||
func(h HeapDoc) Less(i, j int) bool {
|
func(h HeapDoc) Less(i, j int) bool {
|
||||||
|
if h[i].Priority != 0 || h[j].Priority != 0 {
|
||||||
|
return h[i].Priority < h[j].Priority
|
||||||
|
}
|
||||||
scoreA := len(strings.Split(h[i].Path, "/")) / len(strings.Split(h[i].InitialPath, "/"))
|
scoreA := len(strings.Split(h[i].Path, "/")) / len(strings.Split(h[i].InitialPath, "/"))
|
||||||
scoreB := len(strings.Split(h[j].Path, "/")) / len(strings.Split(h[j].InitialPath, "/"))
|
scoreB := len(strings.Split(h[j].Path, "/")) / len(strings.Split(h[j].InitialPath, "/"))
|
||||||
return scoreA < scoreB
|
return scoreA < scoreB
|
||||||
@ -557,7 +792,11 @@ func(h HeapDoc) Swap(i, j int) {
|
|||||||
h[i] = h[j]
|
h[i] = h[j]
|
||||||
h[j] = a
|
h[j] = a
|
||||||
}
|
}
|
||||||
func (h *HeapDoc) Push(x interface{}) { *h = append(*h, x.(*Document)) }
|
func (h *HeapDoc) Push(x interface{}) {
|
||||||
|
if h.Len() < MAX_HEAP_SIZE {
|
||||||
|
*h = append(*h, x.(*Document))
|
||||||
|
}
|
||||||
|
}
|
||||||
func (h *HeapDoc) Pop() interface{} {
|
func (h *HeapDoc) Pop() interface{} {
|
||||||
old := *h
|
old := *h
|
||||||
n := len(old)
|
n := len(old)
|
||||||
|
|||||||
Reference in New Issue
Block a user