feat: Search enhancement (#2562)

* feat: ignore AList storage on indexing

* fix: remove unused err in `walkFn`

* chore(ci): fix auto_lang trigger and run it

* feat: batch index

* feat: quit index & init index

* feat: set DocType for bleve data

* fix: build index cleanup check origin err
pull/2587/head
BoYanZh 2022-12-05 13:28:39 +08:00 committed by GitHub
parent 4e1be9bee6
commit 8c0dfe2f3d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 160 additions and 50 deletions

View File

@ -7,6 +7,7 @@ on:
paths:
- 'drivers/**'
- 'internal/bootstrap/data/setting.go'
- 'internal/conf/const.go'
workflow_dispatch:
jobs:
@ -69,4 +70,4 @@ jobs:
github_token: ${{ secrets.MY_TOKEN }}
branch: main
directory: alist-web
repository: alist-org/alist-web
repository: alist-org/alist-web

View File

@ -1,5 +1,18 @@
package bootstrap
import (
"github.com/alist-org/alist/v3/internal/search"
log "github.com/sirupsen/logrus"
)
func InitIndex() {
// TODO init ? Probably not.
progress, err := search.Progress()
if err != nil {
log.Errorf("init index error: %+v", err)
return
}
if !progress.IsDone {
progress.IsDone = true
search.WriteProgress(progress)
}
}

View File

@ -25,7 +25,7 @@ const (
AudioTypes = "audio_types"
VideoTypes = "video_types"
ImageTypes = "image_types"
//OfficeTypes = "office_types"
// OfficeTypes = "office_types"
ProxyTypes = "proxy_types"
OfficeViewers = "office_viewers"
PdfViewers = "pdf_viewers"

View File

@ -11,6 +11,10 @@ func CreateSearchNode(node *model.SearchNode) error {
return db.Create(node).Error
}
func BatchCreateSearchNodes(nodes *[]model.SearchNode) error {
return db.CreateInBatches(nodes, 1000).Error
}
func DeleteSearchNodesByParent(parent string) error {
return db.Where(fmt.Sprintf("%s LIKE ?",
columnName("path")), fmt.Sprintf("%s%%", parent)).

View File

@ -14,9 +14,9 @@ import (
// WalkFS will stop when current depth > `depth`. For each visited node,
// WalkFS calls walkFn. If a visited file system node is a directory and
// walkFn returns path.SkipDir, walkFS will skip traversal of this node.
func WalkFS(ctx context.Context, depth int, name string, info model.Obj, walkFn func(reqPath string, info model.Obj, err error) error) error {
func WalkFS(ctx context.Context, depth int, name string, info model.Obj, walkFn func(reqPath string, info model.Obj) error) error {
// This implementation is based on Walk's code in the standard path/path package.
walkFnErr := walkFn(name, info, nil)
walkFnErr := walkFn(name, info)
if walkFnErr != nil {
if info.IsDir() && walkFnErr == filepath.SkipDir {
return nil

View File

@ -34,3 +34,7 @@ func (p *SearchReq) Validate() error {
}
return nil
}
func (s *SearchNode) Type() string {
return "SearchNode"
}

View File

@ -17,6 +17,15 @@ func Init(indexPath *string) (bleve.Index, error) {
if err == bleve.ErrorIndexPathDoesNotExist {
log.Infof("Creating new index...")
indexMapping := bleve.NewIndexMapping()
searchNodeMapping := bleve.NewDocumentMapping()
searchNodeMapping.AddFieldMappingsAt("is_dir", bleve.NewBooleanFieldMapping())
// TODO: appoint analyzer
parentFieldMapping := bleve.NewTextFieldMapping()
searchNodeMapping.AddFieldMappingsAt("parent", parentFieldMapping)
// TODO: appoint analyzer
nameFieldMapping := bleve.NewKeywordFieldMapping()
searchNodeMapping.AddFieldMappingsAt("name", nameFieldMapping)
indexMapping.AddDocumentMapping("SearchNode", searchNodeMapping)
fileIndex, err = bleve.New(*indexPath, indexMapping)
if err != nil {
return nil, err

View File

@ -49,6 +49,14 @@ func (b *Bleve) Index(ctx context.Context, node model.SearchNode) error {
return b.BIndex.Index(uuid.NewString(), node)
}
func (b *Bleve) BatchIndex(ctx context.Context, nodes []model.SearchNode) error {
batch := b.BIndex.NewBatch()
for _, node := range nodes {
batch.Index(uuid.NewString(), node)
}
return b.BIndex.Batch(batch)
}
func (b *Bleve) Get(ctx context.Context, parent string) ([]model.SearchNode, error) {
return nil, errs.NotSupport
}

View File

@ -14,32 +14,85 @@ import (
var (
Running = false
Quit chan struct{}
)
func BuildIndex(ctx context.Context, indexPaths, ignorePaths []string, maxDepth int, count bool) error {
var objCount uint64 = 0
Running = true
var (
err error
fi model.Obj
)
defer func() {
Running = false
now := time.Now()
eMsg := ""
if err != nil {
log.Errorf("build index error: %+v", err)
eMsg = err.Error()
} else {
log.Infof("success build index, count: %d", objCount)
storages, err := db.GetEnabledStorages()
if err != nil {
return err
}
for _, storage := range storages {
if storage.Driver == "AList V2" || storage.Driver == "AList V3" {
// TODO: request for indexing permission
ignorePaths = append(ignorePaths, storage.MountPath)
}
if count {
WriteProgress(&model.IndexProgress{
ObjCount: objCount,
IsDone: err == nil,
LastDoneTime: &now,
Error: eMsg,
})
}
var (
objCount uint64 = 0
fi model.Obj
)
Running = true
Quit = make(chan struct{}, 1)
parents := []string{}
infos := []model.Obj{}
go func() {
ticker := time.NewTicker(5 * time.Second)
for {
select {
case <-ticker.C:
log.Infof("index obj count: %d", objCount)
if len(parents) != 0 {
log.Debugf("current index: %s", parents[len(parents)-1])
}
if err = BatchIndex(ctx, parents, infos); err != nil {
log.Errorf("build index in batch error: %+v", err)
} else {
objCount = objCount + uint64(len(parents))
}
if count {
WriteProgress(&model.IndexProgress{
ObjCount: objCount,
IsDone: false,
LastDoneTime: nil,
})
}
parents = nil
infos = nil
case <-Quit:
Running = false
ticker.Stop()
eMsg := ""
now := time.Now()
originErr := err
if err = BatchIndex(ctx, parents, infos); err != nil {
log.Errorf("build index in batch error: %+v", err)
} else {
objCount = objCount + uint64(len(parents))
}
parents = nil
infos = nil
if originErr != nil {
log.Errorf("build index error: %+v", err)
eMsg = err.Error()
} else {
log.Infof("success build index, count: %d", objCount)
}
if count {
WriteProgress(&model.IndexProgress{
ObjCount: objCount,
IsDone: originErr == nil,
LastDoneTime: &now,
Error: eMsg,
})
}
return
}
}
}()
defer func() {
if Running {
Quit <- struct{}{}
}
}()
admin, err := db.GetAdmin()
@ -53,7 +106,7 @@ func BuildIndex(ctx context.Context, indexPaths, ignorePaths []string, maxDepth
})
}
for _, indexPath := range indexPaths {
walkFn := func(indexPath string, info model.Obj, err error) error {
walkFn := func(indexPath string, info model.Obj) error {
for _, avoidPath := range ignorePaths {
if indexPath == avoidPath {
return filepath.SkipDir
@ -63,23 +116,8 @@ func BuildIndex(ctx context.Context, indexPaths, ignorePaths []string, maxDepth
if indexPath == "/" {
return nil
}
err = Index(ctx, path.Dir(indexPath), info)
if err != nil {
return err
} else {
objCount++
}
if objCount%100 == 0 {
log.Infof("index obj count: %d", objCount)
log.Debugf("current success index: %s", indexPath)
if count {
WriteProgress(&model.IndexProgress{
ObjCount: objCount,
IsDone: false,
LastDoneTime: nil,
})
}
}
parents = append(parents, path.Dir(indexPath))
infos = append(infos, info)
return nil
}
fi, err = fs.Get(ctx, indexPath)

View File

@ -22,6 +22,10 @@ func (D DB) Index(ctx context.Context, node model.SearchNode) error {
return db.CreateSearchNode(&node)
}
func (D DB) BatchIndex(ctx context.Context, nodes []model.SearchNode) error {
return db.BatchCreateSearchNodes(&nodes)
}
func (D DB) Get(ctx context.Context, parent string) ([]model.SearchNode, error) {
return db.GetSearchNodesByParent(parent)
}

View File

@ -1,8 +1,6 @@
package search
import (
"context"
"github.com/alist-org/alist/v3/internal/conf"
"github.com/alist-org/alist/v3/internal/db"
"github.com/alist-org/alist/v3/internal/model"
@ -11,7 +9,7 @@ import (
log "github.com/sirupsen/logrus"
)
func Progress(ctx context.Context) (*model.IndexProgress, error) {
func Progress() (*model.IndexProgress, error) {
p := setting.GetStr(conf.IndexProgress)
var progress model.IndexProgress
err := utils.Json.UnmarshalFromString(p, &progress)

View File

@ -59,6 +59,25 @@ func Index(ctx context.Context, parent string, obj model.Obj) error {
})
}
func BatchIndex(ctx context.Context, parents []string, objs []model.Obj) error {
if instance == nil {
return errs.SearchNotAvailable
}
if len(parents) == 0 {
return nil
}
searchNodes := []model.SearchNode{}
for i := range parents {
searchNodes = append(searchNodes, model.SearchNode{
Parent: parents[i],
Name: objs[i].GetName(),
IsDir: objs[i].IsDir(),
Size: objs[i].GetSize(),
})
}
return instance.BatchIndex(ctx, searchNodes)
}
func init() {
db.RegisterSettingItemHook(conf.SearchIndex, func(item *model.SettingItem) error {
log.Debugf("searcher init, mode: %s", item.Value)

View File

@ -18,6 +18,8 @@ type Searcher interface {
Search(ctx context.Context, req model.SearchReq) ([]model.SearchNode, int64, error)
// Index obj with parent
Index(ctx context.Context, node model.SearchNode) error
// Index obj with parent in batches
BatchIndex(ctx context.Context, nodes []model.SearchNode) error
// Get by parent
Get(ctx context.Context, parent string) ([]model.SearchNode, error)
// Del with prefix

View File

@ -16,7 +16,7 @@ func Update(parent string, objs []model.Obj) {
}
ctx := context.Background()
// only update when index have built
progress, err := Progress(ctx)
progress, err := Progress()
if err != nil {
log.Errorf("update search index error while get progress: %+v", err)
return

View File

@ -40,8 +40,17 @@ func BuildIndex(c *gin.Context) {
common.SuccessResp(c)
}
func StopIndex(c *gin.Context) {
if !search.Running {
common.ErrorStrResp(c, "index is not running", 400)
return
}
search.Quit <- struct{}{}
common.SuccessResp(c)
}
func GetProgress(c *gin.Context) {
progress, err := search.Progress(c)
progress, err := search.Progress()
if err != nil {
common.ErrorResp(c, err, 500)
return

View File

@ -110,6 +110,7 @@ func admin(g *gin.RouterGroup) {
index := g.Group("/index")
index.POST("/build", middlewares.SearchIndex, handles.BuildIndex)
index.POST("/stop", middlewares.SearchIndex, handles.StopIndex)
index.GET("/progress", middlewares.SearchIndex, handles.GetProgress)
}