Skip to content

How to correctly use the simple analyzer when indexing. #2136

Open
@zonggit

Description

here is my code:

package main

import (
"fmt"
"log"
"os"
"strconv"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/simple"
)
type SearchDocument struct {
Path string json:"path"
}
func main() {
index()
}
func index() {
indexPath := "test_index"
if _, err := os.Stat(indexPath); !os.IsNotExist(err) {
os.RemoveAll(indexPath)
}
mapping := bleve.NewIndexMapping()
simpleMapping := bleve.NewTextFieldMapping()
simpleMapping.Analyzer = simple.Name // use simple analyzer
simpleMapping.Store = true
simpleMapping.Index = true
simpleMapping.IncludeTermVectors = true
docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("Path", simpleMapping)
mapping.AddDocumentMapping("doc", docMapping)
index, err := bleve.New(indexPath, mapping)
if err != nil {
log.Fatalf("Error creating index: %v", err)
}
defer index.Close()
doc := SearchDocument{
Path: "custom/tests/test_regression.py#L13-L25 ",
}
if err := index.Index("1", doc); err != nil {
log.Fatalf("Error indexing document: %v", err)
}
analyzer := index.Mapping().AnalyzerNamed(simple.Name)
tokenStream := analyzer.Analyze([]byte("test_regression.py"))
if err != nil {
log.Fatal(err)
}
fmt.Println("tokenization result:", tokenStream)
queries := []string{"test_regression.py", "test_regression", "regression"}
for _, query := range queries {
fmt.Println("start search: " + query)
mq := bleve.NewMatchQuery(query)
mq.SetField("path")
searchRequest := bleve.NewSearchRequest(mq)
searchRequest.Fields = []string{"*"}
searchRequest.Size = 100
searchResult, err := index.Search(searchRequest)
if err != nil {
log.Fatalf("Error searching index: %v", err)
}
fmt.Println("search doc count: " + strconv.Itoa(searchResult.Hits.Len()))
for _, hit := range searchResult.Hits {
p := hit.Fields["path"].(string)
fmt.Println(p)
}
fmt.Println("=====================================")
}
}

the result is

go run .\demo.go
tokenization result: [Start: 0 End: 4 Position: 1 Token: test Type: 0 Start: 5 End: 15 Position: 2 Token: regression Type: 0 Start: 16 End: 18 Position: 3 Token: py Type: 0]
start search: test_regression.py
search doc count: 1
custom/tests/test_regression.py#L13-L25
=====================================
start search: test_regression
search doc count: 0
=====================================
start search: regression
search doc count: 0
=====================================`

Why are there no results when searching for 'regression'?

Activity

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions