package main import ( "bufio" "fmt" "os" "path/filepath" "strconv" "strings" "time" "github.com/blevesearch/bleve/v2" ) // Document represents a single document to be indexed. // You can add more fields if needed. type Document struct { ID string `json:"id"` Link string `json:"link"` Title string `json:"title"` Tags string `json:"tags"` Description string `json:"description"` Popularity int64 `json:"popularity"` } var ( // Global Bleve index handle bleveIndex bleve.Index ) func startPeriodicIndexing(filePath string, interval time.Duration) { go func() { for { printDebug("Refreshing index from %s", filePath) err := IndexFile(filePath) if err != nil { printErr("Failed to refresh index: %v", err) } time.Sleep(interval) } }() } // InitIndex ensures that the Bleve index is created or opened. func InitIndex() error { idx, err := bleve.Open(filepath.Join(config.DriveCache.Path, "index.bleve")) if err == bleve.ErrorIndexPathDoesNotExist { // Index doesn't exist, create a new one mapping := bleve.NewIndexMapping() // Custom mapping for the document docMapping := bleve.NewDocumentMapping() // Text fields with custom analyzers for better tokenization textFieldMapping := bleve.NewTextFieldMapping() textFieldMapping.Analyzer = "standard" // Use standard analyzer for partial and fuzzy matches docMapping.AddFieldMappingsAt("title", textFieldMapping) docMapping.AddFieldMappingsAt("description", textFieldMapping) docMapping.AddFieldMappingsAt("tags", textFieldMapping) // Numeric field for popularity popularityMapping := bleve.NewNumericFieldMapping() docMapping.AddFieldMappingsAt("popularity", popularityMapping) mapping.AddDocumentMapping("Document", docMapping) idx, err = bleve.New(filepath.Join(config.DriveCache.Path, "index.bleve"), mapping) if err != nil { return fmt.Errorf("failed to create index: %v", err) } } else if err != nil { return fmt.Errorf("failed to open index: %v", err) } bleveIndex = idx return nil } // IndexFile reads a file line-by-line and indexes each line as a document. // Each line represents a simple document. Adjust parsing as needed. func IndexFile(filePath string) error { file, err := os.Open(filePath) if err != nil { return fmt.Errorf("unable to open file for indexing: %v", err) } defer file.Close() scanner := bufio.NewScanner(file) batch := bleveIndex.NewBatch() indexedDomains := make(map[string]bool) // Track indexed domains for scanner.Scan() { line := scanner.Text() // Split the line into 5 fields: link|title|tags|description|popularity parts := strings.SplitN(line, "|", 5) if len(parts) < 5 { continue // Skip malformed lines } domain := parts[0] popularity, _ := strconv.ParseInt(parts[4], 10, 64) // Skip if the domain is already indexed if indexedDomains[domain] { continue } doc := Document{ ID: domain, // Use the domain as the unique ID Link: parts[0], Title: parts[1], Tags: parts[2], Description: parts[3], Popularity: popularity, } err := batch.Index(doc.ID, map[string]interface{}{ "title": doc.Title, "description": doc.Description, "link": doc.Link, "tags": doc.Tags, "popularity": doc.Popularity, }) if err != nil { return fmt.Errorf("failed to index document: %v", err) } indexedDomains[domain] = true // Mark the domain as indexed } // Commit the batch if err := bleveIndex.Batch(batch); err != nil { return fmt.Errorf("error committing batch: %v", err) } if err := scanner.Err(); err != nil { return fmt.Errorf("error reading file: %v", err) } printDebug("Indexed %d unique domains from %s\n", len(indexedDomains), filePath) return nil } // SearchIndex performs a full-text search on the indexed data. func SearchIndex(queryStr string, page, pageSize int) ([]Document, error) { // Create compound query exactMatch := bleve.NewMatchQuery(queryStr) // Exact match fuzzyMatch := bleve.NewFuzzyQuery(queryStr) // Fuzzy match fuzzyMatch.Fuzziness = 2 prefixMatch := bleve.NewPrefixQuery(queryStr) // Prefix match query := bleve.NewDisjunctionQuery(exactMatch, fuzzyMatch, prefixMatch) req := bleve.NewSearchRequest(query) req.Fields = []string{"title", "description", "link", "tags", "popularity"} // Pagination req.Size = pageSize req.From = (page - 1) * pageSize // Sort by popularity req.SortBy([]string{"popularity"}) res, err := bleveIndex.Search(req) if err != nil { return nil, fmt.Errorf("search error: %v", err) } var docs []Document for _, hit := range res.Hits { title := fmt.Sprintf("%v", hit.Fields["title"]) description := fmt.Sprintf("%v", hit.Fields["description"]) link := fmt.Sprintf("%v", hit.Fields["link"]) tags := fmt.Sprintf("%v", hit.Fields["tags"]) popularity := int64(0) if pop, ok := hit.Fields["popularity"].(float64); ok { popularity = int64(pop) } if link == "" || title == "" { continue } docs = append(docs, Document{ ID: hit.ID, Title: title, Description: description, Link: link, Tags: tags, Popularity: popularity, }) } return docs, nil }