package main import ( "bufio" "fmt" "net/url" "os" "path/filepath" "strconv" "strings" "time" "github.com/blevesearch/bleve/v2" "golang.org/x/net/publicsuffix" ) // Document represents a single document to be indexed. type Document struct { ID string `json:"id"` Link string `json:"link"` Title string `json:"title"` Tags string `json:"tags"` Description string `json:"description"` Popularity int64 `json:"popularity"` } var ( // Global Bleve index handle bleveIndex bleve.Index ) // startPeriodicIndexing refreshes the index from a file periodically func startPeriodicIndexing(filePath string, interval time.Duration) { go func() { for { printDebug("Refreshing index from %s", filePath) if err := IndexFile(filePath); err != nil { printErr("Failed to refresh index: %v", err) } time.Sleep(interval) } }() } // InitIndex ensures that the Bleve index is created or opened. func InitIndex() error { idx, err := bleve.Open(filepath.Join(config.DriveCache.Path, "index.bleve")) if err == bleve.ErrorIndexPathDoesNotExist { // Index doesn't exist, create a new one mapping := bleve.NewIndexMapping() docMapping := bleve.NewDocumentMapping() // Text fields titleFieldMapping := bleve.NewTextFieldMapping() titleFieldMapping.Analyzer = "standard" docMapping.AddFieldMappingsAt("title", titleFieldMapping) descFieldMapping := bleve.NewTextFieldMapping() descFieldMapping.Analyzer = "standard" docMapping.AddFieldMappingsAt("description", descFieldMapping) tagFieldMapping := bleve.NewTextFieldMapping() tagFieldMapping.Analyzer = "standard" docMapping.AddFieldMappingsAt("tags", tagFieldMapping) // Numeric field for popularity popularityMapping := bleve.NewNumericFieldMapping() docMapping.AddFieldMappingsAt("popularity", popularityMapping) mapping.AddDocumentMapping("Document", docMapping) idx, err = bleve.New(filepath.Join(config.DriveCache.Path, "index.bleve"), mapping) if err != nil { return fmt.Errorf("failed to create index: %v", err) } } else if err != nil { return fmt.Errorf("failed to open index: %v", err) } bleveIndex = idx return nil } func normalizeDomain(rawURL string) string { parsed, err := url.Parse(rawURL) if err != nil { return rawURL } domain, err := publicsuffix.EffectiveTLDPlusOne(parsed.Hostname()) if err != nil { return parsed.Hostname() // fallback } return domain } // IndexFile reads a file line-by-line and indexes each line as a document. func IndexFile(filePath string) error { file, err := os.Open(filePath) if err != nil { return fmt.Errorf("unable to open file for indexing: %v", err) } defer file.Close() scanner := bufio.NewScanner(file) batch := bleveIndex.NewBatch() // Map to track normalized domains we’ve already indexed indexedDomains := make(map[string]bool) for scanner.Scan() { line := scanner.Text() // link|title|tags|description|popularity parts := strings.SplitN(line, "|", 5) if len(parts) < 5 { continue } // Normalize domain part so duplicates share the same “key” normalized := normalizeDomain(parts[0]) popularity, _ := strconv.ParseInt(parts[4], 10, 64) if indexedDomains[normalized] { continue } doc := Document{ ID: normalized, Link: parts[0], Title: parts[1], Tags: parts[2], Description: parts[3], Popularity: popularity, } err := batch.Index(doc.ID, map[string]interface{}{ "title": doc.Title, "description": doc.Description, "link": doc.Link, "tags": doc.Tags, "popularity": doc.Popularity, }) if err != nil { return fmt.Errorf("failed to index document: %v", err) } indexedDomains[normalized] = true } if err := bleveIndex.Batch(batch); err != nil { return fmt.Errorf("error committing batch: %v", err) } if err := scanner.Err(); err != nil { return fmt.Errorf("error reading file: %v", err) } printDebug("Indexed %d unique normalized domains from %s", len(indexedDomains), filePath) return nil } // SearchIndex performs a full-text search on the indexed data. func SearchIndex(queryStr string, page, pageSize int) ([]Document, error) { exactMatch := bleve.NewMatchQuery(queryStr) // Exact match fuzzyMatch := bleve.NewFuzzyQuery(queryStr) // Fuzzy match fuzzyMatch.Fuzziness = 2 prefixMatch := bleve.NewPrefixQuery(queryStr) // Prefix match query := bleve.NewDisjunctionQuery(exactMatch, fuzzyMatch, prefixMatch) req := bleve.NewSearchRequest(query) req.Fields = []string{"title", "description", "link", "tags", "popularity"} // Pagination req.Size = pageSize req.From = (page - 1) * pageSize // Sort primarily by relevance (score), then by popularity descending req.SortBy([]string{"-_score", "-popularity"}) res, err := bleveIndex.Search(req) if err != nil { return nil, fmt.Errorf("search error: %v", err) } var docs []Document for _, hit := range res.Hits { title := fmt.Sprintf("%v", hit.Fields["title"]) description := fmt.Sprintf("%v", hit.Fields["description"]) link := fmt.Sprintf("%v", hit.Fields["link"]) tags := fmt.Sprintf("%v", hit.Fields["tags"]) popularity := int64(0) if pop, ok := hit.Fields["popularity"].(float64); ok { popularity = int64(pop) } if link == "" || title == "" { continue } docs = append(docs, Document{ ID: hit.ID, Title: title, Description: description, Link: link, Tags: tags, Popularity: popularity, }) } return docs, nil }