added website crawling and indexing crawled results

This commit is contained in:
partisan 2024-12-29 22:54:55 +01:00
parent 5b90a372a1
commit 047cccd19f
10 changed files with 819 additions and 97 deletions

View file

@ -24,15 +24,15 @@ import (
)
var (
cachingImages = make(map[string]*sync.Mutex)
cachingImagesMu sync.Mutex
// cachingSemaphore = make(chan struct{}, 100) // Limit to concurrent downloads
cachingImages = make(map[string]*sync.Mutex)
cachingImagesMu sync.Mutex
cachingSemaphore = make(chan struct{}, 100)
invalidImageIDs = make(map[string]struct{})
invalidImageIDsMu sync.Mutex
imageURLMap = make(map[string]string) // mapping from imageID_type to imageURL
imageURLMapMu sync.RWMutex // mutex for thread-safe access
imageURLMap = make(map[string]string)
imageURLMapMu sync.RWMutex
)
func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error) {
@ -49,7 +49,13 @@ func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error
filename = fmt.Sprintf("%s_full.webp", imageID)
}
cachedImagePath := filepath.Join(config.DriveCache.Path, filename)
// Make sure we store inside: config.DriveCache.Path / images
imageCacheDir := filepath.Join(config.DriveCache.Path, "images")
if err := os.MkdirAll(imageCacheDir, 0755); err != nil {
return "", false, fmt.Errorf("couldn't create images folder: %v", err)
}
cachedImagePath := filepath.Join(imageCacheDir, filename)
tempImagePath := cachedImagePath + ".tmp"
// Check if the image is already cached
@ -73,9 +79,8 @@ func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error
return cachedImagePath, true, nil
}
// // Limit max concurrent downloads
// cachingSemaphore <- struct{}{} // Acquire a token
// defer func() { <-cachingSemaphore }() // Release the token
cachingSemaphore <- struct{}{}
defer func() { <-cachingSemaphore }()
// Create a custom http.Client that skips SSL certificate verification
client := &http.Client{
@ -217,7 +222,8 @@ func handleImageServe(w http.ResponseWriter, r *http.Request) {
imageType = parts[1]
filename := fmt.Sprintf("%s_%s.webp", imageID, imageType)
cachedImagePath := filepath.Join(config.DriveCache.Path, filename)
// Adjust to read from config.DriveCache.Path / images
cachedImagePath := filepath.Join(config.DriveCache.Path, "images", filename)
if hasExtension && imageType == "thumb" {
// Requesting cached image (thumbnail or full)
@ -329,7 +335,7 @@ func handleImageStatus(w http.ResponseWriter, r *http.Request) {
// Check thumbnail first
for _, ext := range extensions {
thumbFilename := fmt.Sprintf("%s_thumb.%s", id, ext)
thumbPath := filepath.Join(config.DriveCache.Path, thumbFilename)
thumbPath := filepath.Join(config.DriveCache.Path, "images", thumbFilename)
if _, err := os.Stat(thumbPath); err == nil {
statusMap[id] = fmt.Sprintf("/image/%s_thumb.%s", id, ext)
@ -342,7 +348,7 @@ func handleImageStatus(w http.ResponseWriter, r *http.Request) {
if !imageReady {
for _, ext := range extensions {
fullFilename := fmt.Sprintf("%s_full.%s", id, ext)
fullPath := filepath.Join(config.DriveCache.Path, fullFilename)
fullPath := filepath.Join(config.DriveCache.Path, "images", fullFilename)
if _, err := os.Stat(fullPath); err == nil {
statusMap[id] = fmt.Sprintf("/image/%s_full.%s", id, ext)
@ -447,7 +453,9 @@ func cleanExpiredCachedImages() {
}
func cleanupCache() {
files, err := os.ReadDir(config.DriveCache.Path)
// Read from: config.DriveCache.Path / images
imageCacheDir := filepath.Join(config.DriveCache.Path, "images")
files, err := os.ReadDir(imageCacheDir)
if err != nil {
printErr("Failed to read DriveCache directory: %v", err)
return
@ -462,19 +470,17 @@ func cleanupCache() {
continue
}
filePath := filepath.Join(config.DriveCache.Path, file.Name())
filePath := filepath.Join(imageCacheDir, file.Name())
// Check for expired files based on modification time
if config.DriveCache.Duration > 0 && time.Since(info.ModTime()) > config.DriveCache.Duration {
if err := os.Remove(filePath); err == nil {
printDebug("Removed expired cache file: %s", filePath)
} else {
printErr("Failed to remove expired cache file: %s", filePath)
}
continue // Skip adding this file to the list
continue
}
// Accumulate total size and store file info for potential deletion
totalSize += uint64(info.Size())
fileInfos = append(fileInfos, info)
}
@ -491,7 +497,7 @@ func cleanupCache() {
break
}
filePath := filepath.Join(config.DriveCache.Path, info.Name())
filePath := filepath.Join(imageCacheDir, info.Name())
fileSize := uint64(info.Size())
if err := os.Remove(filePath); err == nil {

View file

@ -162,7 +162,7 @@ func (rc *ResultsCache) keyToString(key CacheKey) string {
// checkAndCleanCache removes items if memory usage exceeds the limit.
func (rc *ResultsCache) checkAndCleanCache() {
for rc.currentMemoryUsage() > config.RamCache.MaxUsageBytes {
if rc.currentMemoryUsage() > config.RamCache.MaxUsageBytes {
rc.cleanOldestItems()
}
}

View file

@ -30,6 +30,7 @@ type Config struct {
Domain string // Added
NodesEnabled bool // Added
CrawlerEnabled bool // Added
IndexerEnabled bool // Added
WebsiteEnabled bool // Added
RamCacheEnabled bool
DriveCacheEnabled bool // Added
@ -46,6 +47,7 @@ var defaultConfig = Config{
AuthCode: generateStrongRandomString(64),
NodesEnabled: false,
CrawlerEnabled: true,
IndexerEnabled: false,
WebsiteEnabled: true,
RamCacheEnabled: true,
DriveCacheEnabled: false,
@ -105,6 +107,15 @@ func createConfig() error {
config.Domain = defaultConfig.Domain
}
// printMessage("Use Indexer? (YES/no): ")
// indexerChoice, _ := reader.ReadString('\n')
// indexerChoice = strings.TrimSpace(strings.ToLower(indexerChoice))
// if indexerChoice == "no" {
// config.IndexerEnabled = false
// } else {
// config.IndexerEnabled = true
// }
// Cache settings
printMessage("Would you like to configure Cache settings (yes/NO): ")
configureCache, _ := reader.ReadString('\n')
@ -181,7 +192,7 @@ func createConfig() error {
} else {
config.DriveCache.MaxUsageBytes = parseMaxUsageDrive(driveMaxUsage, drivePath)
if config.DriveCache.MaxUsageBytes == 0 {
printWarn("Invalid DriveCache max usage, using default (1 TiB).")
printWarn("Invalid DriveCache max usage, using default.")
config.DriveCache.MaxUsageBytes = defaultConfig.DriveCache.MaxUsageBytes
}
}
@ -201,13 +212,6 @@ func createConfig() error {
printMessage("Generated connection code: %s\n", config.AuthCode)
}
// Set other default values
config.NodesEnabled = defaultConfig.NodesEnabled
config.CrawlerEnabled = defaultConfig.CrawlerEnabled
config.WebsiteEnabled = defaultConfig.WebsiteEnabled
config.LogLevel = defaultConfig.LogLevel
// Save configuration to file
saveConfig(config)
printInfo("Configuration saved successfully.")
return nil
@ -232,6 +236,7 @@ func saveConfig(config Config) {
featuresSec := cfg.Section("Features")
featuresSec.Key("Nodes").SetValue(strconv.FormatBool(config.NodesEnabled))
featuresSec.Key("Crawler").SetValue(strconv.FormatBool(config.CrawlerEnabled))
featuresSec.Key("Indexer").SetValue(strconv.FormatBool(config.IndexerEnabled))
featuresSec.Key("Website").SetValue(strconv.FormatBool(config.WebsiteEnabled))
featuresSec.Key("RamCache").SetValue(strconv.FormatBool(config.RamCacheEnabled))
featuresSec.Key("DriveCache").SetValue(strconv.FormatBool(config.DriveCacheEnabled))
@ -273,6 +278,7 @@ func loadConfig() Config {
// Features
nodesEnabled, _ := cfg.Section("Features").Key("Nodes").Bool()
crawlerEnabled, _ := cfg.Section("Features").Key("Crawler").Bool()
indexerEnabled, _ := cfg.Section("Features").Key("Indexer").Bool()
websiteEnabled, _ := cfg.Section("Features").Key("Website").Bool()
ramCacheEnabled, _ := cfg.Section("Features").Key("RamCache").Bool()
driveCacheEnabled, _ := cfg.Section("Features").Key("DriveCache").Bool()
@ -294,10 +300,11 @@ func loadConfig() Config {
Port: port,
Domain: domain,
LogLevel: logLevel,
AuthCode: authCode, // Assign AuthCode here
AuthCode: authCode,
Peers: peers,
NodesEnabled: nodesEnabled,
CrawlerEnabled: crawlerEnabled,
IndexerEnabled: indexerEnabled,
WebsiteEnabled: websiteEnabled,
RamCacheEnabled: ramCacheEnabled,
DriveCacheEnabled: driveCacheEnabled,

224
crawler.go Normal file
View file

@ -0,0 +1,224 @@
package main
import (
"bufio"
"fmt"
"net/http"
"os"
"path/filepath"
"strings"
"time"
"golang.org/x/net/html"
)
// webCrawlerInit is called during init on program start
func webCrawlerInit() {
go func() {
// First run immediately
runCrawlerAndIndexer()
// Then every 24h (adjust as needed)
ticker := time.NewTicker(24 * time.Hour)
for range ticker.C {
runCrawlerAndIndexer()
}
}()
}
// runCrawlerAndIndexer reads domains.csv -> crawls -> writes to data_to_index.txt -> reindexes
func runCrawlerAndIndexer() {
// 1. Read domains.csv
domains, err := readDomainsCSV(filepath.Join(config.DriveCache.Path, "domains.csv"))
if err != nil {
printErr("Error reading domains.csv: %v", err)
return
}
// 2. Crawl each domain and write results to data_to_index.txt
outFile := filepath.Join(config.DriveCache.Path, "data_to_index.txt")
if err := crawlDomainsToFile(domains, outFile); err != nil {
printErr("Error crawling domains: %v", err)
return
}
// 3. Re-index data_to_index.txt
if err := IndexFile(outFile); err != nil {
printErr("Error indexing data_to_index.txt: %v", err)
return
}
printDebug("Crawl + index refresh completed.")
}
// readDomainsCSV returns a slice of (rank,domain) from a local CSV file
func readDomainsCSV(csvPath string) ([][2]string, error) {
f, err := os.Open(csvPath)
if err != nil {
return nil, err
}
defer f.Close()
var result [][2]string
scanner := bufio.NewScanner(f)
// Skip header line
scanner.Scan()
for scanner.Scan() {
line := scanner.Text()
// Split by commas, not tabs
fields := strings.SplitN(line, ",", 3) // Splits into up to 3 parts (rank, domain, popularity)
if len(fields) < 2 {
printDebug("Skipping malformed line: %s", line)
continue
}
// Remove quotes around fields, if present
rank := strings.Trim(fields[0], `"`)
domain := strings.Trim(fields[1], `"`)
result = append(result, [2]string{rank, domain})
}
return result, scanner.Err()
}
// crawlDomainsToFile visits each domain, extracts minimal data, writes results to outFile
func crawlDomainsToFile(domains [][2]string, outFile string) error {
// Read existing data_to_index.txt into a map to prevent duplicates
existingEntries := make(map[string]bool)
if _, err := os.Stat(outFile); err == nil { // File exists
file, err := os.Open(outFile)
if err != nil {
return fmt.Errorf("unable to open %s: %v", outFile, err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
parts := strings.SplitN(line, "|", 5)
if len(parts) >= 1 {
existingEntries[parts[0]] = true // Mark existing domain
}
}
}
// Open file for writing (truncate if existing)
file, err := os.OpenFile(outFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
if err != nil {
return fmt.Errorf("unable to open %s for writing: %v", outFile, err)
}
defer file.Close()
for _, d := range domains {
rank := d[0]
domain := d[1]
if domain == "" || existingEntries["https://"+domain] {
continue
}
fullURL := "https://" + domain
title, desc, keywords := fetchPageMetadata(fullURL)
if title == "" {
title = "Unknown Title"
}
if desc == "" {
desc = "No Description"
}
// Write unique domain to file
line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
fullURL,
sanitize(title),
sanitize(keywords),
sanitize(desc),
rank,
)
if _, err := file.WriteString(line); err != nil {
return err
}
existingEntries[fullURL] = true
}
return nil
}
// fetchPageMetadata does a simple GET and parses <title>, meta[name=description], meta[name=keywords]
func fetchPageMetadata(pageURL string) (string, string, string) {
// Generate a User-Agent using your GetUserAgent function
userAgent, err := GetUserAgent("crawler")
if err != nil {
printWarn("Failed to generate User-Agent: %v", err)
return "", "", ""
}
client := &http.Client{Timeout: 15 * time.Second}
req, err := http.NewRequest("GET", pageURL, nil)
if err != nil {
printWarn("Failed to create request for %s: %v", pageURL, err)
return "", "", ""
}
// Set the dynamically generated User-Agent
req.Header.Set("User-Agent", userAgent)
resp, err := client.Do(req)
if err != nil {
printWarn("Failed to GET %s: %v", pageURL, err)
return "", "", ""
}
defer resp.Body.Close()
// Handle non-200 responses
if resp.StatusCode == 403 || resp.StatusCode == 401 {
printWarn("Skipping %s: HTTP %d", pageURL, resp.StatusCode)
return "", "", ""
} else if resp.StatusCode < 200 || resp.StatusCode >= 300 {
printWarn("Non-200 for %s: %d", pageURL, resp.StatusCode)
return "", "", ""
}
// Parse HTML
doc, err := html.Parse(resp.Body)
if err != nil {
printWarn("HTML parse error for %s: %v", pageURL, err)
return "", "", ""
}
var title, desc, keywords string
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "title" && n.FirstChild != nil {
title = n.FirstChild.Data
}
if n.Type == html.ElementNode && n.Data == "meta" {
var nameVal, contentVal string
for _, attr := range n.Attr {
switch strings.ToLower(attr.Key) {
case "name":
nameVal = strings.ToLower(attr.Val)
case "content":
contentVal = attr.Val
}
}
if nameVal == "description" {
desc = contentVal
} else if nameVal == "keywords" {
keywords = contentVal
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
return title, desc, keywords
}
// sanitize is a quick helper to remove newlines/pipes from fields
func sanitize(input string) string {
input = strings.ReplaceAll(input, "|", " ")
input = strings.ReplaceAll(input, "\n", " ")
input = strings.TrimSpace(input)
return input
}

118
get-domains-csv.go Normal file
View file

@ -0,0 +1,118 @@
package main
import (
"archive/zip"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
)
func downloadAndSetupDomainsCSV() error {
targetFilePath := filepath.Join(config.DriveCache.Path, "domains.csv")
// Check if domains.csv already exists
if _, err := os.Stat(targetFilePath); err == nil {
printDebug("domains.csv already exists at %s", targetFilePath)
return nil
}
downloadURL := "https://www.domcop.com/files/top/top10milliondomains.csv.zip"
zipFilePath := filepath.Join(config.DriveCache.Path, "top10milliondomains.csv.zip")
// Download the file
printDebug("Downloading file from %s", downloadURL)
resp, err := http.Get(downloadURL)
if err != nil {
return fmt.Errorf("failed to download file: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("failed to download file: received status code %d", resp.StatusCode)
}
// Create the zip file locally
zipFile, err := os.Create(zipFilePath)
if err != nil {
return fmt.Errorf("failed to create local zip file: %v", err)
}
defer zipFile.Close()
_, err = io.Copy(zipFile, resp.Body)
if err != nil {
return fmt.Errorf("failed to write downloaded zip file: %v", err)
}
// Unzip the file
printDebug("Unzipping file %s", zipFilePath)
if err := unzipFile(zipFilePath, config.DriveCache.Path); err != nil {
return fmt.Errorf("failed to unzip file: %v", err)
}
// Find the .csv file and rename/move it to domains.csv
csvFound := false
dirEntries, err := os.ReadDir(config.DriveCache.Path)
if err != nil {
return fmt.Errorf("failed to read directory: %v", err)
}
for _, entry := range dirEntries {
if !entry.IsDir() && filepath.Ext(entry.Name()) == ".csv" {
csvPath := filepath.Join(config.DriveCache.Path, entry.Name())
if err := os.Rename(csvPath, targetFilePath); err != nil {
return fmt.Errorf("failed to move %s to %s: %v", csvPath, targetFilePath, err)
}
csvFound = true
break
}
}
if !csvFound {
return fmt.Errorf("no .csv file found in the downloaded archive")
}
// Clean up zip file
if err := os.Remove(zipFilePath); err != nil {
printWarn("failed to remove zip file %s: %v", zipFilePath, err)
}
printDebug("domains.csv successfully downloaded and placed at %s", targetFilePath)
return nil
}
func unzipFile(zipFile, destDir string) error {
reader, err := zip.OpenReader(zipFile)
if err != nil {
return err
}
defer reader.Close()
for _, file := range reader.File {
filePath := filepath.Join(destDir, file.Name)
if file.FileInfo().IsDir() {
os.MkdirAll(filePath, os.ModePerm)
continue
}
srcFile, err := file.Open()
if err != nil {
return err
}
defer srcFile.Close()
destFile, err := os.Create(filePath)
if err != nil {
return err
}
defer destFile.Close()
if _, err := io.Copy(destFile, srcFile); err != nil {
return err
}
}
return nil
}

46
go.mod
View file

@ -1,9 +1,11 @@
module searchengine
module qgato
go 1.18
go 1.23
toolchain go1.23.4
require (
github.com/PuerkitoBio/goquery v1.9.1 // direct
github.com/PuerkitoBio/goquery v1.10.0 // direct
github.com/chai2010/webp v1.1.1
github.com/leonelquinteros/gotext v1.7.0
github.com/shirou/gopsutil v3.21.11+incompatible
@ -12,10 +14,42 @@ require (
)
require (
github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/blevesearch/bleve/v2 v2.4.4
golang.org/x/net v0.33.0
)
require (
github.com/RoaringBitmap/roaring v1.9.4 // indirect
github.com/andybalholm/cascadia v1.3.3 // indirect
github.com/bits-and-blooms/bitset v1.20.0 // indirect
github.com/blevesearch/bleve_index_api v1.2.0 // indirect
github.com/blevesearch/geo v0.1.20 // indirect
github.com/blevesearch/go-faiss v1.0.24 // indirect
github.com/blevesearch/go-porterstemmer v1.0.3 // indirect
github.com/blevesearch/gtreap v0.1.1 // indirect
github.com/blevesearch/mmap-go v1.0.4 // indirect
github.com/blevesearch/scorch_segment_api/v2 v2.3.0 // indirect
github.com/blevesearch/segment v0.9.1 // indirect
github.com/blevesearch/snowballstem v0.9.0 // indirect
github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect
github.com/blevesearch/vellum v1.1.0 // indirect
github.com/blevesearch/zapx/v11 v11.3.10 // indirect
github.com/blevesearch/zapx/v12 v12.3.10 // indirect
github.com/blevesearch/zapx/v13 v13.3.10 // indirect
github.com/blevesearch/zapx/v14 v14.3.10 // indirect
github.com/blevesearch/zapx/v15 v15.3.17 // indirect
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b // indirect
github.com/go-ole/go-ole v1.3.0 // indirect
github.com/golang/geo v0.0.0-20230421003525-6adc56603217 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/mschoch/smat v0.2.0 // indirect
github.com/stretchr/testify v1.9.0 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
golang.org/x/net v0.30.0 // indirect
golang.org/x/sys v0.26.0 // indirect
go.etcd.io/bbolt v1.3.11 // indirect
golang.org/x/sys v0.28.0 // indirect
google.golang.org/protobuf v1.36.0 // indirect
)

123
go.sum
View file

@ -1,39 +1,121 @@
github.com/PuerkitoBio/goquery v1.9.1 h1:mTL6XjbJTZdpfL+Gwl5U2h1l9yEkJjhmlTeV9VPW7UI=
github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY=
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
github.com/PuerkitoBio/goquery v1.10.0 h1:6fiXdLuUvYs2OJSvNRqlNPoBm6YABE226xrbavY5Wv4=
github.com/PuerkitoBio/goquery v1.10.0/go.mod h1:TjZZl68Q3eGHNBA8CWaxAN7rOU1EbDz3CWuolcO5Yu4=
github.com/RoaringBitmap/roaring v1.9.4 h1:yhEIoH4YezLYT04s1nHehNO64EKFTop/wBhxv2QzDdQ=
github.com/RoaringBitmap/roaring v1.9.4/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90=
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bits-and-blooms/bitset v1.20.0 h1:2F+rfL86jE2d/bmw7OhqUg2Sj/1rURkBn3MdfoPyRVU=
github.com/bits-and-blooms/bitset v1.20.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/blevesearch/bleve/v2 v2.4.4 h1:RwwLGjUm54SwyyykbrZs4vc1qjzYic4ZnAnY9TwNl60=
github.com/blevesearch/bleve/v2 v2.4.4/go.mod h1:fa2Eo6DP7JR+dMFpQe+WiZXINKSunh7WBtlDGbolKXk=
github.com/blevesearch/bleve_index_api v1.2.0 h1:/DXMMWBwx/UmGKM1xDhTwDoJI5yQrG6rqRWPFcOgUVo=
github.com/blevesearch/bleve_index_api v1.2.0/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
github.com/blevesearch/geo v0.1.20 h1:paaSpu2Ewh/tn5DKn/FB5SzvH0EWupxHEIwbCk/QPqM=
github.com/blevesearch/geo v0.1.20/go.mod h1:DVG2QjwHNMFmjo+ZgzrIq2sfCh6rIHzy9d9d0B59I6w=
github.com/blevesearch/go-faiss v1.0.24 h1:K79IvKjoKHdi7FdiXEsAhxpMuns0x4fM0BO93bW5jLI=
github.com/blevesearch/go-faiss v1.0.24/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y=
github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk=
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
github.com/blevesearch/scorch_segment_api/v2 v2.3.0 h1:vxCjbXAkkEBSb4AB3Iqgr/EJcPyYRsiGxpcvsS8E1Dw=
github.com/blevesearch/scorch_segment_api/v2 v2.3.0/go.mod h1:5y+TgXYSx+xJGaCwSlvy9G/UJBIY5wzvIkhvhBm2ATc=
github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU=
github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A=
github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ=
github.com/blevesearch/vellum v1.1.0 h1:CinkGyIsgVlYf8Y2LUQHvdelgXr6PYuvoDIajq6yR9w=
github.com/blevesearch/vellum v1.1.0/go.mod h1:QgwWryE8ThtNPxtgWJof5ndPfx0/YMBh+W2weHKPw8Y=
github.com/blevesearch/zapx/v11 v11.3.10 h1:hvjgj9tZ9DeIqBCxKhi70TtSZYMdcFn7gDb71Xo/fvk=
github.com/blevesearch/zapx/v11 v11.3.10/go.mod h1:0+gW+FaE48fNxoVtMY5ugtNHHof/PxCqh7CnhYdnMzQ=
github.com/blevesearch/zapx/v12 v12.3.10 h1:yHfj3vXLSYmmsBleJFROXuO08mS3L1qDCdDK81jDl8s=
github.com/blevesearch/zapx/v12 v12.3.10/go.mod h1:0yeZg6JhaGxITlsS5co73aqPtM04+ycnI6D1v0mhbCs=
github.com/blevesearch/zapx/v13 v13.3.10 h1:0KY9tuxg06rXxOZHg3DwPJBjniSlqEgVpxIqMGahDE8=
github.com/blevesearch/zapx/v13 v13.3.10/go.mod h1:w2wjSDQ/WBVeEIvP0fvMJZAzDwqwIEzVPnCPrz93yAk=
github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz77pSwwKU=
github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
github.com/blevesearch/zapx/v15 v15.3.17 h1:NkkMI98pYLq/uHnB6YWcITrrLpCVyvZ9iP+AyfpW1Ys=
github.com/blevesearch/zapx/v15 v15.3.17/go.mod h1:vXRQzJJvlGVCdmOD5hg7t7JdjUT5DmDPhsAfjvtzIq8=
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b h1:ju9Az5YgrzCeK3M1QwvZIpxYhChkXp7/L0RhDYsxXoE=
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b/go.mod h1:BlrYNpOu4BvVRslmIG+rLtKhmjIaRhIbG8sb9scGTwI=
github.com/chai2010/webp v1.1.1 h1:jTRmEccAJ4MGrhFOrPMpNGIJ/eybIgwKpcACsrTEapk=
github.com/chai2010/webp v1.1.1/go.mod h1:0XVwvZWdjjdxpUEIf7b9g9VkHFnInUSYujwqTLEuldU=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE=
github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78=
github.com/golang/geo v0.0.0-20230421003525-6adc56603217 h1:HKlyj6in2JV6wVkmQ4XmG/EIm+SCYlPZ+V4GWit7Z+I=
github.com/golang/geo v0.0.0-20230421003525-6adc56603217/go.mod h1:8wI0hitZ3a1IxZfeH3/5I97CI8i5cLGsYe7xNhQGs9U=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/leonelquinteros/gotext v1.7.0 h1:jcJmF4AXqyamP7vuw2MMIKs+O3jAEmvrc5JQiI8Ht/8=
github.com/leonelquinteros/gotext v1.7.0/go.mod h1:qJdoQuERPpccw7L70uoU+K/BvTfRBHYsisCQyFLXyvw=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0=
go.etcd.io/bbolt v1.3.11/go.mod h1:dksAq7YMXoljX0xu6VF5DMZGbhYYoLUalEiSySYAS4I=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/image v0.21.0 h1:c5qV36ajHpdj4Qi0GnE0jUc/yuo33OLFaa0d+crTD5s=
golang.org/x/image v0.21.0/go.mod h1:vUbsLavqK/W303ZroQQVKQ+Af3Yl6Uz1Ppu5J/cLz78=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@ -42,23 +124,42 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.36.0 h1:mjIs9gYtt56AzC4ZaffQuh88TZurBGhIJMBZGSxNerQ=
google.golang.org/protobuf v1.36.0/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

198
indexer.go Normal file
View file

@ -0,0 +1,198 @@
package main
import (
"bufio"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/blevesearch/bleve/v2"
)
// Document represents a single document to be indexed.
// You can add more fields if needed.
type Document struct {
ID string `json:"id"`
Link string `json:"link"`
Title string `json:"title"`
Tags string `json:"tags"`
Description string `json:"description"`
Popularity int64 `json:"popularity"`
}
var (
// Global Bleve index handle
bleveIndex bleve.Index
)
func startPeriodicIndexing(filePath string, interval time.Duration) {
go func() {
for {
printDebug("Refreshing index from %s", filePath)
err := IndexFile(filePath)
if err != nil {
printErr("Failed to refresh index: %v", err)
}
time.Sleep(interval)
}
}()
}
// InitIndex ensures that the Bleve index is created or opened.
func InitIndex() error {
idx, err := bleve.Open(filepath.Join(config.DriveCache.Path, "index.bleve"))
if err == bleve.ErrorIndexPathDoesNotExist {
// Index doesn't exist, create a new one
mapping := bleve.NewIndexMapping()
// Custom mapping for the document
docMapping := bleve.NewDocumentMapping()
// Text fields with custom analyzers for better tokenization
textFieldMapping := bleve.NewTextFieldMapping()
textFieldMapping.Analyzer = "standard" // Use standard analyzer for partial and fuzzy matches
docMapping.AddFieldMappingsAt("title", textFieldMapping)
docMapping.AddFieldMappingsAt("description", textFieldMapping)
docMapping.AddFieldMappingsAt("tags", textFieldMapping)
// Numeric field for popularity
popularityMapping := bleve.NewNumericFieldMapping()
docMapping.AddFieldMappingsAt("popularity", popularityMapping)
mapping.AddDocumentMapping("Document", docMapping)
idx, err = bleve.New(filepath.Join(config.DriveCache.Path, "index.bleve"), mapping)
if err != nil {
return fmt.Errorf("failed to create index: %v", err)
}
} else if err != nil {
return fmt.Errorf("failed to open index: %v", err)
}
bleveIndex = idx
return nil
}
// IndexFile reads a file line-by-line and indexes each line as a document.
// Each line represents a simple document. Adjust parsing as needed.
func IndexFile(filePath string) error {
file, err := os.Open(filePath)
if err != nil {
return fmt.Errorf("unable to open file for indexing: %v", err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
batch := bleveIndex.NewBatch()
indexedDomains := make(map[string]bool) // Track indexed domains
for scanner.Scan() {
line := scanner.Text()
// Split the line into 5 fields: link|title|tags|description|popularity
parts := strings.SplitN(line, "|", 5)
if len(parts) < 5 {
continue // Skip malformed lines
}
domain := parts[0]
popularity, _ := strconv.ParseInt(parts[4], 10, 64)
// Skip if the domain is already indexed
if indexedDomains[domain] {
continue
}
doc := Document{
ID: domain, // Use the domain as the unique ID
Link: parts[0],
Title: parts[1],
Tags: parts[2],
Description: parts[3],
Popularity: popularity,
}
err := batch.Index(doc.ID, map[string]interface{}{
"title": doc.Title,
"description": doc.Description,
"link": doc.Link,
"tags": doc.Tags,
"popularity": doc.Popularity,
})
if err != nil {
return fmt.Errorf("failed to index document: %v", err)
}
indexedDomains[domain] = true // Mark the domain as indexed
}
// Commit the batch
if err := bleveIndex.Batch(batch); err != nil {
return fmt.Errorf("error committing batch: %v", err)
}
if err := scanner.Err(); err != nil {
return fmt.Errorf("error reading file: %v", err)
}
printDebug("Indexed %d unique domains from %s\n", len(indexedDomains), filePath)
return nil
}
// SearchIndex performs a full-text search on the indexed data.
func SearchIndex(queryStr string, page, pageSize int) ([]Document, error) {
// Create compound query
exactMatch := bleve.NewMatchQuery(queryStr) // Exact match
fuzzyMatch := bleve.NewFuzzyQuery(queryStr) // Fuzzy match
fuzzyMatch.Fuzziness = 2
prefixMatch := bleve.NewPrefixQuery(queryStr) // Prefix match
query := bleve.NewDisjunctionQuery(exactMatch, fuzzyMatch, prefixMatch)
req := bleve.NewSearchRequest(query)
req.Fields = []string{"title", "description", "link", "tags", "popularity"}
// Pagination
req.Size = pageSize
req.From = (page - 1) * pageSize
// Sort by popularity
req.SortBy([]string{"popularity"})
res, err := bleveIndex.Search(req)
if err != nil {
return nil, fmt.Errorf("search error: %v", err)
}
var docs []Document
for _, hit := range res.Hits {
title := fmt.Sprintf("%v", hit.Fields["title"])
description := fmt.Sprintf("%v", hit.Fields["description"])
link := fmt.Sprintf("%v", hit.Fields["link"])
tags := fmt.Sprintf("%v", hit.Fields["tags"])
popularity := int64(0)
if pop, ok := hit.Fields["popularity"].(float64); ok {
popularity = int64(pop)
}
if link == "<nil>" || title == "<nil>" {
continue
}
docs = append(docs, Document{
ID: hit.ID,
Title: title,
Description: description,
Link: link,
Tags: tags,
Popularity: popularity,
})
}
return docs, nil
}

25
init.go
View file

@ -3,6 +3,8 @@ package main
import (
"flag"
"os"
"path/filepath"
"time"
)
var config Config
@ -95,5 +97,28 @@ func main() {
printInfo("RAM cache is disabled.")
}
// Init indexer
if config.IndexerEnabled {
if err := downloadAndSetupDomainsCSV(); err != nil {
printErr("Failed to set up domains.csv: %v", err)
return
}
webCrawlerInit()
err := InitIndex()
if err != nil {
printErr("Failed to initialize index:", err)
}
// Start periodic indexing (every 2 minutes)
dataFilePath := filepath.Join(config.DriveCache.Path, "data_to_index.txt")
startPeriodicIndexing(dataFilePath, 2*time.Minute)
printInfo("Indexer is enabled.")
} else {
printInfo("Indexer is disabled.")
}
runServer()
}

113
text.go
View file

@ -73,14 +73,10 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
select {
case results := <-cacheChan:
if results == nil {
// Fetch only if the cache miss occurs and Crawler is enabled
if config.CrawlerEnabled {
combinedResults = fetchTextResults(query, safe, lang, page)
if len(combinedResults) > 0 {
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
}
} else {
printInfo("Crawler disabled; skipping fetching.")
// Always attempt to fetch results on a cache miss
combinedResults = fetchTextResults(query, safe, lang, page)
if len(combinedResults) > 0 {
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
}
} else {
textResults, _, _, _ := convertToSpecificResults(results)
@ -88,13 +84,10 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
}
case <-time.After(2 * time.Second):
printInfo("Cache check timeout")
if config.CrawlerEnabled {
combinedResults = fetchTextResults(query, safe, lang, page)
if len(combinedResults) > 0 {
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
}
} else {
printInfo("Crawler disabled; skipping fetching.")
// Even on timeout, attempt to fetch results
combinedResults = fetchTextResults(query, safe, lang, page)
if len(combinedResults) > 0 {
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
}
}
@ -121,54 +114,70 @@ func prefetchPage(query, safe, lang string, page int) {
func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
var results []TextSearchResult
// If Crawler is disabled, do not fetch from search engines
if !config.CrawlerEnabled {
printDebug("Crawler is disabled; skipping search engine fetching.")
return results // Return an empty list
}
printDebug("Crawler is disabled; fetching from local index.")
engineCount := len(textSearchEngines)
// Calculate the starting position based on the page number
indexedResults, err := SearchIndex(query, page, 10)
if err != nil {
printErr("Error searching the index: %v", err)
return results // Return empty results on error
}
// Determine which engine to use for the current page
engineIndex := (page - 1) % engineCount
engine := textSearchEngines[engineIndex]
// Convert indexed results to TextSearchResult format
for _, doc := range indexedResults {
results = append(results, TextSearchResult{
URL: doc.Link,
Header: doc.Title,
Description: doc.Description,
Source: doc.Tags,
})
}
// Calculate the page number for this engine
enginePage := (page-1)/engineCount + 1
// Debug print to verify engine and page number being fetched
printDebug("Fetching results for overall page %d using engine: %s (engine page %d)", page, engine.Name, enginePage)
// Fetch results from the selected engine
searchResults, _, err := engine.Func(query, safe, lang, enginePage)
if err != nil {
printWarn("Error performing search with %s: %v", engine.Name, err)
return results
} else {
results = append(results, validateResults(searchResults)...)
}
// Crawler is enabled, so use the search engines
engineCount := len(textSearchEngines)
// If no results are found with the selected engine, try the next in line
if len(results) == 0 {
for i := 1; i < engineCount; i++ {
nextEngine := textSearchEngines[(engineIndex+i)%engineCount]
enginePage = (page-1)/engineCount + 1 // Recalculate for the new engine
printInfo("No results found, trying next engine: %s (engine page %d)", nextEngine.Name, enginePage)
// Determine which engine to use for the current page
engineIndex := (page - 1) % engineCount
engine := textSearchEngines[engineIndex]
searchResults, _, err := nextEngine.Func(query, safe, lang, enginePage)
if err != nil {
printWarn("Error performing search with %s: %v", nextEngine.Name, err)
continue
}
// Calculate the page number for this engine
enginePage := (page-1)/engineCount + 1
printDebug("Fetching results for overall page %d using engine: %s (engine page %d)", page, engine.Name, enginePage)
// Fetch results from the selected engine
searchResults, _, err := engine.Func(query, safe, lang, enginePage)
if err != nil {
printWarn("Error performing search with %s: %v", engine.Name, err)
} else {
results = append(results, validateResults(searchResults)...)
if len(results) > 0 {
break
}
// If no results are found with the selected engine, try the next in line
if len(results) == 0 {
for i := 1; i < engineCount; i++ {
nextEngine := textSearchEngines[(engineIndex+i)%engineCount]
enginePage = (page-1)/engineCount + 1
printInfo("No results found, trying next engine: %s (engine page %d)", nextEngine.Name, enginePage)
searchResults, _, err := nextEngine.Func(query, safe, lang, enginePage)
if err != nil {
printWarn("Error performing search with %s: %v", nextEngine.Name, err)
continue
}
results = append(results, validateResults(searchResults)...)
if len(results) > 0 {
break
}
}
}
printInfo("Fetched %d results for overall page %d", len(results), page)
return results
}
printInfo("Fetched %d results for overall page %d", len(results), page)
return results
}
func validateResults(searchResults []SearchResult) []TextSearchResult {