From 047cccd19f7d102772508537a81485e42c4e74c3 Mon Sep 17 00:00:00 2001 From: partisan Date: Sun, 29 Dec 2024 22:54:55 +0100 Subject: [PATCH] added website crawling and indexing crawled results --- cache-images.go | 42 +++++---- cache.go | 2 +- config.go | 25 +++-- crawler.go | 224 +++++++++++++++++++++++++++++++++++++++++++++ get-domains-csv.go | 118 ++++++++++++++++++++++++ go.mod | 46 ++++++++-- go.sum | 123 ++++++++++++++++++++++--- indexer.go | 198 +++++++++++++++++++++++++++++++++++++++ init.go | 25 +++++ text.go | 113 ++++++++++++----------- 10 files changed, 819 insertions(+), 97 deletions(-) create mode 100644 crawler.go create mode 100644 get-domains-csv.go create mode 100644 indexer.go diff --git a/cache-images.go b/cache-images.go index 16d686e..4e551cd 100644 --- a/cache-images.go +++ b/cache-images.go @@ -24,15 +24,15 @@ import ( ) var ( - cachingImages = make(map[string]*sync.Mutex) - cachingImagesMu sync.Mutex - // cachingSemaphore = make(chan struct{}, 100) // Limit to concurrent downloads + cachingImages = make(map[string]*sync.Mutex) + cachingImagesMu sync.Mutex + cachingSemaphore = make(chan struct{}, 100) invalidImageIDs = make(map[string]struct{}) invalidImageIDsMu sync.Mutex - imageURLMap = make(map[string]string) // mapping from imageID_type to imageURL - imageURLMapMu sync.RWMutex // mutex for thread-safe access + imageURLMap = make(map[string]string) + imageURLMapMu sync.RWMutex ) func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error) { @@ -49,7 +49,13 @@ func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error filename = fmt.Sprintf("%s_full.webp", imageID) } - cachedImagePath := filepath.Join(config.DriveCache.Path, filename) + // Make sure we store inside: config.DriveCache.Path / images + imageCacheDir := filepath.Join(config.DriveCache.Path, "images") + if err := os.MkdirAll(imageCacheDir, 0755); err != nil { + return "", false, fmt.Errorf("couldn't create images folder: %v", err) + } + + cachedImagePath := filepath.Join(imageCacheDir, filename) tempImagePath := cachedImagePath + ".tmp" // Check if the image is already cached @@ -73,9 +79,8 @@ func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error return cachedImagePath, true, nil } - // // Limit max concurrent downloads - // cachingSemaphore <- struct{}{} // Acquire a token - // defer func() { <-cachingSemaphore }() // Release the token + cachingSemaphore <- struct{}{} + defer func() { <-cachingSemaphore }() // Create a custom http.Client that skips SSL certificate verification client := &http.Client{ @@ -217,7 +222,8 @@ func handleImageServe(w http.ResponseWriter, r *http.Request) { imageType = parts[1] filename := fmt.Sprintf("%s_%s.webp", imageID, imageType) - cachedImagePath := filepath.Join(config.DriveCache.Path, filename) + // Adjust to read from config.DriveCache.Path / images + cachedImagePath := filepath.Join(config.DriveCache.Path, "images", filename) if hasExtension && imageType == "thumb" { // Requesting cached image (thumbnail or full) @@ -329,7 +335,7 @@ func handleImageStatus(w http.ResponseWriter, r *http.Request) { // Check thumbnail first for _, ext := range extensions { thumbFilename := fmt.Sprintf("%s_thumb.%s", id, ext) - thumbPath := filepath.Join(config.DriveCache.Path, thumbFilename) + thumbPath := filepath.Join(config.DriveCache.Path, "images", thumbFilename) if _, err := os.Stat(thumbPath); err == nil { statusMap[id] = fmt.Sprintf("/image/%s_thumb.%s", id, ext) @@ -342,7 +348,7 @@ func handleImageStatus(w http.ResponseWriter, r *http.Request) { if !imageReady { for _, ext := range extensions { fullFilename := fmt.Sprintf("%s_full.%s", id, ext) - fullPath := filepath.Join(config.DriveCache.Path, fullFilename) + fullPath := filepath.Join(config.DriveCache.Path, "images", fullFilename) if _, err := os.Stat(fullPath); err == nil { statusMap[id] = fmt.Sprintf("/image/%s_full.%s", id, ext) @@ -447,7 +453,9 @@ func cleanExpiredCachedImages() { } func cleanupCache() { - files, err := os.ReadDir(config.DriveCache.Path) + // Read from: config.DriveCache.Path / images + imageCacheDir := filepath.Join(config.DriveCache.Path, "images") + files, err := os.ReadDir(imageCacheDir) if err != nil { printErr("Failed to read DriveCache directory: %v", err) return @@ -462,19 +470,17 @@ func cleanupCache() { continue } - filePath := filepath.Join(config.DriveCache.Path, file.Name()) + filePath := filepath.Join(imageCacheDir, file.Name()) - // Check for expired files based on modification time if config.DriveCache.Duration > 0 && time.Since(info.ModTime()) > config.DriveCache.Duration { if err := os.Remove(filePath); err == nil { printDebug("Removed expired cache file: %s", filePath) } else { printErr("Failed to remove expired cache file: %s", filePath) } - continue // Skip adding this file to the list + continue } - // Accumulate total size and store file info for potential deletion totalSize += uint64(info.Size()) fileInfos = append(fileInfos, info) } @@ -491,7 +497,7 @@ func cleanupCache() { break } - filePath := filepath.Join(config.DriveCache.Path, info.Name()) + filePath := filepath.Join(imageCacheDir, info.Name()) fileSize := uint64(info.Size()) if err := os.Remove(filePath); err == nil { diff --git a/cache.go b/cache.go index b5ad880..ac2902d 100644 --- a/cache.go +++ b/cache.go @@ -162,7 +162,7 @@ func (rc *ResultsCache) keyToString(key CacheKey) string { // checkAndCleanCache removes items if memory usage exceeds the limit. func (rc *ResultsCache) checkAndCleanCache() { - for rc.currentMemoryUsage() > config.RamCache.MaxUsageBytes { + if rc.currentMemoryUsage() > config.RamCache.MaxUsageBytes { rc.cleanOldestItems() } } diff --git a/config.go b/config.go index c3aec6b..2e5d805 100644 --- a/config.go +++ b/config.go @@ -30,6 +30,7 @@ type Config struct { Domain string // Added NodesEnabled bool // Added CrawlerEnabled bool // Added + IndexerEnabled bool // Added WebsiteEnabled bool // Added RamCacheEnabled bool DriveCacheEnabled bool // Added @@ -46,6 +47,7 @@ var defaultConfig = Config{ AuthCode: generateStrongRandomString(64), NodesEnabled: false, CrawlerEnabled: true, + IndexerEnabled: false, WebsiteEnabled: true, RamCacheEnabled: true, DriveCacheEnabled: false, @@ -105,6 +107,15 @@ func createConfig() error { config.Domain = defaultConfig.Domain } + // printMessage("Use Indexer? (YES/no): ") + // indexerChoice, _ := reader.ReadString('\n') + // indexerChoice = strings.TrimSpace(strings.ToLower(indexerChoice)) + // if indexerChoice == "no" { + // config.IndexerEnabled = false + // } else { + // config.IndexerEnabled = true + // } + // Cache settings printMessage("Would you like to configure Cache settings (yes/NO): ") configureCache, _ := reader.ReadString('\n') @@ -181,7 +192,7 @@ func createConfig() error { } else { config.DriveCache.MaxUsageBytes = parseMaxUsageDrive(driveMaxUsage, drivePath) if config.DriveCache.MaxUsageBytes == 0 { - printWarn("Invalid DriveCache max usage, using default (1 TiB).") + printWarn("Invalid DriveCache max usage, using default.") config.DriveCache.MaxUsageBytes = defaultConfig.DriveCache.MaxUsageBytes } } @@ -201,13 +212,6 @@ func createConfig() error { printMessage("Generated connection code: %s\n", config.AuthCode) } - // Set other default values - config.NodesEnabled = defaultConfig.NodesEnabled - config.CrawlerEnabled = defaultConfig.CrawlerEnabled - config.WebsiteEnabled = defaultConfig.WebsiteEnabled - config.LogLevel = defaultConfig.LogLevel - - // Save configuration to file saveConfig(config) printInfo("Configuration saved successfully.") return nil @@ -232,6 +236,7 @@ func saveConfig(config Config) { featuresSec := cfg.Section("Features") featuresSec.Key("Nodes").SetValue(strconv.FormatBool(config.NodesEnabled)) featuresSec.Key("Crawler").SetValue(strconv.FormatBool(config.CrawlerEnabled)) + featuresSec.Key("Indexer").SetValue(strconv.FormatBool(config.IndexerEnabled)) featuresSec.Key("Website").SetValue(strconv.FormatBool(config.WebsiteEnabled)) featuresSec.Key("RamCache").SetValue(strconv.FormatBool(config.RamCacheEnabled)) featuresSec.Key("DriveCache").SetValue(strconv.FormatBool(config.DriveCacheEnabled)) @@ -273,6 +278,7 @@ func loadConfig() Config { // Features nodesEnabled, _ := cfg.Section("Features").Key("Nodes").Bool() crawlerEnabled, _ := cfg.Section("Features").Key("Crawler").Bool() + indexerEnabled, _ := cfg.Section("Features").Key("Indexer").Bool() websiteEnabled, _ := cfg.Section("Features").Key("Website").Bool() ramCacheEnabled, _ := cfg.Section("Features").Key("RamCache").Bool() driveCacheEnabled, _ := cfg.Section("Features").Key("DriveCache").Bool() @@ -294,10 +300,11 @@ func loadConfig() Config { Port: port, Domain: domain, LogLevel: logLevel, - AuthCode: authCode, // Assign AuthCode here + AuthCode: authCode, Peers: peers, NodesEnabled: nodesEnabled, CrawlerEnabled: crawlerEnabled, + IndexerEnabled: indexerEnabled, WebsiteEnabled: websiteEnabled, RamCacheEnabled: ramCacheEnabled, DriveCacheEnabled: driveCacheEnabled, diff --git a/crawler.go b/crawler.go new file mode 100644 index 0000000..bbe3540 --- /dev/null +++ b/crawler.go @@ -0,0 +1,224 @@ +package main + +import ( + "bufio" + "fmt" + "net/http" + "os" + "path/filepath" + "strings" + "time" + + "golang.org/x/net/html" +) + +// webCrawlerInit is called during init on program start +func webCrawlerInit() { + go func() { + // First run immediately + runCrawlerAndIndexer() + + // Then every 24h (adjust as needed) + ticker := time.NewTicker(24 * time.Hour) + for range ticker.C { + runCrawlerAndIndexer() + } + }() +} + +// runCrawlerAndIndexer reads domains.csv -> crawls -> writes to data_to_index.txt -> reindexes +func runCrawlerAndIndexer() { + // 1. Read domains.csv + domains, err := readDomainsCSV(filepath.Join(config.DriveCache.Path, "domains.csv")) + if err != nil { + printErr("Error reading domains.csv: %v", err) + return + } + + // 2. Crawl each domain and write results to data_to_index.txt + outFile := filepath.Join(config.DriveCache.Path, "data_to_index.txt") + if err := crawlDomainsToFile(domains, outFile); err != nil { + printErr("Error crawling domains: %v", err) + return + } + + // 3. Re-index data_to_index.txt + if err := IndexFile(outFile); err != nil { + printErr("Error indexing data_to_index.txt: %v", err) + return + } + + printDebug("Crawl + index refresh completed.") +} + +// readDomainsCSV returns a slice of (rank,domain) from a local CSV file +func readDomainsCSV(csvPath string) ([][2]string, error) { + f, err := os.Open(csvPath) + if err != nil { + return nil, err + } + defer f.Close() + + var result [][2]string + scanner := bufio.NewScanner(f) + // Skip header line + scanner.Scan() + + for scanner.Scan() { + line := scanner.Text() + // Split by commas, not tabs + fields := strings.SplitN(line, ",", 3) // Splits into up to 3 parts (rank, domain, popularity) + if len(fields) < 2 { + printDebug("Skipping malformed line: %s", line) + continue + } + // Remove quotes around fields, if present + rank := strings.Trim(fields[0], `"`) + domain := strings.Trim(fields[1], `"`) + result = append(result, [2]string{rank, domain}) + } + return result, scanner.Err() +} + +// crawlDomainsToFile visits each domain, extracts minimal data, writes results to outFile +func crawlDomainsToFile(domains [][2]string, outFile string) error { + // Read existing data_to_index.txt into a map to prevent duplicates + existingEntries := make(map[string]bool) + if _, err := os.Stat(outFile); err == nil { // File exists + file, err := os.Open(outFile) + if err != nil { + return fmt.Errorf("unable to open %s: %v", outFile, err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + parts := strings.SplitN(line, "|", 5) + if len(parts) >= 1 { + existingEntries[parts[0]] = true // Mark existing domain + } + } + } + + // Open file for writing (truncate if existing) + file, err := os.OpenFile(outFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil { + return fmt.Errorf("unable to open %s for writing: %v", outFile, err) + } + defer file.Close() + + for _, d := range domains { + rank := d[0] + domain := d[1] + if domain == "" || existingEntries["https://"+domain] { + continue + } + + fullURL := "https://" + domain + title, desc, keywords := fetchPageMetadata(fullURL) + if title == "" { + title = "Unknown Title" + } + if desc == "" { + desc = "No Description" + } + + // Write unique domain to file + line := fmt.Sprintf("%s|%s|%s|%s|%s\n", + fullURL, + sanitize(title), + sanitize(keywords), + sanitize(desc), + rank, + ) + if _, err := file.WriteString(line); err != nil { + return err + } + + existingEntries[fullURL] = true + } + + return nil +} + +// fetchPageMetadata does a simple GET and parses , meta[name=description], meta[name=keywords] +func fetchPageMetadata(pageURL string) (string, string, string) { + // Generate a User-Agent using your GetUserAgent function + userAgent, err := GetUserAgent("crawler") + if err != nil { + printWarn("Failed to generate User-Agent: %v", err) + return "", "", "" + } + + client := &http.Client{Timeout: 15 * time.Second} + req, err := http.NewRequest("GET", pageURL, nil) + if err != nil { + printWarn("Failed to create request for %s: %v", pageURL, err) + return "", "", "" + } + + // Set the dynamically generated User-Agent + req.Header.Set("User-Agent", userAgent) + + resp, err := client.Do(req) + if err != nil { + printWarn("Failed to GET %s: %v", pageURL, err) + return "", "", "" + } + defer resp.Body.Close() + + // Handle non-200 responses + if resp.StatusCode == 403 || resp.StatusCode == 401 { + printWarn("Skipping %s: HTTP %d", pageURL, resp.StatusCode) + return "", "", "" + } else if resp.StatusCode < 200 || resp.StatusCode >= 300 { + printWarn("Non-200 for %s: %d", pageURL, resp.StatusCode) + return "", "", "" + } + + // Parse HTML + doc, err := html.Parse(resp.Body) + if err != nil { + printWarn("HTML parse error for %s: %v", pageURL, err) + return "", "", "" + } + + var title, desc, keywords string + var f func(*html.Node) + f = func(n *html.Node) { + if n.Type == html.ElementNode && n.Data == "title" && n.FirstChild != nil { + title = n.FirstChild.Data + } + if n.Type == html.ElementNode && n.Data == "meta" { + var nameVal, contentVal string + for _, attr := range n.Attr { + switch strings.ToLower(attr.Key) { + case "name": + nameVal = strings.ToLower(attr.Val) + case "content": + contentVal = attr.Val + } + } + if nameVal == "description" { + desc = contentVal + } else if nameVal == "keywords" { + keywords = contentVal + } + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + f(c) + } + } + f(doc) + + return title, desc, keywords +} + +// sanitize is a quick helper to remove newlines/pipes from fields +func sanitize(input string) string { + input = strings.ReplaceAll(input, "|", " ") + input = strings.ReplaceAll(input, "\n", " ") + input = strings.TrimSpace(input) + return input +} diff --git a/get-domains-csv.go b/get-domains-csv.go new file mode 100644 index 0000000..8d931f9 --- /dev/null +++ b/get-domains-csv.go @@ -0,0 +1,118 @@ +package main + +import ( + "archive/zip" + "fmt" + "io" + "net/http" + "os" + "path/filepath" +) + +func downloadAndSetupDomainsCSV() error { + targetFilePath := filepath.Join(config.DriveCache.Path, "domains.csv") + + // Check if domains.csv already exists + if _, err := os.Stat(targetFilePath); err == nil { + printDebug("domains.csv already exists at %s", targetFilePath) + return nil + } + + downloadURL := "https://www.domcop.com/files/top/top10milliondomains.csv.zip" + zipFilePath := filepath.Join(config.DriveCache.Path, "top10milliondomains.csv.zip") + + // Download the file + printDebug("Downloading file from %s", downloadURL) + resp, err := http.Get(downloadURL) + if err != nil { + return fmt.Errorf("failed to download file: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("failed to download file: received status code %d", resp.StatusCode) + } + + // Create the zip file locally + zipFile, err := os.Create(zipFilePath) + if err != nil { + return fmt.Errorf("failed to create local zip file: %v", err) + } + defer zipFile.Close() + + _, err = io.Copy(zipFile, resp.Body) + if err != nil { + return fmt.Errorf("failed to write downloaded zip file: %v", err) + } + + // Unzip the file + printDebug("Unzipping file %s", zipFilePath) + if err := unzipFile(zipFilePath, config.DriveCache.Path); err != nil { + return fmt.Errorf("failed to unzip file: %v", err) + } + + // Find the .csv file and rename/move it to domains.csv + csvFound := false + dirEntries, err := os.ReadDir(config.DriveCache.Path) + if err != nil { + return fmt.Errorf("failed to read directory: %v", err) + } + + for _, entry := range dirEntries { + if !entry.IsDir() && filepath.Ext(entry.Name()) == ".csv" { + csvPath := filepath.Join(config.DriveCache.Path, entry.Name()) + if err := os.Rename(csvPath, targetFilePath); err != nil { + return fmt.Errorf("failed to move %s to %s: %v", csvPath, targetFilePath, err) + } + csvFound = true + break + } + } + + if !csvFound { + return fmt.Errorf("no .csv file found in the downloaded archive") + } + + // Clean up zip file + if err := os.Remove(zipFilePath); err != nil { + printWarn("failed to remove zip file %s: %v", zipFilePath, err) + } + + printDebug("domains.csv successfully downloaded and placed at %s", targetFilePath) + return nil +} + +func unzipFile(zipFile, destDir string) error { + reader, err := zip.OpenReader(zipFile) + if err != nil { + return err + } + defer reader.Close() + + for _, file := range reader.File { + filePath := filepath.Join(destDir, file.Name) + + if file.FileInfo().IsDir() { + os.MkdirAll(filePath, os.ModePerm) + continue + } + + srcFile, err := file.Open() + if err != nil { + return err + } + defer srcFile.Close() + + destFile, err := os.Create(filePath) + if err != nil { + return err + } + defer destFile.Close() + + if _, err := io.Copy(destFile, srcFile); err != nil { + return err + } + } + + return nil +} diff --git a/go.mod b/go.mod index 63599f8..6895586 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,11 @@ -module searchengine +module qgato -go 1.18 +go 1.23 + +toolchain go1.23.4 require ( - github.com/PuerkitoBio/goquery v1.9.1 // direct + github.com/PuerkitoBio/goquery v1.10.0 // direct github.com/chai2010/webp v1.1.1 github.com/leonelquinteros/gotext v1.7.0 github.com/shirou/gopsutil v3.21.11+incompatible @@ -12,10 +14,42 @@ require ( ) require ( - github.com/andybalholm/cascadia v1.3.2 // indirect + github.com/blevesearch/bleve/v2 v2.4.4 + golang.org/x/net v0.33.0 +) + +require ( + github.com/RoaringBitmap/roaring v1.9.4 // indirect + github.com/andybalholm/cascadia v1.3.3 // indirect + github.com/bits-and-blooms/bitset v1.20.0 // indirect + github.com/blevesearch/bleve_index_api v1.2.0 // indirect + github.com/blevesearch/geo v0.1.20 // indirect + github.com/blevesearch/go-faiss v1.0.24 // indirect + github.com/blevesearch/go-porterstemmer v1.0.3 // indirect + github.com/blevesearch/gtreap v0.1.1 // indirect + github.com/blevesearch/mmap-go v1.0.4 // indirect + github.com/blevesearch/scorch_segment_api/v2 v2.3.0 // indirect + github.com/blevesearch/segment v0.9.1 // indirect + github.com/blevesearch/snowballstem v0.9.0 // indirect + github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect + github.com/blevesearch/vellum v1.1.0 // indirect + github.com/blevesearch/zapx/v11 v11.3.10 // indirect + github.com/blevesearch/zapx/v12 v12.3.10 // indirect + github.com/blevesearch/zapx/v13 v13.3.10 // indirect + github.com/blevesearch/zapx/v14 v14.3.10 // indirect + github.com/blevesearch/zapx/v15 v15.3.17 // indirect + github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b // indirect github.com/go-ole/go-ole v1.3.0 // indirect + github.com/golang/geo v0.0.0-20230421003525-6adc56603217 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/golang/snappy v0.0.4 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/mschoch/smat v0.2.0 // indirect github.com/stretchr/testify v1.9.0 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect - golang.org/x/net v0.30.0 // indirect - golang.org/x/sys v0.26.0 // indirect + go.etcd.io/bbolt v1.3.11 // indirect + golang.org/x/sys v0.28.0 // indirect + google.golang.org/protobuf v1.36.0 // indirect ) diff --git a/go.sum b/go.sum index 962a1b8..f3f643b 100644 --- a/go.sum +++ b/go.sum @@ -1,39 +1,121 @@ -github.com/PuerkitoBio/goquery v1.9.1 h1:mTL6XjbJTZdpfL+Gwl5U2h1l9yEkJjhmlTeV9VPW7UI= -github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY= -github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= -github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= +github.com/PuerkitoBio/goquery v1.10.0 h1:6fiXdLuUvYs2OJSvNRqlNPoBm6YABE226xrbavY5Wv4= +github.com/PuerkitoBio/goquery v1.10.0/go.mod h1:TjZZl68Q3eGHNBA8CWaxAN7rOU1EbDz3CWuolcO5Yu4= +github.com/RoaringBitmap/roaring v1.9.4 h1:yhEIoH4YezLYT04s1nHehNO64EKFTop/wBhxv2QzDdQ= +github.com/RoaringBitmap/roaring v1.9.4/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90= +github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= +github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= +github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/bits-and-blooms/bitset v1.20.0 h1:2F+rfL86jE2d/bmw7OhqUg2Sj/1rURkBn3MdfoPyRVU= +github.com/bits-and-blooms/bitset v1.20.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/blevesearch/bleve/v2 v2.4.4 h1:RwwLGjUm54SwyyykbrZs4vc1qjzYic4ZnAnY9TwNl60= +github.com/blevesearch/bleve/v2 v2.4.4/go.mod h1:fa2Eo6DP7JR+dMFpQe+WiZXINKSunh7WBtlDGbolKXk= +github.com/blevesearch/bleve_index_api v1.2.0 h1:/DXMMWBwx/UmGKM1xDhTwDoJI5yQrG6rqRWPFcOgUVo= +github.com/blevesearch/bleve_index_api v1.2.0/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8= +github.com/blevesearch/geo v0.1.20 h1:paaSpu2Ewh/tn5DKn/FB5SzvH0EWupxHEIwbCk/QPqM= +github.com/blevesearch/geo v0.1.20/go.mod h1:DVG2QjwHNMFmjo+ZgzrIq2sfCh6rIHzy9d9d0B59I6w= +github.com/blevesearch/go-faiss v1.0.24 h1:K79IvKjoKHdi7FdiXEsAhxpMuns0x4fM0BO93bW5jLI= +github.com/blevesearch/go-faiss v1.0.24/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk= +github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo= +github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M= +github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y= +github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk= +github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc= +github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs= +github.com/blevesearch/scorch_segment_api/v2 v2.3.0 h1:vxCjbXAkkEBSb4AB3Iqgr/EJcPyYRsiGxpcvsS8E1Dw= +github.com/blevesearch/scorch_segment_api/v2 v2.3.0/go.mod h1:5y+TgXYSx+xJGaCwSlvy9G/UJBIY5wzvIkhvhBm2ATc= +github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU= +github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw= +github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s= +github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs= +github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A= +github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ= +github.com/blevesearch/vellum v1.1.0 h1:CinkGyIsgVlYf8Y2LUQHvdelgXr6PYuvoDIajq6yR9w= +github.com/blevesearch/vellum v1.1.0/go.mod h1:QgwWryE8ThtNPxtgWJof5ndPfx0/YMBh+W2weHKPw8Y= +github.com/blevesearch/zapx/v11 v11.3.10 h1:hvjgj9tZ9DeIqBCxKhi70TtSZYMdcFn7gDb71Xo/fvk= +github.com/blevesearch/zapx/v11 v11.3.10/go.mod h1:0+gW+FaE48fNxoVtMY5ugtNHHof/PxCqh7CnhYdnMzQ= +github.com/blevesearch/zapx/v12 v12.3.10 h1:yHfj3vXLSYmmsBleJFROXuO08mS3L1qDCdDK81jDl8s= +github.com/blevesearch/zapx/v12 v12.3.10/go.mod h1:0yeZg6JhaGxITlsS5co73aqPtM04+ycnI6D1v0mhbCs= +github.com/blevesearch/zapx/v13 v13.3.10 h1:0KY9tuxg06rXxOZHg3DwPJBjniSlqEgVpxIqMGahDE8= +github.com/blevesearch/zapx/v13 v13.3.10/go.mod h1:w2wjSDQ/WBVeEIvP0fvMJZAzDwqwIEzVPnCPrz93yAk= +github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz77pSwwKU= +github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns= +github.com/blevesearch/zapx/v15 v15.3.17 h1:NkkMI98pYLq/uHnB6YWcITrrLpCVyvZ9iP+AyfpW1Ys= +github.com/blevesearch/zapx/v15 v15.3.17/go.mod h1:vXRQzJJvlGVCdmOD5hg7t7JdjUT5DmDPhsAfjvtzIq8= +github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b h1:ju9Az5YgrzCeK3M1QwvZIpxYhChkXp7/L0RhDYsxXoE= +github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b/go.mod h1:BlrYNpOu4BvVRslmIG+rLtKhmjIaRhIbG8sb9scGTwI= github.com/chai2010/webp v1.1.1 h1:jTRmEccAJ4MGrhFOrPMpNGIJ/eybIgwKpcACsrTEapk= github.com/chai2010/webp v1.1.1/go.mod h1:0XVwvZWdjjdxpUEIf7b9g9VkHFnInUSYujwqTLEuldU= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= +github.com/golang/geo v0.0.0-20230421003525-6adc56603217 h1:HKlyj6in2JV6wVkmQ4XmG/EIm+SCYlPZ+V4GWit7Z+I= +github.com/golang/geo v0.0.0-20230421003525-6adc56603217/go.mod h1:8wI0hitZ3a1IxZfeH3/5I97CI8i5cLGsYe7xNhQGs9U= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/leonelquinteros/gotext v1.7.0 h1:jcJmF4AXqyamP7vuw2MMIKs+O3jAEmvrc5JQiI8Ht/8= github.com/leonelquinteros/gotext v1.7.0/go.mod h1:qJdoQuERPpccw7L70uoU+K/BvTfRBHYsisCQyFLXyvw= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= +github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI= github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0= +go.etcd.io/bbolt v1.3.11/go.mod h1:dksAq7YMXoljX0xu6VF5DMZGbhYYoLUalEiSySYAS4I= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= +golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/image v0.21.0 h1:c5qV36ajHpdj4Qi0GnE0jUc/yuo33OLFaa0d+crTD5s= golang.org/x/image v0.21.0/go.mod h1:vUbsLavqK/W303ZroQQVKQ+Af3Yl6Uz1Ppu5J/cLz78= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= +golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -42,23 +124,42 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= +golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.36.0 h1:mjIs9gYtt56AzC4ZaffQuh88TZurBGhIJMBZGSxNerQ= +google.golang.org/protobuf v1.36.0/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/indexer.go b/indexer.go new file mode 100644 index 0000000..66bc100 --- /dev/null +++ b/indexer.go @@ -0,0 +1,198 @@ +package main + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/blevesearch/bleve/v2" +) + +// Document represents a single document to be indexed. +// You can add more fields if needed. +type Document struct { + ID string `json:"id"` + Link string `json:"link"` + Title string `json:"title"` + Tags string `json:"tags"` + Description string `json:"description"` + Popularity int64 `json:"popularity"` +} + +var ( + // Global Bleve index handle + bleveIndex bleve.Index +) + +func startPeriodicIndexing(filePath string, interval time.Duration) { + go func() { + for { + printDebug("Refreshing index from %s", filePath) + err := IndexFile(filePath) + if err != nil { + printErr("Failed to refresh index: %v", err) + } + time.Sleep(interval) + } + }() +} + +// InitIndex ensures that the Bleve index is created or opened. +func InitIndex() error { + idx, err := bleve.Open(filepath.Join(config.DriveCache.Path, "index.bleve")) + if err == bleve.ErrorIndexPathDoesNotExist { + // Index doesn't exist, create a new one + mapping := bleve.NewIndexMapping() + + // Custom mapping for the document + docMapping := bleve.NewDocumentMapping() + + // Text fields with custom analyzers for better tokenization + textFieldMapping := bleve.NewTextFieldMapping() + textFieldMapping.Analyzer = "standard" // Use standard analyzer for partial and fuzzy matches + + docMapping.AddFieldMappingsAt("title", textFieldMapping) + docMapping.AddFieldMappingsAt("description", textFieldMapping) + docMapping.AddFieldMappingsAt("tags", textFieldMapping) + + // Numeric field for popularity + popularityMapping := bleve.NewNumericFieldMapping() + docMapping.AddFieldMappingsAt("popularity", popularityMapping) + + mapping.AddDocumentMapping("Document", docMapping) + + idx, err = bleve.New(filepath.Join(config.DriveCache.Path, "index.bleve"), mapping) + if err != nil { + return fmt.Errorf("failed to create index: %v", err) + } + } else if err != nil { + return fmt.Errorf("failed to open index: %v", err) + } + + bleveIndex = idx + return nil +} + +// IndexFile reads a file line-by-line and indexes each line as a document. +// Each line represents a simple document. Adjust parsing as needed. +func IndexFile(filePath string) error { + file, err := os.Open(filePath) + if err != nil { + return fmt.Errorf("unable to open file for indexing: %v", err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + batch := bleveIndex.NewBatch() + indexedDomains := make(map[string]bool) // Track indexed domains + + for scanner.Scan() { + line := scanner.Text() + + // Split the line into 5 fields: link|title|tags|description|popularity + parts := strings.SplitN(line, "|", 5) + if len(parts) < 5 { + continue // Skip malformed lines + } + + domain := parts[0] + popularity, _ := strconv.ParseInt(parts[4], 10, 64) + + // Skip if the domain is already indexed + if indexedDomains[domain] { + continue + } + + doc := Document{ + ID: domain, // Use the domain as the unique ID + Link: parts[0], + Title: parts[1], + Tags: parts[2], + Description: parts[3], + Popularity: popularity, + } + + err := batch.Index(doc.ID, map[string]interface{}{ + "title": doc.Title, + "description": doc.Description, + "link": doc.Link, + "tags": doc.Tags, + "popularity": doc.Popularity, + }) + if err != nil { + return fmt.Errorf("failed to index document: %v", err) + } + + indexedDomains[domain] = true // Mark the domain as indexed + } + + // Commit the batch + if err := bleveIndex.Batch(batch); err != nil { + return fmt.Errorf("error committing batch: %v", err) + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("error reading file: %v", err) + } + + printDebug("Indexed %d unique domains from %s\n", len(indexedDomains), filePath) + return nil +} + +// SearchIndex performs a full-text search on the indexed data. +func SearchIndex(queryStr string, page, pageSize int) ([]Document, error) { + // Create compound query + exactMatch := bleve.NewMatchQuery(queryStr) // Exact match + fuzzyMatch := bleve.NewFuzzyQuery(queryStr) // Fuzzy match + fuzzyMatch.Fuzziness = 2 + prefixMatch := bleve.NewPrefixQuery(queryStr) // Prefix match + + query := bleve.NewDisjunctionQuery(exactMatch, fuzzyMatch, prefixMatch) + + req := bleve.NewSearchRequest(query) + req.Fields = []string{"title", "description", "link", "tags", "popularity"} + + // Pagination + req.Size = pageSize + req.From = (page - 1) * pageSize + + // Sort by popularity + req.SortBy([]string{"popularity"}) + + res, err := bleveIndex.Search(req) + if err != nil { + return nil, fmt.Errorf("search error: %v", err) + } + + var docs []Document + for _, hit := range res.Hits { + title := fmt.Sprintf("%v", hit.Fields["title"]) + description := fmt.Sprintf("%v", hit.Fields["description"]) + link := fmt.Sprintf("%v", hit.Fields["link"]) + tags := fmt.Sprintf("%v", hit.Fields["tags"]) + popularity := int64(0) + + if pop, ok := hit.Fields["popularity"].(float64); ok { + popularity = int64(pop) + } + + if link == "<nil>" || title == "<nil>" { + continue + } + + docs = append(docs, Document{ + ID: hit.ID, + Title: title, + Description: description, + Link: link, + Tags: tags, + Popularity: popularity, + }) + } + + return docs, nil +} diff --git a/init.go b/init.go index e7d4ed1..c92e656 100644 --- a/init.go +++ b/init.go @@ -3,6 +3,8 @@ package main import ( "flag" "os" + "path/filepath" + "time" ) var config Config @@ -95,5 +97,28 @@ func main() { printInfo("RAM cache is disabled.") } + // Init indexer + if config.IndexerEnabled { + if err := downloadAndSetupDomainsCSV(); err != nil { + printErr("Failed to set up domains.csv: %v", err) + return + } + + webCrawlerInit() + + err := InitIndex() + if err != nil { + printErr("Failed to initialize index:", err) + } + + // Start periodic indexing (every 2 minutes) + dataFilePath := filepath.Join(config.DriveCache.Path, "data_to_index.txt") + startPeriodicIndexing(dataFilePath, 2*time.Minute) + + printInfo("Indexer is enabled.") + } else { + printInfo("Indexer is disabled.") + } + runServer() } diff --git a/text.go b/text.go index 4744a97..d6e3212 100755 --- a/text.go +++ b/text.go @@ -73,14 +73,10 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, select { case results := <-cacheChan: if results == nil { - // Fetch only if the cache miss occurs and Crawler is enabled - if config.CrawlerEnabled { - combinedResults = fetchTextResults(query, safe, lang, page) - if len(combinedResults) > 0 { - resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) - } - } else { - printInfo("Crawler disabled; skipping fetching.") + // Always attempt to fetch results on a cache miss + combinedResults = fetchTextResults(query, safe, lang, page) + if len(combinedResults) > 0 { + resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) } } else { textResults, _, _, _ := convertToSpecificResults(results) @@ -88,13 +84,10 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, } case <-time.After(2 * time.Second): printInfo("Cache check timeout") - if config.CrawlerEnabled { - combinedResults = fetchTextResults(query, safe, lang, page) - if len(combinedResults) > 0 { - resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) - } - } else { - printInfo("Crawler disabled; skipping fetching.") + // Even on timeout, attempt to fetch results + combinedResults = fetchTextResults(query, safe, lang, page) + if len(combinedResults) > 0 { + resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) } } @@ -121,54 +114,70 @@ func prefetchPage(query, safe, lang string, page int) { func fetchTextResults(query, safe, lang string, page int) []TextSearchResult { var results []TextSearchResult - // If Crawler is disabled, do not fetch from search engines if !config.CrawlerEnabled { - printDebug("Crawler is disabled; skipping search engine fetching.") - return results // Return an empty list - } + printDebug("Crawler is disabled; fetching from local index.") - engineCount := len(textSearchEngines) + // Calculate the starting position based on the page number + indexedResults, err := SearchIndex(query, page, 10) + if err != nil { + printErr("Error searching the index: %v", err) + return results // Return empty results on error + } - // Determine which engine to use for the current page - engineIndex := (page - 1) % engineCount - engine := textSearchEngines[engineIndex] + // Convert indexed results to TextSearchResult format + for _, doc := range indexedResults { + results = append(results, TextSearchResult{ + URL: doc.Link, + Header: doc.Title, + Description: doc.Description, + Source: doc.Tags, + }) + } - // Calculate the page number for this engine - enginePage := (page-1)/engineCount + 1 - - // Debug print to verify engine and page number being fetched - printDebug("Fetching results for overall page %d using engine: %s (engine page %d)", page, engine.Name, enginePage) - - // Fetch results from the selected engine - searchResults, _, err := engine.Func(query, safe, lang, enginePage) - if err != nil { - printWarn("Error performing search with %s: %v", engine.Name, err) + return results } else { - results = append(results, validateResults(searchResults)...) - } + // Crawler is enabled, so use the search engines + engineCount := len(textSearchEngines) - // If no results are found with the selected engine, try the next in line - if len(results) == 0 { - for i := 1; i < engineCount; i++ { - nextEngine := textSearchEngines[(engineIndex+i)%engineCount] - enginePage = (page-1)/engineCount + 1 // Recalculate for the new engine - printInfo("No results found, trying next engine: %s (engine page %d)", nextEngine.Name, enginePage) + // Determine which engine to use for the current page + engineIndex := (page - 1) % engineCount + engine := textSearchEngines[engineIndex] - searchResults, _, err := nextEngine.Func(query, safe, lang, enginePage) - if err != nil { - printWarn("Error performing search with %s: %v", nextEngine.Name, err) - continue - } + // Calculate the page number for this engine + enginePage := (page-1)/engineCount + 1 + + printDebug("Fetching results for overall page %d using engine: %s (engine page %d)", page, engine.Name, enginePage) + + // Fetch results from the selected engine + searchResults, _, err := engine.Func(query, safe, lang, enginePage) + if err != nil { + printWarn("Error performing search with %s: %v", engine.Name, err) + } else { results = append(results, validateResults(searchResults)...) - if len(results) > 0 { - break + } + + // If no results are found with the selected engine, try the next in line + if len(results) == 0 { + for i := 1; i < engineCount; i++ { + nextEngine := textSearchEngines[(engineIndex+i)%engineCount] + enginePage = (page-1)/engineCount + 1 + printInfo("No results found, trying next engine: %s (engine page %d)", nextEngine.Name, enginePage) + + searchResults, _, err := nextEngine.Func(query, safe, lang, enginePage) + if err != nil { + printWarn("Error performing search with %s: %v", nextEngine.Name, err) + continue + } + results = append(results, validateResults(searchResults)...) + if len(results) > 0 { + break + } } } + + printInfo("Fetched %d results for overall page %d", len(results), page) + return results } - - printInfo("Fetched %d results for overall page %d", len(results), page) - - return results } func validateResults(searchResults []SearchResult) []TextSearchResult {