added website crawling and indexing crawled results
This commit is contained in:
parent
5b90a372a1
commit
047cccd19f
10 changed files with 819 additions and 97 deletions
|
@ -24,15 +24,15 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
cachingImages = make(map[string]*sync.Mutex)
|
||||
cachingImagesMu sync.Mutex
|
||||
// cachingSemaphore = make(chan struct{}, 100) // Limit to concurrent downloads
|
||||
cachingImages = make(map[string]*sync.Mutex)
|
||||
cachingImagesMu sync.Mutex
|
||||
cachingSemaphore = make(chan struct{}, 100)
|
||||
|
||||
invalidImageIDs = make(map[string]struct{})
|
||||
invalidImageIDsMu sync.Mutex
|
||||
|
||||
imageURLMap = make(map[string]string) // mapping from imageID_type to imageURL
|
||||
imageURLMapMu sync.RWMutex // mutex for thread-safe access
|
||||
imageURLMap = make(map[string]string)
|
||||
imageURLMapMu sync.RWMutex
|
||||
)
|
||||
|
||||
func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error) {
|
||||
|
@ -49,7 +49,13 @@ func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error
|
|||
filename = fmt.Sprintf("%s_full.webp", imageID)
|
||||
}
|
||||
|
||||
cachedImagePath := filepath.Join(config.DriveCache.Path, filename)
|
||||
// Make sure we store inside: config.DriveCache.Path / images
|
||||
imageCacheDir := filepath.Join(config.DriveCache.Path, "images")
|
||||
if err := os.MkdirAll(imageCacheDir, 0755); err != nil {
|
||||
return "", false, fmt.Errorf("couldn't create images folder: %v", err)
|
||||
}
|
||||
|
||||
cachedImagePath := filepath.Join(imageCacheDir, filename)
|
||||
tempImagePath := cachedImagePath + ".tmp"
|
||||
|
||||
// Check if the image is already cached
|
||||
|
@ -73,9 +79,8 @@ func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error
|
|||
return cachedImagePath, true, nil
|
||||
}
|
||||
|
||||
// // Limit max concurrent downloads
|
||||
// cachingSemaphore <- struct{}{} // Acquire a token
|
||||
// defer func() { <-cachingSemaphore }() // Release the token
|
||||
cachingSemaphore <- struct{}{}
|
||||
defer func() { <-cachingSemaphore }()
|
||||
|
||||
// Create a custom http.Client that skips SSL certificate verification
|
||||
client := &http.Client{
|
||||
|
@ -217,7 +222,8 @@ func handleImageServe(w http.ResponseWriter, r *http.Request) {
|
|||
imageType = parts[1]
|
||||
|
||||
filename := fmt.Sprintf("%s_%s.webp", imageID, imageType)
|
||||
cachedImagePath := filepath.Join(config.DriveCache.Path, filename)
|
||||
// Adjust to read from config.DriveCache.Path / images
|
||||
cachedImagePath := filepath.Join(config.DriveCache.Path, "images", filename)
|
||||
|
||||
if hasExtension && imageType == "thumb" {
|
||||
// Requesting cached image (thumbnail or full)
|
||||
|
@ -329,7 +335,7 @@ func handleImageStatus(w http.ResponseWriter, r *http.Request) {
|
|||
// Check thumbnail first
|
||||
for _, ext := range extensions {
|
||||
thumbFilename := fmt.Sprintf("%s_thumb.%s", id, ext)
|
||||
thumbPath := filepath.Join(config.DriveCache.Path, thumbFilename)
|
||||
thumbPath := filepath.Join(config.DriveCache.Path, "images", thumbFilename)
|
||||
|
||||
if _, err := os.Stat(thumbPath); err == nil {
|
||||
statusMap[id] = fmt.Sprintf("/image/%s_thumb.%s", id, ext)
|
||||
|
@ -342,7 +348,7 @@ func handleImageStatus(w http.ResponseWriter, r *http.Request) {
|
|||
if !imageReady {
|
||||
for _, ext := range extensions {
|
||||
fullFilename := fmt.Sprintf("%s_full.%s", id, ext)
|
||||
fullPath := filepath.Join(config.DriveCache.Path, fullFilename)
|
||||
fullPath := filepath.Join(config.DriveCache.Path, "images", fullFilename)
|
||||
|
||||
if _, err := os.Stat(fullPath); err == nil {
|
||||
statusMap[id] = fmt.Sprintf("/image/%s_full.%s", id, ext)
|
||||
|
@ -447,7 +453,9 @@ func cleanExpiredCachedImages() {
|
|||
}
|
||||
|
||||
func cleanupCache() {
|
||||
files, err := os.ReadDir(config.DriveCache.Path)
|
||||
// Read from: config.DriveCache.Path / images
|
||||
imageCacheDir := filepath.Join(config.DriveCache.Path, "images")
|
||||
files, err := os.ReadDir(imageCacheDir)
|
||||
if err != nil {
|
||||
printErr("Failed to read DriveCache directory: %v", err)
|
||||
return
|
||||
|
@ -462,19 +470,17 @@ func cleanupCache() {
|
|||
continue
|
||||
}
|
||||
|
||||
filePath := filepath.Join(config.DriveCache.Path, file.Name())
|
||||
filePath := filepath.Join(imageCacheDir, file.Name())
|
||||
|
||||
// Check for expired files based on modification time
|
||||
if config.DriveCache.Duration > 0 && time.Since(info.ModTime()) > config.DriveCache.Duration {
|
||||
if err := os.Remove(filePath); err == nil {
|
||||
printDebug("Removed expired cache file: %s", filePath)
|
||||
} else {
|
||||
printErr("Failed to remove expired cache file: %s", filePath)
|
||||
}
|
||||
continue // Skip adding this file to the list
|
||||
continue
|
||||
}
|
||||
|
||||
// Accumulate total size and store file info for potential deletion
|
||||
totalSize += uint64(info.Size())
|
||||
fileInfos = append(fileInfos, info)
|
||||
}
|
||||
|
@ -491,7 +497,7 @@ func cleanupCache() {
|
|||
break
|
||||
}
|
||||
|
||||
filePath := filepath.Join(config.DriveCache.Path, info.Name())
|
||||
filePath := filepath.Join(imageCacheDir, info.Name())
|
||||
fileSize := uint64(info.Size())
|
||||
|
||||
if err := os.Remove(filePath); err == nil {
|
||||
|
|
2
cache.go
2
cache.go
|
@ -162,7 +162,7 @@ func (rc *ResultsCache) keyToString(key CacheKey) string {
|
|||
|
||||
// checkAndCleanCache removes items if memory usage exceeds the limit.
|
||||
func (rc *ResultsCache) checkAndCleanCache() {
|
||||
for rc.currentMemoryUsage() > config.RamCache.MaxUsageBytes {
|
||||
if rc.currentMemoryUsage() > config.RamCache.MaxUsageBytes {
|
||||
rc.cleanOldestItems()
|
||||
}
|
||||
}
|
||||
|
|
25
config.go
25
config.go
|
@ -30,6 +30,7 @@ type Config struct {
|
|||
Domain string // Added
|
||||
NodesEnabled bool // Added
|
||||
CrawlerEnabled bool // Added
|
||||
IndexerEnabled bool // Added
|
||||
WebsiteEnabled bool // Added
|
||||
RamCacheEnabled bool
|
||||
DriveCacheEnabled bool // Added
|
||||
|
@ -46,6 +47,7 @@ var defaultConfig = Config{
|
|||
AuthCode: generateStrongRandomString(64),
|
||||
NodesEnabled: false,
|
||||
CrawlerEnabled: true,
|
||||
IndexerEnabled: false,
|
||||
WebsiteEnabled: true,
|
||||
RamCacheEnabled: true,
|
||||
DriveCacheEnabled: false,
|
||||
|
@ -105,6 +107,15 @@ func createConfig() error {
|
|||
config.Domain = defaultConfig.Domain
|
||||
}
|
||||
|
||||
// printMessage("Use Indexer? (YES/no): ")
|
||||
// indexerChoice, _ := reader.ReadString('\n')
|
||||
// indexerChoice = strings.TrimSpace(strings.ToLower(indexerChoice))
|
||||
// if indexerChoice == "no" {
|
||||
// config.IndexerEnabled = false
|
||||
// } else {
|
||||
// config.IndexerEnabled = true
|
||||
// }
|
||||
|
||||
// Cache settings
|
||||
printMessage("Would you like to configure Cache settings (yes/NO): ")
|
||||
configureCache, _ := reader.ReadString('\n')
|
||||
|
@ -181,7 +192,7 @@ func createConfig() error {
|
|||
} else {
|
||||
config.DriveCache.MaxUsageBytes = parseMaxUsageDrive(driveMaxUsage, drivePath)
|
||||
if config.DriveCache.MaxUsageBytes == 0 {
|
||||
printWarn("Invalid DriveCache max usage, using default (1 TiB).")
|
||||
printWarn("Invalid DriveCache max usage, using default.")
|
||||
config.DriveCache.MaxUsageBytes = defaultConfig.DriveCache.MaxUsageBytes
|
||||
}
|
||||
}
|
||||
|
@ -201,13 +212,6 @@ func createConfig() error {
|
|||
printMessage("Generated connection code: %s\n", config.AuthCode)
|
||||
}
|
||||
|
||||
// Set other default values
|
||||
config.NodesEnabled = defaultConfig.NodesEnabled
|
||||
config.CrawlerEnabled = defaultConfig.CrawlerEnabled
|
||||
config.WebsiteEnabled = defaultConfig.WebsiteEnabled
|
||||
config.LogLevel = defaultConfig.LogLevel
|
||||
|
||||
// Save configuration to file
|
||||
saveConfig(config)
|
||||
printInfo("Configuration saved successfully.")
|
||||
return nil
|
||||
|
@ -232,6 +236,7 @@ func saveConfig(config Config) {
|
|||
featuresSec := cfg.Section("Features")
|
||||
featuresSec.Key("Nodes").SetValue(strconv.FormatBool(config.NodesEnabled))
|
||||
featuresSec.Key("Crawler").SetValue(strconv.FormatBool(config.CrawlerEnabled))
|
||||
featuresSec.Key("Indexer").SetValue(strconv.FormatBool(config.IndexerEnabled))
|
||||
featuresSec.Key("Website").SetValue(strconv.FormatBool(config.WebsiteEnabled))
|
||||
featuresSec.Key("RamCache").SetValue(strconv.FormatBool(config.RamCacheEnabled))
|
||||
featuresSec.Key("DriveCache").SetValue(strconv.FormatBool(config.DriveCacheEnabled))
|
||||
|
@ -273,6 +278,7 @@ func loadConfig() Config {
|
|||
// Features
|
||||
nodesEnabled, _ := cfg.Section("Features").Key("Nodes").Bool()
|
||||
crawlerEnabled, _ := cfg.Section("Features").Key("Crawler").Bool()
|
||||
indexerEnabled, _ := cfg.Section("Features").Key("Indexer").Bool()
|
||||
websiteEnabled, _ := cfg.Section("Features").Key("Website").Bool()
|
||||
ramCacheEnabled, _ := cfg.Section("Features").Key("RamCache").Bool()
|
||||
driveCacheEnabled, _ := cfg.Section("Features").Key("DriveCache").Bool()
|
||||
|
@ -294,10 +300,11 @@ func loadConfig() Config {
|
|||
Port: port,
|
||||
Domain: domain,
|
||||
LogLevel: logLevel,
|
||||
AuthCode: authCode, // Assign AuthCode here
|
||||
AuthCode: authCode,
|
||||
Peers: peers,
|
||||
NodesEnabled: nodesEnabled,
|
||||
CrawlerEnabled: crawlerEnabled,
|
||||
IndexerEnabled: indexerEnabled,
|
||||
WebsiteEnabled: websiteEnabled,
|
||||
RamCacheEnabled: ramCacheEnabled,
|
||||
DriveCacheEnabled: driveCacheEnabled,
|
||||
|
|
224
crawler.go
Normal file
224
crawler.go
Normal file
|
@ -0,0 +1,224 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// webCrawlerInit is called during init on program start
|
||||
func webCrawlerInit() {
|
||||
go func() {
|
||||
// First run immediately
|
||||
runCrawlerAndIndexer()
|
||||
|
||||
// Then every 24h (adjust as needed)
|
||||
ticker := time.NewTicker(24 * time.Hour)
|
||||
for range ticker.C {
|
||||
runCrawlerAndIndexer()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// runCrawlerAndIndexer reads domains.csv -> crawls -> writes to data_to_index.txt -> reindexes
|
||||
func runCrawlerAndIndexer() {
|
||||
// 1. Read domains.csv
|
||||
domains, err := readDomainsCSV(filepath.Join(config.DriveCache.Path, "domains.csv"))
|
||||
if err != nil {
|
||||
printErr("Error reading domains.csv: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// 2. Crawl each domain and write results to data_to_index.txt
|
||||
outFile := filepath.Join(config.DriveCache.Path, "data_to_index.txt")
|
||||
if err := crawlDomainsToFile(domains, outFile); err != nil {
|
||||
printErr("Error crawling domains: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// 3. Re-index data_to_index.txt
|
||||
if err := IndexFile(outFile); err != nil {
|
||||
printErr("Error indexing data_to_index.txt: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
printDebug("Crawl + index refresh completed.")
|
||||
}
|
||||
|
||||
// readDomainsCSV returns a slice of (rank,domain) from a local CSV file
|
||||
func readDomainsCSV(csvPath string) ([][2]string, error) {
|
||||
f, err := os.Open(csvPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var result [][2]string
|
||||
scanner := bufio.NewScanner(f)
|
||||
// Skip header line
|
||||
scanner.Scan()
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
// Split by commas, not tabs
|
||||
fields := strings.SplitN(line, ",", 3) // Splits into up to 3 parts (rank, domain, popularity)
|
||||
if len(fields) < 2 {
|
||||
printDebug("Skipping malformed line: %s", line)
|
||||
continue
|
||||
}
|
||||
// Remove quotes around fields, if present
|
||||
rank := strings.Trim(fields[0], `"`)
|
||||
domain := strings.Trim(fields[1], `"`)
|
||||
result = append(result, [2]string{rank, domain})
|
||||
}
|
||||
return result, scanner.Err()
|
||||
}
|
||||
|
||||
// crawlDomainsToFile visits each domain, extracts minimal data, writes results to outFile
|
||||
func crawlDomainsToFile(domains [][2]string, outFile string) error {
|
||||
// Read existing data_to_index.txt into a map to prevent duplicates
|
||||
existingEntries := make(map[string]bool)
|
||||
if _, err := os.Stat(outFile); err == nil { // File exists
|
||||
file, err := os.Open(outFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open %s: %v", outFile, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
parts := strings.SplitN(line, "|", 5)
|
||||
if len(parts) >= 1 {
|
||||
existingEntries[parts[0]] = true // Mark existing domain
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Open file for writing (truncate if existing)
|
||||
file, err := os.OpenFile(outFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open %s for writing: %v", outFile, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
for _, d := range domains {
|
||||
rank := d[0]
|
||||
domain := d[1]
|
||||
if domain == "" || existingEntries["https://"+domain] {
|
||||
continue
|
||||
}
|
||||
|
||||
fullURL := "https://" + domain
|
||||
title, desc, keywords := fetchPageMetadata(fullURL)
|
||||
if title == "" {
|
||||
title = "Unknown Title"
|
||||
}
|
||||
if desc == "" {
|
||||
desc = "No Description"
|
||||
}
|
||||
|
||||
// Write unique domain to file
|
||||
line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
|
||||
fullURL,
|
||||
sanitize(title),
|
||||
sanitize(keywords),
|
||||
sanitize(desc),
|
||||
rank,
|
||||
)
|
||||
if _, err := file.WriteString(line); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
existingEntries[fullURL] = true
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// fetchPageMetadata does a simple GET and parses <title>, meta[name=description], meta[name=keywords]
|
||||
func fetchPageMetadata(pageURL string) (string, string, string) {
|
||||
// Generate a User-Agent using your GetUserAgent function
|
||||
userAgent, err := GetUserAgent("crawler")
|
||||
if err != nil {
|
||||
printWarn("Failed to generate User-Agent: %v", err)
|
||||
return "", "", ""
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
req, err := http.NewRequest("GET", pageURL, nil)
|
||||
if err != nil {
|
||||
printWarn("Failed to create request for %s: %v", pageURL, err)
|
||||
return "", "", ""
|
||||
}
|
||||
|
||||
// Set the dynamically generated User-Agent
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
printWarn("Failed to GET %s: %v", pageURL, err)
|
||||
return "", "", ""
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Handle non-200 responses
|
||||
if resp.StatusCode == 403 || resp.StatusCode == 401 {
|
||||
printWarn("Skipping %s: HTTP %d", pageURL, resp.StatusCode)
|
||||
return "", "", ""
|
||||
} else if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
printWarn("Non-200 for %s: %d", pageURL, resp.StatusCode)
|
||||
return "", "", ""
|
||||
}
|
||||
|
||||
// Parse HTML
|
||||
doc, err := html.Parse(resp.Body)
|
||||
if err != nil {
|
||||
printWarn("HTML parse error for %s: %v", pageURL, err)
|
||||
return "", "", ""
|
||||
}
|
||||
|
||||
var title, desc, keywords string
|
||||
var f func(*html.Node)
|
||||
f = func(n *html.Node) {
|
||||
if n.Type == html.ElementNode && n.Data == "title" && n.FirstChild != nil {
|
||||
title = n.FirstChild.Data
|
||||
}
|
||||
if n.Type == html.ElementNode && n.Data == "meta" {
|
||||
var nameVal, contentVal string
|
||||
for _, attr := range n.Attr {
|
||||
switch strings.ToLower(attr.Key) {
|
||||
case "name":
|
||||
nameVal = strings.ToLower(attr.Val)
|
||||
case "content":
|
||||
contentVal = attr.Val
|
||||
}
|
||||
}
|
||||
if nameVal == "description" {
|
||||
desc = contentVal
|
||||
} else if nameVal == "keywords" {
|
||||
keywords = contentVal
|
||||
}
|
||||
}
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
f(c)
|
||||
}
|
||||
}
|
||||
f(doc)
|
||||
|
||||
return title, desc, keywords
|
||||
}
|
||||
|
||||
// sanitize is a quick helper to remove newlines/pipes from fields
|
||||
func sanitize(input string) string {
|
||||
input = strings.ReplaceAll(input, "|", " ")
|
||||
input = strings.ReplaceAll(input, "\n", " ")
|
||||
input = strings.TrimSpace(input)
|
||||
return input
|
||||
}
|
118
get-domains-csv.go
Normal file
118
get-domains-csv.go
Normal file
|
@ -0,0 +1,118 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
func downloadAndSetupDomainsCSV() error {
|
||||
targetFilePath := filepath.Join(config.DriveCache.Path, "domains.csv")
|
||||
|
||||
// Check if domains.csv already exists
|
||||
if _, err := os.Stat(targetFilePath); err == nil {
|
||||
printDebug("domains.csv already exists at %s", targetFilePath)
|
||||
return nil
|
||||
}
|
||||
|
||||
downloadURL := "https://www.domcop.com/files/top/top10milliondomains.csv.zip"
|
||||
zipFilePath := filepath.Join(config.DriveCache.Path, "top10milliondomains.csv.zip")
|
||||
|
||||
// Download the file
|
||||
printDebug("Downloading file from %s", downloadURL)
|
||||
resp, err := http.Get(downloadURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to download file: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("failed to download file: received status code %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
// Create the zip file locally
|
||||
zipFile, err := os.Create(zipFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create local zip file: %v", err)
|
||||
}
|
||||
defer zipFile.Close()
|
||||
|
||||
_, err = io.Copy(zipFile, resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write downloaded zip file: %v", err)
|
||||
}
|
||||
|
||||
// Unzip the file
|
||||
printDebug("Unzipping file %s", zipFilePath)
|
||||
if err := unzipFile(zipFilePath, config.DriveCache.Path); err != nil {
|
||||
return fmt.Errorf("failed to unzip file: %v", err)
|
||||
}
|
||||
|
||||
// Find the .csv file and rename/move it to domains.csv
|
||||
csvFound := false
|
||||
dirEntries, err := os.ReadDir(config.DriveCache.Path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read directory: %v", err)
|
||||
}
|
||||
|
||||
for _, entry := range dirEntries {
|
||||
if !entry.IsDir() && filepath.Ext(entry.Name()) == ".csv" {
|
||||
csvPath := filepath.Join(config.DriveCache.Path, entry.Name())
|
||||
if err := os.Rename(csvPath, targetFilePath); err != nil {
|
||||
return fmt.Errorf("failed to move %s to %s: %v", csvPath, targetFilePath, err)
|
||||
}
|
||||
csvFound = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !csvFound {
|
||||
return fmt.Errorf("no .csv file found in the downloaded archive")
|
||||
}
|
||||
|
||||
// Clean up zip file
|
||||
if err := os.Remove(zipFilePath); err != nil {
|
||||
printWarn("failed to remove zip file %s: %v", zipFilePath, err)
|
||||
}
|
||||
|
||||
printDebug("domains.csv successfully downloaded and placed at %s", targetFilePath)
|
||||
return nil
|
||||
}
|
||||
|
||||
func unzipFile(zipFile, destDir string) error {
|
||||
reader, err := zip.OpenReader(zipFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
for _, file := range reader.File {
|
||||
filePath := filepath.Join(destDir, file.Name)
|
||||
|
||||
if file.FileInfo().IsDir() {
|
||||
os.MkdirAll(filePath, os.ModePerm)
|
||||
continue
|
||||
}
|
||||
|
||||
srcFile, err := file.Open()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer srcFile.Close()
|
||||
|
||||
destFile, err := os.Create(filePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer destFile.Close()
|
||||
|
||||
if _, err := io.Copy(destFile, srcFile); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
46
go.mod
46
go.mod
|
@ -1,9 +1,11 @@
|
|||
module searchengine
|
||||
module qgato
|
||||
|
||||
go 1.18
|
||||
go 1.23
|
||||
|
||||
toolchain go1.23.4
|
||||
|
||||
require (
|
||||
github.com/PuerkitoBio/goquery v1.9.1 // direct
|
||||
github.com/PuerkitoBio/goquery v1.10.0 // direct
|
||||
github.com/chai2010/webp v1.1.1
|
||||
github.com/leonelquinteros/gotext v1.7.0
|
||||
github.com/shirou/gopsutil v3.21.11+incompatible
|
||||
|
@ -12,10 +14,42 @@ require (
|
|||
)
|
||||
|
||||
require (
|
||||
github.com/andybalholm/cascadia v1.3.2 // indirect
|
||||
github.com/blevesearch/bleve/v2 v2.4.4
|
||||
golang.org/x/net v0.33.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/RoaringBitmap/roaring v1.9.4 // indirect
|
||||
github.com/andybalholm/cascadia v1.3.3 // indirect
|
||||
github.com/bits-and-blooms/bitset v1.20.0 // indirect
|
||||
github.com/blevesearch/bleve_index_api v1.2.0 // indirect
|
||||
github.com/blevesearch/geo v0.1.20 // indirect
|
||||
github.com/blevesearch/go-faiss v1.0.24 // indirect
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3 // indirect
|
||||
github.com/blevesearch/gtreap v0.1.1 // indirect
|
||||
github.com/blevesearch/mmap-go v1.0.4 // indirect
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.3.0 // indirect
|
||||
github.com/blevesearch/segment v0.9.1 // indirect
|
||||
github.com/blevesearch/snowballstem v0.9.0 // indirect
|
||||
github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect
|
||||
github.com/blevesearch/vellum v1.1.0 // indirect
|
||||
github.com/blevesearch/zapx/v11 v11.3.10 // indirect
|
||||
github.com/blevesearch/zapx/v12 v12.3.10 // indirect
|
||||
github.com/blevesearch/zapx/v13 v13.3.10 // indirect
|
||||
github.com/blevesearch/zapx/v14 v14.3.10 // indirect
|
||||
github.com/blevesearch/zapx/v15 v15.3.17 // indirect
|
||||
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b // indirect
|
||||
github.com/go-ole/go-ole v1.3.0 // indirect
|
||||
github.com/golang/geo v0.0.0-20230421003525-6adc56603217 // indirect
|
||||
github.com/golang/protobuf v1.5.4 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/mschoch/smat v0.2.0 // indirect
|
||||
github.com/stretchr/testify v1.9.0 // indirect
|
||||
github.com/yusufpapurcu/wmi v1.2.4 // indirect
|
||||
golang.org/x/net v0.30.0 // indirect
|
||||
golang.org/x/sys v0.26.0 // indirect
|
||||
go.etcd.io/bbolt v1.3.11 // indirect
|
||||
golang.org/x/sys v0.28.0 // indirect
|
||||
google.golang.org/protobuf v1.36.0 // indirect
|
||||
)
|
||||
|
|
123
go.sum
123
go.sum
|
@ -1,39 +1,121 @@
|
|||
github.com/PuerkitoBio/goquery v1.9.1 h1:mTL6XjbJTZdpfL+Gwl5U2h1l9yEkJjhmlTeV9VPW7UI=
|
||||
github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY=
|
||||
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
|
||||
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
|
||||
github.com/PuerkitoBio/goquery v1.10.0 h1:6fiXdLuUvYs2OJSvNRqlNPoBm6YABE226xrbavY5Wv4=
|
||||
github.com/PuerkitoBio/goquery v1.10.0/go.mod h1:TjZZl68Q3eGHNBA8CWaxAN7rOU1EbDz3CWuolcO5Yu4=
|
||||
github.com/RoaringBitmap/roaring v1.9.4 h1:yhEIoH4YezLYT04s1nHehNO64EKFTop/wBhxv2QzDdQ=
|
||||
github.com/RoaringBitmap/roaring v1.9.4/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90=
|
||||
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
|
||||
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
|
||||
github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
|
||||
github.com/bits-and-blooms/bitset v1.20.0 h1:2F+rfL86jE2d/bmw7OhqUg2Sj/1rURkBn3MdfoPyRVU=
|
||||
github.com/bits-and-blooms/bitset v1.20.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
|
||||
github.com/blevesearch/bleve/v2 v2.4.4 h1:RwwLGjUm54SwyyykbrZs4vc1qjzYic4ZnAnY9TwNl60=
|
||||
github.com/blevesearch/bleve/v2 v2.4.4/go.mod h1:fa2Eo6DP7JR+dMFpQe+WiZXINKSunh7WBtlDGbolKXk=
|
||||
github.com/blevesearch/bleve_index_api v1.2.0 h1:/DXMMWBwx/UmGKM1xDhTwDoJI5yQrG6rqRWPFcOgUVo=
|
||||
github.com/blevesearch/bleve_index_api v1.2.0/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
|
||||
github.com/blevesearch/geo v0.1.20 h1:paaSpu2Ewh/tn5DKn/FB5SzvH0EWupxHEIwbCk/QPqM=
|
||||
github.com/blevesearch/geo v0.1.20/go.mod h1:DVG2QjwHNMFmjo+ZgzrIq2sfCh6rIHzy9d9d0B59I6w=
|
||||
github.com/blevesearch/go-faiss v1.0.24 h1:K79IvKjoKHdi7FdiXEsAhxpMuns0x4fM0BO93bW5jLI=
|
||||
github.com/blevesearch/go-faiss v1.0.24/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
|
||||
github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y=
|
||||
github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk=
|
||||
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
|
||||
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.3.0 h1:vxCjbXAkkEBSb4AB3Iqgr/EJcPyYRsiGxpcvsS8E1Dw=
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.3.0/go.mod h1:5y+TgXYSx+xJGaCwSlvy9G/UJBIY5wzvIkhvhBm2ATc=
|
||||
github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU=
|
||||
github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
|
||||
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
|
||||
github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
|
||||
github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A=
|
||||
github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ=
|
||||
github.com/blevesearch/vellum v1.1.0 h1:CinkGyIsgVlYf8Y2LUQHvdelgXr6PYuvoDIajq6yR9w=
|
||||
github.com/blevesearch/vellum v1.1.0/go.mod h1:QgwWryE8ThtNPxtgWJof5ndPfx0/YMBh+W2weHKPw8Y=
|
||||
github.com/blevesearch/zapx/v11 v11.3.10 h1:hvjgj9tZ9DeIqBCxKhi70TtSZYMdcFn7gDb71Xo/fvk=
|
||||
github.com/blevesearch/zapx/v11 v11.3.10/go.mod h1:0+gW+FaE48fNxoVtMY5ugtNHHof/PxCqh7CnhYdnMzQ=
|
||||
github.com/blevesearch/zapx/v12 v12.3.10 h1:yHfj3vXLSYmmsBleJFROXuO08mS3L1qDCdDK81jDl8s=
|
||||
github.com/blevesearch/zapx/v12 v12.3.10/go.mod h1:0yeZg6JhaGxITlsS5co73aqPtM04+ycnI6D1v0mhbCs=
|
||||
github.com/blevesearch/zapx/v13 v13.3.10 h1:0KY9tuxg06rXxOZHg3DwPJBjniSlqEgVpxIqMGahDE8=
|
||||
github.com/blevesearch/zapx/v13 v13.3.10/go.mod h1:w2wjSDQ/WBVeEIvP0fvMJZAzDwqwIEzVPnCPrz93yAk=
|
||||
github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz77pSwwKU=
|
||||
github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
|
||||
github.com/blevesearch/zapx/v15 v15.3.17 h1:NkkMI98pYLq/uHnB6YWcITrrLpCVyvZ9iP+AyfpW1Ys=
|
||||
github.com/blevesearch/zapx/v15 v15.3.17/go.mod h1:vXRQzJJvlGVCdmOD5hg7t7JdjUT5DmDPhsAfjvtzIq8=
|
||||
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b h1:ju9Az5YgrzCeK3M1QwvZIpxYhChkXp7/L0RhDYsxXoE=
|
||||
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b/go.mod h1:BlrYNpOu4BvVRslmIG+rLtKhmjIaRhIbG8sb9scGTwI=
|
||||
github.com/chai2010/webp v1.1.1 h1:jTRmEccAJ4MGrhFOrPMpNGIJ/eybIgwKpcACsrTEapk=
|
||||
github.com/chai2010/webp v1.1.1/go.mod h1:0XVwvZWdjjdxpUEIf7b9g9VkHFnInUSYujwqTLEuldU=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE=
|
||||
github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78=
|
||||
github.com/golang/geo v0.0.0-20230421003525-6adc56603217 h1:HKlyj6in2JV6wVkmQ4XmG/EIm+SCYlPZ+V4GWit7Z+I=
|
||||
github.com/golang/geo v0.0.0-20230421003525-6adc56603217/go.mod h1:8wI0hitZ3a1IxZfeH3/5I97CI8i5cLGsYe7xNhQGs9U=
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/leonelquinteros/gotext v1.7.0 h1:jcJmF4AXqyamP7vuw2MMIKs+O3jAEmvrc5JQiI8Ht/8=
|
||||
github.com/leonelquinteros/gotext v1.7.0/go.mod h1:qJdoQuERPpccw7L70uoU+K/BvTfRBHYsisCQyFLXyvw=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
|
||||
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
|
||||
github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
|
||||
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
|
||||
go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0=
|
||||
go.etcd.io/bbolt v1.3.11/go.mod h1:dksAq7YMXoljX0xu6VF5DMZGbhYYoLUalEiSySYAS4I=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
|
||||
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
|
||||
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
|
||||
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
||||
golang.org/x/image v0.21.0 h1:c5qV36ajHpdj4Qi0GnE0jUc/yuo33OLFaa0d+crTD5s=
|
||||
golang.org/x/image v0.21.0/go.mod h1:vUbsLavqK/W303ZroQQVKQ+Af3Yl6Uz1Ppu5J/cLz78=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
|
||||
golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
|
||||
golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
|
||||
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
|
||||
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
|
||||
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
|
||||
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
|
||||
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
|
||||
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
|
||||
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
|
||||
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
|
@ -42,23 +124,42 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc
|
|||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
|
||||
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
|
||||
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||
golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
|
||||
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
|
||||
golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
|
||||
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
|
||||
golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
|
||||
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
||||
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
|
||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.36.0 h1:mjIs9gYtt56AzC4ZaffQuh88TZurBGhIJMBZGSxNerQ=
|
||||
google.golang.org/protobuf v1.36.0/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
|
||||
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
|
198
indexer.go
Normal file
198
indexer.go
Normal file
|
@ -0,0 +1,198 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
)
|
||||
|
||||
// Document represents a single document to be indexed.
|
||||
// You can add more fields if needed.
|
||||
type Document struct {
|
||||
ID string `json:"id"`
|
||||
Link string `json:"link"`
|
||||
Title string `json:"title"`
|
||||
Tags string `json:"tags"`
|
||||
Description string `json:"description"`
|
||||
Popularity int64 `json:"popularity"`
|
||||
}
|
||||
|
||||
var (
|
||||
// Global Bleve index handle
|
||||
bleveIndex bleve.Index
|
||||
)
|
||||
|
||||
func startPeriodicIndexing(filePath string, interval time.Duration) {
|
||||
go func() {
|
||||
for {
|
||||
printDebug("Refreshing index from %s", filePath)
|
||||
err := IndexFile(filePath)
|
||||
if err != nil {
|
||||
printErr("Failed to refresh index: %v", err)
|
||||
}
|
||||
time.Sleep(interval)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// InitIndex ensures that the Bleve index is created or opened.
|
||||
func InitIndex() error {
|
||||
idx, err := bleve.Open(filepath.Join(config.DriveCache.Path, "index.bleve"))
|
||||
if err == bleve.ErrorIndexPathDoesNotExist {
|
||||
// Index doesn't exist, create a new one
|
||||
mapping := bleve.NewIndexMapping()
|
||||
|
||||
// Custom mapping for the document
|
||||
docMapping := bleve.NewDocumentMapping()
|
||||
|
||||
// Text fields with custom analyzers for better tokenization
|
||||
textFieldMapping := bleve.NewTextFieldMapping()
|
||||
textFieldMapping.Analyzer = "standard" // Use standard analyzer for partial and fuzzy matches
|
||||
|
||||
docMapping.AddFieldMappingsAt("title", textFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("description", textFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("tags", textFieldMapping)
|
||||
|
||||
// Numeric field for popularity
|
||||
popularityMapping := bleve.NewNumericFieldMapping()
|
||||
docMapping.AddFieldMappingsAt("popularity", popularityMapping)
|
||||
|
||||
mapping.AddDocumentMapping("Document", docMapping)
|
||||
|
||||
idx, err = bleve.New(filepath.Join(config.DriveCache.Path, "index.bleve"), mapping)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create index: %v", err)
|
||||
}
|
||||
} else if err != nil {
|
||||
return fmt.Errorf("failed to open index: %v", err)
|
||||
}
|
||||
|
||||
bleveIndex = idx
|
||||
return nil
|
||||
}
|
||||
|
||||
// IndexFile reads a file line-by-line and indexes each line as a document.
|
||||
// Each line represents a simple document. Adjust parsing as needed.
|
||||
func IndexFile(filePath string) error {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open file for indexing: %v", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
batch := bleveIndex.NewBatch()
|
||||
indexedDomains := make(map[string]bool) // Track indexed domains
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
// Split the line into 5 fields: link|title|tags|description|popularity
|
||||
parts := strings.SplitN(line, "|", 5)
|
||||
if len(parts) < 5 {
|
||||
continue // Skip malformed lines
|
||||
}
|
||||
|
||||
domain := parts[0]
|
||||
popularity, _ := strconv.ParseInt(parts[4], 10, 64)
|
||||
|
||||
// Skip if the domain is already indexed
|
||||
if indexedDomains[domain] {
|
||||
continue
|
||||
}
|
||||
|
||||
doc := Document{
|
||||
ID: domain, // Use the domain as the unique ID
|
||||
Link: parts[0],
|
||||
Title: parts[1],
|
||||
Tags: parts[2],
|
||||
Description: parts[3],
|
||||
Popularity: popularity,
|
||||
}
|
||||
|
||||
err := batch.Index(doc.ID, map[string]interface{}{
|
||||
"title": doc.Title,
|
||||
"description": doc.Description,
|
||||
"link": doc.Link,
|
||||
"tags": doc.Tags,
|
||||
"popularity": doc.Popularity,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to index document: %v", err)
|
||||
}
|
||||
|
||||
indexedDomains[domain] = true // Mark the domain as indexed
|
||||
}
|
||||
|
||||
// Commit the batch
|
||||
if err := bleveIndex.Batch(batch); err != nil {
|
||||
return fmt.Errorf("error committing batch: %v", err)
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return fmt.Errorf("error reading file: %v", err)
|
||||
}
|
||||
|
||||
printDebug("Indexed %d unique domains from %s\n", len(indexedDomains), filePath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// SearchIndex performs a full-text search on the indexed data.
|
||||
func SearchIndex(queryStr string, page, pageSize int) ([]Document, error) {
|
||||
// Create compound query
|
||||
exactMatch := bleve.NewMatchQuery(queryStr) // Exact match
|
||||
fuzzyMatch := bleve.NewFuzzyQuery(queryStr) // Fuzzy match
|
||||
fuzzyMatch.Fuzziness = 2
|
||||
prefixMatch := bleve.NewPrefixQuery(queryStr) // Prefix match
|
||||
|
||||
query := bleve.NewDisjunctionQuery(exactMatch, fuzzyMatch, prefixMatch)
|
||||
|
||||
req := bleve.NewSearchRequest(query)
|
||||
req.Fields = []string{"title", "description", "link", "tags", "popularity"}
|
||||
|
||||
// Pagination
|
||||
req.Size = pageSize
|
||||
req.From = (page - 1) * pageSize
|
||||
|
||||
// Sort by popularity
|
||||
req.SortBy([]string{"popularity"})
|
||||
|
||||
res, err := bleveIndex.Search(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("search error: %v", err)
|
||||
}
|
||||
|
||||
var docs []Document
|
||||
for _, hit := range res.Hits {
|
||||
title := fmt.Sprintf("%v", hit.Fields["title"])
|
||||
description := fmt.Sprintf("%v", hit.Fields["description"])
|
||||
link := fmt.Sprintf("%v", hit.Fields["link"])
|
||||
tags := fmt.Sprintf("%v", hit.Fields["tags"])
|
||||
popularity := int64(0)
|
||||
|
||||
if pop, ok := hit.Fields["popularity"].(float64); ok {
|
||||
popularity = int64(pop)
|
||||
}
|
||||
|
||||
if link == "<nil>" || title == "<nil>" {
|
||||
continue
|
||||
}
|
||||
|
||||
docs = append(docs, Document{
|
||||
ID: hit.ID,
|
||||
Title: title,
|
||||
Description: description,
|
||||
Link: link,
|
||||
Tags: tags,
|
||||
Popularity: popularity,
|
||||
})
|
||||
}
|
||||
|
||||
return docs, nil
|
||||
}
|
25
init.go
25
init.go
|
@ -3,6 +3,8 @@ package main
|
|||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
)
|
||||
|
||||
var config Config
|
||||
|
@ -95,5 +97,28 @@ func main() {
|
|||
printInfo("RAM cache is disabled.")
|
||||
}
|
||||
|
||||
// Init indexer
|
||||
if config.IndexerEnabled {
|
||||
if err := downloadAndSetupDomainsCSV(); err != nil {
|
||||
printErr("Failed to set up domains.csv: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
webCrawlerInit()
|
||||
|
||||
err := InitIndex()
|
||||
if err != nil {
|
||||
printErr("Failed to initialize index:", err)
|
||||
}
|
||||
|
||||
// Start periodic indexing (every 2 minutes)
|
||||
dataFilePath := filepath.Join(config.DriveCache.Path, "data_to_index.txt")
|
||||
startPeriodicIndexing(dataFilePath, 2*time.Minute)
|
||||
|
||||
printInfo("Indexer is enabled.")
|
||||
} else {
|
||||
printInfo("Indexer is disabled.")
|
||||
}
|
||||
|
||||
runServer()
|
||||
}
|
||||
|
|
113
text.go
113
text.go
|
@ -73,14 +73,10 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
|
|||
select {
|
||||
case results := <-cacheChan:
|
||||
if results == nil {
|
||||
// Fetch only if the cache miss occurs and Crawler is enabled
|
||||
if config.CrawlerEnabled {
|
||||
combinedResults = fetchTextResults(query, safe, lang, page)
|
||||
if len(combinedResults) > 0 {
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||
}
|
||||
} else {
|
||||
printInfo("Crawler disabled; skipping fetching.")
|
||||
// Always attempt to fetch results on a cache miss
|
||||
combinedResults = fetchTextResults(query, safe, lang, page)
|
||||
if len(combinedResults) > 0 {
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||
}
|
||||
} else {
|
||||
textResults, _, _, _ := convertToSpecificResults(results)
|
||||
|
@ -88,13 +84,10 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
|
|||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
printInfo("Cache check timeout")
|
||||
if config.CrawlerEnabled {
|
||||
combinedResults = fetchTextResults(query, safe, lang, page)
|
||||
if len(combinedResults) > 0 {
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||
}
|
||||
} else {
|
||||
printInfo("Crawler disabled; skipping fetching.")
|
||||
// Even on timeout, attempt to fetch results
|
||||
combinedResults = fetchTextResults(query, safe, lang, page)
|
||||
if len(combinedResults) > 0 {
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -121,54 +114,70 @@ func prefetchPage(query, safe, lang string, page int) {
|
|||
func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
|
||||
var results []TextSearchResult
|
||||
|
||||
// If Crawler is disabled, do not fetch from search engines
|
||||
if !config.CrawlerEnabled {
|
||||
printDebug("Crawler is disabled; skipping search engine fetching.")
|
||||
return results // Return an empty list
|
||||
}
|
||||
printDebug("Crawler is disabled; fetching from local index.")
|
||||
|
||||
engineCount := len(textSearchEngines)
|
||||
// Calculate the starting position based on the page number
|
||||
indexedResults, err := SearchIndex(query, page, 10)
|
||||
if err != nil {
|
||||
printErr("Error searching the index: %v", err)
|
||||
return results // Return empty results on error
|
||||
}
|
||||
|
||||
// Determine which engine to use for the current page
|
||||
engineIndex := (page - 1) % engineCount
|
||||
engine := textSearchEngines[engineIndex]
|
||||
// Convert indexed results to TextSearchResult format
|
||||
for _, doc := range indexedResults {
|
||||
results = append(results, TextSearchResult{
|
||||
URL: doc.Link,
|
||||
Header: doc.Title,
|
||||
Description: doc.Description,
|
||||
Source: doc.Tags,
|
||||
})
|
||||
}
|
||||
|
||||
// Calculate the page number for this engine
|
||||
enginePage := (page-1)/engineCount + 1
|
||||
|
||||
// Debug print to verify engine and page number being fetched
|
||||
printDebug("Fetching results for overall page %d using engine: %s (engine page %d)", page, engine.Name, enginePage)
|
||||
|
||||
// Fetch results from the selected engine
|
||||
searchResults, _, err := engine.Func(query, safe, lang, enginePage)
|
||||
if err != nil {
|
||||
printWarn("Error performing search with %s: %v", engine.Name, err)
|
||||
return results
|
||||
} else {
|
||||
results = append(results, validateResults(searchResults)...)
|
||||
}
|
||||
// Crawler is enabled, so use the search engines
|
||||
engineCount := len(textSearchEngines)
|
||||
|
||||
// If no results are found with the selected engine, try the next in line
|
||||
if len(results) == 0 {
|
||||
for i := 1; i < engineCount; i++ {
|
||||
nextEngine := textSearchEngines[(engineIndex+i)%engineCount]
|
||||
enginePage = (page-1)/engineCount + 1 // Recalculate for the new engine
|
||||
printInfo("No results found, trying next engine: %s (engine page %d)", nextEngine.Name, enginePage)
|
||||
// Determine which engine to use for the current page
|
||||
engineIndex := (page - 1) % engineCount
|
||||
engine := textSearchEngines[engineIndex]
|
||||
|
||||
searchResults, _, err := nextEngine.Func(query, safe, lang, enginePage)
|
||||
if err != nil {
|
||||
printWarn("Error performing search with %s: %v", nextEngine.Name, err)
|
||||
continue
|
||||
}
|
||||
// Calculate the page number for this engine
|
||||
enginePage := (page-1)/engineCount + 1
|
||||
|
||||
printDebug("Fetching results for overall page %d using engine: %s (engine page %d)", page, engine.Name, enginePage)
|
||||
|
||||
// Fetch results from the selected engine
|
||||
searchResults, _, err := engine.Func(query, safe, lang, enginePage)
|
||||
if err != nil {
|
||||
printWarn("Error performing search with %s: %v", engine.Name, err)
|
||||
} else {
|
||||
results = append(results, validateResults(searchResults)...)
|
||||
if len(results) > 0 {
|
||||
break
|
||||
}
|
||||
|
||||
// If no results are found with the selected engine, try the next in line
|
||||
if len(results) == 0 {
|
||||
for i := 1; i < engineCount; i++ {
|
||||
nextEngine := textSearchEngines[(engineIndex+i)%engineCount]
|
||||
enginePage = (page-1)/engineCount + 1
|
||||
printInfo("No results found, trying next engine: %s (engine page %d)", nextEngine.Name, enginePage)
|
||||
|
||||
searchResults, _, err := nextEngine.Func(query, safe, lang, enginePage)
|
||||
if err != nil {
|
||||
printWarn("Error performing search with %s: %v", nextEngine.Name, err)
|
||||
continue
|
||||
}
|
||||
results = append(results, validateResults(searchResults)...)
|
||||
if len(results) > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printInfo("Fetched %d results for overall page %d", len(results), page)
|
||||
return results
|
||||
}
|
||||
|
||||
printInfo("Fetched %d results for overall page %d", len(results), page)
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
func validateResults(searchResults []SearchResult) []TextSearchResult {
|
||||
|
|
Loading…
Add table
Reference in a new issue