From 047cccd19f7d102772508537a81485e42c4e74c3 Mon Sep 17 00:00:00 2001
From: partisan <none@noone.no>
Date: Sun, 29 Dec 2024 22:54:55 +0100
Subject: [PATCH 1/9] added website crawling and indexing crawled results

---
 cache-images.go    |  42 +++++----
 cache.go           |   2 +-
 config.go          |  25 +++--
 crawler.go         | 224 +++++++++++++++++++++++++++++++++++++++++++++
 get-domains-csv.go | 118 ++++++++++++++++++++++++
 go.mod             |  46 ++++++++--
 go.sum             | 123 ++++++++++++++++++++++---
 indexer.go         | 198 +++++++++++++++++++++++++++++++++++++++
 init.go            |  25 +++++
 text.go            | 113 ++++++++++++-----------
 10 files changed, 819 insertions(+), 97 deletions(-)
 create mode 100644 crawler.go
 create mode 100644 get-domains-csv.go
 create mode 100644 indexer.go

diff --git a/cache-images.go b/cache-images.go
index 16d686e..4e551cd 100644
--- a/cache-images.go
+++ b/cache-images.go
@@ -24,15 +24,15 @@ import (
 )
 
 var (
-	cachingImages   = make(map[string]*sync.Mutex)
-	cachingImagesMu sync.Mutex
-	// cachingSemaphore = make(chan struct{}, 100) // Limit to concurrent downloads
+	cachingImages    = make(map[string]*sync.Mutex)
+	cachingImagesMu  sync.Mutex
+	cachingSemaphore = make(chan struct{}, 100)
 
 	invalidImageIDs   = make(map[string]struct{})
 	invalidImageIDsMu sync.Mutex
 
-	imageURLMap   = make(map[string]string) // mapping from imageID_type to imageURL
-	imageURLMapMu sync.RWMutex              // mutex for thread-safe access
+	imageURLMap   = make(map[string]string)
+	imageURLMapMu sync.RWMutex
 )
 
 func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error) {
@@ -49,7 +49,13 @@ func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error
 		filename = fmt.Sprintf("%s_full.webp", imageID)
 	}
 
-	cachedImagePath := filepath.Join(config.DriveCache.Path, filename)
+	// Make sure we store inside: config.DriveCache.Path / images
+	imageCacheDir := filepath.Join(config.DriveCache.Path, "images")
+	if err := os.MkdirAll(imageCacheDir, 0755); err != nil {
+		return "", false, fmt.Errorf("couldn't create images folder: %v", err)
+	}
+
+	cachedImagePath := filepath.Join(imageCacheDir, filename)
 	tempImagePath := cachedImagePath + ".tmp"
 
 	// Check if the image is already cached
@@ -73,9 +79,8 @@ func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error
 		return cachedImagePath, true, nil
 	}
 
-	// // Limit max concurrent downloads
-	// cachingSemaphore <- struct{}{}        // Acquire a token
-	// defer func() { <-cachingSemaphore }() // Release the token
+	cachingSemaphore <- struct{}{}
+	defer func() { <-cachingSemaphore }()
 
 	// Create a custom http.Client that skips SSL certificate verification
 	client := &http.Client{
@@ -217,7 +222,8 @@ func handleImageServe(w http.ResponseWriter, r *http.Request) {
 	imageType = parts[1]
 
 	filename := fmt.Sprintf("%s_%s.webp", imageID, imageType)
-	cachedImagePath := filepath.Join(config.DriveCache.Path, filename)
+	// Adjust to read from config.DriveCache.Path / images
+	cachedImagePath := filepath.Join(config.DriveCache.Path, "images", filename)
 
 	if hasExtension && imageType == "thumb" {
 		// Requesting cached image (thumbnail or full)
@@ -329,7 +335,7 @@ func handleImageStatus(w http.ResponseWriter, r *http.Request) {
 		// Check thumbnail first
 		for _, ext := range extensions {
 			thumbFilename := fmt.Sprintf("%s_thumb.%s", id, ext)
-			thumbPath := filepath.Join(config.DriveCache.Path, thumbFilename)
+			thumbPath := filepath.Join(config.DriveCache.Path, "images", thumbFilename)
 
 			if _, err := os.Stat(thumbPath); err == nil {
 				statusMap[id] = fmt.Sprintf("/image/%s_thumb.%s", id, ext)
@@ -342,7 +348,7 @@ func handleImageStatus(w http.ResponseWriter, r *http.Request) {
 		if !imageReady {
 			for _, ext := range extensions {
 				fullFilename := fmt.Sprintf("%s_full.%s", id, ext)
-				fullPath := filepath.Join(config.DriveCache.Path, fullFilename)
+				fullPath := filepath.Join(config.DriveCache.Path, "images", fullFilename)
 
 				if _, err := os.Stat(fullPath); err == nil {
 					statusMap[id] = fmt.Sprintf("/image/%s_full.%s", id, ext)
@@ -447,7 +453,9 @@ func cleanExpiredCachedImages() {
 }
 
 func cleanupCache() {
-	files, err := os.ReadDir(config.DriveCache.Path)
+	// Read from: config.DriveCache.Path / images
+	imageCacheDir := filepath.Join(config.DriveCache.Path, "images")
+	files, err := os.ReadDir(imageCacheDir)
 	if err != nil {
 		printErr("Failed to read DriveCache directory: %v", err)
 		return
@@ -462,19 +470,17 @@ func cleanupCache() {
 			continue
 		}
 
-		filePath := filepath.Join(config.DriveCache.Path, file.Name())
+		filePath := filepath.Join(imageCacheDir, file.Name())
 
-		// Check for expired files based on modification time
 		if config.DriveCache.Duration > 0 && time.Since(info.ModTime()) > config.DriveCache.Duration {
 			if err := os.Remove(filePath); err == nil {
 				printDebug("Removed expired cache file: %s", filePath)
 			} else {
 				printErr("Failed to remove expired cache file: %s", filePath)
 			}
-			continue // Skip adding this file to the list
+			continue
 		}
 
-		// Accumulate total size and store file info for potential deletion
 		totalSize += uint64(info.Size())
 		fileInfos = append(fileInfos, info)
 	}
@@ -491,7 +497,7 @@ func cleanupCache() {
 				break
 			}
 
-			filePath := filepath.Join(config.DriveCache.Path, info.Name())
+			filePath := filepath.Join(imageCacheDir, info.Name())
 			fileSize := uint64(info.Size())
 
 			if err := os.Remove(filePath); err == nil {
diff --git a/cache.go b/cache.go
index b5ad880..ac2902d 100644
--- a/cache.go
+++ b/cache.go
@@ -162,7 +162,7 @@ func (rc *ResultsCache) keyToString(key CacheKey) string {
 
 // checkAndCleanCache removes items if memory usage exceeds the limit.
 func (rc *ResultsCache) checkAndCleanCache() {
-	for rc.currentMemoryUsage() > config.RamCache.MaxUsageBytes {
+	if rc.currentMemoryUsage() > config.RamCache.MaxUsageBytes {
 		rc.cleanOldestItems()
 	}
 }
diff --git a/config.go b/config.go
index c3aec6b..2e5d805 100644
--- a/config.go
+++ b/config.go
@@ -30,6 +30,7 @@ type Config struct {
 	Domain            string // Added
 	NodesEnabled      bool   // Added
 	CrawlerEnabled    bool   // Added
+	IndexerEnabled    bool   // Added
 	WebsiteEnabled    bool   // Added
 	RamCacheEnabled   bool
 	DriveCacheEnabled bool // Added
@@ -46,6 +47,7 @@ var defaultConfig = Config{
 	AuthCode:          generateStrongRandomString(64),
 	NodesEnabled:      false,
 	CrawlerEnabled:    true,
+	IndexerEnabled:    false,
 	WebsiteEnabled:    true,
 	RamCacheEnabled:   true,
 	DriveCacheEnabled: false,
@@ -105,6 +107,15 @@ func createConfig() error {
 			config.Domain = defaultConfig.Domain
 		}
 
+		// printMessage("Use Indexer? (YES/no): ")
+		// indexerChoice, _ := reader.ReadString('\n')
+		// indexerChoice = strings.TrimSpace(strings.ToLower(indexerChoice))
+		// if indexerChoice == "no" {
+		// 	config.IndexerEnabled = false
+		// } else {
+		// 	config.IndexerEnabled = true
+		// }
+
 		// Cache settings
 		printMessage("Would you like to configure Cache settings (yes/NO): ")
 		configureCache, _ := reader.ReadString('\n')
@@ -181,7 +192,7 @@ func createConfig() error {
 			} else {
 				config.DriveCache.MaxUsageBytes = parseMaxUsageDrive(driveMaxUsage, drivePath)
 				if config.DriveCache.MaxUsageBytes == 0 {
-					printWarn("Invalid DriveCache max usage, using default (1 TiB).")
+					printWarn("Invalid DriveCache max usage, using default.")
 					config.DriveCache.MaxUsageBytes = defaultConfig.DriveCache.MaxUsageBytes
 				}
 			}
@@ -201,13 +212,6 @@ func createConfig() error {
 		printMessage("Generated connection code: %s\n", config.AuthCode)
 	}
 
-	// Set other default values
-	config.NodesEnabled = defaultConfig.NodesEnabled
-	config.CrawlerEnabled = defaultConfig.CrawlerEnabled
-	config.WebsiteEnabled = defaultConfig.WebsiteEnabled
-	config.LogLevel = defaultConfig.LogLevel
-
-	// Save configuration to file
 	saveConfig(config)
 	printInfo("Configuration saved successfully.")
 	return nil
@@ -232,6 +236,7 @@ func saveConfig(config Config) {
 	featuresSec := cfg.Section("Features")
 	featuresSec.Key("Nodes").SetValue(strconv.FormatBool(config.NodesEnabled))
 	featuresSec.Key("Crawler").SetValue(strconv.FormatBool(config.CrawlerEnabled))
+	featuresSec.Key("Indexer").SetValue(strconv.FormatBool(config.IndexerEnabled))
 	featuresSec.Key("Website").SetValue(strconv.FormatBool(config.WebsiteEnabled))
 	featuresSec.Key("RamCache").SetValue(strconv.FormatBool(config.RamCacheEnabled))
 	featuresSec.Key("DriveCache").SetValue(strconv.FormatBool(config.DriveCacheEnabled))
@@ -273,6 +278,7 @@ func loadConfig() Config {
 	// Features
 	nodesEnabled, _ := cfg.Section("Features").Key("Nodes").Bool()
 	crawlerEnabled, _ := cfg.Section("Features").Key("Crawler").Bool()
+	indexerEnabled, _ := cfg.Section("Features").Key("Indexer").Bool()
 	websiteEnabled, _ := cfg.Section("Features").Key("Website").Bool()
 	ramCacheEnabled, _ := cfg.Section("Features").Key("RamCache").Bool()
 	driveCacheEnabled, _ := cfg.Section("Features").Key("DriveCache").Bool()
@@ -294,10 +300,11 @@ func loadConfig() Config {
 		Port:              port,
 		Domain:            domain,
 		LogLevel:          logLevel,
-		AuthCode:          authCode, // Assign AuthCode here
+		AuthCode:          authCode,
 		Peers:             peers,
 		NodesEnabled:      nodesEnabled,
 		CrawlerEnabled:    crawlerEnabled,
+		IndexerEnabled:    indexerEnabled,
 		WebsiteEnabled:    websiteEnabled,
 		RamCacheEnabled:   ramCacheEnabled,
 		DriveCacheEnabled: driveCacheEnabled,
diff --git a/crawler.go b/crawler.go
new file mode 100644
index 0000000..bbe3540
--- /dev/null
+++ b/crawler.go
@@ -0,0 +1,224 @@
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"net/http"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"golang.org/x/net/html"
+)
+
+// webCrawlerInit is called during init on program start
+func webCrawlerInit() {
+	go func() {
+		// First run immediately
+		runCrawlerAndIndexer()
+
+		// Then every 24h (adjust as needed)
+		ticker := time.NewTicker(24 * time.Hour)
+		for range ticker.C {
+			runCrawlerAndIndexer()
+		}
+	}()
+}
+
+// runCrawlerAndIndexer reads domains.csv -> crawls -> writes to data_to_index.txt -> reindexes
+func runCrawlerAndIndexer() {
+	// 1. Read domains.csv
+	domains, err := readDomainsCSV(filepath.Join(config.DriveCache.Path, "domains.csv"))
+	if err != nil {
+		printErr("Error reading domains.csv: %v", err)
+		return
+	}
+
+	// 2. Crawl each domain and write results to data_to_index.txt
+	outFile := filepath.Join(config.DriveCache.Path, "data_to_index.txt")
+	if err := crawlDomainsToFile(domains, outFile); err != nil {
+		printErr("Error crawling domains: %v", err)
+		return
+	}
+
+	// 3. Re-index data_to_index.txt
+	if err := IndexFile(outFile); err != nil {
+		printErr("Error indexing data_to_index.txt: %v", err)
+		return
+	}
+
+	printDebug("Crawl + index refresh completed.")
+}
+
+// readDomainsCSV returns a slice of (rank,domain) from a local CSV file
+func readDomainsCSV(csvPath string) ([][2]string, error) {
+	f, err := os.Open(csvPath)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	var result [][2]string
+	scanner := bufio.NewScanner(f)
+	// Skip header line
+	scanner.Scan()
+
+	for scanner.Scan() {
+		line := scanner.Text()
+		// Split by commas, not tabs
+		fields := strings.SplitN(line, ",", 3) // Splits into up to 3 parts (rank, domain, popularity)
+		if len(fields) < 2 {
+			printDebug("Skipping malformed line: %s", line)
+			continue
+		}
+		// Remove quotes around fields, if present
+		rank := strings.Trim(fields[0], `"`)
+		domain := strings.Trim(fields[1], `"`)
+		result = append(result, [2]string{rank, domain})
+	}
+	return result, scanner.Err()
+}
+
+// crawlDomainsToFile visits each domain, extracts minimal data, writes results to outFile
+func crawlDomainsToFile(domains [][2]string, outFile string) error {
+	// Read existing data_to_index.txt into a map to prevent duplicates
+	existingEntries := make(map[string]bool)
+	if _, err := os.Stat(outFile); err == nil { // File exists
+		file, err := os.Open(outFile)
+		if err != nil {
+			return fmt.Errorf("unable to open %s: %v", outFile, err)
+		}
+		defer file.Close()
+
+		scanner := bufio.NewScanner(file)
+		for scanner.Scan() {
+			line := scanner.Text()
+			parts := strings.SplitN(line, "|", 5)
+			if len(parts) >= 1 {
+				existingEntries[parts[0]] = true // Mark existing domain
+			}
+		}
+	}
+
+	// Open file for writing (truncate if existing)
+	file, err := os.OpenFile(outFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
+	if err != nil {
+		return fmt.Errorf("unable to open %s for writing: %v", outFile, err)
+	}
+	defer file.Close()
+
+	for _, d := range domains {
+		rank := d[0]
+		domain := d[1]
+		if domain == "" || existingEntries["https://"+domain] {
+			continue
+		}
+
+		fullURL := "https://" + domain
+		title, desc, keywords := fetchPageMetadata(fullURL)
+		if title == "" {
+			title = "Unknown Title"
+		}
+		if desc == "" {
+			desc = "No Description"
+		}
+
+		// Write unique domain to file
+		line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
+			fullURL,
+			sanitize(title),
+			sanitize(keywords),
+			sanitize(desc),
+			rank,
+		)
+		if _, err := file.WriteString(line); err != nil {
+			return err
+		}
+
+		existingEntries[fullURL] = true
+	}
+
+	return nil
+}
+
+// fetchPageMetadata does a simple GET and parses <title>, meta[name=description], meta[name=keywords]
+func fetchPageMetadata(pageURL string) (string, string, string) {
+	// Generate a User-Agent using your GetUserAgent function
+	userAgent, err := GetUserAgent("crawler")
+	if err != nil {
+		printWarn("Failed to generate User-Agent: %v", err)
+		return "", "", ""
+	}
+
+	client := &http.Client{Timeout: 15 * time.Second}
+	req, err := http.NewRequest("GET", pageURL, nil)
+	if err != nil {
+		printWarn("Failed to create request for %s: %v", pageURL, err)
+		return "", "", ""
+	}
+
+	// Set the dynamically generated User-Agent
+	req.Header.Set("User-Agent", userAgent)
+
+	resp, err := client.Do(req)
+	if err != nil {
+		printWarn("Failed to GET %s: %v", pageURL, err)
+		return "", "", ""
+	}
+	defer resp.Body.Close()
+
+	// Handle non-200 responses
+	if resp.StatusCode == 403 || resp.StatusCode == 401 {
+		printWarn("Skipping %s: HTTP %d", pageURL, resp.StatusCode)
+		return "", "", ""
+	} else if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		printWarn("Non-200 for %s: %d", pageURL, resp.StatusCode)
+		return "", "", ""
+	}
+
+	// Parse HTML
+	doc, err := html.Parse(resp.Body)
+	if err != nil {
+		printWarn("HTML parse error for %s: %v", pageURL, err)
+		return "", "", ""
+	}
+
+	var title, desc, keywords string
+	var f func(*html.Node)
+	f = func(n *html.Node) {
+		if n.Type == html.ElementNode && n.Data == "title" && n.FirstChild != nil {
+			title = n.FirstChild.Data
+		}
+		if n.Type == html.ElementNode && n.Data == "meta" {
+			var nameVal, contentVal string
+			for _, attr := range n.Attr {
+				switch strings.ToLower(attr.Key) {
+				case "name":
+					nameVal = strings.ToLower(attr.Val)
+				case "content":
+					contentVal = attr.Val
+				}
+			}
+			if nameVal == "description" {
+				desc = contentVal
+			} else if nameVal == "keywords" {
+				keywords = contentVal
+			}
+		}
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			f(c)
+		}
+	}
+	f(doc)
+
+	return title, desc, keywords
+}
+
+// sanitize is a quick helper to remove newlines/pipes from fields
+func sanitize(input string) string {
+	input = strings.ReplaceAll(input, "|", " ")
+	input = strings.ReplaceAll(input, "\n", " ")
+	input = strings.TrimSpace(input)
+	return input
+}
diff --git a/get-domains-csv.go b/get-domains-csv.go
new file mode 100644
index 0000000..8d931f9
--- /dev/null
+++ b/get-domains-csv.go
@@ -0,0 +1,118 @@
+package main
+
+import (
+	"archive/zip"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"path/filepath"
+)
+
+func downloadAndSetupDomainsCSV() error {
+	targetFilePath := filepath.Join(config.DriveCache.Path, "domains.csv")
+
+	// Check if domains.csv already exists
+	if _, err := os.Stat(targetFilePath); err == nil {
+		printDebug("domains.csv already exists at %s", targetFilePath)
+		return nil
+	}
+
+	downloadURL := "https://www.domcop.com/files/top/top10milliondomains.csv.zip"
+	zipFilePath := filepath.Join(config.DriveCache.Path, "top10milliondomains.csv.zip")
+
+	// Download the file
+	printDebug("Downloading file from %s", downloadURL)
+	resp, err := http.Get(downloadURL)
+	if err != nil {
+		return fmt.Errorf("failed to download file: %v", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return fmt.Errorf("failed to download file: received status code %d", resp.StatusCode)
+	}
+
+	// Create the zip file locally
+	zipFile, err := os.Create(zipFilePath)
+	if err != nil {
+		return fmt.Errorf("failed to create local zip file: %v", err)
+	}
+	defer zipFile.Close()
+
+	_, err = io.Copy(zipFile, resp.Body)
+	if err != nil {
+		return fmt.Errorf("failed to write downloaded zip file: %v", err)
+	}
+
+	// Unzip the file
+	printDebug("Unzipping file %s", zipFilePath)
+	if err := unzipFile(zipFilePath, config.DriveCache.Path); err != nil {
+		return fmt.Errorf("failed to unzip file: %v", err)
+	}
+
+	// Find the .csv file and rename/move it to domains.csv
+	csvFound := false
+	dirEntries, err := os.ReadDir(config.DriveCache.Path)
+	if err != nil {
+		return fmt.Errorf("failed to read directory: %v", err)
+	}
+
+	for _, entry := range dirEntries {
+		if !entry.IsDir() && filepath.Ext(entry.Name()) == ".csv" {
+			csvPath := filepath.Join(config.DriveCache.Path, entry.Name())
+			if err := os.Rename(csvPath, targetFilePath); err != nil {
+				return fmt.Errorf("failed to move %s to %s: %v", csvPath, targetFilePath, err)
+			}
+			csvFound = true
+			break
+		}
+	}
+
+	if !csvFound {
+		return fmt.Errorf("no .csv file found in the downloaded archive")
+	}
+
+	// Clean up zip file
+	if err := os.Remove(zipFilePath); err != nil {
+		printWarn("failed to remove zip file %s: %v", zipFilePath, err)
+	}
+
+	printDebug("domains.csv successfully downloaded and placed at %s", targetFilePath)
+	return nil
+}
+
+func unzipFile(zipFile, destDir string) error {
+	reader, err := zip.OpenReader(zipFile)
+	if err != nil {
+		return err
+	}
+	defer reader.Close()
+
+	for _, file := range reader.File {
+		filePath := filepath.Join(destDir, file.Name)
+
+		if file.FileInfo().IsDir() {
+			os.MkdirAll(filePath, os.ModePerm)
+			continue
+		}
+
+		srcFile, err := file.Open()
+		if err != nil {
+			return err
+		}
+		defer srcFile.Close()
+
+		destFile, err := os.Create(filePath)
+		if err != nil {
+			return err
+		}
+		defer destFile.Close()
+
+		if _, err := io.Copy(destFile, srcFile); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
diff --git a/go.mod b/go.mod
index 63599f8..6895586 100644
--- a/go.mod
+++ b/go.mod
@@ -1,9 +1,11 @@
-module searchengine
+module qgato
 
-go 1.18
+go 1.23
+
+toolchain go1.23.4
 
 require (
-	github.com/PuerkitoBio/goquery v1.9.1 // direct
+	github.com/PuerkitoBio/goquery v1.10.0 // direct
 	github.com/chai2010/webp v1.1.1
 	github.com/leonelquinteros/gotext v1.7.0
 	github.com/shirou/gopsutil v3.21.11+incompatible
@@ -12,10 +14,42 @@ require (
 )
 
 require (
-	github.com/andybalholm/cascadia v1.3.2 // indirect
+	github.com/blevesearch/bleve/v2 v2.4.4
+	golang.org/x/net v0.33.0
+)
+
+require (
+	github.com/RoaringBitmap/roaring v1.9.4 // indirect
+	github.com/andybalholm/cascadia v1.3.3 // indirect
+	github.com/bits-and-blooms/bitset v1.20.0 // indirect
+	github.com/blevesearch/bleve_index_api v1.2.0 // indirect
+	github.com/blevesearch/geo v0.1.20 // indirect
+	github.com/blevesearch/go-faiss v1.0.24 // indirect
+	github.com/blevesearch/go-porterstemmer v1.0.3 // indirect
+	github.com/blevesearch/gtreap v0.1.1 // indirect
+	github.com/blevesearch/mmap-go v1.0.4 // indirect
+	github.com/blevesearch/scorch_segment_api/v2 v2.3.0 // indirect
+	github.com/blevesearch/segment v0.9.1 // indirect
+	github.com/blevesearch/snowballstem v0.9.0 // indirect
+	github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect
+	github.com/blevesearch/vellum v1.1.0 // indirect
+	github.com/blevesearch/zapx/v11 v11.3.10 // indirect
+	github.com/blevesearch/zapx/v12 v12.3.10 // indirect
+	github.com/blevesearch/zapx/v13 v13.3.10 // indirect
+	github.com/blevesearch/zapx/v14 v14.3.10 // indirect
+	github.com/blevesearch/zapx/v15 v15.3.17 // indirect
+	github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b // indirect
 	github.com/go-ole/go-ole v1.3.0 // indirect
+	github.com/golang/geo v0.0.0-20230421003525-6adc56603217 // indirect
+	github.com/golang/protobuf v1.5.4 // indirect
+	github.com/golang/snappy v0.0.4 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/mschoch/smat v0.2.0 // indirect
 	github.com/stretchr/testify v1.9.0 // indirect
 	github.com/yusufpapurcu/wmi v1.2.4 // indirect
-	golang.org/x/net v0.30.0 // indirect
-	golang.org/x/sys v0.26.0 // indirect
+	go.etcd.io/bbolt v1.3.11 // indirect
+	golang.org/x/sys v0.28.0 // indirect
+	google.golang.org/protobuf v1.36.0 // indirect
 )
diff --git a/go.sum b/go.sum
index 962a1b8..f3f643b 100644
--- a/go.sum
+++ b/go.sum
@@ -1,39 +1,121 @@
-github.com/PuerkitoBio/goquery v1.9.1 h1:mTL6XjbJTZdpfL+Gwl5U2h1l9yEkJjhmlTeV9VPW7UI=
-github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY=
-github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
-github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
+github.com/PuerkitoBio/goquery v1.10.0 h1:6fiXdLuUvYs2OJSvNRqlNPoBm6YABE226xrbavY5Wv4=
+github.com/PuerkitoBio/goquery v1.10.0/go.mod h1:TjZZl68Q3eGHNBA8CWaxAN7rOU1EbDz3CWuolcO5Yu4=
+github.com/RoaringBitmap/roaring v1.9.4 h1:yhEIoH4YezLYT04s1nHehNO64EKFTop/wBhxv2QzDdQ=
+github.com/RoaringBitmap/roaring v1.9.4/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90=
+github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
+github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
+github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
+github.com/bits-and-blooms/bitset v1.20.0 h1:2F+rfL86jE2d/bmw7OhqUg2Sj/1rURkBn3MdfoPyRVU=
+github.com/bits-and-blooms/bitset v1.20.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
+github.com/blevesearch/bleve/v2 v2.4.4 h1:RwwLGjUm54SwyyykbrZs4vc1qjzYic4ZnAnY9TwNl60=
+github.com/blevesearch/bleve/v2 v2.4.4/go.mod h1:fa2Eo6DP7JR+dMFpQe+WiZXINKSunh7WBtlDGbolKXk=
+github.com/blevesearch/bleve_index_api v1.2.0 h1:/DXMMWBwx/UmGKM1xDhTwDoJI5yQrG6rqRWPFcOgUVo=
+github.com/blevesearch/bleve_index_api v1.2.0/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
+github.com/blevesearch/geo v0.1.20 h1:paaSpu2Ewh/tn5DKn/FB5SzvH0EWupxHEIwbCk/QPqM=
+github.com/blevesearch/geo v0.1.20/go.mod h1:DVG2QjwHNMFmjo+ZgzrIq2sfCh6rIHzy9d9d0B59I6w=
+github.com/blevesearch/go-faiss v1.0.24 h1:K79IvKjoKHdi7FdiXEsAhxpMuns0x4fM0BO93bW5jLI=
+github.com/blevesearch/go-faiss v1.0.24/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
+github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
+github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
+github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y=
+github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk=
+github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
+github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
+github.com/blevesearch/scorch_segment_api/v2 v2.3.0 h1:vxCjbXAkkEBSb4AB3Iqgr/EJcPyYRsiGxpcvsS8E1Dw=
+github.com/blevesearch/scorch_segment_api/v2 v2.3.0/go.mod h1:5y+TgXYSx+xJGaCwSlvy9G/UJBIY5wzvIkhvhBm2ATc=
+github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU=
+github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
+github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
+github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
+github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A=
+github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ=
+github.com/blevesearch/vellum v1.1.0 h1:CinkGyIsgVlYf8Y2LUQHvdelgXr6PYuvoDIajq6yR9w=
+github.com/blevesearch/vellum v1.1.0/go.mod h1:QgwWryE8ThtNPxtgWJof5ndPfx0/YMBh+W2weHKPw8Y=
+github.com/blevesearch/zapx/v11 v11.3.10 h1:hvjgj9tZ9DeIqBCxKhi70TtSZYMdcFn7gDb71Xo/fvk=
+github.com/blevesearch/zapx/v11 v11.3.10/go.mod h1:0+gW+FaE48fNxoVtMY5ugtNHHof/PxCqh7CnhYdnMzQ=
+github.com/blevesearch/zapx/v12 v12.3.10 h1:yHfj3vXLSYmmsBleJFROXuO08mS3L1qDCdDK81jDl8s=
+github.com/blevesearch/zapx/v12 v12.3.10/go.mod h1:0yeZg6JhaGxITlsS5co73aqPtM04+ycnI6D1v0mhbCs=
+github.com/blevesearch/zapx/v13 v13.3.10 h1:0KY9tuxg06rXxOZHg3DwPJBjniSlqEgVpxIqMGahDE8=
+github.com/blevesearch/zapx/v13 v13.3.10/go.mod h1:w2wjSDQ/WBVeEIvP0fvMJZAzDwqwIEzVPnCPrz93yAk=
+github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz77pSwwKU=
+github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
+github.com/blevesearch/zapx/v15 v15.3.17 h1:NkkMI98pYLq/uHnB6YWcITrrLpCVyvZ9iP+AyfpW1Ys=
+github.com/blevesearch/zapx/v15 v15.3.17/go.mod h1:vXRQzJJvlGVCdmOD5hg7t7JdjUT5DmDPhsAfjvtzIq8=
+github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b h1:ju9Az5YgrzCeK3M1QwvZIpxYhChkXp7/L0RhDYsxXoE=
+github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b/go.mod h1:BlrYNpOu4BvVRslmIG+rLtKhmjIaRhIbG8sb9scGTwI=
 github.com/chai2010/webp v1.1.1 h1:jTRmEccAJ4MGrhFOrPMpNGIJ/eybIgwKpcACsrTEapk=
 github.com/chai2010/webp v1.1.1/go.mod h1:0XVwvZWdjjdxpUEIf7b9g9VkHFnInUSYujwqTLEuldU=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
 github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE=
 github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78=
+github.com/golang/geo v0.0.0-20230421003525-6adc56603217 h1:HKlyj6in2JV6wVkmQ4XmG/EIm+SCYlPZ+V4GWit7Z+I=
+github.com/golang/geo v0.0.0-20230421003525-6adc56603217/go.mod h1:8wI0hitZ3a1IxZfeH3/5I97CI8i5cLGsYe7xNhQGs9U=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
+github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
+github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
 github.com/leonelquinteros/gotext v1.7.0 h1:jcJmF4AXqyamP7vuw2MMIKs+O3jAEmvrc5JQiI8Ht/8=
 github.com/leonelquinteros/gotext v1.7.0/go.mod h1:qJdoQuERPpccw7L70uoU+K/BvTfRBHYsisCQyFLXyvw=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
+github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
 github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
 github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
+go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0=
+go.etcd.io/bbolt v1.3.11/go.mod h1:dksAq7YMXoljX0xu6VF5DMZGbhYYoLUalEiSySYAS4I=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
+golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
+golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
+golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
 golang.org/x/image v0.21.0 h1:c5qV36ajHpdj4Qi0GnE0jUc/yuo33OLFaa0d+crTD5s=
 golang.org/x/image v0.21.0/go.mod h1:vUbsLavqK/W303ZroQQVKQ+Af3Yl6Uz1Ppu5J/cLz78=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
-golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
-golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
-golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
+golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
+golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
+golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -42,23 +124,42 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
-golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
+golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
-golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
+golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
+golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
+golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
+golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
+golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
 golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
+golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/protobuf v1.36.0 h1:mjIs9gYtt56AzC4ZaffQuh88TZurBGhIJMBZGSxNerQ=
+google.golang.org/protobuf v1.36.0/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
 gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/indexer.go b/indexer.go
new file mode 100644
index 0000000..66bc100
--- /dev/null
+++ b/indexer.go
@@ -0,0 +1,198 @@
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/blevesearch/bleve/v2"
+)
+
+// Document represents a single document to be indexed.
+// You can add more fields if needed.
+type Document struct {
+	ID          string `json:"id"`
+	Link        string `json:"link"`
+	Title       string `json:"title"`
+	Tags        string `json:"tags"`
+	Description string `json:"description"`
+	Popularity  int64  `json:"popularity"`
+}
+
+var (
+	// Global Bleve index handle
+	bleveIndex bleve.Index
+)
+
+func startPeriodicIndexing(filePath string, interval time.Duration) {
+	go func() {
+		for {
+			printDebug("Refreshing index from %s", filePath)
+			err := IndexFile(filePath)
+			if err != nil {
+				printErr("Failed to refresh index: %v", err)
+			}
+			time.Sleep(interval)
+		}
+	}()
+}
+
+// InitIndex ensures that the Bleve index is created or opened.
+func InitIndex() error {
+	idx, err := bleve.Open(filepath.Join(config.DriveCache.Path, "index.bleve"))
+	if err == bleve.ErrorIndexPathDoesNotExist {
+		// Index doesn't exist, create a new one
+		mapping := bleve.NewIndexMapping()
+
+		// Custom mapping for the document
+		docMapping := bleve.NewDocumentMapping()
+
+		// Text fields with custom analyzers for better tokenization
+		textFieldMapping := bleve.NewTextFieldMapping()
+		textFieldMapping.Analyzer = "standard" // Use standard analyzer for partial and fuzzy matches
+
+		docMapping.AddFieldMappingsAt("title", textFieldMapping)
+		docMapping.AddFieldMappingsAt("description", textFieldMapping)
+		docMapping.AddFieldMappingsAt("tags", textFieldMapping)
+
+		// Numeric field for popularity
+		popularityMapping := bleve.NewNumericFieldMapping()
+		docMapping.AddFieldMappingsAt("popularity", popularityMapping)
+
+		mapping.AddDocumentMapping("Document", docMapping)
+
+		idx, err = bleve.New(filepath.Join(config.DriveCache.Path, "index.bleve"), mapping)
+		if err != nil {
+			return fmt.Errorf("failed to create index: %v", err)
+		}
+	} else if err != nil {
+		return fmt.Errorf("failed to open index: %v", err)
+	}
+
+	bleveIndex = idx
+	return nil
+}
+
+// IndexFile reads a file line-by-line and indexes each line as a document.
+// Each line represents a simple document. Adjust parsing as needed.
+func IndexFile(filePath string) error {
+	file, err := os.Open(filePath)
+	if err != nil {
+		return fmt.Errorf("unable to open file for indexing: %v", err)
+	}
+	defer file.Close()
+
+	scanner := bufio.NewScanner(file)
+	batch := bleveIndex.NewBatch()
+	indexedDomains := make(map[string]bool) // Track indexed domains
+
+	for scanner.Scan() {
+		line := scanner.Text()
+
+		// Split the line into 5 fields: link|title|tags|description|popularity
+		parts := strings.SplitN(line, "|", 5)
+		if len(parts) < 5 {
+			continue // Skip malformed lines
+		}
+
+		domain := parts[0]
+		popularity, _ := strconv.ParseInt(parts[4], 10, 64)
+
+		// Skip if the domain is already indexed
+		if indexedDomains[domain] {
+			continue
+		}
+
+		doc := Document{
+			ID:          domain, // Use the domain as the unique ID
+			Link:        parts[0],
+			Title:       parts[1],
+			Tags:        parts[2],
+			Description: parts[3],
+			Popularity:  popularity,
+		}
+
+		err := batch.Index(doc.ID, map[string]interface{}{
+			"title":       doc.Title,
+			"description": doc.Description,
+			"link":        doc.Link,
+			"tags":        doc.Tags,
+			"popularity":  doc.Popularity,
+		})
+		if err != nil {
+			return fmt.Errorf("failed to index document: %v", err)
+		}
+
+		indexedDomains[domain] = true // Mark the domain as indexed
+	}
+
+	// Commit the batch
+	if err := bleveIndex.Batch(batch); err != nil {
+		return fmt.Errorf("error committing batch: %v", err)
+	}
+
+	if err := scanner.Err(); err != nil {
+		return fmt.Errorf("error reading file: %v", err)
+	}
+
+	printDebug("Indexed %d unique domains from %s\n", len(indexedDomains), filePath)
+	return nil
+}
+
+// SearchIndex performs a full-text search on the indexed data.
+func SearchIndex(queryStr string, page, pageSize int) ([]Document, error) {
+	// Create compound query
+	exactMatch := bleve.NewMatchQuery(queryStr) // Exact match
+	fuzzyMatch := bleve.NewFuzzyQuery(queryStr) // Fuzzy match
+	fuzzyMatch.Fuzziness = 2
+	prefixMatch := bleve.NewPrefixQuery(queryStr) // Prefix match
+
+	query := bleve.NewDisjunctionQuery(exactMatch, fuzzyMatch, prefixMatch)
+
+	req := bleve.NewSearchRequest(query)
+	req.Fields = []string{"title", "description", "link", "tags", "popularity"}
+
+	// Pagination
+	req.Size = pageSize
+	req.From = (page - 1) * pageSize
+
+	// Sort by popularity
+	req.SortBy([]string{"popularity"})
+
+	res, err := bleveIndex.Search(req)
+	if err != nil {
+		return nil, fmt.Errorf("search error: %v", err)
+	}
+
+	var docs []Document
+	for _, hit := range res.Hits {
+		title := fmt.Sprintf("%v", hit.Fields["title"])
+		description := fmt.Sprintf("%v", hit.Fields["description"])
+		link := fmt.Sprintf("%v", hit.Fields["link"])
+		tags := fmt.Sprintf("%v", hit.Fields["tags"])
+		popularity := int64(0)
+
+		if pop, ok := hit.Fields["popularity"].(float64); ok {
+			popularity = int64(pop)
+		}
+
+		if link == "<nil>" || title == "<nil>" {
+			continue
+		}
+
+		docs = append(docs, Document{
+			ID:          hit.ID,
+			Title:       title,
+			Description: description,
+			Link:        link,
+			Tags:        tags,
+			Popularity:  popularity,
+		})
+	}
+
+	return docs, nil
+}
diff --git a/init.go b/init.go
index e7d4ed1..c92e656 100644
--- a/init.go
+++ b/init.go
@@ -3,6 +3,8 @@ package main
 import (
 	"flag"
 	"os"
+	"path/filepath"
+	"time"
 )
 
 var config Config
@@ -95,5 +97,28 @@ func main() {
 		printInfo("RAM cache is disabled.")
 	}
 
+	// Init indexer
+	if config.IndexerEnabled {
+		if err := downloadAndSetupDomainsCSV(); err != nil {
+			printErr("Failed to set up domains.csv: %v", err)
+			return
+		}
+
+		webCrawlerInit()
+
+		err := InitIndex()
+		if err != nil {
+			printErr("Failed to initialize index:", err)
+		}
+
+		// Start periodic indexing (every 2 minutes)
+		dataFilePath := filepath.Join(config.DriveCache.Path, "data_to_index.txt")
+		startPeriodicIndexing(dataFilePath, 2*time.Minute)
+
+		printInfo("Indexer is enabled.")
+	} else {
+		printInfo("Indexer is disabled.")
+	}
+
 	runServer()
 }
diff --git a/text.go b/text.go
index 4744a97..d6e3212 100755
--- a/text.go
+++ b/text.go
@@ -73,14 +73,10 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
 	select {
 	case results := <-cacheChan:
 		if results == nil {
-			// Fetch only if the cache miss occurs and Crawler is enabled
-			if config.CrawlerEnabled {
-				combinedResults = fetchTextResults(query, safe, lang, page)
-				if len(combinedResults) > 0 {
-					resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
-				}
-			} else {
-				printInfo("Crawler disabled; skipping fetching.")
+			// Always attempt to fetch results on a cache miss
+			combinedResults = fetchTextResults(query, safe, lang, page)
+			if len(combinedResults) > 0 {
+				resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
 			}
 		} else {
 			textResults, _, _, _ := convertToSpecificResults(results)
@@ -88,13 +84,10 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
 		}
 	case <-time.After(2 * time.Second):
 		printInfo("Cache check timeout")
-		if config.CrawlerEnabled {
-			combinedResults = fetchTextResults(query, safe, lang, page)
-			if len(combinedResults) > 0 {
-				resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
-			}
-		} else {
-			printInfo("Crawler disabled; skipping fetching.")
+		// Even on timeout, attempt to fetch results
+		combinedResults = fetchTextResults(query, safe, lang, page)
+		if len(combinedResults) > 0 {
+			resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
 		}
 	}
 
@@ -121,54 +114,70 @@ func prefetchPage(query, safe, lang string, page int) {
 func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
 	var results []TextSearchResult
 
-	// If Crawler is disabled, do not fetch from search engines
 	if !config.CrawlerEnabled {
-		printDebug("Crawler is disabled; skipping search engine fetching.")
-		return results // Return an empty list
-	}
+		printDebug("Crawler is disabled; fetching from local index.")
 
-	engineCount := len(textSearchEngines)
+		// Calculate the starting position based on the page number
+		indexedResults, err := SearchIndex(query, page, 10)
+		if err != nil {
+			printErr("Error searching the index: %v", err)
+			return results // Return empty results on error
+		}
 
-	// Determine which engine to use for the current page
-	engineIndex := (page - 1) % engineCount
-	engine := textSearchEngines[engineIndex]
+		// Convert indexed results to TextSearchResult format
+		for _, doc := range indexedResults {
+			results = append(results, TextSearchResult{
+				URL:         doc.Link,
+				Header:      doc.Title,
+				Description: doc.Description,
+				Source:      doc.Tags,
+			})
+		}
 
-	// Calculate the page number for this engine
-	enginePage := (page-1)/engineCount + 1
-
-	// Debug print to verify engine and page number being fetched
-	printDebug("Fetching results for overall page %d using engine: %s (engine page %d)", page, engine.Name, enginePage)
-
-	// Fetch results from the selected engine
-	searchResults, _, err := engine.Func(query, safe, lang, enginePage)
-	if err != nil {
-		printWarn("Error performing search with %s: %v", engine.Name, err)
+		return results
 	} else {
-		results = append(results, validateResults(searchResults)...)
-	}
+		// Crawler is enabled, so use the search engines
+		engineCount := len(textSearchEngines)
 
-	// If no results are found with the selected engine, try the next in line
-	if len(results) == 0 {
-		for i := 1; i < engineCount; i++ {
-			nextEngine := textSearchEngines[(engineIndex+i)%engineCount]
-			enginePage = (page-1)/engineCount + 1 // Recalculate for the new engine
-			printInfo("No results found, trying next engine: %s (engine page %d)", nextEngine.Name, enginePage)
+		// Determine which engine to use for the current page
+		engineIndex := (page - 1) % engineCount
+		engine := textSearchEngines[engineIndex]
 
-			searchResults, _, err := nextEngine.Func(query, safe, lang, enginePage)
-			if err != nil {
-				printWarn("Error performing search with %s: %v", nextEngine.Name, err)
-				continue
-			}
+		// Calculate the page number for this engine
+		enginePage := (page-1)/engineCount + 1
+
+		printDebug("Fetching results for overall page %d using engine: %s (engine page %d)", page, engine.Name, enginePage)
+
+		// Fetch results from the selected engine
+		searchResults, _, err := engine.Func(query, safe, lang, enginePage)
+		if err != nil {
+			printWarn("Error performing search with %s: %v", engine.Name, err)
+		} else {
 			results = append(results, validateResults(searchResults)...)
-			if len(results) > 0 {
-				break
+		}
+
+		// If no results are found with the selected engine, try the next in line
+		if len(results) == 0 {
+			for i := 1; i < engineCount; i++ {
+				nextEngine := textSearchEngines[(engineIndex+i)%engineCount]
+				enginePage = (page-1)/engineCount + 1
+				printInfo("No results found, trying next engine: %s (engine page %d)", nextEngine.Name, enginePage)
+
+				searchResults, _, err := nextEngine.Func(query, safe, lang, enginePage)
+				if err != nil {
+					printWarn("Error performing search with %s: %v", nextEngine.Name, err)
+					continue
+				}
+				results = append(results, validateResults(searchResults)...)
+				if len(results) > 0 {
+					break
+				}
 			}
 		}
+
+		printInfo("Fetched %d results for overall page %d", len(results), page)
+		return results
 	}
-
-	printInfo("Fetched %d results for overall page %d", len(results), page)
-
-	return results
 }
 
 func validateResults(searchResults []SearchResult) []TextSearchResult {

From 13e1d6119b2260a762237c1db7a6f528d484bd73 Mon Sep 17 00:00:00 2001
From: partisan <none@noone.no>
Date: Mon, 30 Dec 2024 17:19:20 +0100
Subject: [PATCH 2/9] added more config values for indexing + fixed value
 handling when Its missing in config file

---
 config.go  | 157 +++++++++++++++++++++++++++++++++++------------------
 crawler.go |  84 +++++++++++++++-------------
 indexer.go |   6 +-
 3 files changed, 153 insertions(+), 94 deletions(-)

diff --git a/config.go b/config.go
index 2e5d805..4ea4eb2 100644
--- a/config.go
+++ b/config.go
@@ -23,35 +23,43 @@ type CacheConfig struct {
 }
 
 type Config struct {
-	Port              int    // Added
-	AuthCode          string // Added
-	PeerID            string // Added
-	Peers             []string
-	Domain            string // Added
-	NodesEnabled      bool   // Added
-	CrawlerEnabled    bool   // Added
-	IndexerEnabled    bool   // Added
-	WebsiteEnabled    bool   // Added
-	RamCacheEnabled   bool
-	DriveCacheEnabled bool // Added
-	LogLevel          int  // Added
+	Port                 int    // Added
+	AuthCode             string // Added
+	PeerID               string // Added
+	Peers                []string
+	Domain               string // Added
+	NodesEnabled         bool   // Added
+	CrawlerEnabled       bool   // Added
+	IndexerEnabled       bool   // Added
+	WebsiteEnabled       bool   // Added
+	RamCacheEnabled      bool
+	DriveCacheEnabled    bool          // Added
+	LogLevel             int           // Added
+	ConcurrentCrawlers   int           // Number of concurrent crawlers
+	CrawlingInterval     time.Duration // Refres crawled results in...
+	MaxPagesPerDomain    int           // Max pages to crawl per domain
+	IndexRefreshInterval time.Duration // Interval for periodic index refresh (e.g., "10m")
 
 	DriveCache CacheConfig
 	RamCache   CacheConfig
 }
 
 var defaultConfig = Config{
-	Port:              5000,
-	Domain:            "localhost",
-	Peers:             []string{},
-	AuthCode:          generateStrongRandomString(64),
-	NodesEnabled:      false,
-	CrawlerEnabled:    true,
-	IndexerEnabled:    false,
-	WebsiteEnabled:    true,
-	RamCacheEnabled:   true,
-	DriveCacheEnabled: false,
-	LogLevel:          1,
+	Port:                 5000,
+	Domain:               "localhost",
+	Peers:                []string{},
+	AuthCode:             generateStrongRandomString(64),
+	NodesEnabled:         false,
+	CrawlerEnabled:       true,
+	IndexerEnabled:       false,
+	WebsiteEnabled:       true,
+	RamCacheEnabled:      true,
+	DriveCacheEnabled:    false,
+	ConcurrentCrawlers:   5,
+	CrawlingInterval:     24 * time.Hour,
+	MaxPagesPerDomain:    10,
+	IndexRefreshInterval: 2 * time.Minute,
+	LogLevel:             1,
 	DriveCache: CacheConfig{
 		Duration:      48 * time.Hour,                                     // Added
 		Path:          "./cache",                                          // Added
@@ -238,8 +246,13 @@ func saveConfig(config Config) {
 	featuresSec.Key("Crawler").SetValue(strconv.FormatBool(config.CrawlerEnabled))
 	featuresSec.Key("Indexer").SetValue(strconv.FormatBool(config.IndexerEnabled))
 	featuresSec.Key("Website").SetValue(strconv.FormatBool(config.WebsiteEnabled))
-	featuresSec.Key("RamCache").SetValue(strconv.FormatBool(config.RamCacheEnabled))
-	featuresSec.Key("DriveCache").SetValue(strconv.FormatBool(config.DriveCacheEnabled))
+
+	// Indexer section
+	indexerSec := cfg.Section("Indexer")
+	indexerSec.Key("ConcurrentCrawlers").SetValue(strconv.Itoa(config.ConcurrentCrawlers))
+	indexerSec.Key("CrawlingInterval").SetValue(config.CrawlingInterval.String())
+	indexerSec.Key("MaxPagesPerDomain").SetValue(strconv.Itoa(config.MaxPagesPerDomain))
+	indexerSec.Key("IndexRefreshInterval").SetValue(config.IndexRefreshInterval.String())
 
 	// DriveCache section
 	driveSec := cfg.Section("DriveCache")
@@ -266,53 +279,61 @@ func loadConfig() Config {
 	}
 
 	// Server
-	port, _ := cfg.Section("Server").Key("Port").Int()
-	domain := cfg.Section("Server").Key("Domain").String()
-	logLevel, _ := cfg.Section("Server").Key("LogLevel").Int()
+	port := getConfigValue(cfg.Section("Server").Key("Port"), defaultConfig.Port, strconv.Atoi)
+	domain := getConfigValueString(cfg.Section("Server").Key("Domain"), defaultConfig.Domain)
+	logLevel := getConfigValue(cfg.Section("Server").Key("LogLevel"), defaultConfig.LogLevel, strconv.Atoi)
 
 	// Peers
-	authCode := cfg.Section("Peers").Key("AuthCode").String()
-	peersStr := cfg.Section("Peers").Key("Peers").String()
-	peers := strings.Split(peersStr, ",")
+	authCode := getConfigValueString(cfg.Section("Peers").Key("AuthCode"), defaultConfig.AuthCode)
+	peers := strings.Split(getConfigValueString(cfg.Section("Peers").Key("Peers"), ""), ",")
 
 	// Features
-	nodesEnabled, _ := cfg.Section("Features").Key("Nodes").Bool()
-	crawlerEnabled, _ := cfg.Section("Features").Key("Crawler").Bool()
-	indexerEnabled, _ := cfg.Section("Features").Key("Indexer").Bool()
-	websiteEnabled, _ := cfg.Section("Features").Key("Website").Bool()
-	ramCacheEnabled, _ := cfg.Section("Features").Key("RamCache").Bool()
-	driveCacheEnabled, _ := cfg.Section("Features").Key("DriveCache").Bool()
+	nodesEnabled := getConfigValueBool(cfg.Section("Features").Key("Nodes"), defaultConfig.NodesEnabled)
+	crawlerEnabled := getConfigValueBool(cfg.Section("Features").Key("Crawler"), defaultConfig.CrawlerEnabled)
+	indexerEnabled := getConfigValueBool(cfg.Section("Features").Key("Indexer"), defaultConfig.IndexerEnabled)
+	websiteEnabled := getConfigValueBool(cfg.Section("Features").Key("Website"), defaultConfig.WebsiteEnabled)
+	ramCacheEnabled := getConfigValueBool(cfg.Section("Features").Key("RamCache"), defaultConfig.RamCacheEnabled)
+	driveCacheEnabled := getConfigValueBool(cfg.Section("Features").Key("DriveCache"), defaultConfig.DriveCacheEnabled)
+
+	// Indexing
+	concurrentCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentCrawlers"), defaultConfig.ConcurrentCrawlers, strconv.Atoi)
+	crawlingInterval := getConfigValue(cfg.Section("Indexer").Key("CrawlingInterval"), defaultConfig.CrawlingInterval, time.ParseDuration)
+	maxPagesPerDomain := getConfigValue(cfg.Section("Indexer").Key("MaxPagesPerDomain"), defaultConfig.MaxPagesPerDomain, strconv.Atoi)
+	indexRefreshInterval := getConfigValue(cfg.Section("Indexer").Key("IndexRefreshInterval"), defaultConfig.IndexRefreshInterval, time.ParseDuration)
 
 	// DriveCache
-	driveDuration, _ := time.ParseDuration(cfg.Section("DriveCache").Key("Duration").String())
-	drivePath := cfg.Section("DriveCache").Key("Path").String()
-	driveMaxUsage := parseMaxUsageDrive(cfg.Section("DriveCache").Key("MaxUsage").String(), drivePath)
+	driveDuration := getConfigValue(cfg.Section("DriveCache").Key("Duration"), defaultConfig.DriveCache.Duration, time.ParseDuration)
+	drivePath := getConfigValueString(cfg.Section("DriveCache").Key("Path"), defaultConfig.DriveCache.Path)
+	driveMaxUsage := parseMaxUsageDrive(getConfigValueString(cfg.Section("DriveCache").Key("MaxUsage"), formatMaxUsage(defaultConfig.DriveCache.MaxUsageBytes)), drivePath)
 	// maxConcurrentDownloads, _ := cfg.Section("DriveCache").Key("MaxConcurrentDownloads.Thumbnail").Int()
 	// if maxConcurrentDownloads == 0 {
 	// 	maxConcurrentDownloads = defaultConfig.DriveCache.MaxConcurrentThumbnailDownloads
 	// }
 
 	// RamCache
-	ramDuration, _ := time.ParseDuration(cfg.Section("RamCache").Key("Duration").String())
-	ramMaxUsage := parseMaxUsageRam(cfg.Section("RamCache").Key("MaxUsage").String())
+	ramDuration := getConfigValue(cfg.Section("RamCache").Key("Duration"), defaultConfig.RamCache.Duration, time.ParseDuration)
+	ramMaxUsage := parseMaxUsageRam(getConfigValueString(cfg.Section("RamCache").Key("MaxUsage"), formatMaxUsage(defaultConfig.RamCache.MaxUsageBytes)))
 
 	return Config{
-		Port:              port,
-		Domain:            domain,
-		LogLevel:          logLevel,
-		AuthCode:          authCode,
-		Peers:             peers,
-		NodesEnabled:      nodesEnabled,
-		CrawlerEnabled:    crawlerEnabled,
-		IndexerEnabled:    indexerEnabled,
-		WebsiteEnabled:    websiteEnabled,
-		RamCacheEnabled:   ramCacheEnabled,
-		DriveCacheEnabled: driveCacheEnabled,
+		Port:                 port,
+		Domain:               domain,
+		LogLevel:             logLevel,
+		AuthCode:             authCode,
+		Peers:                peers,
+		NodesEnabled:         nodesEnabled,
+		CrawlerEnabled:       crawlerEnabled,
+		IndexerEnabled:       indexerEnabled,
+		WebsiteEnabled:       websiteEnabled,
+		RamCacheEnabled:      ramCacheEnabled,
+		DriveCacheEnabled:    driveCacheEnabled,
+		ConcurrentCrawlers:   concurrentCrawlers,
+		CrawlingInterval:     crawlingInterval,
+		MaxPagesPerDomain:    maxPagesPerDomain,
+		IndexRefreshInterval: indexRefreshInterval,
 		DriveCache: CacheConfig{
 			Duration:      driveDuration,
 			MaxUsageBytes: driveMaxUsage,
 			Path:          drivePath,
-			// MaxConcurrentThumbnailDownloads: maxConcurrentDownloads,
 		},
 		RamCache: CacheConfig{
 			Duration:      ramDuration,
@@ -321,6 +342,34 @@ func loadConfig() Config {
 	}
 }
 
+// getConfigValue retrieves a configuration value or returns a default value from defaultConfig.
+func getConfigValue[T any](key *ini.Key, defaultValue T, parseFunc func(string) (T, error)) T {
+	if key == nil || key.String() == "" {
+		return defaultValue
+	}
+	value, err := parseFunc(key.String())
+	if err != nil {
+		return defaultValue
+	}
+	return value
+}
+
+// getConfigValueString retrieves a string value or falls back to the default.
+func getConfigValueString(key *ini.Key, defaultValue string) string {
+	if key == nil || key.String() == "" {
+		return defaultValue
+	}
+	return key.String()
+}
+
+// getConfigValueBool retrieves a boolean value or falls back to the default.
+func getConfigValueBool(key *ini.Key, defaultValue bool) bool {
+	if key == nil || key.String() == "" {
+		return defaultValue
+	}
+	return key.MustBool(defaultValue)
+}
+
 // Helper to parse MaxUsage string into bytes
 func parseMaxUsageRam(value string) uint64 {
 	const GiB = 1024 * 1024 * 1024
diff --git a/crawler.go b/crawler.go
index bbe3540..fbb5b5e 100644
--- a/crawler.go
+++ b/crawler.go
@@ -7,6 +7,7 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+	"sync"
 	"time"
 
 	"golang.org/x/net/html"
@@ -18,8 +19,8 @@ func webCrawlerInit() {
 		// First run immediately
 		runCrawlerAndIndexer()
 
-		// Then every 24h (adjust as needed)
-		ticker := time.NewTicker(24 * time.Hour)
+		// Then run periodically based on CrawlingInterval
+		ticker := time.NewTicker(config.CrawlingInterval)
 		for range ticker.C {
 			runCrawlerAndIndexer()
 		}
@@ -37,16 +38,13 @@ func runCrawlerAndIndexer() {
 
 	// 2. Crawl each domain and write results to data_to_index.txt
 	outFile := filepath.Join(config.DriveCache.Path, "data_to_index.txt")
-	if err := crawlDomainsToFile(domains, outFile); err != nil {
+	if err := crawlDomainsToFile(domains, outFile, config.MaxPagesPerDomain, config.ConcurrentCrawlers); err != nil {
 		printErr("Error crawling domains: %v", err)
 		return
 	}
 
-	// 3. Re-index data_to_index.txt
-	if err := IndexFile(outFile); err != nil {
-		printErr("Error indexing data_to_index.txt: %v", err)
-		return
-	}
+	// 3. Re-index data_to_index.txt periodically based on IndexRefreshInterval
+	startPeriodicIndexing(outFile, config.IndexRefreshInterval)
 
 	printDebug("Crawl + index refresh completed.")
 }
@@ -81,10 +79,11 @@ func readDomainsCSV(csvPath string) ([][2]string, error) {
 }
 
 // crawlDomainsToFile visits each domain, extracts minimal data, writes results to outFile
-func crawlDomainsToFile(domains [][2]string, outFile string) error {
-	// Read existing data_to_index.txt into a map to prevent duplicates
+func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int, concurrentCrawlers int) error {
 	existingEntries := make(map[string]bool)
-	if _, err := os.Stat(outFile); err == nil { // File exists
+	var mu sync.Mutex // Mutex to protect access to the map
+
+	if _, err := os.Stat(outFile); err == nil {
 		file, err := os.Open(outFile)
 		if err != nil {
 			return fmt.Errorf("unable to open %s: %v", outFile, err)
@@ -96,7 +95,7 @@ func crawlDomainsToFile(domains [][2]string, outFile string) error {
 			line := scanner.Text()
 			parts := strings.SplitN(line, "|", 5)
 			if len(parts) >= 1 {
-				existingEntries[parts[0]] = true // Mark existing domain
+				existingEntries[parts[0]] = true
 			}
 		}
 	}
@@ -108,37 +107,48 @@ func crawlDomainsToFile(domains [][2]string, outFile string) error {
 	}
 	defer file.Close()
 
+	semaphore := make(chan struct{}, concurrentCrawlers)
+	var wg sync.WaitGroup
+
 	for _, d := range domains {
-		rank := d[0]
-		domain := d[1]
-		if domain == "" || existingEntries["https://"+domain] {
-			continue
-		}
+		wg.Add(1)
+		semaphore <- struct{}{}
+		go func(domain [2]string) {
+			defer wg.Done()
+			defer func() { <-semaphore }()
 
-		fullURL := "https://" + domain
-		title, desc, keywords := fetchPageMetadata(fullURL)
-		if title == "" {
-			title = "Unknown Title"
-		}
-		if desc == "" {
-			desc = "No Description"
-		}
+			rank := domain[0]
+			domainName := domain[1]
+			fullURL := "https://" + domainName
 
-		// Write unique domain to file
-		line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
-			fullURL,
-			sanitize(title),
-			sanitize(keywords),
-			sanitize(desc),
-			rank,
-		)
-		if _, err := file.WriteString(line); err != nil {
-			return err
-		}
+			mu.Lock()
+			if domainName == "" || existingEntries[fullURL] {
+				mu.Unlock()
+				return
+			}
+			existingEntries[fullURL] = true
+			mu.Unlock()
 
-		existingEntries[fullURL] = true
+			title, desc, keywords := fetchPageMetadata(fullURL)
+			if title == "" {
+				title = "Unknown Title"
+			}
+			if desc == "" {
+				desc = "No Description"
+			}
+
+			line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
+				fullURL,
+				sanitize(title),
+				sanitize(keywords),
+				sanitize(desc),
+				rank,
+			)
+			file.WriteString(line)
+		}(d)
 	}
 
+	wg.Wait()
 	return nil
 }
 
diff --git a/indexer.go b/indexer.go
index 66bc100..7963fc1 100644
--- a/indexer.go
+++ b/indexer.go
@@ -28,12 +28,12 @@ var (
 	bleveIndex bleve.Index
 )
 
+// startPeriodicIndexing refreshes the index from a file periodically
 func startPeriodicIndexing(filePath string, interval time.Duration) {
 	go func() {
 		for {
 			printDebug("Refreshing index from %s", filePath)
-			err := IndexFile(filePath)
-			if err != nil {
+			if err := IndexFile(filePath); err != nil {
 				printErr("Failed to refresh index: %v", err)
 			}
 			time.Sleep(interval)
@@ -139,7 +139,7 @@ func IndexFile(filePath string) error {
 		return fmt.Errorf("error reading file: %v", err)
 	}
 
-	printDebug("Indexed %d unique domains from %s\n", len(indexedDomains), filePath)
+	printDebug("Indexed %d unique domains from %s", len(indexedDomains), filePath)
 	return nil
 }
 

From a9a6948a44254008b03c7b1fd869c370e7541f36 Mon Sep 17 00:00:00 2001
From: partisan <none@noone.no>
Date: Tue, 31 Dec 2024 02:44:14 +0100
Subject: [PATCH 3/9] updated indexing & user agent generator

---
 agent.go   | 43 ++++++++++++++++++++++++++------------
 indexer.go | 60 ++++++++++++++++++++++++++++++++++--------------------
 init.go    |  5 +++++
 3 files changed, 73 insertions(+), 35 deletions(-)

diff --git a/agent.go b/agent.go
index 296b4e4..6333102 100755
--- a/agent.go
+++ b/agent.go
@@ -3,7 +3,7 @@ package main
 import (
 	"encoding/json"
 	"fmt"
-	"io/ioutil"
+	"io"
 	"math/rand"
 	"net/http"
 	"sort"
@@ -40,13 +40,33 @@ var (
 func fetchLatestBrowserVersions() (BrowserData, error) {
 	url := "https://raw.githubusercontent.com/Fyrd/caniuse/master/fulldata-json/data-2.0.json"
 
-	resp, err := http.Get(url)
+	// // Optional: skip TLS verification to avoid certificate errors
+	// transport := &http.Transport{
+	// 	TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
+	// }
+
+	// Increase the HTTP client timeout
+	client := &http.Client{
+		Timeout: 30 * time.Second,
+		// Transport: transport,
+	}
+
+	// Build the request manually to set headers
+	req, err := http.NewRequest("GET", url, nil)
+	if err != nil {
+		return BrowserData{}, err
+	}
+	// Custom user agent and English language preference
+	req.Header.Set("User-Agent", "MyCustomAgent/1.0 (compatible; +https://example.com)")
+	req.Header.Set("Accept-Language", "en-US,en;q=0.9")
+
+	resp, err := client.Do(req)
 	if err != nil {
 		return BrowserData{}, err
 	}
 	defer resp.Body.Close()
 
-	body, err := ioutil.ReadAll(resp.Body)
+	body, err := io.ReadAll(resp.Body)
 	if err != nil {
 		return BrowserData{}, err
 	}
@@ -109,7 +129,7 @@ func randomUserAgent() (string, error) {
 		return "", err
 	}
 
-	rand.Seed(time.Now().UnixNano())
+	rand := rand.New(rand.NewSource(time.Now().UnixNano()))
 
 	// Simulated browser usage statistics (in percentages)
 	usageStats := map[string]float64{
@@ -161,6 +181,7 @@ func randomUserAgent() (string, error) {
 		}
 	}
 
+	// Fallback to the last version if none matched
 	if version == "" {
 		version = versions[len(versions)-1].Version
 	}
@@ -240,11 +261,11 @@ func updateUserAgentVersion(userAgent string, newVersions BrowserData) string {
 		browserType = "Firefox"
 	}
 
-	// Get the latest version for the browser type
+	// Get the latest version for that browser
 	var latestVersion string
-	if browserType == "Firefox" {
+	if browserType == "Firefox" && len(newVersions.Firefox) > 0 {
 		latestVersion = newVersions.Firefox[0].Version
-	} else if browserType == "Chromium" {
+	} else if browserType == "Chromium" && len(newVersions.Chromium) > 0 {
 		latestVersion = newVersions.Chromium[0].Version
 	}
 
@@ -252,7 +273,7 @@ func updateUserAgentVersion(userAgent string, newVersions BrowserData) string {
 	return generateUserAgent(browserType, latestVersion)
 }
 
-func periodicUpdate() {
+func periodicAgentUpdate() {
 	for {
 		// Sleep for a random interval between 1 and 2 days
 		time.Sleep(time.Duration(24+rand.Intn(24)) * time.Hour)
@@ -309,12 +330,8 @@ func GetNewUserAgent(cacheKey string) (string, error) {
 	return userAgent, nil
 }
 
-func init() {
-	go periodicUpdate()
-}
-
 // func main() {
-// 	go periodicUpdate() // not needed here
+// 	go periodicAgentUpdate() // not needed here
 
 // 	cacheKey := "image-search"
 // 	userAgent, err := GetUserAgent(cacheKey)
diff --git a/indexer.go b/indexer.go
index 7963fc1..306c28d 100644
--- a/indexer.go
+++ b/indexer.go
@@ -3,6 +3,7 @@ package main
 import (
 	"bufio"
 	"fmt"
+	"net/url"
 	"os"
 	"path/filepath"
 	"strconv"
@@ -10,10 +11,10 @@ import (
 	"time"
 
 	"github.com/blevesearch/bleve/v2"
+	"golang.org/x/net/publicsuffix"
 )
 
 // Document represents a single document to be indexed.
-// You can add more fields if needed.
 type Document struct {
 	ID          string `json:"id"`
 	Link        string `json:"link"`
@@ -48,16 +49,20 @@ func InitIndex() error {
 		// Index doesn't exist, create a new one
 		mapping := bleve.NewIndexMapping()
 
-		// Custom mapping for the document
 		docMapping := bleve.NewDocumentMapping()
 
-		// Text fields with custom analyzers for better tokenization
-		textFieldMapping := bleve.NewTextFieldMapping()
-		textFieldMapping.Analyzer = "standard" // Use standard analyzer for partial and fuzzy matches
+		// Text fields
+		titleFieldMapping := bleve.NewTextFieldMapping()
+		titleFieldMapping.Analyzer = "standard"
+		docMapping.AddFieldMappingsAt("title", titleFieldMapping)
 
-		docMapping.AddFieldMappingsAt("title", textFieldMapping)
-		docMapping.AddFieldMappingsAt("description", textFieldMapping)
-		docMapping.AddFieldMappingsAt("tags", textFieldMapping)
+		descFieldMapping := bleve.NewTextFieldMapping()
+		descFieldMapping.Analyzer = "standard"
+		docMapping.AddFieldMappingsAt("description", descFieldMapping)
+
+		tagFieldMapping := bleve.NewTextFieldMapping()
+		tagFieldMapping.Analyzer = "standard"
+		docMapping.AddFieldMappingsAt("tags", tagFieldMapping)
 
 		// Numeric field for popularity
 		popularityMapping := bleve.NewNumericFieldMapping()
@@ -77,8 +82,19 @@ func InitIndex() error {
 	return nil
 }
 
+func normalizeDomain(rawURL string) string {
+	parsed, err := url.Parse(rawURL)
+	if err != nil {
+		return rawURL
+	}
+	domain, err := publicsuffix.EffectiveTLDPlusOne(parsed.Hostname())
+	if err != nil {
+		return parsed.Hostname() // fallback
+	}
+	return domain
+}
+
 // IndexFile reads a file line-by-line and indexes each line as a document.
-// Each line represents a simple document. Adjust parsing as needed.
 func IndexFile(filePath string) error {
 	file, err := os.Open(filePath)
 	if err != nil {
@@ -88,27 +104,29 @@ func IndexFile(filePath string) error {
 
 	scanner := bufio.NewScanner(file)
 	batch := bleveIndex.NewBatch()
-	indexedDomains := make(map[string]bool) // Track indexed domains
+
+	// Map to track normalized domains we’ve already indexed
+	indexedDomains := make(map[string]bool)
 
 	for scanner.Scan() {
 		line := scanner.Text()
 
-		// Split the line into 5 fields: link|title|tags|description|popularity
+		// link|title|tags|description|popularity
 		parts := strings.SplitN(line, "|", 5)
 		if len(parts) < 5 {
-			continue // Skip malformed lines
+			continue
 		}
 
-		domain := parts[0]
+		// Normalize domain part so duplicates share the same “key”
+		normalized := normalizeDomain(parts[0])
 		popularity, _ := strconv.ParseInt(parts[4], 10, 64)
 
-		// Skip if the domain is already indexed
-		if indexedDomains[domain] {
+		if indexedDomains[normalized] {
 			continue
 		}
 
 		doc := Document{
-			ID:          domain, // Use the domain as the unique ID
+			ID:          normalized,
 			Link:        parts[0],
 			Title:       parts[1],
 			Tags:        parts[2],
@@ -127,10 +145,9 @@ func IndexFile(filePath string) error {
 			return fmt.Errorf("failed to index document: %v", err)
 		}
 
-		indexedDomains[domain] = true // Mark the domain as indexed
+		indexedDomains[normalized] = true
 	}
 
-	// Commit the batch
 	if err := bleveIndex.Batch(batch); err != nil {
 		return fmt.Errorf("error committing batch: %v", err)
 	}
@@ -139,13 +156,12 @@ func IndexFile(filePath string) error {
 		return fmt.Errorf("error reading file: %v", err)
 	}
 
-	printDebug("Indexed %d unique domains from %s", len(indexedDomains), filePath)
+	printDebug("Indexed %d unique normalized domains from %s", len(indexedDomains), filePath)
 	return nil
 }
 
 // SearchIndex performs a full-text search on the indexed data.
 func SearchIndex(queryStr string, page, pageSize int) ([]Document, error) {
-	// Create compound query
 	exactMatch := bleve.NewMatchQuery(queryStr) // Exact match
 	fuzzyMatch := bleve.NewFuzzyQuery(queryStr) // Fuzzy match
 	fuzzyMatch.Fuzziness = 2
@@ -160,8 +176,8 @@ func SearchIndex(queryStr string, page, pageSize int) ([]Document, error) {
 	req.Size = pageSize
 	req.From = (page - 1) * pageSize
 
-	// Sort by popularity
-	req.SortBy([]string{"popularity"})
+	// Sort primarily by relevance (score), then by popularity descending
+	req.SortBy([]string{"-_score", "-popularity"})
 
 	res, err := bleveIndex.Search(req)
 	if err != nil {
diff --git a/init.go b/init.go
index c92e656..7a6dba2 100644
--- a/init.go
+++ b/init.go
@@ -61,6 +61,11 @@ func main() {
 	}
 	config.PeerID = hostID
 
+	// Initiate Browser Agent updater
+	if config.CrawlerEnabled || config.IndexerEnabled {
+		go periodicAgentUpdate()
+	}
+
 	InitializeLanguage("en") // Initialize language before generating OpenSearch
 	generateOpenSearchXML(config)
 

From 3494457336f4d8b0f5138b0057717d7b8af81c9d Mon Sep 17 00:00:00 2001
From: partisan <none@noone.no>
Date: Wed, 1 Jan 2025 13:49:16 +0100
Subject: [PATCH 4/9] improved crawler data extraction

---
 crawler-extraction.go | 204 ++++++++++++++++++++++++++++++++++++++++++
 crawler.go            | 100 ++-------------------
 go.mod                |   5 ++
 go.sum                |  14 +++
 4 files changed, 231 insertions(+), 92 deletions(-)
 create mode 100644 crawler-extraction.go

diff --git a/crawler-extraction.go b/crawler-extraction.go
new file mode 100644
index 0000000..1594bef
--- /dev/null
+++ b/crawler-extraction.go
@@ -0,0 +1,204 @@
+package main
+
+import (
+	"net/http"
+	"net/url"
+	"strings"
+	"time"
+
+	"github.com/go-shiori/go-readability"
+	"golang.org/x/net/html"
+)
+
+// fetchPageMetadata tries extracting title/description/keywords from standard HTML,
+// OG, Twitter, then falls back to go-readability if needed. If after all that we
+// still have no title or no description, we return ("", "", "") so the caller
+// can skip saving it.
+//
+//  1. <title>, <meta name="description"/>, <meta name="keywords"/>
+//  2. <meta property="og:title">, <meta property="og:description">
+//  3. <meta name="twitter:title">, <meta name="twitter:description">
+//  4. go-readability fallback (if title or description is still missing)
+//  5. Basic heuristic to detect “wrong” content from readability (e.g. raw HTML or “readability-page-1”).
+func fetchPageMetadata(pageURL string) (string, string, string) {
+	userAgent, err := GetUserAgent("crawler")
+	if err != nil {
+		printDebug("Failed to generate User-Agent: %v", err)
+		return "", "", ""
+	}
+
+	client := &http.Client{Timeout: 15 * time.Second}
+	req, err := http.NewRequest("GET", pageURL, nil)
+	if err != nil {
+		printDebug("Failed to create request for %s: %v", pageURL, err)
+		return "", "", ""
+	}
+
+	// Force English content when possible
+	req.Header.Set("User-Agent", userAgent)
+	req.Header.Set("Accept-Language", "en-US,en;q=0.9")
+
+	resp, err := client.Do(req)
+	if err != nil {
+		printDebug("Failed to GET %s: %v", pageURL, err)
+		return "", "", ""
+	}
+	defer resp.Body.Close()
+
+	// Skip non-2xx
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		printDebug("Skipping %s due to HTTP status: %d", pageURL, resp.StatusCode)
+		return "", "", ""
+	}
+
+	// First pass: standard HTML parse
+	doc, err := html.Parse(resp.Body)
+	if err != nil {
+		printDebug("HTML parse error for %s: %v", pageURL, err)
+		return "", "", ""
+	}
+
+	var (
+		title, desc, keywords string
+		ogTitle, ogDesc       string
+		twTitle, twDesc       string
+		foundTitle, foundDesc bool
+	)
+
+	var walk func(*html.Node)
+	walk = func(n *html.Node) {
+		if n.Type == html.ElementNode {
+			switch strings.ToLower(n.Data) {
+			case "title":
+				if n.FirstChild != nil {
+					title = n.FirstChild.Data
+					foundTitle = true
+				}
+			case "meta":
+				var metaName, metaProperty, contentVal string
+				for _, attr := range n.Attr {
+					switch strings.ToLower(attr.Key) {
+					case "name":
+						metaName = strings.ToLower(attr.Val)
+					case "property":
+						metaProperty = strings.ToLower(attr.Val)
+					case "content":
+						contentVal = attr.Val
+					}
+				}
+
+				// Standard meta tags
+				switch metaName {
+				case "description":
+					desc = contentVal
+					foundDesc = true
+				case "keywords":
+					keywords = contentVal
+				case "twitter:title":
+					twTitle = contentVal
+				case "twitter:description":
+					twDesc = contentVal
+				}
+
+				// Open Graph tags
+				switch metaProperty {
+				case "og:title":
+					ogTitle = contentVal
+				case "og:description":
+					ogDesc = contentVal
+				}
+			}
+		}
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			walk(c)
+		}
+	}
+	walk(doc)
+
+	// Fallback to OG or Twitter if <title>/description are missing
+	if !foundTitle {
+		if ogTitle != "" {
+			title = ogTitle
+		} else if twTitle != "" {
+			title = twTitle
+		}
+	}
+	if !foundDesc {
+		if ogDesc != "" {
+			desc = ogDesc
+		} else if twDesc != "" {
+			desc = twDesc
+		}
+	}
+
+	// If still missing title or desc, fallback to go-readability
+	if title == "" || desc == "" {
+		parsedURL, parseErr := url.Parse(pageURL)
+		if parseErr != nil {
+			printDebug("Failed to parse URL %s: %v", pageURL, parseErr)
+			// We must skip if we can't parse the URL for readability
+			return "", "", ""
+		}
+
+		readResp, readErr := client.Get(pageURL)
+		if readErr == nil && readResp.StatusCode >= 200 && readResp.StatusCode < 300 {
+			defer readResp.Body.Close()
+
+			article, rdErr := readability.FromReader(readResp.Body, parsedURL)
+			if rdErr == nil {
+				// If we still have no title, try from readability
+				if title == "" && article.Title != "" {
+					title = article.Title
+				}
+				// If we still have no description, try article.Excerpt
+				if desc == "" && article.Excerpt != "" {
+					desc = article.Excerpt
+				} else if desc == "" && len(article.Content) > 0 {
+					// If excerpt is empty, use a snippet from article.Content
+					snippet := article.Content
+					if len(snippet) > 200 {
+						snippet = snippet[:200] + "..."
+					}
+					desc = snippet
+				}
+			} else {
+				printDebug("go-readability failed for %s: %v", pageURL, rdErr)
+			}
+		}
+	}
+
+	// Heuristic: discard obviously incorrect HTML-y strings or placeholders
+	if looksLikeRawHTML(title) {
+		title = ""
+	}
+	if looksLikeRawHTML(desc) {
+		desc = ""
+	}
+
+	// If after all that we have no title or description, skip
+	if title == "" || desc == "" {
+		return "", "", ""
+	}
+
+	return sanitize(title), sanitize(desc), sanitize(keywords)
+}
+
+// looksLikeRawHTML is a simple heuristic to check for leftover HTML or
+// go-readability noise (e.g., "readability-page-1").
+func looksLikeRawHTML(text string) bool {
+	textLower := strings.ToLower(text)
+	if strings.Contains(textLower, "readability-page") {
+		return true
+	}
+	if strings.Count(textLower, "<div") > 0 || strings.Count(textLower, "<p") > 2 {
+		return true
+	}
+	return false
+}
+
+// sanitize removes pipes and newlines so they don't break our output format.
+func sanitize(input string) string {
+	input = strings.ReplaceAll(input, "|", " ")
+	input = strings.ReplaceAll(input, "\n", " ")
+	return strings.TrimSpace(input)
+}
diff --git a/crawler.go b/crawler.go
index fbb5b5e..2a934f6 100644
--- a/crawler.go
+++ b/crawler.go
@@ -3,14 +3,11 @@ package main
 import (
 	"bufio"
 	"fmt"
-	"net/http"
 	"os"
 	"path/filepath"
 	"strings"
 	"sync"
 	"time"
-
-	"golang.org/x/net/html"
 )
 
 // webCrawlerInit is called during init on program start
@@ -130,18 +127,18 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int, concu
 			mu.Unlock()
 
 			title, desc, keywords := fetchPageMetadata(fullURL)
-			if title == "" {
-				title = "Unknown Title"
-			}
-			if desc == "" {
-				desc = "No Description"
+
+			// Skip saving if title or description is missing
+			if title == "" || desc == "" {
+				printDebug("Skipping %s: missing title or description", fullURL)
+				return
 			}
 
 			line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
 				fullURL,
-				sanitize(title),
-				sanitize(keywords),
-				sanitize(desc),
+				title,
+				keywords,
+				desc,
 				rank,
 			)
 			file.WriteString(line)
@@ -151,84 +148,3 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int, concu
 	wg.Wait()
 	return nil
 }
-
-// fetchPageMetadata does a simple GET and parses <title>, meta[name=description], meta[name=keywords]
-func fetchPageMetadata(pageURL string) (string, string, string) {
-	// Generate a User-Agent using your GetUserAgent function
-	userAgent, err := GetUserAgent("crawler")
-	if err != nil {
-		printWarn("Failed to generate User-Agent: %v", err)
-		return "", "", ""
-	}
-
-	client := &http.Client{Timeout: 15 * time.Second}
-	req, err := http.NewRequest("GET", pageURL, nil)
-	if err != nil {
-		printWarn("Failed to create request for %s: %v", pageURL, err)
-		return "", "", ""
-	}
-
-	// Set the dynamically generated User-Agent
-	req.Header.Set("User-Agent", userAgent)
-
-	resp, err := client.Do(req)
-	if err != nil {
-		printWarn("Failed to GET %s: %v", pageURL, err)
-		return "", "", ""
-	}
-	defer resp.Body.Close()
-
-	// Handle non-200 responses
-	if resp.StatusCode == 403 || resp.StatusCode == 401 {
-		printWarn("Skipping %s: HTTP %d", pageURL, resp.StatusCode)
-		return "", "", ""
-	} else if resp.StatusCode < 200 || resp.StatusCode >= 300 {
-		printWarn("Non-200 for %s: %d", pageURL, resp.StatusCode)
-		return "", "", ""
-	}
-
-	// Parse HTML
-	doc, err := html.Parse(resp.Body)
-	if err != nil {
-		printWarn("HTML parse error for %s: %v", pageURL, err)
-		return "", "", ""
-	}
-
-	var title, desc, keywords string
-	var f func(*html.Node)
-	f = func(n *html.Node) {
-		if n.Type == html.ElementNode && n.Data == "title" && n.FirstChild != nil {
-			title = n.FirstChild.Data
-		}
-		if n.Type == html.ElementNode && n.Data == "meta" {
-			var nameVal, contentVal string
-			for _, attr := range n.Attr {
-				switch strings.ToLower(attr.Key) {
-				case "name":
-					nameVal = strings.ToLower(attr.Val)
-				case "content":
-					contentVal = attr.Val
-				}
-			}
-			if nameVal == "description" {
-				desc = contentVal
-			} else if nameVal == "keywords" {
-				keywords = contentVal
-			}
-		}
-		for c := n.FirstChild; c != nil; c = c.NextSibling {
-			f(c)
-		}
-	}
-	f(doc)
-
-	return title, desc, keywords
-}
-
-// sanitize is a quick helper to remove newlines/pipes from fields
-func sanitize(input string) string {
-	input = strings.ReplaceAll(input, "|", " ")
-	input = strings.ReplaceAll(input, "\n", " ")
-	input = strings.TrimSpace(input)
-	return input
-}
diff --git a/go.mod b/go.mod
index 6895586..a293a75 100644
--- a/go.mod
+++ b/go.mod
@@ -15,12 +15,14 @@ require (
 
 require (
 	github.com/blevesearch/bleve/v2 v2.4.4
+	github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f
 	golang.org/x/net v0.33.0
 )
 
 require (
 	github.com/RoaringBitmap/roaring v1.9.4 // indirect
 	github.com/andybalholm/cascadia v1.3.3 // indirect
+	github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de // indirect
 	github.com/bits-and-blooms/bitset v1.20.0 // indirect
 	github.com/blevesearch/bleve_index_api v1.2.0 // indirect
 	github.com/blevesearch/geo v0.1.20 // indirect
@@ -40,6 +42,8 @@ require (
 	github.com/blevesearch/zapx/v15 v15.3.17 // indirect
 	github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b // indirect
 	github.com/go-ole/go-ole v1.3.0 // indirect
+	github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect
+	github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect
 	github.com/golang/geo v0.0.0-20230421003525-6adc56603217 // indirect
 	github.com/golang/protobuf v1.5.4 // indirect
 	github.com/golang/snappy v0.0.4 // indirect
@@ -51,5 +55,6 @@ require (
 	github.com/yusufpapurcu/wmi v1.2.4 // indirect
 	go.etcd.io/bbolt v1.3.11 // indirect
 	golang.org/x/sys v0.28.0 // indirect
+	golang.org/x/text v0.21.0 // indirect
 	google.golang.org/protobuf v1.36.0 // indirect
 )
diff --git a/go.sum b/go.sum
index f3f643b..59414b4 100644
--- a/go.sum
+++ b/go.sum
@@ -4,6 +4,8 @@ github.com/RoaringBitmap/roaring v1.9.4 h1:yhEIoH4YezLYT04s1nHehNO64EKFTop/wBhxv
 github.com/RoaringBitmap/roaring v1.9.4/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90=
 github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
 github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
+github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de h1:FxWPpzIjnTlhPwqqXc4/vE0f7GvRjuAsbW+HOIe8KnA=
+github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de/go.mod h1:DCaWoUhZrYW9p1lxo/cm8EmUOOzAPSEZNGF2DK1dJgw=
 github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
 github.com/bits-and-blooms/bitset v1.20.0 h1:2F+rfL86jE2d/bmw7OhqUg2Sj/1rURkBn3MdfoPyRVU=
 github.com/bits-and-blooms/bitset v1.20.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
@@ -51,6 +53,12 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
 github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
 github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE=
 github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78=
+github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c h1:wpkoddUomPfHiOziHZixGO5ZBS73cKqVzZipfrLmO1w=
+github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c/go.mod h1:oVDCh3qjJMLVUSILBRwrm+Bc6RNXGZYtoh9xdvf1ffM=
+github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f h1:cypj7SJh+47G9J3VCPdMzT3uWcXWAWDJA54ErTfOigI=
+github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f/go.mod h1:YWa00ashoPZMAOElrSn4E1cJErhDVU6PWAll4Hxzn+w=
+github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f h1:3BSP1Tbs2djlpprl7wCLuiqMaUh5SJkkzI2gDs+FgLs=
+github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14=
 github.com/golang/geo v0.0.0-20230421003525-6adc56603217 h1:HKlyj6in2JV6wVkmQ4XmG/EIm+SCYlPZ+V4GWit7Z+I=
 github.com/golang/geo v0.0.0-20230421003525-6adc56603217/go.mod h1:8wI0hitZ3a1IxZfeH3/5I97CI8i5cLGsYe7xNhQGs9U=
 github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
@@ -64,6 +72,7 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
 github.com/leonelquinteros/gotext v1.7.0 h1:jcJmF4AXqyamP7vuw2MMIKs+O3jAEmvrc5JQiI8Ht/8=
 github.com/leonelquinteros/gotext v1.7.0/go.mod h1:qJdoQuERPpccw7L70uoU+K/BvTfRBHYsisCQyFLXyvw=
+github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -73,6 +82,10 @@ github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
 github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
+github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4/go.mod h1:C1a7PQSMz9NShzorzCiG2fk9+xuCgLkPeCvMHYR2OWg=
+github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
+github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
 github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
 github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
@@ -147,6 +160,7 @@ golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
 golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=

From c71808aa1e116c8c9238e620f255ee5d8ba3f4bb Mon Sep 17 00:00:00 2001
From: partisan <none@noone.no>
Date: Wed, 1 Jan 2025 14:50:12 +0100
Subject: [PATCH 5/9] improved crawler data extraction (added chromedp)

---
 .gitignore            |   3 +-
 config.go             | 101 +++++++++++-----------
 crawler-extraction.go | 196 ++++++++++++++++++++++++++----------------
 crawler.go            | 146 ++++++++++++++++++++++---------
 go.mod                |   8 ++
 go.sum                |  17 ++++
 6 files changed, 305 insertions(+), 166 deletions(-)

diff --git a/.gitignore b/.gitignore
index 118b838..5f5aeab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,5 @@ image_cache/
 cache/
 *.min.js
 *.min.css
-qgato
\ No newline at end of file
+qgato
+test.py
\ No newline at end of file
diff --git a/config.go b/config.go
index 4ea4eb2..bdd9ccc 100644
--- a/config.go
+++ b/config.go
@@ -23,43 +23,45 @@ type CacheConfig struct {
 }
 
 type Config struct {
-	Port                 int    // Added
-	AuthCode             string // Added
-	PeerID               string // Added
-	Peers                []string
-	Domain               string // Added
-	NodesEnabled         bool   // Added
-	CrawlerEnabled       bool   // Added
-	IndexerEnabled       bool   // Added
-	WebsiteEnabled       bool   // Added
-	RamCacheEnabled      bool
-	DriveCacheEnabled    bool          // Added
-	LogLevel             int           // Added
-	ConcurrentCrawlers   int           // Number of concurrent crawlers
-	CrawlingInterval     time.Duration // Refres crawled results in...
-	MaxPagesPerDomain    int           // Max pages to crawl per domain
-	IndexRefreshInterval time.Duration // Interval for periodic index refresh (e.g., "10m")
+	Port                       int    // Added
+	AuthCode                   string // Added
+	PeerID                     string // Added
+	Peers                      []string
+	Domain                     string // Added
+	NodesEnabled               bool   // Added
+	CrawlerEnabled             bool   // Added
+	IndexerEnabled             bool   // Added
+	WebsiteEnabled             bool   // Added
+	RamCacheEnabled            bool
+	DriveCacheEnabled          bool // Added
+	LogLevel                   int  // Added
+	ConcurrentStandardCrawlers int
+	ConcurrentChromeCrawlers   int
+	CrawlingInterval           time.Duration // Refres crawled results in...
+	MaxPagesPerDomain          int           // Max pages to crawl per domain
+	IndexRefreshInterval       time.Duration // Interval for periodic index refresh (e.g., "10m")
 
 	DriveCache CacheConfig
 	RamCache   CacheConfig
 }
 
 var defaultConfig = Config{
-	Port:                 5000,
-	Domain:               "localhost",
-	Peers:                []string{},
-	AuthCode:             generateStrongRandomString(64),
-	NodesEnabled:         false,
-	CrawlerEnabled:       true,
-	IndexerEnabled:       false,
-	WebsiteEnabled:       true,
-	RamCacheEnabled:      true,
-	DriveCacheEnabled:    false,
-	ConcurrentCrawlers:   5,
-	CrawlingInterval:     24 * time.Hour,
-	MaxPagesPerDomain:    10,
-	IndexRefreshInterval: 2 * time.Minute,
-	LogLevel:             1,
+	Port:                       5000,
+	Domain:                     "localhost",
+	Peers:                      []string{},
+	AuthCode:                   generateStrongRandomString(64),
+	NodesEnabled:               false,
+	CrawlerEnabled:             true,
+	IndexerEnabled:             false,
+	WebsiteEnabled:             true,
+	RamCacheEnabled:            true,
+	DriveCacheEnabled:          false,
+	ConcurrentStandardCrawlers: 12,
+	ConcurrentChromeCrawlers:   4,
+	CrawlingInterval:           24 * time.Hour,
+	MaxPagesPerDomain:          10,
+	IndexRefreshInterval:       2 * time.Minute,
+	LogLevel:                   1,
 	DriveCache: CacheConfig{
 		Duration:      48 * time.Hour,                                     // Added
 		Path:          "./cache",                                          // Added
@@ -249,7 +251,8 @@ func saveConfig(config Config) {
 
 	// Indexer section
 	indexerSec := cfg.Section("Indexer")
-	indexerSec.Key("ConcurrentCrawlers").SetValue(strconv.Itoa(config.ConcurrentCrawlers))
+	indexerSec.Key("ConcurrentStandardCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
+	indexerSec.Key("ConcurrentChromeCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
 	indexerSec.Key("CrawlingInterval").SetValue(config.CrawlingInterval.String())
 	indexerSec.Key("MaxPagesPerDomain").SetValue(strconv.Itoa(config.MaxPagesPerDomain))
 	indexerSec.Key("IndexRefreshInterval").SetValue(config.IndexRefreshInterval.String())
@@ -296,7 +299,8 @@ func loadConfig() Config {
 	driveCacheEnabled := getConfigValueBool(cfg.Section("Features").Key("DriveCache"), defaultConfig.DriveCacheEnabled)
 
 	// Indexing
-	concurrentCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentCrawlers"), defaultConfig.ConcurrentCrawlers, strconv.Atoi)
+	concurrentStandardCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentStandardCrawlers"), defaultConfig.ConcurrentStandardCrawlers, strconv.Atoi)
+	concurrentChromeCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentChromeCrawlers"), defaultConfig.ConcurrentChromeCrawlers, strconv.Atoi)
 	crawlingInterval := getConfigValue(cfg.Section("Indexer").Key("CrawlingInterval"), defaultConfig.CrawlingInterval, time.ParseDuration)
 	maxPagesPerDomain := getConfigValue(cfg.Section("Indexer").Key("MaxPagesPerDomain"), defaultConfig.MaxPagesPerDomain, strconv.Atoi)
 	indexRefreshInterval := getConfigValue(cfg.Section("Indexer").Key("IndexRefreshInterval"), defaultConfig.IndexRefreshInterval, time.ParseDuration)
@@ -315,21 +319,22 @@ func loadConfig() Config {
 	ramMaxUsage := parseMaxUsageRam(getConfigValueString(cfg.Section("RamCache").Key("MaxUsage"), formatMaxUsage(defaultConfig.RamCache.MaxUsageBytes)))
 
 	return Config{
-		Port:                 port,
-		Domain:               domain,
-		LogLevel:             logLevel,
-		AuthCode:             authCode,
-		Peers:                peers,
-		NodesEnabled:         nodesEnabled,
-		CrawlerEnabled:       crawlerEnabled,
-		IndexerEnabled:       indexerEnabled,
-		WebsiteEnabled:       websiteEnabled,
-		RamCacheEnabled:      ramCacheEnabled,
-		DriveCacheEnabled:    driveCacheEnabled,
-		ConcurrentCrawlers:   concurrentCrawlers,
-		CrawlingInterval:     crawlingInterval,
-		MaxPagesPerDomain:    maxPagesPerDomain,
-		IndexRefreshInterval: indexRefreshInterval,
+		Port:                       port,
+		Domain:                     domain,
+		LogLevel:                   logLevel,
+		AuthCode:                   authCode,
+		Peers:                      peers,
+		NodesEnabled:               nodesEnabled,
+		CrawlerEnabled:             crawlerEnabled,
+		IndexerEnabled:             indexerEnabled,
+		WebsiteEnabled:             websiteEnabled,
+		RamCacheEnabled:            ramCacheEnabled,
+		DriveCacheEnabled:          driveCacheEnabled,
+		ConcurrentStandardCrawlers: concurrentStandardCrawlers,
+		ConcurrentChromeCrawlers:   concurrentChromeCrawlers,
+		CrawlingInterval:           crawlingInterval,
+		MaxPagesPerDomain:          maxPagesPerDomain,
+		IndexRefreshInterval:       indexRefreshInterval,
 		DriveCache: CacheConfig{
 			Duration:      driveDuration,
 			MaxUsageBytes: driveMaxUsage,
diff --git a/crawler-extraction.go b/crawler-extraction.go
index 1594bef..4ce8b9d 100644
--- a/crawler-extraction.go
+++ b/crawler-extraction.go
@@ -1,69 +1,99 @@
 package main
 
 import (
+	"context"
 	"net/http"
 	"net/url"
 	"strings"
 	"time"
 
+	"github.com/chromedp/cdproto/emulation"
+	"github.com/chromedp/chromedp"
 	"github.com/go-shiori/go-readability"
 	"golang.org/x/net/html"
 )
 
-// fetchPageMetadata tries extracting title/description/keywords from standard HTML,
-// OG, Twitter, then falls back to go-readability if needed. If after all that we
-// still have no title or no description, we return ("", "", "") so the caller
-// can skip saving it.
-//
-//  1. <title>, <meta name="description"/>, <meta name="keywords"/>
-//  2. <meta property="og:title">, <meta property="og:description">
-//  3. <meta name="twitter:title">, <meta name="twitter:description">
-//  4. go-readability fallback (if title or description is still missing)
-//  5. Basic heuristic to detect “wrong” content from readability (e.g. raw HTML or “readability-page-1”).
-func fetchPageMetadata(pageURL string) (string, string, string) {
-	userAgent, err := GetUserAgent("crawler")
+// fetchPageMetadataStandard tries standard HTML parse + go-readability only.
+func fetchPageMetadataStandard(pageURL, userAgent string) (string, string, string) {
+	// 1. Standard HTML parse
+	title, desc, keywords := extractStandard(pageURL, userAgent)
+
+	// 2. Fallback: go-readability
+	if title == "" || desc == "" {
+		title, desc, keywords = fallbackReadability(pageURL, userAgent, title, desc, keywords)
+	}
+
+	// If still empty, return ("", "", "")
+	if title == "" || desc == "" {
+		return "", "", ""
+	}
+	return sanitize(title), sanitize(desc), sanitize(keywords)
+}
+
+// fetchPageMetadataChrome uses Chromedp to handle JavaScript-rendered pages.
+func fetchPageMetadataChrome(pageURL, userAgent string) (string, string, string) {
+	// Create context
+	ctx, cancel := chromedp.NewContext(context.Background())
+	defer cancel()
+
+	var renderedHTML string
+	err := chromedp.Run(ctx,
+		emulation.SetUserAgentOverride(userAgent).WithAcceptLanguage("en-US,en;q=0.9"),
+		chromedp.Navigate(pageURL),
+		chromedp.Sleep(2*time.Second), // Let JS run a bit
+		chromedp.OuterHTML("html", &renderedHTML),
+	)
 	if err != nil {
-		printDebug("Failed to generate User-Agent: %v", err)
+		printDebug("chromedp error for %s: %v", pageURL, err)
 		return "", "", ""
 	}
 
+	doc, err := html.Parse(strings.NewReader(renderedHTML))
+	if err != nil {
+		printDebug("chromedp parse error for %s: %v", pageURL, err)
+		return "", "", ""
+	}
+
+	return extractParsedDOM(doc)
+}
+
+// extractStandard does the normal HTML parse with OG, Twitter, etc.
+func extractStandard(pageURL, userAgent string) (title, desc, keywords string) {
 	client := &http.Client{Timeout: 15 * time.Second}
 	req, err := http.NewRequest("GET", pageURL, nil)
 	if err != nil {
 		printDebug("Failed to create request for %s: %v", pageURL, err)
-		return "", "", ""
+		return
 	}
-
-	// Force English content when possible
 	req.Header.Set("User-Agent", userAgent)
 	req.Header.Set("Accept-Language", "en-US,en;q=0.9")
 
 	resp, err := client.Do(req)
 	if err != nil {
 		printDebug("Failed to GET %s: %v", pageURL, err)
-		return "", "", ""
+		return
 	}
 	defer resp.Body.Close()
 
-	// Skip non-2xx
 	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
 		printDebug("Skipping %s due to HTTP status: %d", pageURL, resp.StatusCode)
-		return "", "", ""
+		return
 	}
 
-	// First pass: standard HTML parse
 	doc, err := html.Parse(resp.Body)
 	if err != nil {
 		printDebug("HTML parse error for %s: %v", pageURL, err)
-		return "", "", ""
+		return
 	}
 
-	var (
-		title, desc, keywords string
-		ogTitle, ogDesc       string
-		twTitle, twDesc       string
-		foundTitle, foundDesc bool
-	)
+	return extractParsedDOM(doc)
+}
+
+// extractParsedDOM uses the same logic to parse <title>, meta, OG, Twitter.
+func extractParsedDOM(doc *html.Node) (title, desc, keywords string) {
+	var ogTitle, ogDesc string
+	var twTitle, twDesc string
+	var foundTitle, foundDesc bool
 
 	var walk func(*html.Node)
 	walk = func(n *html.Node) {
@@ -87,7 +117,6 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
 					}
 				}
 
-				// Standard meta tags
 				switch metaName {
 				case "description":
 					desc = contentVal
@@ -100,7 +129,6 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
 					twDesc = contentVal
 				}
 
-				// Open Graph tags
 				switch metaProperty {
 				case "og:title":
 					ogTitle = contentVal
@@ -115,7 +143,7 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
 	}
 	walk(doc)
 
-	// Fallback to OG or Twitter if <title>/description are missing
+	// fallback to OG/Twitter if missing
 	if !foundTitle {
 		if ogTitle != "" {
 			title = ogTitle
@@ -131,43 +159,7 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
 		}
 	}
 
-	// If still missing title or desc, fallback to go-readability
-	if title == "" || desc == "" {
-		parsedURL, parseErr := url.Parse(pageURL)
-		if parseErr != nil {
-			printDebug("Failed to parse URL %s: %v", pageURL, parseErr)
-			// We must skip if we can't parse the URL for readability
-			return "", "", ""
-		}
-
-		readResp, readErr := client.Get(pageURL)
-		if readErr == nil && readResp.StatusCode >= 200 && readResp.StatusCode < 300 {
-			defer readResp.Body.Close()
-
-			article, rdErr := readability.FromReader(readResp.Body, parsedURL)
-			if rdErr == nil {
-				// If we still have no title, try from readability
-				if title == "" && article.Title != "" {
-					title = article.Title
-				}
-				// If we still have no description, try article.Excerpt
-				if desc == "" && article.Excerpt != "" {
-					desc = article.Excerpt
-				} else if desc == "" && len(article.Content) > 0 {
-					// If excerpt is empty, use a snippet from article.Content
-					snippet := article.Content
-					if len(snippet) > 200 {
-						snippet = snippet[:200] + "..."
-					}
-					desc = snippet
-				}
-			} else {
-				printDebug("go-readability failed for %s: %v", pageURL, rdErr)
-			}
-		}
-	}
-
-	// Heuristic: discard obviously incorrect HTML-y strings or placeholders
+	// Heuristic check
 	if looksLikeRawHTML(title) {
 		title = ""
 	}
@@ -175,16 +167,68 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
 		desc = ""
 	}
 
-	// If after all that we have no title or description, skip
-	if title == "" || desc == "" {
-		return "", "", ""
-	}
-
-	return sanitize(title), sanitize(desc), sanitize(keywords)
+	return title, desc, keywords
 }
 
-// looksLikeRawHTML is a simple heuristic to check for leftover HTML or
-// go-readability noise (e.g., "readability-page-1").
+// fallbackReadability tries go-readability if title/desc is missing.
+func fallbackReadability(pageURL, userAgent, title, desc, keywords string) (string, string, string) {
+	if title != "" && desc != "" {
+		return title, desc, keywords
+	}
+
+	client := &http.Client{Timeout: 15 * time.Second}
+	readReq, err := http.NewRequest("GET", pageURL, nil)
+	if err != nil {
+		printDebug("Failed to create fallbackReadability request: %v", err)
+		return title, desc, keywords
+	}
+	readReq.Header.Set("User-Agent", userAgent)
+	readReq.Header.Set("Accept-Language", "en-US,en;q=0.9")
+
+	readResp, err := client.Do(readReq)
+	if err != nil || readResp.StatusCode < 200 || readResp.StatusCode >= 300 {
+		if err != nil {
+			printDebug("go-readability GET error for %s: %v", pageURL, err)
+		}
+		if readResp != nil {
+			readResp.Body.Close()
+		}
+		return title, desc, keywords
+	}
+	defer readResp.Body.Close()
+
+	parsedURL, parseErr := url.Parse(pageURL)
+	if parseErr != nil {
+		printDebug("Failed to parse URL: %v", parseErr)
+		return title, desc, keywords
+	}
+
+	article, rdErr := readability.FromReader(readResp.Body, parsedURL)
+	if rdErr != nil {
+		printDebug("go-readability error for %s: %v", pageURL, rdErr)
+		return title, desc, keywords
+	}
+
+	if title == "" && article.Title != "" && !looksLikeRawHTML(article.Title) {
+		title = article.Title
+	}
+	if desc == "" {
+		if article.Excerpt != "" && !looksLikeRawHTML(article.Excerpt) {
+			desc = article.Excerpt
+		} else if len(article.Content) > 0 {
+			snippet := article.Content
+			if len(snippet) > 200 {
+				snippet = snippet[:200] + "..."
+			}
+			if !looksLikeRawHTML(snippet) {
+				desc = snippet
+			}
+		}
+	}
+	return title, desc, keywords
+}
+
+// looksLikeRawHTML is a simple heuristic check for leftover or invalid HTML text
 func looksLikeRawHTML(text string) bool {
 	textLower := strings.ToLower(text)
 	if strings.Contains(textLower, "readability-page") {
@@ -196,7 +240,7 @@ func looksLikeRawHTML(text string) bool {
 	return false
 }
 
-// sanitize removes pipes and newlines so they don't break our output format.
+// sanitize removes pipes/newlines so they don't break our output format.
 func sanitize(input string) string {
 	input = strings.ReplaceAll(input, "|", " ")
 	input = strings.ReplaceAll(input, "\n", " ")
diff --git a/crawler.go b/crawler.go
index 2a934f6..45dc76f 100644
--- a/crawler.go
+++ b/crawler.go
@@ -35,7 +35,7 @@ func runCrawlerAndIndexer() {
 
 	// 2. Crawl each domain and write results to data_to_index.txt
 	outFile := filepath.Join(config.DriveCache.Path, "data_to_index.txt")
-	if err := crawlDomainsToFile(domains, outFile, config.MaxPagesPerDomain, config.ConcurrentCrawlers); err != nil {
+	if err := crawlDomainsToFile(domains, outFile, config.MaxPagesPerDomain); err != nil {
 		printErr("Error crawling domains: %v", err)
 		return
 	}
@@ -75,18 +75,20 @@ func readDomainsCSV(csvPath string) ([][2]string, error) {
 	return result, scanner.Err()
 }
 
-// crawlDomainsToFile visits each domain, extracts minimal data, writes results to outFile
-func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int, concurrentCrawlers int) error {
+// crawlDomainsToFile does an async pipeline:
+//  1. "standard" goroutines read from standardCh -> attempt standard extraction -> if fails, push to chromeCh
+//  2. "chrome" goroutines read from chromeCh -> attempt chromedp extraction -> if fails, skip
+func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error {
 	existingEntries := make(map[string]bool)
-	var mu sync.Mutex // Mutex to protect access to the map
+	var mu sync.Mutex // For existingEntries + file writes
 
+	// read existing entries from outFile if it exists
 	if _, err := os.Stat(outFile); err == nil {
 		file, err := os.Open(outFile)
 		if err != nil {
 			return fmt.Errorf("unable to open %s: %v", outFile, err)
 		}
 		defer file.Close()
-
 		scanner := bufio.NewScanner(file)
 		for scanner.Scan() {
 			line := scanner.Text()
@@ -104,47 +106,109 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int, concu
 	}
 	defer file.Close()
 
-	semaphore := make(chan struct{}, concurrentCrawlers)
-	var wg sync.WaitGroup
+	// Prepare channels
+	standardCh := make(chan [2]string, 1000) // buffered channels help avoid blocking
+	chromeCh := make(chan [2]string, 1000)
 
-	for _, d := range domains {
-		wg.Add(1)
-		semaphore <- struct{}{}
-		go func(domain [2]string) {
-			defer wg.Done()
-			defer func() { <-semaphore }()
+	// 1) Spawn standard workers
+	var wgStandard sync.WaitGroup
+	for i := 0; i < config.ConcurrentStandardCrawlers; i++ {
+		wgStandard.Add(1)
+		go func() {
+			defer wgStandard.Done()
+			for dom := range standardCh {
+				rank := dom[0]
+				domainName := dom[1]
+				fullURL := "https://" + domainName
 
-			rank := domain[0]
-			domainName := domain[1]
-			fullURL := "https://" + domainName
-
-			mu.Lock()
-			if domainName == "" || existingEntries[fullURL] {
+				// Mark domain existing so we don't re-crawl duplicates
+				mu.Lock()
+				if domainName == "" || existingEntries[fullURL] {
+					mu.Unlock()
+					continue
+				}
+				existingEntries[fullURL] = true
+				mu.Unlock()
+
+				// get a standard user agent
+				userAgent, _ := GetUserAgent("crawler-std")
+				title, desc, keywords := fetchPageMetadataStandard(fullURL, userAgent)
+
+				if title == "" || desc == "" {
+					// push to chromeCh
+					chromeCh <- dom
+					continue
+				}
+
+				// write to file
+				line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
+					fullURL, title, keywords, desc, rank)
+
+				mu.Lock()
+				file.WriteString(line)
 				mu.Unlock()
-				return
 			}
-			existingEntries[fullURL] = true
-			mu.Unlock()
-
-			title, desc, keywords := fetchPageMetadata(fullURL)
-
-			// Skip saving if title or description is missing
-			if title == "" || desc == "" {
-				printDebug("Skipping %s: missing title or description", fullURL)
-				return
-			}
-
-			line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
-				fullURL,
-				title,
-				keywords,
-				desc,
-				rank,
-			)
-			file.WriteString(line)
-		}(d)
+		}()
 	}
 
-	wg.Wait()
+	// 2) Spawn chrome workers
+	var wgChrome sync.WaitGroup
+	for i := 0; i < config.ConcurrentChromeCrawlers; i++ {
+		wgChrome.Add(1)
+		go func() {
+			defer wgChrome.Done()
+			for dom := range chromeCh {
+				rank := dom[0]
+				domainName := dom[1]
+				fullURL := "https://" + domainName
+
+				// Mark domain existing if not already
+				mu.Lock()
+				if domainName == "" || existingEntries[fullURL] {
+					mu.Unlock()
+					continue
+				}
+				existingEntries[fullURL] = true
+				mu.Unlock()
+
+				// get a chrome user agent
+				userAgent, _ := GetUserAgent("crawler-chrome")
+				title, desc, keywords := fetchPageMetadataChrome(fullURL, userAgent)
+
+				if title == "" || desc == "" {
+					printWarn("Skipping (Chrome) %s: missing title/desc", fullURL)
+					continue
+				}
+
+				// write to file
+				line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
+					fullURL, title, keywords, desc, rank)
+
+				mu.Lock()
+				file.WriteString(line)
+				mu.Unlock()
+			}
+		}()
+	}
+
+	// Feed domains into standardCh
+	go func() {
+		for _, dom := range domains {
+			// optionally, if maxPages is relevant, you can track how many have been processed
+			standardCh <- dom
+		}
+		// close the standardCh once all are queued
+		close(standardCh)
+	}()
+
+	// Wait for standard workers to finish, then close chromeCh
+	go func() {
+		wgStandard.Wait()
+		close(chromeCh)
+	}()
+
+	// Wait for chrome workers to finish
+	wgChrome.Wait()
+
 	return nil
 }
diff --git a/go.mod b/go.mod
index a293a75..c8200d3 100644
--- a/go.mod
+++ b/go.mod
@@ -41,13 +41,21 @@ require (
 	github.com/blevesearch/zapx/v14 v14.3.10 // indirect
 	github.com/blevesearch/zapx/v15 v15.3.17 // indirect
 	github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b // indirect
+	github.com/chromedp/cdproto v0.0.0-20241022234722-4d5d5faf59fb // indirect
+	github.com/chromedp/chromedp v0.11.2 // indirect
+	github.com/chromedp/sysutil v1.1.0 // indirect
 	github.com/go-ole/go-ole v1.3.0 // indirect
 	github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect
+	github.com/gobwas/httphead v0.1.0 // indirect
+	github.com/gobwas/pool v0.2.1 // indirect
+	github.com/gobwas/ws v1.4.0 // indirect
 	github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect
 	github.com/golang/geo v0.0.0-20230421003525-6adc56603217 // indirect
 	github.com/golang/protobuf v1.5.4 // indirect
 	github.com/golang/snappy v0.0.4 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/mailru/easyjson v0.7.7 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/mschoch/smat v0.2.0 // indirect
diff --git a/go.sum b/go.sum
index 59414b4..148146f 100644
--- a/go.sum
+++ b/go.sum
@@ -47,6 +47,12 @@ github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b h1:ju9Az5Y
 github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b/go.mod h1:BlrYNpOu4BvVRslmIG+rLtKhmjIaRhIbG8sb9scGTwI=
 github.com/chai2010/webp v1.1.1 h1:jTRmEccAJ4MGrhFOrPMpNGIJ/eybIgwKpcACsrTEapk=
 github.com/chai2010/webp v1.1.1/go.mod h1:0XVwvZWdjjdxpUEIf7b9g9VkHFnInUSYujwqTLEuldU=
+github.com/chromedp/cdproto v0.0.0-20241022234722-4d5d5faf59fb h1:noKVm2SsG4v0Yd0lHNtFYc9EUxIVvrr4kJ6hM8wvIYU=
+github.com/chromedp/cdproto v0.0.0-20241022234722-4d5d5faf59fb/go.mod h1:4XqMl3iIW08jtieURWL6Tt5924w21pxirC6th662XUM=
+github.com/chromedp/chromedp v0.11.2 h1:ZRHTh7DjbNTlfIv3NFTbB7eVeu5XCNkgrpcGSpn2oX0=
+github.com/chromedp/chromedp v0.11.2/go.mod h1:lr8dFRLKsdTTWb75C/Ttol2vnBKOSnt0BW8R9Xaupi8=
+github.com/chromedp/sysutil v1.1.0 h1:PUFNv5EcprjqXZD9nJb9b/c9ibAbxiYo4exNWZyipwM=
+github.com/chromedp/sysutil v1.1.0/go.mod h1:WiThHUdltqCNKGc4gaU50XgYjwjYIhKWoHGPTUfWTJ8=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -57,6 +63,12 @@ github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c h1:wpkoddUomPfHiOziH
 github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c/go.mod h1:oVDCh3qjJMLVUSILBRwrm+Bc6RNXGZYtoh9xdvf1ffM=
 github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f h1:cypj7SJh+47G9J3VCPdMzT3uWcXWAWDJA54ErTfOigI=
 github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f/go.mod h1:YWa00ashoPZMAOElrSn4E1cJErhDVU6PWAll4Hxzn+w=
+github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU=
+github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
+github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
+github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
+github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs=
+github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc=
 github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f h1:3BSP1Tbs2djlpprl7wCLuiqMaUh5SJkkzI2gDs+FgLs=
 github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14=
 github.com/golang/geo v0.0.0-20230421003525-6adc56603217 h1:HKlyj6in2JV6wVkmQ4XmG/EIm+SCYlPZ+V4GWit7Z+I=
@@ -68,10 +80,14 @@ github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW
 github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
 github.com/leonelquinteros/gotext v1.7.0 h1:jcJmF4AXqyamP7vuw2MMIKs+O3jAEmvrc5JQiI8Ht/8=
 github.com/leonelquinteros/gotext v1.7.0/go.mod h1:qJdoQuERPpccw7L70uoU+K/BvTfRBHYsisCQyFLXyvw=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
 github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
@@ -137,6 +153,7 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=

From 918e1823dfc6a20127d18a453ab6cbb734cc2190 Mon Sep 17 00:00:00 2001
From: partisan <none@noone.no>
Date: Wed, 1 Jan 2025 23:48:47 +0100
Subject: [PATCH 6/9] added visited sites functionality to crawler

---
 README.md          |  34 +++++++--------
 crawler-visited.go | 106 +++++++++++++++++++++++++++++++++++++++++++++
 crawler.go         |  93 ++++++++++++++++++++-------------------
 go.mod             |   4 +-
 go.sum             |   4 ++
 5 files changed, 178 insertions(+), 63 deletions(-)
 create mode 100644 crawler-visited.go

diff --git a/README.md b/README.md
index 23e8bf5..5ad3337 100644
--- a/README.md
+++ b/README.md
@@ -7,30 +7,30 @@
 </p>
 
 <p align="center">
-A self-hosted private <a href="https://en.wikipedia.org/wiki/Metasearch_engine">metasearch engine</a> that aims to be more resource-efficient than its competition.
+A self-hosted private search engine designed to be scalable and more resource-efficient than its competitors.
 </p>
 
 # Bare in mind that this project is still WIP
 
-## Comparison to other search engines
+## Comparison to other open-source search engines
 
 
-| Feature                  | Whoogle [1]      | Araa-Search             | LibreY                 | 4get                   | SearchXNG               | *QGato*                                            |
-| :------------------------- | ------------------ | ------------------------- | ------------------------ | ------------------------ | ------------------------- | ---------------------------------------------------- |
-| Works without JavaScript | ✅               | ✅                      | ✅                     | ✅                     | ✅                      | ✅                                                 |
-| Music search             | ❓               | ❌                      | ❌                     | ✅                     | ✅                      | ✅                                                 |
-| Torrent search           | ❌               | ✅                      | ✅                     | ❌                     | ✅                      | ✅                                                 |
-| API                      | ❌               | ❓ [2]                  | ✅                     | ✅                     | ✅                      | ✅                                                 |
-| Scalable                 | ❌               | ❌                      | ❌                     | ❌                     | ❌                      | ✅                                                 |
-| Not Resource Hungry      | ❓ Moderate      | ❌ Very resource hungry | ❌ Moderate 200-400mb~ | ❌ Moderate 200-400mb~ | ❌ Moderate 200-300MiB~ | ✅ about 15-20MiB at idle, 17-22MiB when searching |
-| Result caching           | ❌               | ❌                      | ❓                     | ❓                     | ❓                      | ✅                                                 |
-| Dynamic Page Loading     | ❓ Not specified | ❌                      | ❌                     | ❌                     | ✅                      | ✅                                                 |
-| User themable            | ❌               | ✅                      | ❌                     | ❌                     | ✅[3]                   | ✅                                                 |
-| Unusual logo choice      | ❌               | ❌                      | ❌                     | ✅                     | ❌                      | ❌                                                 |
+| Feature                  | Whoogle [1] | Araa-Search             | LibreY                 | 4get                   | SearchXNG               | *QGato*                               |
+| :------------------------- | ------------- | ------------------------- | ------------------------ | ------------------------ | ------------------------- | --------------------------------------- |
+| Works without JavaScript | ✅          | ✅                      | ✅                     | ✅                     | ✅                      | ✅                                    |
+| Music search             | ❌          | ❌                      | ❌                     | ✅                     | ✅                      | ✅                                    |
+| Torrent search           | ❌          | ✅                      | ✅                     | ❌                     | ✅                      | ✅                                    |
+| API                      | ❌          | ❌ [2]                  | ✅                     | ✅                     | ✅                      | ✅                                    |
+| Scalable                 | ❌          | ❌                      | ❌                     | ❌                     | ❌                      | ✅                                    |
+| Not Resource Hungry      | ❓ Moderate | ❌ Very resource hungry | ❌ Moderate 200-400mb~ | ❌ Moderate 200-400mb~ | ❌ Moderate 200-300MiB~ | ✅ about 15-30MiB even when searching |
+| Result caching           | ❓          | ❓                      | ❓                     | ❓                     | ❓                      | ✅                                    |
+| Dynamic Page Loading     | ❌          | ❌                      | ❌                     | ❌                     | ✅                      | ✅                                    |
+| User themable            | ❌          | ✅                      | ❌                     | ❌                     | ❓[3]                   | ✅                                    |
+| Unusual logo choice      | ❌          | ❌                      | ❌                     | ✅                     | ❌                      | ❌                                    |
 
 [1]: I was not able to check this since their site does not work, same for the community instances.
 
-[2]: In the project repo they specify that it has API, but It looks like they are no loger supporting it. Or just removed "API" button and documentation, since I was not able to find it anymore.
+[2]: In the project repo they specify that it has API, but It looks like they are no longer supporting it. Or just removed "API" button and documentation, since I was not able to find it anymore.
 
 [3]: It is called 'User Themable' because you want to give the user freedom of choice for their theme, not by hard-setting one theme in the backend and calling it themable.
 
@@ -48,7 +48,7 @@ A self-hosted private <a href="https://en.wikipedia.org/wiki/Metasearch_engine">
 ### For Self-Hosting
 
 - **Self-hosted option** - Run on your own server for even more privacy.
-- **Lightweight** - Low memory footprint (15-22MiB) even during searches.
+- **Lightweight** - Low memory footprint (15-30MiB) even during searches.
 - **Decentralized** - No single point of failure.
 - **Results caching in RAM** - Faster response times through caching.
 - **Configurable** - Tweak features via `config.ini`.
@@ -67,7 +67,7 @@ A self-hosted private <a href="https://en.wikipedia.org/wiki/Metasearch_engine">
 
 ### Prerequisites
 
-- Go (version 1.18 or higher recommended)
+- Go (version 1.23 or higher recommended)
 - Git (unexpected)
 - Access to the internet for fetching results (even more unexpected)
 
diff --git a/crawler-visited.go b/crawler-visited.go
new file mode 100644
index 0000000..bfa1af9
--- /dev/null
+++ b/crawler-visited.go
@@ -0,0 +1,106 @@
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"sync"
+)
+
+// VisitedStore handles deduplicating visited URLs with a map and a periodic flush to disk.
+type VisitedStore struct {
+	mu      sync.Mutex
+	visited map[string]bool
+	toFlush []string
+
+	filePath  string
+	batchSize int // how many new URLs we batch before flushing
+}
+
+// NewVisitedStore creates or loads the visited URLs from filePath.
+func NewVisitedStore(filePath string, batchSize int) (*VisitedStore, error) {
+	store := &VisitedStore{
+		visited:   make(map[string]bool),
+		filePath:  filePath,
+		batchSize: batchSize,
+	}
+
+	// Attempt to load existing visited URLs (if file exists).
+	if _, err := os.Stat(filePath); err == nil {
+		if err := store.loadFromFile(); err != nil {
+			return nil, fmt.Errorf("loadFromFile error: %w", err)
+		}
+	}
+	return store, nil
+}
+
+// loadFromFile loads visited URLs from the store’s file. One URL per line.
+func (s *VisitedStore) loadFromFile() error {
+	f, err := os.Open(s.filePath)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		url := scanner.Text()
+		s.visited[url] = true
+	}
+	return scanner.Err()
+}
+
+// AlreadyVisited returns true if the URL is in the store.
+func (s *VisitedStore) AlreadyVisited(url string) bool {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.visited[url]
+}
+
+// MarkVisited adds the URL to the store if not already present, and triggers a flush if batchSize is reached.
+func (s *VisitedStore) MarkVisited(url string) (added bool, err error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	if s.visited[url] {
+		return false, nil
+	}
+	// Mark in memory
+	s.visited[url] = true
+	s.toFlush = append(s.toFlush, url)
+
+	// Flush if we have enough new URLs
+	if len(s.toFlush) >= s.batchSize {
+		if err := s.flushToFileUnlocked(); err != nil {
+			return false, err
+		}
+	}
+	return true, nil
+}
+
+// Flush everything in s.toFlush to file, then clear the buffer.
+func (s *VisitedStore) Flush() error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.flushToFileUnlocked()
+}
+
+// flushToFileUnlocked writes s.toFlush lines to the store file, then clears s.toFlush.
+func (s *VisitedStore) flushToFileUnlocked() error {
+	if len(s.toFlush) == 0 {
+		return nil
+	}
+	f, err := os.OpenFile(s.filePath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	for _, url := range s.toFlush {
+		if _, err := fmt.Fprintln(f, url); err != nil {
+			return err
+		}
+	}
+	s.toFlush = nil
+	return nil
+}
diff --git a/crawler.go b/crawler.go
index 45dc76f..3ddc36b 100644
--- a/crawler.go
+++ b/crawler.go
@@ -10,13 +10,24 @@ import (
 	"time"
 )
 
+// Create a global or config-level visited store
+var visitedStore *VisitedStore
+
 // webCrawlerInit is called during init on program start
 func webCrawlerInit() {
+	// Initialize the store with, say, batchSize=50
+	store, err := NewVisitedStore(filepath.Join(config.DriveCache.Path, "visited-urls.txt"), 50)
+	if err != nil {
+		printErr("Failed to initialize visited store: %v", err)
+	}
+	visitedStore = store
+
+	// Start the periodic crawler
 	go func() {
 		// First run immediately
 		runCrawlerAndIndexer()
 
-		// Then run periodically based on CrawlingInterval
+		// Then run periodically
 		ticker := time.NewTicker(config.CrawlingInterval)
 		for range ticker.C {
 			runCrawlerAndIndexer()
@@ -79,25 +90,8 @@ func readDomainsCSV(csvPath string) ([][2]string, error) {
 //  1. "standard" goroutines read from standardCh -> attempt standard extraction -> if fails, push to chromeCh
 //  2. "chrome" goroutines read from chromeCh -> attempt chromedp extraction -> if fails, skip
 func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error {
-	existingEntries := make(map[string]bool)
-	var mu sync.Mutex // For existingEntries + file writes
 
-	// read existing entries from outFile if it exists
-	if _, err := os.Stat(outFile); err == nil {
-		file, err := os.Open(outFile)
-		if err != nil {
-			return fmt.Errorf("unable to open %s: %v", outFile, err)
-		}
-		defer file.Close()
-		scanner := bufio.NewScanner(file)
-		for scanner.Scan() {
-			line := scanner.Text()
-			parts := strings.SplitN(line, "|", 5)
-			if len(parts) >= 1 {
-				existingEntries[parts[0]] = true
-			}
-		}
-	}
+	var mu sync.Mutex
 
 	// Open file for writing (truncate if existing)
 	file, err := os.OpenFile(outFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
@@ -119,33 +113,38 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error
 			for dom := range standardCh {
 				rank := dom[0]
 				domainName := dom[1]
-				fullURL := "https://" + domainName
-
-				// Mark domain existing so we don't re-crawl duplicates
-				mu.Lock()
-				if domainName == "" || existingEntries[fullURL] {
-					mu.Unlock()
+				if domainName == "" {
 					continue
 				}
-				existingEntries[fullURL] = true
-				mu.Unlock()
+				fullURL := "https://" + domainName
 
-				// get a standard user agent
+				// 1. Check if we've already visited this URL
+				added, err := visitedStore.MarkVisited(fullURL)
+				if err != nil {
+					printErr("MarkVisited error for %s: %v", fullURL, err)
+					continue
+				}
+				if !added {
+					// Already visited
+					continue
+				}
+
+				// 2. Standard extraction
 				userAgent, _ := GetUserAgent("crawler-std")
 				title, desc, keywords := fetchPageMetadataStandard(fullURL, userAgent)
 
+				// If missing, push to Chrome queue
 				if title == "" || desc == "" {
-					// push to chromeCh
 					chromeCh <- dom
 					continue
 				}
 
-				// write to file
+				// 3. Write to file
 				line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
 					fullURL, title, keywords, desc, rank)
 
 				mu.Lock()
-				file.WriteString(line)
+				_, _ = file.WriteString(line)
 				mu.Unlock()
 			}
 		}()
@@ -160,32 +159,32 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error
 			for dom := range chromeCh {
 				rank := dom[0]
 				domainName := dom[1]
-				fullURL := "https://" + domainName
-
-				// Mark domain existing if not already
-				mu.Lock()
-				if domainName == "" || existingEntries[fullURL] {
-					mu.Unlock()
+				if domainName == "" {
 					continue
 				}
-				existingEntries[fullURL] = true
-				mu.Unlock()
+				fullURL := "https://" + domainName
 
-				// get a chrome user agent
+				// We already marked it visited in the standard pass
+				// but you may re-check if you prefer:
+				//
+				// added, err := visitedStore.MarkVisited(fullURL)
+				// if err != nil { ... }
+				// if !added { continue }
+
+				// 3. Chromedp fallback extraction
 				userAgent, _ := GetUserAgent("crawler-chrome")
 				title, desc, keywords := fetchPageMetadataChrome(fullURL, userAgent)
-
 				if title == "" || desc == "" {
 					printWarn("Skipping (Chrome) %s: missing title/desc", fullURL)
 					continue
 				}
 
-				// write to file
+				// 4. Write to file
 				line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
 					fullURL, title, keywords, desc, rank)
 
 				mu.Lock()
-				file.WriteString(line)
+				_, _ = file.WriteString(line)
 				mu.Unlock()
 			}
 		}()
@@ -194,7 +193,6 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error
 	// Feed domains into standardCh
 	go func() {
 		for _, dom := range domains {
-			// optionally, if maxPages is relevant, you can track how many have been processed
 			standardCh <- dom
 		}
 		// close the standardCh once all are queued
@@ -210,5 +208,12 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error
 	// Wait for chrome workers to finish
 	wgChrome.Wait()
 
+	// Optionally flush the visited store once more
+	if visitedStore != nil {
+		if err := visitedStore.Flush(); err != nil {
+			printErr("visitedStore flush error: %v", err)
+		}
+	}
+
 	return nil
 }
diff --git a/go.mod b/go.mod
index c8200d3..f7d89ad 100644
--- a/go.mod
+++ b/go.mod
@@ -15,6 +15,8 @@ require (
 
 require (
 	github.com/blevesearch/bleve/v2 v2.4.4
+	github.com/chromedp/cdproto v0.0.0-20241022234722-4d5d5faf59fb
+	github.com/chromedp/chromedp v0.11.2
 	github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f
 	golang.org/x/net v0.33.0
 )
@@ -41,8 +43,6 @@ require (
 	github.com/blevesearch/zapx/v14 v14.3.10 // indirect
 	github.com/blevesearch/zapx/v15 v15.3.17 // indirect
 	github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b // indirect
-	github.com/chromedp/cdproto v0.0.0-20241022234722-4d5d5faf59fb // indirect
-	github.com/chromedp/chromedp v0.11.2 // indirect
 	github.com/chromedp/sysutil v1.1.0 // indirect
 	github.com/go-ole/go-ole v1.3.0 // indirect
 	github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect
diff --git a/go.sum b/go.sum
index 148146f..66cede6 100644
--- a/go.sum
+++ b/go.sum
@@ -84,6 +84,8 @@ github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8Hm
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo=
+github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
 github.com/leonelquinteros/gotext v1.7.0 h1:jcJmF4AXqyamP7vuw2MMIKs+O3jAEmvrc5JQiI8Ht/8=
 github.com/leonelquinteros/gotext v1.7.0/go.mod h1:qJdoQuERPpccw7L70uoU+K/BvTfRBHYsisCQyFLXyvw=
 github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
@@ -96,6 +98,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
 github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
+github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw=
+github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=

From 61266c461ade647fa24b898f0b10430ae09eb56b Mon Sep 17 00:00:00 2001
From: partisan <none@noone.no>
Date: Thu, 2 Jan 2025 12:55:44 +0100
Subject: [PATCH 7/9] changed indexing buffer to save to ram not to file

---
 config.go  |  10 ++---
 crawler.go |  74 +++++++++++++++------------------
 indexer.go | 120 +++++++++++++++++++++++++++++++++++++++++++++++++----
 init.go    |  13 +++---
 4 files changed, 155 insertions(+), 62 deletions(-)

diff --git a/config.go b/config.go
index bdd9ccc..18d83cf 100644
--- a/config.go
+++ b/config.go
@@ -39,7 +39,7 @@ type Config struct {
 	ConcurrentChromeCrawlers   int
 	CrawlingInterval           time.Duration // Refres crawled results in...
 	MaxPagesPerDomain          int           // Max pages to crawl per domain
-	IndexRefreshInterval       time.Duration // Interval for periodic index refresh (e.g., "10m")
+	IndexBatchSize             int
 
 	DriveCache CacheConfig
 	RamCache   CacheConfig
@@ -60,7 +60,7 @@ var defaultConfig = Config{
 	ConcurrentChromeCrawlers:   4,
 	CrawlingInterval:           24 * time.Hour,
 	MaxPagesPerDomain:          10,
-	IndexRefreshInterval:       2 * time.Minute,
+	IndexBatchSize:             50,
 	LogLevel:                   1,
 	DriveCache: CacheConfig{
 		Duration:      48 * time.Hour,                                     // Added
@@ -255,7 +255,7 @@ func saveConfig(config Config) {
 	indexerSec.Key("ConcurrentChromeCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
 	indexerSec.Key("CrawlingInterval").SetValue(config.CrawlingInterval.String())
 	indexerSec.Key("MaxPagesPerDomain").SetValue(strconv.Itoa(config.MaxPagesPerDomain))
-	indexerSec.Key("IndexRefreshInterval").SetValue(config.IndexRefreshInterval.String())
+	indexerSec.Key("IndexBatchSize").SetValue(strconv.Itoa(config.IndexBatchSize))
 
 	// DriveCache section
 	driveSec := cfg.Section("DriveCache")
@@ -303,7 +303,7 @@ func loadConfig() Config {
 	concurrentChromeCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentChromeCrawlers"), defaultConfig.ConcurrentChromeCrawlers, strconv.Atoi)
 	crawlingInterval := getConfigValue(cfg.Section("Indexer").Key("CrawlingInterval"), defaultConfig.CrawlingInterval, time.ParseDuration)
 	maxPagesPerDomain := getConfigValue(cfg.Section("Indexer").Key("MaxPagesPerDomain"), defaultConfig.MaxPagesPerDomain, strconv.Atoi)
-	indexRefreshInterval := getConfigValue(cfg.Section("Indexer").Key("IndexRefreshInterval"), defaultConfig.IndexRefreshInterval, time.ParseDuration)
+	indexBatchSize := getConfigValue(cfg.Section("Indexer").Key("IndexBatchSize"), defaultConfig.IndexBatchSize, strconv.Atoi)
 
 	// DriveCache
 	driveDuration := getConfigValue(cfg.Section("DriveCache").Key("Duration"), defaultConfig.DriveCache.Duration, time.ParseDuration)
@@ -334,7 +334,7 @@ func loadConfig() Config {
 		ConcurrentChromeCrawlers:   concurrentChromeCrawlers,
 		CrawlingInterval:           crawlingInterval,
 		MaxPagesPerDomain:          maxPagesPerDomain,
-		IndexRefreshInterval:       indexRefreshInterval,
+		IndexBatchSize:             indexBatchSize,
 		DriveCache: CacheConfig{
 			Duration:      driveDuration,
 			MaxUsageBytes: driveMaxUsage,
diff --git a/crawler.go b/crawler.go
index 3ddc36b..afa7f9e 100644
--- a/crawler.go
+++ b/crawler.go
@@ -2,7 +2,6 @@ package main
 
 import (
 	"bufio"
-	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
@@ -45,14 +44,20 @@ func runCrawlerAndIndexer() {
 	}
 
 	// 2. Crawl each domain and write results to data_to_index.txt
-	outFile := filepath.Join(config.DriveCache.Path, "data_to_index.txt")
-	if err := crawlDomainsToFile(domains, outFile, config.MaxPagesPerDomain); err != nil {
+	if err := crawlDomainsToFile(domains, config.MaxPagesPerDomain); err != nil {
 		printErr("Error crawling domains: %v", err)
 		return
 	}
 
-	// 3. Re-index data_to_index.txt periodically based on IndexRefreshInterval
-	startPeriodicIndexing(outFile, config.IndexRefreshInterval)
+	// After finishing crawling, flush any pending visited-urls
+	if visitedStore != nil {
+		if err := visitedStore.Flush(); err != nil {
+			printErr("Failed to flush visitedStore: %v", err)
+		}
+	}
+
+	// 3. Re-index data_to_index.txt based on IndexRefreshInterval
+	//startPeriodicIndexing(outFile, config.IndexRefreshInterval)
 
 	printDebug("Crawl + index refresh completed.")
 }
@@ -89,16 +94,10 @@ func readDomainsCSV(csvPath string) ([][2]string, error) {
 // crawlDomainsToFile does an async pipeline:
 //  1. "standard" goroutines read from standardCh -> attempt standard extraction -> if fails, push to chromeCh
 //  2. "chrome" goroutines read from chromeCh -> attempt chromedp extraction -> if fails, skip
-func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error {
-
-	var mu sync.Mutex
-
-	// Open file for writing (truncate if existing)
-	file, err := os.OpenFile(outFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
-	if err != nil {
-		return fmt.Errorf("unable to open %s for writing: %v", outFile, err)
-	}
-	defer file.Close()
+//
+// Now, instead of writing to a file, we directly index each result into Bleve via indexDocImmediately(...).
+func crawlDomainsToFile(domains [][2]string, maxPages int) error {
+	var mu sync.Mutex // Used if needed to protect shared data. (Here mainly for visitedStore.)
 
 	// Prepare channels
 	standardCh := make(chan [2]string, 1000) // buffered channels help avoid blocking
@@ -110,6 +109,7 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error
 		wgStandard.Add(1)
 		go func() {
 			defer wgStandard.Done()
+
 			for dom := range standardCh {
 				rank := dom[0]
 				domainName := dom[1]
@@ -118,14 +118,17 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error
 				}
 				fullURL := "https://" + domainName
 
-				// 1. Check if we've already visited this URL
+				// Mark visited so we don't re-crawl duplicates
+				mu.Lock()
 				added, err := visitedStore.MarkVisited(fullURL)
+				mu.Unlock()
+
 				if err != nil {
 					printErr("MarkVisited error for %s: %v", fullURL, err)
 					continue
 				}
 				if !added {
-					// Already visited
+					// Already visited, skip
 					continue
 				}
 
@@ -139,13 +142,11 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error
 					continue
 				}
 
-				// 3. Write to file
-				line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
-					fullURL, title, keywords, desc, rank)
-
-				mu.Lock()
-				_, _ = file.WriteString(line)
-				mu.Unlock()
+				// 3. Directly index
+				err = indexDocImmediately(fullURL, title, keywords, desc, rank)
+				if err != nil {
+					printErr("Index error for %s: %v", fullURL, err)
+				}
 			}
 		}()
 	}
@@ -156,6 +157,7 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error
 		wgChrome.Add(1)
 		go func() {
 			defer wgChrome.Done()
+
 			for dom := range chromeCh {
 				rank := dom[0]
 				domainName := dom[1]
@@ -164,28 +166,19 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error
 				}
 				fullURL := "https://" + domainName
 
-				// We already marked it visited in the standard pass
-				// but you may re-check if you prefer:
-				//
-				// added, err := visitedStore.MarkVisited(fullURL)
-				// if err != nil { ... }
-				// if !added { continue }
-
 				// 3. Chromedp fallback extraction
 				userAgent, _ := GetUserAgent("crawler-chrome")
 				title, desc, keywords := fetchPageMetadataChrome(fullURL, userAgent)
 				if title == "" || desc == "" {
-					printWarn("Skipping (Chrome) %s: missing title/desc", fullURL)
+					printWarn("Skipping %s: unable to get title/desc data", fullURL)
 					continue
 				}
 
-				// 4. Write to file
-				line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
-					fullURL, title, keywords, desc, rank)
-
-				mu.Lock()
-				_, _ = file.WriteString(line)
-				mu.Unlock()
+				// 4. Directly index the doc
+				err := indexDocImmediately(fullURL, title, keywords, desc, rank)
+				if err != nil {
+					printErr("Index error for %s: %v", fullURL, err)
+				}
 			}
 		}()
 	}
@@ -195,7 +188,6 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error
 		for _, dom := range domains {
 			standardCh <- dom
 		}
-		// close the standardCh once all are queued
 		close(standardCh)
 	}()
 
@@ -208,7 +200,7 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error
 	// Wait for chrome workers to finish
 	wgChrome.Wait()
 
-	// Optionally flush the visited store once more
+	// Flush visitedStore
 	if visitedStore != nil {
 		if err := visitedStore.Flush(); err != nil {
 			printErr("visitedStore flush error: %v", err)
diff --git a/indexer.go b/indexer.go
index 306c28d..73ca9e3 100644
--- a/indexer.go
+++ b/indexer.go
@@ -8,6 +8,7 @@ import (
 	"path/filepath"
 	"strconv"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/blevesearch/bleve/v2"
@@ -26,22 +27,123 @@ type Document struct {
 
 var (
 	// Global Bleve index handle
-	bleveIndex bleve.Index
+	bleveIndex  bleve.Index
+	docBuffer   []Document
+	docBufferMu sync.Mutex
 )
 
-// startPeriodicIndexing refreshes the index from a file periodically
-func startPeriodicIndexing(filePath string, interval time.Duration) {
+// // startPeriodicIndexing refreshes the index from a file periodically
+// func startPeriodicIndexing(filePath string, interval time.Duration) {
+// 	go func() {
+// 		for {
+// 			printDebug("Refreshing index from %s", filePath)
+// 			if err := IndexFile(filePath); err != nil {
+// 				printErr("Failed to refresh index: %v", err)
+// 			}
+// 			time.Sleep(interval)
+// 		}
+// 	}()
+// }
+
+// indexDocImmediately indexes a single document into the Bleve index.
+func indexDocImmediately(link, title, tags, desc, rank string) error {
+	pop, _ := strconv.ParseInt(rank, 10, 64)
+	normalized := normalizeDomain(link)
+
+	doc := Document{
+		ID:          normalized,
+		Link:        link,
+		Title:       title,
+		Tags:        tags,
+		Description: desc,
+		Popularity:  pop,
+	}
+
+	// Insert directly into the Bleve index
+	err := bleveIndex.Index(doc.ID, map[string]interface{}{
+		"title":       doc.Title,
+		"description": doc.Description,
+		"link":        doc.Link,
+		"tags":        doc.Tags,
+		"popularity":  doc.Popularity,
+	})
+	if err != nil {
+		return fmt.Errorf("failed to index doc %s: %v", link, err)
+	}
+	return nil
+}
+
+// StartBatchIndexing spawns a goroutine that flushes the buffer every interval.
+func StartBatchIndexing() {
 	go func() {
-		for {
-			printDebug("Refreshing index from %s", filePath)
-			if err := IndexFile(filePath); err != nil {
-				printErr("Failed to refresh index: %v", err)
-			}
-			time.Sleep(interval)
+		ticker := time.NewTicker(config.IndexRefreshInterval)
+		defer ticker.Stop()
+
+		for range ticker.C {
+			flushDocBuffer()
 		}
 	}()
 }
 
+func flushDocBuffer() {
+	docBufferMu.Lock()
+	defer docBufferMu.Unlock()
+
+	if len(docBuffer) == 0 {
+		return
+	}
+
+	batch := bleveIndex.NewBatch()
+	for _, doc := range docBuffer {
+		err := batch.Index(doc.ID, map[string]interface{}{
+			"title":       doc.Title,
+			"description": doc.Description,
+			"link":        doc.Link,
+			"tags":        doc.Tags,
+			"popularity":  doc.Popularity,
+		})
+		if err != nil {
+			printErr("batch index error for %s: %v", doc.Link, err)
+		}
+	}
+	// Attempt to commit the batch
+	if err := bleveIndex.Batch(batch); err != nil {
+		printErr("error committing batch: %v", err)
+	}
+
+	// Clear the buffer
+	docBuffer = docBuffer[:0]
+}
+
+// indexDocBatch queues a single document into memory, which gets flushed by the ticker.
+func indexDocBatch(link, title, tags, desc, rank string) error {
+	pop, _ := strconv.ParseInt(rank, 10, 64)
+	normalized := normalizeDomain(link)
+
+	doc := Document{
+		ID:          normalized,
+		Link:        link,
+		Title:       title,
+		Tags:        tags,
+		Description: desc,
+		Popularity:  pop,
+	}
+
+	docBufferMu.Lock()
+	docBuffer = append(docBuffer, doc)
+
+	// Optional: if we exceed config.IndexBatchSize, flush immediately
+	if len(docBuffer) >= config.IndexBatchSize {
+		go func() {
+			// flush in a separate goroutine to avoid blocking
+			flushDocBuffer()
+		}()
+	}
+	docBufferMu.Unlock()
+
+	return nil
+}
+
 // InitIndex ensures that the Bleve index is created or opened.
 func InitIndex() error {
 	idx, err := bleve.Open(filepath.Join(config.DriveCache.Path, "index.bleve"))
diff --git a/init.go b/init.go
index 7a6dba2..666d93a 100644
--- a/init.go
+++ b/init.go
@@ -3,8 +3,6 @@ package main
 import (
 	"flag"
 	"os"
-	"path/filepath"
-	"time"
 )
 
 var config Config
@@ -109,16 +107,17 @@ func main() {
 			return
 		}
 
-		webCrawlerInit()
-
 		err := InitIndex()
 		if err != nil {
 			printErr("Failed to initialize index:", err)
 		}
 
-		// Start periodic indexing (every 2 minutes)
-		dataFilePath := filepath.Join(config.DriveCache.Path, "data_to_index.txt")
-		startPeriodicIndexing(dataFilePath, 2*time.Minute)
+		webCrawlerInit()
+
+		// No longer needed as crawled data are indexed imidietly
+		// // Start periodic indexing (every 2 minutes)
+		// dataFilePath := filepath.Join(config.DriveCache.Path, "data_to_index.txt")
+		// startPeriodicIndexing(dataFilePath, 2*time.Minute)
 
 		printInfo("Indexer is enabled.")
 	} else {

From 5ae97da6d086556a86bcaca4a36885735c9fd58b Mon Sep 17 00:00:00 2001
From: partisan <none@noone.no>
Date: Sun, 5 Jan 2025 19:23:53 +0100
Subject: [PATCH 8/9] added privacy policy page and about section, improved dir
 check, fixed crash when idexer is disabled

---
 crawler.go                       |   5 +-
 indexer.go                       |  26 +++--
 init.go                          |  18 ++-
 main.go                          |  45 ++++++++
 static/css/style-imageviewer.css |  18 +--
 static/css/style-menu.css        | 104 +++++++++++++++++
 static/css/style-privacy.css     |  95 ++++++++++++++++
 static/js/imageviewer.js         |   2 +-
 static/js/minimenu.js            |   9 ++
 templates/files.html             |  23 +++-
 templates/forums.html            |  23 +++-
 templates/images.html            |  23 +++-
 templates/map.html               |  23 +++-
 templates/privacy.html           | 133 ++++++++++++++++++++++
 templates/search.html            |  23 +++-
 templates/text.html              |  23 +++-
 templates/videos.html            |  23 +++-
 user-settings.go                 | 189 ++++++++++++++++++++-----------
 18 files changed, 698 insertions(+), 107 deletions(-)
 create mode 100644 static/css/style-privacy.css
 create mode 100644 templates/privacy.html

diff --git a/crawler.go b/crawler.go
index afa7f9e..8caa073 100644
--- a/crawler.go
+++ b/crawler.go
@@ -14,8 +14,7 @@ var visitedStore *VisitedStore
 
 // webCrawlerInit is called during init on program start
 func webCrawlerInit() {
-	// Initialize the store with, say, batchSize=50
-	store, err := NewVisitedStore(filepath.Join(config.DriveCache.Path, "visited-urls.txt"), 50)
+	store, err := NewVisitedStore(filepath.Join(config.DriveCache.Path, "visited-urls.txt"), config.IndexBatchSize)
 	if err != nil {
 		printErr("Failed to initialize visited store: %v", err)
 	}
@@ -170,7 +169,7 @@ func crawlDomainsToFile(domains [][2]string, maxPages int) error {
 				userAgent, _ := GetUserAgent("crawler-chrome")
 				title, desc, keywords := fetchPageMetadataChrome(fullURL, userAgent)
 				if title == "" || desc == "" {
-					printWarn("Skipping %s: unable to get title/desc data", fullURL)
+					printDebug("Skipping %s: unable to get title/desc data", fullURL) // Here is print for all domains that fail to be crawled
 					continue
 				}
 
diff --git a/indexer.go b/indexer.go
index 73ca9e3..c8cf6fe 100644
--- a/indexer.go
+++ b/indexer.go
@@ -9,7 +9,6 @@ import (
 	"strconv"
 	"strings"
 	"sync"
-	"time"
 
 	"github.com/blevesearch/bleve/v2"
 	"golang.org/x/net/publicsuffix"
@@ -73,17 +72,17 @@ func indexDocImmediately(link, title, tags, desc, rank string) error {
 	return nil
 }
 
-// StartBatchIndexing spawns a goroutine that flushes the buffer every interval.
-func StartBatchIndexing() {
-	go func() {
-		ticker := time.NewTicker(config.IndexRefreshInterval)
-		defer ticker.Stop()
+// // StartBatchIndexing spawns a goroutine that flushes the buffer every interval.
+// func StartBatchIndexing() {
+// 	go func() {
+// 		ticker := time.NewTicker(config.IndexRefreshInterval)
+// 		defer ticker.Stop()
 
-		for range ticker.C {
-			flushDocBuffer()
-		}
-	}()
-}
+// 		for range ticker.C {
+// 			flushDocBuffer()
+// 		}
+// 	}()
+// }
 
 func flushDocBuffer() {
 	docBufferMu.Lock()
@@ -264,6 +263,11 @@ func IndexFile(filePath string) error {
 
 // SearchIndex performs a full-text search on the indexed data.
 func SearchIndex(queryStr string, page, pageSize int) ([]Document, error) {
+	// Check if the indexer is enabled
+	if !config.IndexerEnabled {
+		return nil, fmt.Errorf("indexer is disabled")
+	}
+
 	exactMatch := bleve.NewMatchQuery(queryStr) // Exact match
 	fuzzyMatch := bleve.NewFuzzyQuery(queryStr) // Fuzzy match
 	fuzzyMatch.Fuzziness = 2
diff --git a/init.go b/init.go
index 666d93a..bf0d220 100644
--- a/init.go
+++ b/init.go
@@ -3,6 +3,7 @@ package main
 import (
 	"flag"
 	"os"
+	"path/filepath"
 )
 
 var config Config
@@ -77,9 +78,18 @@ func main() {
 
 	// Check if the cache directory exists when caching is enabled
 	if config.DriveCacheEnabled {
-		if _, err := os.Stat(config.DriveCache.Path); os.IsNotExist(err) {
-			printErr("Error: Drive cache is enabled, but cache directory '%s' does not exist.\n", config.DriveCache.Path)
-			os.Exit(1) // Exit with a non-zero status to indicate an error
+		cacheDir := config.DriveCache.Path
+		imagesDir := filepath.Join(cacheDir, "images")
+
+		// Check if the directory already exists
+		if _, err := os.Stat(imagesDir); os.IsNotExist(err) {
+			// Try to create the directory since it doesn't exist
+			if err := os.MkdirAll(imagesDir, os.ModePerm); err != nil {
+				printErr("Error: Failed to create cache or images directory '%s': %v", imagesDir, err)
+				os.Exit(1) // Exit with a non-zero status to indicate an error
+			}
+			// Print a warning if the directory had to be created
+			printWarn("Warning: Created missing directory '%s'.", imagesDir)
 		}
 	}
 
@@ -109,7 +119,7 @@ func main() {
 
 		err := InitIndex()
 		if err != nil {
-			printErr("Failed to initialize index:", err)
+			printErr("Failed to initialize index: %v", err)
 		}
 
 		webCrawlerInit()
diff --git a/main.go b/main.go
index cc6b8c3..12c2381 100755
--- a/main.go
+++ b/main.go
@@ -221,6 +221,7 @@ func runServer() {
 		http.HandleFunc("/save-settings", handleSaveSettings)
 		http.HandleFunc("/image/", handleImageServe)
 		http.HandleFunc("/image_status", handleImageStatus)
+		http.HandleFunc("/privacy", handlePrivacyPage)
 		http.HandleFunc("/opensearch.xml", func(w http.ResponseWriter, r *http.Request) {
 			w.Header().Set("Content-Type", "application/opensearchdescription+xml")
 			http.ServeFile(w, r, "static/opensearch.xml")
@@ -235,6 +236,7 @@ func runServer() {
 		http.HandleFunc("/save-settings", handleWebsiteDisabled)
 		http.HandleFunc("/image/", handleWebsiteDisabled)
 		http.HandleFunc("/image_status", handleWebsiteDisabled)
+		http.HandleFunc("/privacy", handleWebsiteDisabled)
 		http.HandleFunc("/opensearch.xml", handleWebsiteDisabled)
 		printInfo("Website functionality disabled.")
 	}
@@ -252,3 +254,46 @@ func handleWebsiteDisabled(w http.ResponseWriter, r *http.Request) {
 	w.WriteHeader(http.StatusServiceUnavailable)
 	_, _ = w.Write([]byte("The website functionality is currently disabled."))
 }
+
+func handlePrivacyPage(w http.ResponseWriter, r *http.Request) {
+	settings := loadUserSettings(w, r)
+	iconPathSVG, iconPathPNG := GetIconPath()
+
+	// Define the data structure for the template
+	data := struct {
+		Theme           string
+		IconPathSVG     string
+		IconPathPNG     string
+		IsThemeDark     bool
+		CookieRows      []CookieRow
+		CurrentLang     string
+		Safe            string
+		LanguageOptions []LanguageOption
+	}{
+		Theme:           settings.Theme,
+		IconPathSVG:     iconPathSVG,
+		IconPathPNG:     iconPathPNG,
+		IsThemeDark:     settings.IsThemeDark,
+		CookieRows:      generateCookieTable(r),
+		CurrentLang:     settings.SiteLanguage,
+		Safe:            settings.SafeSearch,
+		LanguageOptions: languageOptions,
+	}
+
+	// Parse the template
+	tmpl, err := template.New("privacy.html").ParseFiles("templates/privacy.html")
+	if err != nil {
+		log.Printf("Error parsing template: %v", err)
+		http.Error(w, "Internal Server Error", http.StatusInternalServerError)
+		return
+	}
+
+	// Set the response content type
+	w.Header().Set("Content-Type", "text/html; charset=utf-8")
+
+	// Execute the template
+	if err := tmpl.Execute(w, data); err != nil {
+		log.Printf("Error executing template: %v", err)
+		http.Error(w, "Internal Server Error", http.StatusInternalServerError)
+	}
+}
diff --git a/static/css/style-imageviewer.css b/static/css/style-imageviewer.css
index 4c0696f..ac6874a 100644
--- a/static/css/style-imageviewer.css
+++ b/static/css/style-imageviewer.css
@@ -60,13 +60,6 @@
     gap: 5px; /* Add spacing between buttons */
 }
 
-.image-view-close .btn-nostyle {
-    background-color: inherit;
-    border: none;
-    padding: 0px;
-    cursor: pointer;
-}
-
 #viewer-close-button,
 #viewer-prev-button,
 #viewer-next-button {
@@ -128,6 +121,7 @@
 
 .full-size:hover,
 .proxy-size:hover {
+    transition: all 0.3s ease;
     text-decoration: underline;
 }
 
@@ -136,15 +130,6 @@
     visibility: visible;
 }
 
-/* Button No Style */
-.btn-nostyle {
-    background-color: inherit;
-    border: none;
-    padding: 0px;
-    width: fit-content;
-    cursor: pointer;
-}
-
 /* Image Navigation Icons */
 .image-close,
 .image-next,
@@ -163,6 +148,7 @@
 .image-close:hover,
 .image-next:hover,
 .image-before:hover {
+    transition: all 0.3s ease;
     background-color: var(--image-select);
 }
 
diff --git a/static/css/style-menu.css b/static/css/style-menu.css
index d85810b..95be6cf 100644
--- a/static/css/style-menu.css
+++ b/static/css/style-menu.css
@@ -1,3 +1,5 @@
+/* ------------------ Mini-Menu Styles ------------------ */
+
 .settings-search-div-search {
     right: 20px;
     top: 25px;
@@ -140,4 +142,106 @@
         margin-right: 0;
         border-radius: 0;
     }
+}
+
+/* ------------------ About QGato Modal Styles ------------------ */
+
+#aboutQGatoModal {
+    display: none;
+    position: fixed;
+    /* Center modal */
+    top: 50%;
+    left: 50%;
+    transform: translate(-50%, -50%);
+
+    /* Keep it on top */
+    z-index: 999;
+
+    /* Match mini-menu background style */
+    background-color: var(--html-bg);
+    box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12), 0 1px 2px rgba(0, 0, 0, 0.24);
+    border: 1px solid var(--border);
+    border-radius: 12px;
+
+    /* Spacing & sizing */
+    padding: 32px;
+    max-width: 600px; /* Increased width */
+    max-height: 80vh; /* Optional: restrict height to 80% of viewport */
+    overflow-y: auto; /* Enable scrolling if content exceeds height */
+    color: var(--font-fg);
+}
+
+#aboutQGatoModal #close-button {
+    position: absolute;
+    top: 12px;
+    right: 12px; /* Moved close button to top-right */
+}
+
+#aboutQGatoModal .modal-content {
+    text-align: center;
+    margin-top: 20px; /* Adjusted spacing */
+}
+
+/* Logo */
+#aboutQGatoModal .modal-content img {
+    width: 100px; /* Increased logo size */
+    margin-bottom: 16px;
+}
+
+/* Headings, paragraphs, etc. */
+#aboutQGatoModal .modal-content h2 {
+    font-size: 2rem; /* Larger heading */
+    margin: 8px 0;
+}
+
+#aboutQGatoModal .modal-content p {
+    font-size: 1.1rem; /* Larger paragraph text */
+    margin: 12px 0;
+}
+
+/* Container for the Source Code / Privacy Policy buttons */
+#aboutQGatoModal .button-container {
+    margin-top: 16px;
+    display: flex;
+    justify-content: center;
+    gap: 16px;
+}
+
+/* Match mini-menu button style as closely as possible */
+#aboutQGatoModal .button-container button {
+    background-color: var(--button);
+    color: var(--font-fg);
+    border: 1px solid var(--border);
+    border-radius: 6px;
+    padding: 12px 16px; /* Larger button padding */
+    font-size: 1rem; /* Larger button text */
+    cursor: pointer;
+    transition: border 0.3s ease, background-color 0.3s ease, color 0.3s ease;
+}
+
+#aboutQGatoModal .button-container button:hover {
+    border: 1px solid var(--font-fg);
+}
+
+/* Close Button Style */
+.cloase-btn {
+    font-size: 1.5rem; /* Larger close button */
+    color: var(--search-button);
+    border-radius: 50%;
+    padding: 8px;
+}
+
+.cloase-btn:hover {
+    transition: all 0.3s ease;
+    background-color: var(--image-select);
+}
+
+/* ------------------ Common Button No Style ------------------ */
+
+.btn-nostyle {
+    background-color: inherit;
+    border: none;
+    padding: 0px;
+    width: fit-content;
+    cursor: pointer;
 }
\ No newline at end of file
diff --git a/static/css/style-privacy.css b/static/css/style-privacy.css
new file mode 100644
index 0000000..5cfef4b
--- /dev/null
+++ b/static/css/style-privacy.css
@@ -0,0 +1,95 @@
+/* Main content wrapper */
+.privacy-content-wrapper {
+    max-width: 800px;
+    margin: 80px auto 40px auto;
+    padding: 0 20px;
+}
+
+/* Header section */
+.privacy-header {
+    text-align: center;
+    margin-bottom: 30px;
+}
+
+.privacy-header h1 {
+    font-size: 2rem;
+    margin: 0;
+    color: var(--font-fg);
+}
+
+.privacy-header p {
+    color: var(--fg);
+    margin-top: 10px;
+    font-size: 1.1rem;
+}
+
+/* Section headings */
+.privacy-section h2 {
+    font-size: 1.5rem;
+    margin-bottom: 8px;
+    color: var(--font-fg);
+    border-bottom: 1px solid var(--border);
+    padding-bottom: 4px;
+}
+
+/* Section text */
+.privacy-section p {
+    font-size: 1rem;
+    line-height: 1.6;
+    margin-bottom: 20px;
+    color: var(--fg);
+}
+
+/* Footer */
+.privacy-footer {
+    text-align: center;
+    padding: 10px 0;
+    border-top: 1px solid var(--border);
+    color: var(--fg);
+    background-color: var(--html-bg);
+}
+
+/* Links */
+.privacy-section a {
+    color: var(--link);
+    text-decoration: none;
+}
+
+.privacy-section a:hover {
+    text-decoration: underline;
+}
+
+/* Table styling */
+.cookie-table {
+    width: 100%;
+    margin: 20px auto;
+    border-collapse: collapse;
+    text-align: left;
+    font-size: 1rem;
+    color: var(--fg);
+    background-color: var(--html-bg);
+    border: 1px solid var(--border);
+}
+
+.cookie-table th,
+.cookie-table td {
+    padding: 12px 15px;
+    border: 1px solid var(--border);
+}
+
+.cookie-table th {
+    background-color: var(--search-bg);
+    color: var(--font-fg);
+    text-align: center;
+    font-weight: bold;
+}
+
+.cookie-table tr:nth-child(even) {
+    background-color: var(--snip-background);
+}
+
+/* Center the table within its section */
+.privacy-section .cookie-table {
+    margin-left: auto;
+    margin-right: auto;
+}
diff --git a/static/js/imageviewer.js b/static/js/imageviewer.js
index a68f0e2..4bd667f 100644
--- a/static/js/imageviewer.js
+++ b/static/js/imageviewer.js
@@ -13,7 +13,7 @@ document.addEventListener('DOMContentLoaded', function() {
     // Set the innerHTML of viewerOverlay
     viewerOverlay.innerHTML = `
         <div id="image-viewer" class="image_view image_hide">
-            <div class="image-view-close">
+            <div class="btn-nostyle">
                 <button class="btn-nostyle" id="viewer-prev-button">
                     <div class="material-icons-round icon_visibility clickable image-before">&#xe408;</div> <!-- navigate_before -->
                 </button>
diff --git a/static/js/minimenu.js b/static/js/minimenu.js
index c1c8a39..4044edb 100644
--- a/static/js/minimenu.js
+++ b/static/js/minimenu.js
@@ -44,4 +44,13 @@ document.addEventListener('DOMContentLoaded', function () {
     document.getElementById('languageSelect').addEventListener('change', function () {
         updateSettings('lang', this.value);
     });
+
+    // Show/Hide About QGato
+    document.getElementById('aboutQGatoBtn').addEventListener('click', function() {
+        document.getElementById('aboutQGatoModal').style.display = 'block';
+    });
+    
+    document.getElementById('close-button').addEventListener('click', function() {
+        document.getElementById('aboutQGatoModal').style.display = 'none';
+    });
 });
\ No newline at end of file
diff --git a/templates/files.html b/templates/files.html
index 0d9c7c4..a47bf4e 100755
--- a/templates/files.html
+++ b/templates/files.html
@@ -43,7 +43,7 @@
                     <option value="{{.Code}}" {{if eq .Code $.CurrentLang}}selected{{end}}>{{.Name}}</option>
                     {{end}}
                 </select>  
-                <!-- <button id="settingsButton" onclick="window.location.href='/about'">About QGato</button> -->
+                <button id="aboutQGatoBtn">About QGato</button>
             </div>
         </div>
     </div>
@@ -53,6 +53,27 @@
         </div>
     </noscript>
 
+    <!-- Popup Modal for QGato -->
+    <div id="aboutQGatoModal">
+        <!-- Close Button -->
+        <button class="btn-nostyle" id="close-button">
+            <div class="material-icons-round icon_visibility clickable cloase-btn">&#xe5cd;</div>
+        </button>
+
+        <div class="modal-content">
+            <img 
+                src="/static/images/icon.svg" 
+                alt="QGato"
+            >
+            <h2>QGato</h2>
+            <p>A open-source private search engine.</p>
+            <div class="button-container">
+                <button onclick="window.location.href='https://weforge.xyz/Spitfire/Search'">Source Code</button>
+                <button onclick="window.location.href='/privacy'">Privacy policy</button>
+            </div>
+        </div>
+    </div>
+
     <form action="/search" id="prev-next-form" class="results-search-container" method="GET" autocomplete="off">
         <h1 class="logomobile">
             <div class="logo-container" herf="/">
diff --git a/templates/forums.html b/templates/forums.html
index f5d91f8..7b9d6dd 100755
--- a/templates/forums.html
+++ b/templates/forums.html
@@ -43,7 +43,7 @@
                     <option value="{{.Code}}" {{if eq .Code $.CurrentLang}}selected{{end}}>{{.Name}}</option>
                     {{end}}
                 </select>  
-                <!-- <button id="settingsButton" onclick="window.location.href='/about'">About QGato</button> -->
+                <button id="aboutQGatoBtn">About QGato</button>
             </div>
         </div>
     </div>
@@ -53,6 +53,27 @@
         </div>
     </noscript>
 
+    <!-- Popup Modal for QGato -->
+    <div id="aboutQGatoModal">
+        <!-- Close Button -->
+        <button class="btn-nostyle" id="close-button">
+            <div class="material-icons-round icon_visibility clickable cloase-btn">&#xe5cd;</div>
+        </button>
+
+        <div class="modal-content">
+            <img 
+                src="/static/images/icon.svg" 
+                alt="QGato"
+            >
+            <h2>QGato</h2>
+            <p>A open-source private search engine.</p>
+            <div class="button-container">
+                <button onclick="window.location.href='https://weforge.xyz/Spitfire/Search'">Source Code</button>
+                <button onclick="window.location.href='/privacy'">Privacy policy</button>
+            </div>
+        </div>
+    </div>
+
     <form action="/search" id="prev-next-form" class="results-search-container" method="GET" autocomplete="off">
         <h1 class="logomobile">
             <div class="logo-container" herf="/">
diff --git a/templates/images.html b/templates/images.html
index cfdcdea..1bb91b7 100755
--- a/templates/images.html
+++ b/templates/images.html
@@ -52,7 +52,7 @@
                     <option value="{{.Code}}" {{if eq .Code $.CurrentLang}}selected{{end}}>{{.Name}}</option>
                     {{end}}
                 </select>  
-                <!-- <button id="settingsButton" onclick="window.location.href='/about'">About QGato</button> -->
+                <button id="aboutQGatoBtn">About QGato</button>
             </div>
         </div>
     </div>
@@ -61,6 +61,27 @@
             <a href="/settings" class="material-icons-round clickable settings-icon-link settings-icon-link-search">&#xe5d2;</a>
         </div>
     </noscript>
+
+    <!-- Popup Modal for QGato -->
+    <div id="aboutQGatoModal">
+        <!-- Close Button -->
+        <button class="btn-nostyle" id="close-button">
+            <div class="material-icons-round icon_visibility clickable cloase-btn">&#xe5cd;</div>
+        </button>
+
+        <div class="modal-content">
+            <img 
+                src="/static/images/icon.svg" 
+                alt="QGato"
+            >
+            <h2>QGato</h2>
+            <p>A open-source private search engine.</p>
+            <div class="button-container">
+                <button onclick="window.location.href='https://weforge.xyz/Spitfire/Search'">Source Code</button>
+                <button onclick="window.location.href='/privacy'">Privacy policy</button>
+            </div>
+        </div>
+    </div>
     
     <form action="/search" id="prev-next-form" class="results-search-container" method="GET" autocomplete="off">
         <h1 class="logomobile">
diff --git a/templates/map.html b/templates/map.html
index b75f915..054f910 100644
--- a/templates/map.html
+++ b/templates/map.html
@@ -58,7 +58,7 @@
                     <option value="{{.Code}}" {{if eq .Code $.CurrentLang}}selected{{end}}>{{.Name}}</option>
                     {{end}}
                 </select>  
-                <!-- <button id="settingsButton" onclick="window.location.href='/about'">About QGato</button> -->
+                <button id="aboutQGatoBtn">About QGato</button>
             </div>
         </div>
     </div>
@@ -68,6 +68,27 @@
         </div>
     </noscript>
 
+    <!-- Popup Modal for QGato -->
+    <div id="aboutQGatoModal">
+        <!-- Close Button -->
+        <button class="btn-nostyle" id="close-button">
+            <div class="material-icons-round icon_visibility clickable cloase-btn">&#xe5cd;</div>
+        </button>
+
+        <div class="modal-content">
+            <img 
+                src="/static/images/icon.svg" 
+                alt="QGato"
+            >
+            <h2>QGato</h2>
+            <p>A open-source private search engine.</p>
+            <div class="button-container">
+                <button onclick="window.location.href='https://weforge.xyz/Spitfire/Search'">Source Code</button>
+                <button onclick="window.location.href='/privacy'">Privacy policy</button>
+            </div>
+        </div>
+    </div>
+
     <form action="/search" id="prev-next-form" class="results-search-container" method="GET" autocomplete="off">
         <h1 class="logomobile">
             <div class="logo-container" herf="/">
diff --git a/templates/privacy.html b/templates/privacy.html
new file mode 100644
index 0000000..ca55401
--- /dev/null
+++ b/templates/privacy.html
@@ -0,0 +1,133 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Privacy Policy</title>
+
+    <!-- Include your global or theme-specific CSS -->
+    <link rel="stylesheet" href="/static/css/style-menu.css">
+    <link rel="stylesheet" href="/static/css/style-search.css">
+    <link rel="stylesheet" href="/static/css/{{.Theme}}.css">
+    <link rel="stylesheet" href="/static/css/style-fonts.css">
+    <link rel="stylesheet" href="/static/css/style-privacy.css">
+
+    <!-- Icons -->
+    <link rel="icon" href="{{ .IconPathSVG }}" type="image/svg+xml">
+    <link rel="icon" href="{{ .IconPathPNG }}" type="image/png">
+    <link rel="apple-touch-icon" href="{{ .IconPathPNG }}">
+</head>
+<body>
+    <!-- Menu Button -->
+    <div id="js-enabled">
+        <div class="settings-search-div settings-search-div-search">
+            <button class="material-icons-round clickable settings-icon-link settings-icon-link-search">&#xe5d2;</button>
+        </div>
+        <div class="search-menu settings-menu-hidden">
+            <h2>Settings</h2>
+            <div class="settings-content">
+                <button id="settingsButton" onclick="window.location.href='/settings'">All settings</button>
+                <div class="theme-settings">
+                    <p><span class="highlight">Current theme: </span> <span id="theme_name">{{.Theme}}</span></p>
+                    <div class="themes-settings-menu">
+                        <div><img class="view-image-search clickable" id="dark_theme" alt="Dark Theme" src="/static/images/dark.webp"></div>
+                        <div><img class="view-image-search clickable" id="light_theme" alt="Light Theme" src="/static/images/light.webp"></div>
+                    </div>
+                </div>
+                <select class="lang" name="safe" id="safeSearchSelect">
+                    <option value="disabled" {{if eq .Safe "disabled"}}selected{{end}}>Safe Search Off</option>
+                    <option value="active" {{if eq .Safe "active"}}selected{{end}}>Safe Search On</option>
+                </select>
+                <select class="lang" name="lang" id="languageSelect">
+                    {{range .LanguageOptions}}
+                    <option value="{{.Code}}" {{if eq .Code $.CurrentLang}}selected{{end}}>{{.Name}}</option>
+                    {{end}}
+                </select>  
+                <button id="aboutQGatoBtn">About QGato</button>
+            </div>
+        </div>
+    </div>
+    <div id="js-disabled">
+        <a href="/settings" class="material-icons-round settings-icon-link-search">&#xe5d2;</a>
+    </div>
+
+    <!-- Popup Modal for QGato -->
+    <div id="aboutQGatoModal">
+        <!-- Close Button -->
+        <button class="btn-nostyle" id="close-button">
+            <div class="material-icons-round icon_visibility clickable cloase-btn">&#xe5cd;</div>
+        </button>
+
+        <div class="modal-content">
+            <img 
+                src="/static/images/icon.svg" 
+                alt="QGato"
+            >
+            <h2>QGato</h2>
+            <p>A open-source private search engine.</p>
+            <div class="button-container">
+                <button onclick="window.location.href='https://weforge.xyz/Spitfire/Search'">Source Code</button>
+                <button onclick="window.location.href='/privacy'">Privacy policy</button>
+            </div>
+        </div>
+    </div>   
+
+    <!-- Main Content Section -->
+    <main class="privacy-content-wrapper">
+        <header class="privacy-header">
+            <h1>Privacy Policy</h1>
+            <p>Your privacy is important to us. This page outlines our practices.</p>
+        </header>
+
+        <section class="privacy-section">
+            <h2>Introduction</h2>
+            <p>This website is a Free and Open Source Software (FOSS) project licensed under the <a href="https://www.gnu.org/licenses/agpl-3.0.html" target="_blank" rel="noopener noreferrer">AGPL-3.0</a> license. The project is committed to providing a private and secure experience for all users.</p>
+        </section>
+
+        <section class="privacy-section">
+            <h2>Data Collection</h2>
+            <p>Our servers <b>do not collect any user data</b>, including IP addresses, browsing history, or any other identifiable information. We respect your privacy and ensure that no user information is logged or stored on our servers.</p>
+        </section>
+
+        <section class="privacy-section">
+            <h2>Cookies Used</h2>
+            <p>Our cookies are <b>not used to track users</b> or sell user data, they are just used to save your settings.</p>
+            <p>These following cookies are used by this site:</p>
+            <table class="cookie-table">
+                <thead>
+                    <tr>
+                        <th>Cookie Name</th>
+                        <th>Value</th>
+                        <th>Description</th>
+                        <th>Expiration</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    {{ range .CookieRows }}
+                    <tr>
+                        <td>{{ .Name }}</td>
+                        <td>{{ .Value }}</td>
+                        <td>{{ .Description }}</td>
+                        <td>{{ .Expiration }}</td>
+                    </tr>
+                    {{ end }}
+                </tbody>
+            </table>
+        </section>        
+          
+    </main>
+
+    <!-- Footer Section, matching site theme 
+    <footer class="privacy-footer">
+        <p>&copy; QGato. Licensed under the <a href="https://www.gnu.org/licenses/agpl-3.0.html" target="_blank" rel="noopener noreferrer">AGPL-3.0</a>.</p>
+    </footer>    -->
+
+    <!-- Included JavaScript -->
+    <script defer src="/static/js/minimenu.js"></script>
+    <script>
+        // When JS is detected, update the DOM to show the JS-based menu
+        document.getElementById('js-enabled').style.display = 'block';
+        document.getElementById('js-disabled').style.display = 'none';
+    </script>
+</body>
+</html>
diff --git a/templates/search.html b/templates/search.html
index d5d8129..44445fe 100755
--- a/templates/search.html
+++ b/templates/search.html
@@ -58,7 +58,7 @@
                     <option value="{{.Code}}" {{if eq .Code $.CurrentLang}}selected{{end}}>{{.Name}}</option>
                     {{end}}
                 </select>  
-                <!-- <button id="settingsButton" onclick="window.location.href='/about'">About QGato</button> -->
+                <button id="aboutQGatoBtn">About QGato</button>
             </div>
         </div>
     </div>
@@ -66,6 +66,27 @@
         <a href="/settings" class="material-icons-round settings-icon-link-search">&#xe5d2;</a>
     </div>
 
+    <!-- Popup Modal for QGato -->
+    <div id="aboutQGatoModal">
+        <!-- Close Button -->
+        <button class="btn-nostyle" id="close-button">
+            <div class="material-icons-round icon_visibility clickable cloase-btn">&#xe5cd;</div>
+        </button>
+
+        <div class="modal-content">
+            <img 
+                src="/static/images/icon.svg" 
+                alt="QGato"
+            >
+            <h2>QGato</h2>
+            <p>A open-source private search engine.</p>
+            <div class="button-container">
+                <button onclick="window.location.href='https://weforge.xyz/Spitfire/Search'">Source Code</button>
+                <button onclick="window.location.href='/privacy'">Privacy policy</button>
+            </div>
+        </div>
+    </div>
+
     <!-- Search Form -->
     <form action="/search" class="search-container" method="get" autocomplete="off">
         <div class="search-page-content">
diff --git a/templates/text.html b/templates/text.html
index 95fde16..1cbccc2 100755
--- a/templates/text.html
+++ b/templates/text.html
@@ -43,7 +43,7 @@
                     <option value="{{.Code}}" {{if eq .Code $.CurrentLang}}selected{{end}}>{{.Name}}</option>
                     {{end}}
                 </select>  
-                <!-- <button id="settingsButton" onclick="window.location.href='/about'">About QGato</button> -->
+                <button id="aboutQGatoBtn">About QGato</button>
             </div>
         </div>
     </div>
@@ -53,6 +53,27 @@
         </div>
     </noscript>
 
+    <!-- Popup Modal for QGato -->
+    <div id="aboutQGatoModal">
+        <!-- Close Button -->
+        <button class="btn-nostyle" id="close-button">
+            <div class="material-icons-round icon_visibility clickable cloase-btn">&#xe5cd;</div>
+        </button>
+
+        <div class="modal-content">
+            <img 
+                src="/static/images/icon.svg" 
+                alt="QGato"
+            >
+            <h2>QGato</h2>
+            <p>A open-source private search engine.</p>
+            <div class="button-container">
+                <button onclick="window.location.href='https://weforge.xyz/Spitfire/Search'">Source Code</button>
+                <button onclick="window.location.href='/privacy'">Privacy policy</button>
+            </div>
+        </div>
+    </div>
+
     <form action="/search" id="prev-next-form" class="results-search-container" method="GET" autocomplete="off">
         <h1 class="logomobile">
             <div class="logo-container" href="/">
diff --git a/templates/videos.html b/templates/videos.html
index 48bf0f1..15188ac 100644
--- a/templates/videos.html
+++ b/templates/videos.html
@@ -43,7 +43,7 @@
                     <option value="{{.Code}}" {{if eq .Code $.CurrentLang}}selected{{end}}>{{.Name}}</option>
                     {{end}}
                 </select>  
-                <!-- <button id="settingsButton" onclick="window.location.href='/about'">About QGato</button> -->
+                <button id="aboutQGatoBtn">About QGato</button>
             </div>
         </div>
     </div>
@@ -53,6 +53,27 @@
         </div>
     </noscript>
 
+    <!-- Popup Modal for QGato -->
+    <div id="aboutQGatoModal">
+        <!-- Close Button -->
+        <button class="btn-nostyle" id="close-button">
+            <div class="material-icons-round icon_visibility clickable cloase-btn">&#xe5cd;</div>
+        </button>
+
+        <div class="modal-content">
+            <img 
+                src="/static/images/icon.svg" 
+                alt="QGato"
+            >
+            <h2>QGato</h2>
+            <p>A open-source private search engine.</p>
+            <div class="button-container">
+                <button onclick="window.location.href='https://weforge.xyz/Spitfire/Search'">Source Code</button>
+                <button onclick="window.location.href='/privacy'">Privacy policy</button>
+            </div>
+        </div>
+    </div>
+
     <form action="/search" id="prev-next-form" class="results-search-container" method="GET" autocomplete="off">
         <h1 class="logomobile">
             <div class="logo-container" herf="/">
diff --git a/user-settings.go b/user-settings.go
index a18478d..a872f11 100755
--- a/user-settings.go
+++ b/user-settings.go
@@ -18,44 +18,44 @@ func loadUserSettings(w http.ResponseWriter, r *http.Request) UserSettings {
 	var settings UserSettings
 	saveRequired := false
 
-	// Load theme
-	if cookie, err := r.Cookie("theme"); err == nil {
-		settings.Theme = cookie.Value
-	} else {
-		settings.Theme = "dark"
-		saveRequired = true
-	}
-
-	// Determine if the selected theme is dark
-	settings.IsThemeDark = settings.Theme == "dark" || settings.Theme == "night" || settings.Theme == "black" || settings.Theme == "latte"
-
-	// Load site language
-	if cookie, err := r.Cookie("site_language"); err == nil {
-		settings.SiteLanguage = cookie.Value
-	} else {
-		// If no site language is set, use Accept-Language or default to "en"
-		acceptLang := r.Header.Get("Accept-Language")
-		if acceptLang != "" {
-			settings.SiteLanguage = normalizeLangCode(strings.Split(acceptLang, ",")[0])
+	for _, cd := range AllCookies {
+		// Attempt to read the cookie
+		if cookie, err := r.Cookie(cd.Name); err == nil {
+			// Use SetValue to update the correct UserSettings field
+			cd.SetValue(&settings, cookie.Value)
 		} else {
-			settings.SiteLanguage = "en" // Default language
+			// If cookie is missing and you want a default value, set it here
+			switch cd.Name {
+			case "theme":
+				// Default theme to "dark" if missing
+				cd.SetValue(&settings, "dark")
+				saveRequired = true
+			case "site_language":
+				// Fallback to Accept-Language or "en"
+				acceptLang := r.Header.Get("Accept-Language")
+				if acceptLang != "" {
+					cd.SetValue(&settings, normalizeLangCode(acceptLang))
+				} else {
+					cd.SetValue(&settings, "en")
+				}
+				saveRequired = true
+			case "safe":
+				// Default safe to ""
+				cd.SetValue(&settings, "")
+				saveRequired = true
+				// etc. for other cookies if needed
+			}
 		}
-		saveRequired = true
 	}
 
-	// Load search language (can be empty)
-	if cookie, err := r.Cookie("search_language"); err == nil {
-		settings.SearchLanguage = cookie.Value
-	}
-
-	// Load safe search
-	if cookie, err := r.Cookie("safe"); err == nil {
-		settings.SafeSearch = cookie.Value
-	} else {
-		settings.SafeSearch = ""
-		saveRequired = true
-	}
+	// If theme was set, update IsThemeDark just to be sure
+	// Alternatively do it inside SetValue for "theme"
+	settings.IsThemeDark = settings.Theme == "dark" ||
+		settings.Theme == "night" ||
+		settings.Theme == "black" ||
+		settings.Theme == "latte"
 
+	// Save any new default cookies that might have been triggered
 	if saveRequired {
 		saveUserSettings(w, settings)
 	}
@@ -66,38 +66,16 @@ func loadUserSettings(w http.ResponseWriter, r *http.Request) UserSettings {
 func saveUserSettings(w http.ResponseWriter, settings UserSettings) {
 	expiration := time.Now().Add(90 * 24 * time.Hour)
 
-	http.SetCookie(w, &http.Cookie{
-		Name:     "theme",
-		Value:    settings.Theme,
-		Path:     "/",
-		Expires:  expiration,
-		Secure:   true,
-		SameSite: http.SameSiteStrictMode,
-	})
-	http.SetCookie(w, &http.Cookie{
-		Name:     "site_language",
-		Value:    settings.SiteLanguage,
-		Path:     "/",
-		Expires:  expiration,
-		Secure:   true,
-		SameSite: http.SameSiteStrictMode,
-	})
-	http.SetCookie(w, &http.Cookie{
-		Name:     "search_language",
-		Value:    settings.SearchLanguage,
-		Path:     "/",
-		Expires:  expiration,
-		Secure:   true,
-		SameSite: http.SameSiteStrictMode,
-	})
-	http.SetCookie(w, &http.Cookie{
-		Name:     "safe",
-		Value:    settings.SafeSearch,
-		Path:     "/",
-		Expires:  expiration,
-		Secure:   true,
-		SameSite: http.SameSiteStrictMode,
-	})
+	for _, cd := range AllCookies {
+		http.SetCookie(w, &http.Cookie{
+			Name:     cd.Name,
+			Value:    cd.GetValue(settings),
+			Path:     "/",
+			Expires:  expiration,
+			Secure:   true,
+			SameSite: http.SameSiteStrictMode,
+		})
+	}
 
 	printDebug("settings saved: %v", settings)
 }
@@ -193,3 +171,84 @@ func isValidLangCode(lang string) bool {
 	}
 	return false
 }
+
+// CookieDefinition describes how a single cookie is handled
+type CookieDefinition struct {
+	Name string
+	// GetValue extracts the corresponding field from UserSettings
+	GetValue func(UserSettings) string
+	// SetValue updates the corresponding field in UserSettings
+	SetValue func(*UserSettings, string)
+	// Description used in privacy table or docs
+	Description string
+}
+
+// AllCookies defines every cookie we handle in a single slice.
+// Add or remove entries here, and the rest updates automatically.
+var AllCookies = []CookieDefinition{
+	{
+		Name:        "theme",
+		Description: "Stores the selected theme (dark, light, etc.)",
+		GetValue: func(s UserSettings) string {
+			return s.Theme
+		},
+		SetValue: func(s *UserSettings, val string) {
+			s.Theme = val
+			s.IsThemeDark = (val == "dark" || val == "night" || val == "black" || val == "latte")
+		},
+	},
+	{
+		Name:        "site_language",
+		Description: "Stores the preferred site language.",
+		GetValue: func(s UserSettings) string {
+			return s.SiteLanguage
+		},
+		SetValue: func(s *UserSettings, val string) {
+			s.SiteLanguage = val
+		},
+	},
+	{
+		Name:        "search_language",
+		Description: "Stores the preferred language for search results.",
+		GetValue: func(s UserSettings) string {
+			return s.SearchLanguage
+		},
+		SetValue: func(s *UserSettings, val string) {
+			s.SearchLanguage = val
+		},
+	},
+	{
+		Name:        "safe",
+		Description: "Stores the Safe Search setting.",
+		GetValue: func(s UserSettings) string {
+			return s.SafeSearch
+		},
+		SetValue: func(s *UserSettings, val string) {
+			s.SafeSearch = val
+		},
+	},
+}
+
+type CookieRow struct {
+	Name        string
+	Value       string
+	Description string
+	Expiration  string
+}
+
+func generateCookieTable(r *http.Request) []CookieRow {
+	var rows []CookieRow
+	for _, cd := range AllCookies {
+		value := "[Not Set]"
+		if cookie, err := r.Cookie(cd.Name); err == nil {
+			value = cookie.Value
+		}
+		rows = append(rows, CookieRow{
+			Name:        cd.Name,
+			Value:       value,
+			Description: cd.Description,
+			Expiration:  "90 days",
+		})
+	}
+	return rows
+}

From 87000358933f8f91f6c3109785fbab0485a19d41 Mon Sep 17 00:00:00 2001
From: partisan <none@noone.no>
Date: Sun, 5 Jan 2025 20:27:13 +0100
Subject: [PATCH 9/9] fixed 'no more results' text

---
 files.go              |  2 +-
 forums.go             |  3 ++-
 static/css/style.css  |  7 +++++--
 templates/files.html  |  8 +++++---
 templates/forums.html |  7 +++++--
 templates/images.html |  7 +++++--
 templates/text.html   |  7 +++++--
 templates/videos.html | 11 ++++++++---
 video.go              |  1 +
 9 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/files.go b/files.go
index 1755143..d0c1ff1 100755
--- a/files.go
+++ b/files.go
@@ -56,7 +56,7 @@ func handleFileSearch(w http.ResponseWriter, settings UserSettings, query string
 		"Category":        "all",
 		"Sort":            "seed",
 		"Page":            page,
-		"HasPrevPage":     page > 1,
+		"HasPrevPage":     page >= 1,
 		"HasNextPage":     len(combinedResults) > 0,
 		"LanguageOptions": languageOptions,
 		"CurrentLang":     settings.SearchLanguage,
diff --git a/forums.go b/forums.go
index 973c070..bd57e55 100755
--- a/forums.go
+++ b/forums.go
@@ -118,7 +118,8 @@ func handleForumsSearch(w http.ResponseWriter, settings UserSettings, query stri
 		"Page":            page,
 		"Fetched":         fmt.Sprintf("%.2f %s", elapsedTime.Seconds(), Translate("seconds")), // Time for fetching results
 		"HasPrevPage":     page > 1,
-		"HasNextPage":     len(results) == 25, // Assuming 25 results per page
+		"HasNextPage":     len(results) >= 25,
+		"NoResults":       len(results) == 0,
 		"LanguageOptions": languageOptions,
 		"CurrentLang":     settings.SearchLanguage,
 		"Theme":           settings.Theme,
diff --git a/static/css/style.css b/static/css/style.css
index 32ad383..e4b1cd6 100644
--- a/static/css/style.css
+++ b/static/css/style.css
@@ -1158,17 +1158,20 @@ p {
     border-radius: 8px;
     position: relative;
     border: 1px solid var(--snip-border);
-    margin-left: 175px;
     color: var(--fg);
     width: 530px;
     padding: 15px;
     margin-bottom: 627px;
-    margin-top: -20px;
+    margin-top: 20px;
     font-size: 14px;
     line-height: 1.58;
     letter-spacing: normal;
 }
 
+.no-results-found-offset {
+    margin-left: 175px;
+}
+
 /* MAP */
 
 .message {
diff --git a/templates/files.html b/templates/files.html
index a47bf4e..ff35355 100755
--- a/templates/files.html
+++ b/templates/files.html
@@ -173,12 +173,14 @@
             {{ end }}
         </form>
     </div>
-    {{ else }}
-        <div class="no-results-found">
+    {{else if .NoResults}}
+        <div class="no-results-found no-results-found-offset">
             {{ translate "no_results_found" .Query }}<br>
             {{ translate "suggest_rephrase" }}
         </div>
-    {{ end }}
+    {{else}}
+        <div class="no-results-found no-results-found-offset">{{ translate "no_more_results" }}</div>
+    {{end}}
     <script defer src="/static/js/autocomplete.js"></script>
     <script defer src="/static/js/minimenu.js"></script>
     <script>
diff --git a/templates/forums.html b/templates/forums.html
index 7b9d6dd..1476537 100755
--- a/templates/forums.html
+++ b/templates/forums.html
@@ -143,9 +143,12 @@
                 <br>
             {{end}}
         {{else if .NoResults}}
-            <div class="no-results">{{ translate "no_results" .Query }}</div>
+            <div class="no-results-found">
+                {{ translate "no_results_found" .Query }}<br>
+                {{ translate "suggest_rephrase" }}
+            </div>
         {{else}}
-            <div class="no-more-results">{{ translate "no_more_results" }}</div>
+            <div class="no-results-found">{{ translate "no_more_results" }}</div>
         {{end}}
     </div>    
     <div class="message-bottom-left" id="message-bottom-left">
diff --git a/templates/images.html b/templates/images.html
index 1bb91b7..fa6df07 100755
--- a/templates/images.html
+++ b/templates/images.html
@@ -226,9 +226,12 @@
             </div>
         </noscript>        
     {{ else if .NoResults }}
-        <div class="no-results">{{ translate "no_results" .Query }}</div>
+        <div class="no-results-found no-results-found-offset">
+            {{ translate "no_results_found" .Query }}<br>
+            {{ translate "suggest_rephrase" }}
+        </div>
     {{ else }}
-        <div class="no-more-results">{{ translate "no_more_results" }}</div>
+        <div class="no-results-found no-results-found-offset">{{ translate "no_more_results" }}</div>
     {{ end }}
     </div>
     <div class="message-bottom-left" id="message-bottom-left">
diff --git a/templates/text.html b/templates/text.html
index 1cbccc2..58da2af 100755
--- a/templates/text.html
+++ b/templates/text.html
@@ -144,9 +144,12 @@
                 <br>
             {{end}}
         {{else if .NoResults}}
-            <div class="no-results">{{ translate "no_results" .Query }}</div>
+            <div class="no-results-found">
+                {{ translate "no_results_found" .Query }}<br>
+                {{ translate "suggest_rephrase" }}
+            </div>
         {{else}}
-        <div class="no-more-results">{{ translate "no_more_results" }}</div>
+            <div class="no-results-found">{{ translate "no_more_results" }}</div>
         {{end}}
     </div>    
     <div class="message-bottom-left" id="message-bottom-left">
diff --git a/templates/videos.html b/templates/videos.html
index 15188ac..8cc21de 100644
--- a/templates/videos.html
+++ b/templates/videos.html
@@ -141,9 +141,14 @@
             </div>
         </div>
         {{ end }}
-    {{ else }}
-        <div class="no-results">{{ translate "no_results" .Query }}</div>
-    {{ end }}
+    {{else if .NoResults}}
+        <div class="no-results-found no-results-found-offset">
+            {{ translate "no_results_found" .Query }}<br>
+            {{ translate "suggest_rephrase" }}
+        </div>
+    {{else}}
+        <div class="no-results-found no-results-found-offset">{{ translate "no_more_results" }}</div>
+    {{end}}
     <div class="prev-next prev-img" id="prev-next">
         <form action="/search" method="get">
             <input type="hidden" name="q" value="{{ .Query }}">
diff --git a/video.go b/video.go
index c2be338..3120367 100644
--- a/video.go
+++ b/video.go
@@ -170,6 +170,7 @@ func handleVideoSearch(w http.ResponseWriter, settings UserSettings, query strin
 		"Page":            page,
 		"HasPrevPage":     page > 1,
 		"HasNextPage":     len(results) > 0,
+		"NoResults":       len(results) == 0,
 		"LanguageOptions": languageOptions,
 		"CurrentLang":     settings.SearchLanguage,
 		"Theme":           settings.Theme,