From 5e6fc730381710795fde88c01bcb28a96dc41e97 Mon Sep 17 00:00:00 2001 From: partisan Date: Fri, 21 Feb 2025 19:47:55 +0100 Subject: [PATCH] added config values to configure enabled meta-search engines --- config.go | 59 ++++++++++++++++++++++++++++ files.go | 33 ++++++++++++---- images.go | 23 ++++++++--- init.go | 8 ++++ text-librex.go | 104 ++++++++++++++++++++++++------------------------- text.go | 26 +++++++++---- video.go | 19 +++------ 7 files changed, 184 insertions(+), 88 deletions(-) diff --git a/config.go b/config.go index 3bb4eb7..30afcf3 100644 --- a/config.go +++ b/config.go @@ -22,6 +22,13 @@ type CacheConfig struct { Path string } +type MetaSearchConfig struct { + Text []string + Image []string + Files []string + Video []string +} + type Config struct { Port int // Added AuthCode string // Added @@ -47,6 +54,9 @@ type Config struct { CrawlingInterval time.Duration // Refres crawled results in... MaxPagesPerDomain int // Max pages to crawl per domain IndexBatchSize int + LibreXInstances []string + + MetaSearch MetaSearchConfig DriveCache CacheConfig RamCache CacheConfig @@ -75,6 +85,33 @@ var defaultConfig = Config{ MaxPagesPerDomain: 10, IndexBatchSize: 50, LogLevel: 1, + LibreXInstances: []string{"librex.antopie.org"}, + MetaSearch: MetaSearchConfig{ + // For Text search (skip SearXNG and LibreX by default, as that would be mega stupid) + Text: []string{"Google", "Brave", "DuckDuckGo"}, + + // For Image search + Image: []string{"Qwant", "Bing", "DeviantArt"}, + + // For Files search + Files: []string{"TorrentGalaxy", "ThePirateBay"}, + + // For Video (piped instances) + Video: []string{ + "api.piped.yt", + "pipedapi.moomoo.me", + "pipedapi.darkness.services", + "pipedapi.kavin.rocks", + "piped-api.hostux.net", + "pipedapi.syncpundit.io", + "piped-api.cfe.re", + "pipedapi.in.projectsegfau.lt", + "piapi.ggtyler.dev", + "piped-api.codespace.cz", + "pipedapi.coldforge.xyz", + "pipedapi.osphost.fi", + }, + }, DriveCache: CacheConfig{ Duration: 48 * time.Hour, // Added Path: "./cache", // Added @@ -271,6 +308,14 @@ func saveConfig(config Config) { proxiesSec.Key("CrawlerProxyStrict").SetValue(strconv.FormatBool(config.CrawlerProxyStrict)) proxiesSec.Key("CrawlerProxies").SetValue(strings.Join(config.CrawlerProxies, ",")) + // MetaSearch section + metaSec := cfg.Section("MetaSearches") + metaSec.Key("LibreXInstances").SetValue(strings.Join(config.LibreXInstances, ",")) + metaSec.Key("Text").SetValue(strings.Join(config.MetaSearch.Text, ",")) + metaSec.Key("Image").SetValue(strings.Join(config.MetaSearch.Image, ",")) + metaSec.Key("Files").SetValue(strings.Join(config.MetaSearch.Files, ",")) + metaSec.Key("Video").SetValue(strings.Join(config.MetaSearch.Video, ",")) + // Indexer section indexerSec := cfg.Section("Indexer") indexerSec.Key("ConcurrentStandardCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers)) @@ -328,6 +373,13 @@ func loadConfig() Config { crawlerProxyStrict := getConfigValueBool(cfg.Section("Proxies").Key("CrawlerProxyStrict"), defaultConfig.CrawlerProxyStrict) crawlerProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("CrawlerProxies"), ""), ",") + // MetaSearch + searchXInstances := strings.Split(getConfigValueString(cfg.Section("MetaSearches").Key("LibreXInstances"), strings.Join(defaultConfig.LibreXInstances, ",")), ",") + textList := strings.Split(getConfigValueString(cfg.Section("MetaSearch").Key("Text"), strings.Join(defaultConfig.MetaSearch.Text, ",")), ",") + imageList := strings.Split(getConfigValueString(cfg.Section("MetaSearch").Key("Image"), strings.Join(defaultConfig.MetaSearch.Image, ",")), ",") + filesList := strings.Split(getConfigValueString(cfg.Section("MetaSearch").Key("Files"), strings.Join(defaultConfig.MetaSearch.Files, ",")), ",") + videoList := strings.Split(getConfigValueString(cfg.Section("MetaSearch").Key("Video"), strings.Join(defaultConfig.MetaSearch.Video, ",")), ",") + // Indexing concurrentStandardCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentStandardCrawlers"), defaultConfig.ConcurrentStandardCrawlers, strconv.Atoi) concurrentChromeCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentChromeCrawlers"), defaultConfig.ConcurrentChromeCrawlers, strconv.Atoi) @@ -371,6 +423,13 @@ func loadConfig() Config { CrawlingInterval: crawlingInterval, MaxPagesPerDomain: maxPagesPerDomain, IndexBatchSize: indexBatchSize, + LibreXInstances: searchXInstances, + MetaSearch: MetaSearchConfig{ + Text: textList, + Image: imageList, + Files: filesList, + Video: videoList, + }, DriveCache: CacheConfig{ Duration: driveDuration, MaxUsageBytes: driveMaxUsage, diff --git a/files.go b/files.go index f585749..d2b4837 100755 --- a/files.go +++ b/files.go @@ -30,11 +30,25 @@ var ( var fileResultsChan = make(chan []TorrentResult) -func init() { - torrentGalaxy = NewTorrentGalaxy() - // nyaa = NewNyaa() - thePirateBay = NewThePirateBay() - // rutor = NewRutor() +func initFileEngines() { + + torrentGalaxy = nil + thePirateBay = nil + // nyaa = nil + // rutor = nil + + for _, engineName := range config.MetaSearch.Files { + switch engineName { + case "TorrentGalaxy": + torrentGalaxy = NewTorrentGalaxy() + case "ThePirateBay": + thePirateBay = NewThePirateBay() + // case "Nyaa": + // nyaa = NewNyaa() + // case "Rutor": + // rutor = NewRutor() + } + } } func handleFileSearch(w http.ResponseWriter, settings UserSettings, query string, page int) { @@ -123,7 +137,7 @@ func fetchFileResults(query, safe, lang string, page int) []TorrentResult { } sites := []TorrentSite{torrentGalaxy, nyaa, thePirateBay, rutor} - results := []TorrentResult{} + var results []TorrentResult for _, site := range sites { if site == nil { @@ -140,9 +154,12 @@ func fetchFileResults(query, safe, lang string, page int) []TorrentResult { } } + // If no results, try from other nodes if len(results) == 0 { - printWarn("No file results found for query: %s, trying other nodes", query) - results = tryOtherNodesForFileSearch(query, safe, lang, page, []string{hostID}) + if config.NodesEnabled { + printWarn("No file results found for query: %s, trying other nodes", query) + results = tryOtherNodesForFileSearch(query, safe, lang, page, []string{hostID}) + } } return results diff --git a/images.go b/images.go index 6365f3a..52d2e67 100755 --- a/images.go +++ b/images.go @@ -10,12 +10,23 @@ import ( var imageSearchEngines []SearchEngine -func init() { - imageSearchEngines = []SearchEngine{ - {Name: "Qwant", Func: wrapImageSearchFunc(PerformQwantImageSearch)}, - {Name: "Bing", Func: wrapImageSearchFunc(PerformBingImageSearch)}, - {Name: "DeviantArt", Func: wrapImageSearchFunc(PerformDeviantArtImageSearch)}, - //{Name: "Imgur", Func: wrapImageSearchFunc(PerformImgurImageSearch), Weight: 4}, // Image proxy not working +var allImageSearchEngines = []SearchEngine{ + {Name: "Qwant", Func: wrapImageSearchFunc(PerformQwantImageSearch)}, + {Name: "Bing", Func: wrapImageSearchFunc(PerformBingImageSearch)}, + {Name: "DeviantArt", Func: wrapImageSearchFunc(PerformDeviantArtImageSearch)}, + // {Name: "Imgur", Func: wrapImageSearchFunc(PerformImgurImageSearch), Weight: 4}, // example +} + +func initImageEngines() { + imageSearchEngines = nil + + for _, engineName := range config.MetaSearch.Image { + for _, candidate := range allImageSearchEngines { + if candidate.Name == engineName { + imageSearchEngines = append(imageSearchEngines, candidate) + break + } + } } } diff --git a/init.go b/init.go index f8dddd8..4625fb0 100644 --- a/init.go +++ b/init.go @@ -69,6 +69,14 @@ func main() { go periodicAgentUpdate() } + // Load List of Meta Search Engines + if config.MetaSearchEnabled { + initTextEngines() + initImageEngines() + initFileEngines() + initPipedInstances() + } + InitializeLanguage("en") // Initialize language before generating OpenSearch generateOpenSearchXML(config) diff --git a/text-librex.go b/text-librex.go index 9d96129..c9d25eb 100644 --- a/text-librex.go +++ b/text-librex.go @@ -3,14 +3,11 @@ package main import ( "encoding/json" "fmt" - "log" "net/http" "net/url" "time" ) -const LIBREX_DOMAIN = "librex.antopie.org" - type LibreXResult struct { Title string `json:"title"` URL string `json:"url"` @@ -20,13 +17,10 @@ type LibreXResult struct { type LibreXResponse []LibreXResult func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) { - startTime := time.Now() // Start the timer + startTime := time.Now() - // LibreX/Y uses offset instead of page that starts at 0 - page-- - page = page * 10 - - searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page) + // LibreX uses offset instead of page (starting at 0) + pageOffset := (page - 1) * 10 // Generate User-Agent userAgent, err := GetUserAgent("librex-text-search") @@ -34,58 +28,62 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe return nil, 0, err } - req, err := http.NewRequest("GET", searchURL, nil) - if err != nil { - return nil, 0, err - } - req.Header.Set("User-Agent", userAgent) + var allResults []TextSearchResult - // Perform the request using the appropriate client - var resp *http.Response - if config.MetaProxyEnabled && config.MetaProxyStrict && metaProxyClient != nil { - resp, err = metaProxyClient.Do(req) - } else { - client := &http.Client{} - resp, err = client.Do(req) - } + for _, domain := range config.LibreXInstances { + searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", + domain, + url.QueryEscape(query), + pageOffset, + ) - if err != nil { - return nil, 0, logError("error making request to LibreX", err) - } - defer resp.Body.Close() + req, err := http.NewRequest("GET", searchURL, nil) + if err != nil { + printWarn("failed to create request for domain %s: %v", domain, err) + continue + } + req.Header.Set("User-Agent", userAgent) - if resp.StatusCode != http.StatusOK { - return nil, 0, logError("unexpected status code", fmt.Errorf("%d", resp.StatusCode)) - } + // Respect MetaProxy if enabled and strict + var resp *http.Response + if config.MetaProxyEnabled && config.MetaProxyStrict && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{} + resp, err = client.Do(req) + } + if err != nil { + printWarn("error requesting domain %s: %v", domain, err) + continue + } + defer resp.Body.Close() - var librexResp LibreXResponse - if err := json.NewDecoder(resp.Body).Decode(&librexResp); err != nil { - return nil, 0, logError("error decoding LibreX response", err) - } - - var results []TextSearchResult - for _, item := range librexResp { - result := TextSearchResult{ - URL: item.URL, - Header: item.Title, - Description: item.Description, - Source: "LibreX", + if resp.StatusCode != http.StatusOK { + printWarn("unexpected status code from %s: %d", domain, resp.StatusCode) + continue } - results = append(results, result) + var librexResp LibreXResponse + if err := json.NewDecoder(resp.Body).Decode(&librexResp); err != nil { + printWarn("error decoding response from %s: %v", domain, err) + continue + } + + // Accumulate results from this instance + for _, item := range librexResp { + allResults = append(allResults, TextSearchResult{ + URL: item.URL, + Header: item.Title, + Description: item.Description, + Source: "LibreX", + }) + } } - duration := time.Since(startTime) // Calculate the duration - - if len(results) == 0 { - return nil, duration, fmt.Errorf("no results found") + duration := time.Since(startTime) + if len(allResults) == 0 { + return nil, duration, fmt.Errorf("no results found from any LibreX instance") } - return results, duration, nil -} - -// This is just stupid it will probably lead to printing error twice -func logError(message string, err error) error { - log.Printf("%s: %v", message, err) - return fmt.Errorf("%s: %w", message, err) + return allResults, duration, nil } diff --git a/text.go b/text.go index fb4a892..76600d4 100755 --- a/text.go +++ b/text.go @@ -8,13 +8,25 @@ import ( var textSearchEngines []SearchEngine -func init() { - textSearchEngines = []SearchEngine{ - {Name: "Google", Func: wrapTextSearchFunc(PerformGoogleTextSearch)}, - {Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)}, - {Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)}, - {Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)}, - // {Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // Always says StatusCode: 429 +var allTextSearchEngines = []SearchEngine{ + //{Name: "Google", Func: wrapTextSearchFunc(PerformGoogleTextSearch)}, + {Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)}, + // {Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)}, + // {Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)}, + // {Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // example +} + +func initTextEngines() { + // textSearchEngines is your final slice (already declared globally) + textSearchEngines = nil // or make([]SearchEngine, 0) + + for _, engineName := range config.MetaSearch.Text { + for _, candidate := range allTextSearchEngines { + if candidate.Name == engineName { + textSearchEngines = append(textSearchEngines, candidate) + break + } + } } } diff --git a/video.go b/video.go index a2fa3fb..193773c 100644 --- a/video.go +++ b/video.go @@ -12,25 +12,16 @@ import ( const retryDuration = 12 * time.Hour // Retry duration for unresponding piped instances var ( - pipedInstances = []string{ - "api.piped.yt", - "pipedapi.moomoo.me", - "pipedapi.darkness.services", - "pipedapi.kavin.rocks", - "piped-api.hostux.net", - "pipedapi.syncpundit.io", - "piped-api.cfe.re", - "pipedapi.in.projectsegfau.lt", - "piapi.ggtyler.dev", - "piped-api.codespace.cz", - "pipedapi.coldforge.xyz", - "pipedapi.osphost.fi", - } + pipedInstances = []string{} disabledInstances = make(map[string]bool) mu sync.Mutex videoResultsChan = make(chan []VideoResult) // Channel to receive video results from other nodes ) +func initPipedInstances() { + pipedInstances = config.MetaSearch.Video +} + // VideoAPIResponse matches the structure of the JSON response from the Piped API type VideoAPIResponse struct { Items []struct {