diff --git a/README.md b/README.md index 5ad3337..9ad86f3 100644 --- a/README.md +++ b/README.md @@ -47,11 +47,11 @@ A self-hosted private search engine designed to be scalable and more resource-ef ### For Self-Hosting -- **Self-hosted option** - Run on your own server for even more privacy. +- **[Easy to Set Up](https://weforge.xyz/Spitfire/Search#running-the-qgato)** - Quick and straightforward setup process for anyone. - **Lightweight** - Low memory footprint (15-30MiB) even during searches. - **Decentralized** - No single point of failure. - **Results caching in RAM** - Faster response times through caching. -- **Configurable** - Tweak features via `config.ini`. +- **[Configurable](https://weforge.xyz/Spitfire/Search/wiki/Configuration)** - Fully customizable via the `config.ini` file. - **Flexible media support** - Images optionally stored on HDD/SSD for caching and improved response time. ### Results Sources @@ -73,30 +73,20 @@ A self-hosted private search engine designed to be scalable and more resource-ef ### Running the QGato -Linux: - ```bash git clone https://weforge.xyz/Spitfire/Search.git cd Search -chmod +x ./run.sh -./run.sh -``` - -Windows: - -```powershell -git clone https://weforge.xyz/Spitfire/Search.git -cd Search -.\run.bat +go run . ``` *Its that easy!* ### Configuring -Configuration is done via the ``config.ini`` file. -On first start, you will be guided through the basic setup. -More advanced setup and all options will be listed here later, as this is still being updated. +- Configuration is done via the `config.ini` file. +- On first start, you will be guided through the basic setup. +- For more advanced configuration options, visit the [Wiki Configuration Page](https://weforge.xyz/Spitfire/Search/wiki/Configuration). + ## License diff --git a/config.go b/config.go index 18d83cf..3bb4eb7 100644 --- a/config.go +++ b/config.go @@ -23,18 +23,25 @@ type CacheConfig struct { } type Config struct { - Port int // Added - AuthCode string // Added - PeerID string // Added - Peers []string - Domain string // Added - NodesEnabled bool // Added - CrawlerEnabled bool // Added - IndexerEnabled bool // Added - WebsiteEnabled bool // Added - RamCacheEnabled bool - DriveCacheEnabled bool // Added - LogLevel int // Added + Port int // Added + AuthCode string // Added + PeerID string // Added + Peers []string + Domain string // Added + NodesEnabled bool // Added + MetaSearchEnabled bool // Added + IndexerEnabled bool // Added + WebsiteEnabled bool // Added + RamCacheEnabled bool + DriveCacheEnabled bool // Added + MetaProxyEnabled bool // Added + MetaProxyStrict bool // Added + MetaProxies []string // Added + CrawlerProxyEnabled bool // Added + CrawlerProxyStrict bool // Added + CrawlerProxies []string // Added + // Maybye add Proxy support for Image Extraction? + LogLevel int // Added ConcurrentStandardCrawlers int ConcurrentChromeCrawlers int CrawlingInterval time.Duration // Refres crawled results in... @@ -51,11 +58,17 @@ var defaultConfig = Config{ Peers: []string{}, AuthCode: generateStrongRandomString(64), NodesEnabled: false, - CrawlerEnabled: true, + MetaSearchEnabled: true, IndexerEnabled: false, WebsiteEnabled: true, RamCacheEnabled: true, DriveCacheEnabled: false, + MetaProxyEnabled: false, + MetaProxyStrict: true, + MetaProxies: []string{}, + CrawlerProxyEnabled: false, + CrawlerProxyStrict: true, + CrawlerProxies: []string{}, ConcurrentStandardCrawlers: 12, ConcurrentChromeCrawlers: 4, CrawlingInterval: 24 * time.Hour, @@ -245,14 +258,23 @@ func saveConfig(config Config) { // Features section featuresSec := cfg.Section("Features") featuresSec.Key("Nodes").SetValue(strconv.FormatBool(config.NodesEnabled)) - featuresSec.Key("Crawler").SetValue(strconv.FormatBool(config.CrawlerEnabled)) + featuresSec.Key("Crawler").SetValue(strconv.FormatBool(config.MetaSearchEnabled)) featuresSec.Key("Indexer").SetValue(strconv.FormatBool(config.IndexerEnabled)) featuresSec.Key("Website").SetValue(strconv.FormatBool(config.WebsiteEnabled)) + featuresSec.Key("MetaProxy").SetValue(strconv.FormatBool(config.MetaProxyEnabled)) + featuresSec.Key("CrawlerProxy").SetValue(strconv.FormatBool(config.CrawlerProxyEnabled)) + + // Proxies section + proxiesSec := cfg.Section("Proxies") + proxiesSec.Key("MetaProxyStrict").SetValue(strconv.FormatBool(config.MetaProxyStrict)) + proxiesSec.Key("MetaProxies").SetValue(strings.Join(config.MetaProxies, ",")) + proxiesSec.Key("CrawlerProxyStrict").SetValue(strconv.FormatBool(config.CrawlerProxyStrict)) + proxiesSec.Key("CrawlerProxies").SetValue(strings.Join(config.CrawlerProxies, ",")) // Indexer section indexerSec := cfg.Section("Indexer") indexerSec.Key("ConcurrentStandardCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers)) - indexerSec.Key("ConcurrentChromeCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers)) + indexerSec.Key("ConcurrentChromeCrawlers").SetValue(strconv.Itoa(config.ConcurrentChromeCrawlers)) indexerSec.Key("CrawlingInterval").SetValue(config.CrawlingInterval.String()) indexerSec.Key("MaxPagesPerDomain").SetValue(strconv.Itoa(config.MaxPagesPerDomain)) indexerSec.Key("IndexBatchSize").SetValue(strconv.Itoa(config.IndexBatchSize)) @@ -292,11 +314,19 @@ func loadConfig() Config { // Features nodesEnabled := getConfigValueBool(cfg.Section("Features").Key("Nodes"), defaultConfig.NodesEnabled) - crawlerEnabled := getConfigValueBool(cfg.Section("Features").Key("Crawler"), defaultConfig.CrawlerEnabled) + metaSearchEnabled := getConfigValueBool(cfg.Section("Features").Key("Crawler"), defaultConfig.MetaSearchEnabled) indexerEnabled := getConfigValueBool(cfg.Section("Features").Key("Indexer"), defaultConfig.IndexerEnabled) websiteEnabled := getConfigValueBool(cfg.Section("Features").Key("Website"), defaultConfig.WebsiteEnabled) ramCacheEnabled := getConfigValueBool(cfg.Section("Features").Key("RamCache"), defaultConfig.RamCacheEnabled) driveCacheEnabled := getConfigValueBool(cfg.Section("Features").Key("DriveCache"), defaultConfig.DriveCacheEnabled) + metaProxyEnabled := getConfigValueBool(cfg.Section("Features").Key("MetaProxy"), defaultConfig.MetaProxyEnabled) + crawlerProxyEnabled := getConfigValueBool(cfg.Section("Features").Key("CrawlerProxy"), defaultConfig.CrawlerProxyEnabled) + + // Proxies + metaProxyStrict := getConfigValueBool(cfg.Section("Proxies").Key("MetaProxyStrict"), defaultConfig.MetaProxyStrict) + metaProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("MetaProxies"), ""), ",") + crawlerProxyStrict := getConfigValueBool(cfg.Section("Proxies").Key("CrawlerProxyStrict"), defaultConfig.CrawlerProxyStrict) + crawlerProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("CrawlerProxies"), ""), ",") // Indexing concurrentStandardCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentStandardCrawlers"), defaultConfig.ConcurrentStandardCrawlers, strconv.Atoi) @@ -325,11 +355,17 @@ func loadConfig() Config { AuthCode: authCode, Peers: peers, NodesEnabled: nodesEnabled, - CrawlerEnabled: crawlerEnabled, + MetaSearchEnabled: metaSearchEnabled, IndexerEnabled: indexerEnabled, WebsiteEnabled: websiteEnabled, RamCacheEnabled: ramCacheEnabled, DriveCacheEnabled: driveCacheEnabled, + MetaProxyEnabled: metaProxyEnabled, + MetaProxyStrict: metaProxyStrict, + MetaProxies: metaProxies, + CrawlerProxyEnabled: crawlerProxyEnabled, + CrawlerProxyStrict: crawlerProxyStrict, + CrawlerProxies: crawlerProxies, ConcurrentStandardCrawlers: concurrentStandardCrawlers, ConcurrentChromeCrawlers: concurrentChromeCrawlers, CrawlingInterval: crawlingInterval, diff --git a/crawler-extraction.go b/crawler-extraction.go index 4ce8b9d..d0dd06e 100644 --- a/crawler-extraction.go +++ b/crawler-extraction.go @@ -32,8 +32,12 @@ func fetchPageMetadataStandard(pageURL, userAgent string) (string, string, strin // fetchPageMetadataChrome uses Chromedp to handle JavaScript-rendered pages. func fetchPageMetadataChrome(pageURL, userAgent string) (string, string, string) { - // Create context - ctx, cancel := chromedp.NewContext(context.Background()) + // Create a custom allocator context for Chromedp with proxy support if enabled + allocCtx, cancelAlloc := chromedp.NewExecAllocator(context.Background(), configureChromeOptions()...) + defer cancelAlloc() + + // Create a browser context + ctx, cancel := chromedp.NewContext(allocCtx) defer cancel() var renderedHTML string @@ -57,6 +61,32 @@ func fetchPageMetadataChrome(pageURL, userAgent string) (string, string, string) return extractParsedDOM(doc) } +// configureChromeOptions sets up Chrome options and proxy if CrawlerProxy is enabled. +func configureChromeOptions() []chromedp.ExecAllocatorOption { + options := chromedp.DefaultExecAllocatorOptions[:] + + if config.CrawlerProxyEnabled && crawlerProxyClient != nil { + // Retrieve proxy settings from CrawlerProxy + proxy := crawlerProxyClient.GetProxy() // Ensure a `GetProxy` method is implemented for your proxy client + if proxy != "" { + options = append(options, chromedp.ProxyServer(proxy)) + printDebug("Using CrawlerProxy for Chromedp: %s", proxy) + } else { + printWarn("CrawlerProxy is enabled but no valid proxy is available") + } + } + + // // Add additional Chrome + // options = append(options, + // chromedp.Flag("headless", true), + // chromedp.Flag("disable-gpu", true), + // chromedp.Flag("no-sandbox", true), + // chromedp.Flag("disable-setuid-sandbox", true), + // ) + + return options +} + // extractStandard does the normal HTML parse with OG, Twitter, etc. func extractStandard(pageURL, userAgent string) (title, desc, keywords string) { client := &http.Client{Timeout: 15 * time.Second} @@ -68,7 +98,13 @@ func extractStandard(pageURL, userAgent string) (title, desc, keywords string) { req.Header.Set("User-Agent", userAgent) req.Header.Set("Accept-Language", "en-US,en;q=0.9") - resp, err := client.Do(req) + // Use CrawlerProxy if enabled + var resp *http.Response + if config.CrawlerProxyEnabled && crawlerProxyClient != nil { + resp, err = crawlerProxyClient.Do(req) + } else { + resp, err = client.Do(req) + } if err != nil { printDebug("Failed to GET %s: %v", pageURL, err) return @@ -185,7 +221,13 @@ func fallbackReadability(pageURL, userAgent, title, desc, keywords string) (stri readReq.Header.Set("User-Agent", userAgent) readReq.Header.Set("Accept-Language", "en-US,en;q=0.9") - readResp, err := client.Do(readReq) + // Use CrawlerProxy if enabled + var readResp *http.Response + if config.CrawlerProxyEnabled && crawlerProxyClient != nil { + readResp, err = crawlerProxyClient.Do(readReq) + } else { + readResp, err = client.Do(readReq) + } if err != nil || readResp.StatusCode < 200 || readResp.StatusCode >= 300 { if err != nil { printDebug("go-readability GET error for %s: %v", pageURL, err) diff --git a/files-thepiratebay.go b/files-thepiratebay.go index b98ee27..df9484a 100644 --- a/files-thepiratebay.go +++ b/files-thepiratebay.go @@ -57,31 +57,40 @@ func (t *ThePirateBay) Search(query string, category string) ([]TorrentResult, e return []TorrentResult{}, nil } - url := fmt.Sprintf("https://%s/q.php?q=%s&cat=%s", PIRATEBAY_DOMAIN, url.QueryEscape(query), categoryCode) + searchURL := fmt.Sprintf("https://%s/q.php?q=%s&cat=%s", PIRATEBAY_DOMAIN, url.QueryEscape(query), categoryCode) // User Agent generation userAgent, err := GetUserAgent("files-tpb") if err != nil { - fmt.Println("Error:", err) - return nil, err + return nil, fmt.Errorf("error generating User-Agent: %w", err) } - req, err := http.NewRequest("GET", url, nil) + req, err := http.NewRequest("GET", searchURL, nil) if err != nil { - return nil, err + return nil, fmt.Errorf("error creating request: %w", err) } req.Header.Set("User-Agent", userAgent) - client := &http.Client{} - response, err := client.Do(req) - if err != nil { - return nil, err + // Perform the request using MetaProxy if enabled + var resp *http.Response + if config.MetaProxyEnabled && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{} + resp, err = client.Do(req) + } + if err != nil { + return nil, fmt.Errorf("error making request to The Pirate Bay: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) } - defer response.Body.Close() var torrentData []map[string]interface{} - if err := json.NewDecoder(response.Body).Decode(&torrentData); err != nil { - return nil, err + if err := json.NewDecoder(resp.Body).Decode(&torrentData); err != nil { + return nil, fmt.Errorf("error decoding response JSON: %w", err) } var results []TorrentResult diff --git a/files-torrentgalaxy.go b/files-torrentgalaxy.go index 51f51ca..08c2fa0 100644 --- a/files-torrentgalaxy.go +++ b/files-torrentgalaxy.go @@ -62,18 +62,23 @@ func (tg *TorrentGalaxy) Search(query string, category string) ([]TorrentResult, // User Agent generation userAgent, err := GetUserAgent("files-torrentgalaxy") if err != nil { - fmt.Println("Error:", err) - return nil, err + return nil, fmt.Errorf("error generating User-Agent: %w", err) } req, err := http.NewRequest("GET", searchURL, nil) if err != nil { - return nil, err + return nil, fmt.Errorf("error creating request: %w", err) } req.Header.Set("User-Agent", userAgent) - client := &http.Client{} - resp, err := client.Do(req) + // Perform the request using MetaProxy if enabled + var resp *http.Response + if config.MetaProxyEnabled && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{} + resp, err = client.Do(req) + } if err != nil { return nil, fmt.Errorf("error making request to TorrentGalaxy: %w", err) } diff --git a/files.go b/files.go index d0c1ff1..f585749 100755 --- a/files.go +++ b/files.go @@ -88,7 +88,7 @@ func getFileResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, case results := <-cacheChan: if results == nil { // Fetch only if the cache miss occurs and Crawler is enabled - if config.CrawlerEnabled { + if config.MetaSearchEnabled { combinedResults = fetchFileResults(query, safe, lang, page) if len(combinedResults) > 0 { resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) @@ -102,7 +102,7 @@ func getFileResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, } case <-time.After(2 * time.Second): printDebug("Cache check timeout") - if config.CrawlerEnabled { + if config.MetaSearchEnabled { combinedResults = fetchFileResults(query, safe, lang, page) if len(combinedResults) > 0 { resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) @@ -117,7 +117,7 @@ func getFileResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, func fetchFileResults(query, safe, lang string, page int) []TorrentResult { // If Crawler is disabled, skip fetching from torrent sites - if !config.CrawlerEnabled { + if !config.MetaSearchEnabled { printInfo("Crawler is disabled; skipping torrent site fetching.") return []TorrentResult{} } diff --git a/forums.go b/forums.go index bd57e55..3388f75 100755 --- a/forums.go +++ b/forums.go @@ -10,7 +10,7 @@ import ( ) func PerformRedditSearch(query string, safe string, page int) ([]ForumSearchResult, error) { - if !config.CrawlerEnabled { + if !config.MetaSearchEnabled { printDebug("Crawler is disabled; skipping forum search.") return []ForumSearchResult{}, nil } @@ -150,7 +150,7 @@ func getForumResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string case results := <-cacheChan: if results == nil { // Fetch only if the cache miss occurs and Crawler is enabled - if config.CrawlerEnabled { + if config.MetaSearchEnabled { combinedResults = fetchForumResults(query, safe, lang, page) if len(combinedResults) > 0 { resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) @@ -164,7 +164,7 @@ func getForumResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string } case <-time.After(2 * time.Second): printDebug("Cache check timeout") - if config.CrawlerEnabled { + if config.MetaSearchEnabled { combinedResults = fetchForumResults(query, safe, lang, page) if len(combinedResults) > 0 { resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) diff --git a/images-bing.go b/images-bing.go index b6a6aa6..8acc194 100644 --- a/images-bing.go +++ b/images-bing.go @@ -18,8 +18,27 @@ func PerformBingImageSearch(query, safe, lang string, page int) ([]ImageSearchRe // Build the search URL searchURL := buildBingSearchURL(query, page) - // Make the HTTP request - resp, err := http.Get(searchURL) + // Create the HTTP request + req, err := http.NewRequest("GET", searchURL, nil) + if err != nil { + return nil, 0, fmt.Errorf("creating request: %v", err) + } + + // Set User-Agent + ImageUserAgent, err := GetUserAgent("Image-Search-Bing") + if err != nil { + return nil, 0, fmt.Errorf("generating User-Agent: %v", err) + } + req.Header.Set("User-Agent", ImageUserAgent) + + // Use MetaProxy if enabled + var resp *http.Response + if config.MetaProxyEnabled && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{Timeout: 10 * time.Second} + resp, err = client.Do(req) + } if err != nil { return nil, 0, fmt.Errorf("making request: %v", err) } diff --git a/images-deviantart.go b/images-deviantart.go index 3077640..97901d1 100644 --- a/images-deviantart.go +++ b/images-deviantart.go @@ -87,15 +87,21 @@ func PerformDeviantArtImageSearch(query, safe, lang string, page int) ([]ImageSe return nil, 0, err } - // Make the HTTP request with User-Agent header - client := &http.Client{} + // Create the HTTP request req, err := http.NewRequest("GET", searchURL, nil) if err != nil { return nil, 0, fmt.Errorf("creating request: %v", err) } req.Header.Set("User-Agent", DeviantArtImageUserAgent) - resp, err := client.Do(req) + // Perform the request using MetaProxy if enabled + var resp *http.Response + if config.MetaProxyEnabled && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{} + resp, err = client.Do(req) + } if err != nil { return nil, 0, fmt.Errorf("making request: %v", err) } @@ -182,7 +188,7 @@ func PerformDeviantArtImageSearch(query, safe, lang string, page int) ([]ImageSe duration := time.Since(startTime) - // Check if the number of results is one or less + // Check if the number of results is zero if len(results) == 0 { return nil, duration, fmt.Errorf("no images found") } diff --git a/images-imgur.go b/images-imgur.go index 641f645..a53ae00 100644 --- a/images-imgur.go +++ b/images-imgur.go @@ -18,7 +18,27 @@ func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchR var results []ImageSearchResult searchURL := buildImgurSearchURL(query, page) - resp, err := http.Get(searchURL) + // Create the HTTP request + req, err := http.NewRequest("GET", searchURL, nil) + if err != nil { + return nil, 0, fmt.Errorf("creating request: %v", err) + } + + // Get the User-Agent string + imgurUserAgent, err := GetUserAgent("Image-Search-Imgur") + if err != nil { + return nil, 0, fmt.Errorf("getting user-agent: %v", err) + } + req.Header.Set("User-Agent", imgurUserAgent) + + // Perform the HTTP request with MetaProxy if enabled + var resp *http.Response + if config.MetaProxyEnabled && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{} + resp, err = client.Do(req) + } if err != nil { return nil, 0, fmt.Errorf("making request: %v", err) } @@ -28,6 +48,7 @@ func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchR return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) } + // Parse the HTML document doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return nil, 0, fmt.Errorf("loading HTML document: %v", err) @@ -76,12 +97,35 @@ func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchR duration := time.Since(startTime) // Calculate the duration + if len(results) == 0 { + return nil, duration, fmt.Errorf("no images found") + } + return results, duration, nil } // scrapeImageFromImgurPage scrapes the image source from the Imgur page func scrapeImageFromImgurPage(pageURL string) string { - resp, err := http.Get(pageURL) + req, err := http.NewRequest("GET", pageURL, nil) + if err != nil { + fmt.Printf("Error creating request for page: %v\n", err) + return "" + } + + // Get the User-Agent string + imgurUserAgent, err := GetUserAgent("Image-Search-Imgur") + if err == nil { + req.Header.Set("User-Agent", imgurUserAgent) + } + + // Perform the request using MetaProxy if enabled + var resp *http.Response + if config.MetaProxyEnabled && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{} + resp, err = client.Do(req) + } if err != nil { fmt.Printf("Error fetching page: %v\n", err) return "" diff --git a/images-quant.go b/images-quant.go index d85d0f9..619ac35 100644 --- a/images-quant.go +++ b/images-quant.go @@ -97,7 +97,7 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR // Ensure count + offset is within acceptable limits if offset+resultsPerPage > 250 { - return nil, 0, fmt.Errorf("count + offset must be lower than 250 for quant") + return nil, 0, fmt.Errorf("count + offset must be lower than 250 for Qwant") } if safe == "" { @@ -113,21 +113,27 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR offset, safe) - client := &http.Client{Timeout: 10 * time.Second} - + // Create the HTTP request req, err := http.NewRequest("GET", apiURL, nil) if err != nil { return nil, 0, fmt.Errorf("creating request: %v", err) } + // Get the User-Agent string ImageUserAgent, err := GetUserAgent("Image-Search-Quant") if err != nil { - return nil, 0, err + return nil, 0, fmt.Errorf("getting user-agent: %v", err) } + req.Header.Set("User-Agent", ImageUserAgent) - req.Header.Set("User-Agent", ImageUserAgent) // Quant seems to not like some specific User-Agent strings - - resp, err := client.Do(req) + // Perform the request with MetaProxy if enabled + var resp *http.Response + if config.MetaProxyEnabled && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{Timeout: 10 * time.Second} + resp, err = client.Do(req) + } if err != nil { return nil, 0, fmt.Errorf("making request: %v", err) } @@ -137,11 +143,13 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) } + // Parse the API response var apiResp QwantAPIResponse if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { return nil, 0, fmt.Errorf("decoding response: %v", err) } + // Process the results var wg sync.WaitGroup results := make([]ImageSearchResult, len(apiResp.Data.Result.Items)) @@ -174,5 +182,9 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR duration := time.Since(startTime) // Calculate the duration + if len(results) == 0 { + return nil, duration, fmt.Errorf("no images found") + } + return results, duration, nil } diff --git a/images.go b/images.go index a044013..6365f3a 100755 --- a/images.go +++ b/images.go @@ -86,7 +86,7 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string select { case results := <-cacheChan: if results == nil { - if config.CrawlerEnabled { + if config.MetaSearchEnabled { combinedResults = fetchImageResults(query, safe, lang, page, synchronous) if len(combinedResults) > 0 { combinedResults = filterValidImages(combinedResults) @@ -101,7 +101,7 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string } case <-time.After(2 * time.Second): printDebug("Cache check timeout") - if config.CrawlerEnabled { + if config.MetaSearchEnabled { combinedResults = fetchImageResults(query, safe, lang, page, synchronous) if len(combinedResults) > 0 { combinedResults = filterValidImages(combinedResults) @@ -118,8 +118,8 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string func fetchImageResults(query, safe, lang string, page int, synchronous bool) []ImageSearchResult { var results []ImageSearchResult - // Check if CrawlerEnabled is false - if !config.CrawlerEnabled { + // Check if MetaSearchEnabled is false + if !config.MetaSearchEnabled { printDebug("Crawler is disabled; skipping image search engine fetching.") return results } diff --git a/init.go b/init.go index bf0d220..f8dddd8 100644 --- a/init.go +++ b/init.go @@ -60,8 +60,12 @@ func main() { } config.PeerID = hostID + if config.CrawlerProxyEnabled || config.MetaProxyEnabled { + InitProxies() + } + // Initiate Browser Agent updater - if config.CrawlerEnabled || config.IndexerEnabled { + if config.MetaSearchEnabled || config.IndexerEnabled { go periodicAgentUpdate() } diff --git a/proxy.go b/proxy.go new file mode 100644 index 0000000..bb1d20d --- /dev/null +++ b/proxy.go @@ -0,0 +1,169 @@ +package main + +import ( + "fmt" + "net/http" + "strings" + "sync" + "time" + + "golang.org/x/net/proxy" +) + +// ProxyConfig holds configuration for a single proxy. +type ProxyConfig struct { + Address string + Username string + Password string +} + +// ProxyClient provides an HTTP client pool for proxies. +type ProxyClient struct { + clients []*http.Client + lock sync.Mutex + index int +} + +// Package-level proxy clients +var ( + metaProxyClient *ProxyClient + crawlerProxyClient *ProxyClient +) + +// NewProxyClientPool creates a pool of HTTP clients with proxies. +func NewProxyClientPool(proxies []ProxyConfig, timeout time.Duration) (*ProxyClient, error) { + if len(proxies) == 0 { + return nil, fmt.Errorf("no proxies provided") + } + + clients := make([]*http.Client, len(proxies)) + + for i, proxyConfig := range proxies { + var auth *proxy.Auth + if proxyConfig.Username != "" || proxyConfig.Password != "" { + auth = &proxy.Auth{ + User: proxyConfig.Username, + Password: proxyConfig.Password, + } + } + + dialer, err := proxy.SOCKS5("tcp", proxyConfig.Address, auth, proxy.Direct) + if err != nil { + return nil, fmt.Errorf("failed to create SOCKS5 dialer for %s: %w", proxyConfig.Address, err) + } + + transport := &http.Transport{Dial: dialer.Dial} + clients[i] = &http.Client{ + Transport: transport, + Timeout: timeout, + } + } + + return &ProxyClient{clients: clients}, nil +} + +// Do sends an HTTP request using the next proxy in the pool. +func (p *ProxyClient) Do(req *http.Request) (*http.Response, error) { + p.lock.Lock() + client := p.clients[p.index] + p.index = (p.index + 1) % len(p.clients) + p.lock.Unlock() + return client.Do(req) +} + +func (p *ProxyClient) GetProxy() string { + p.lock.Lock() + defer p.lock.Unlock() + + if len(p.clients) == 0 { + return "" + } + + // Round-robin proxy retrieval + client := p.clients[p.index] + p.index = (p.index + 1) % len(p.clients) + + // Assume each client has a proxy string saved + // Example implementation depends on how your proxies are configured + proxyTransport, ok := client.Transport.(*http.Transport) + if ok && proxyTransport.Proxy != nil { + proxyURL, _ := proxyTransport.Proxy(nil) + if proxyURL != nil { + return proxyURL.String() + } + } + + return "" +} + +// ParseProxies parses the proxy strings in the format ADDRESS:PORT or ADDRESS:PORT:USER:PASSWORD. +func ParseProxies(proxyStrings []string) []ProxyConfig { + var proxies []ProxyConfig + for _, proxy := range proxyStrings { + parts := strings.Split(proxy, ":") + if len(parts) == 2 { // ADDRESS:PORT + proxies = append(proxies, ProxyConfig{ + Address: fmt.Sprintf("%s:%s", parts[0], parts[1]), + }) + } else if len(parts) == 4 { // ADDRESS:PORT:USER:PASSWORD + proxies = append(proxies, ProxyConfig{ + Address: fmt.Sprintf("%s:%s", parts[0], parts[1]), + Username: parts[2], + Password: parts[3], + }) + } else { + fmt.Printf("Invalid proxy format: %s\n", proxy) + } + } + return proxies +} + +// InitProxies initializes the proxy clients for Meta and Crawler proxies. +func InitProxies() { + // Initialize Meta Proxy Client + if config.MetaProxyEnabled { + metaProxies := ParseProxies(config.MetaProxies) + client, err := NewProxyClientPool(metaProxies, 30*time.Second) + if err != nil { + if config.MetaProxyStrict { + panic(fmt.Sprintf("Failed to initialize Meta proxies: %v", err)) + } + fmt.Printf("Warning: Meta proxy initialization failed: %v\n", err) + } + metaProxyClient = client + } + + // Initialize Crawler Proxy Client + if config.CrawlerProxyEnabled { + crawlerProxies := ParseProxies(config.CrawlerProxies) + client, err := NewProxyClientPool(crawlerProxies, 30*time.Second) + if err != nil { + if config.CrawlerProxyStrict { + panic(fmt.Sprintf("Failed to initialize Crawler proxies: %v", err)) + } + fmt.Printf("Warning: Crawler proxy initialization failed: %v\n", err) + } + crawlerProxyClient = client + } +} + +// func main() { +// config := loadConfig() + +// // Initialize proxies if enabled +// if config.CrawlerProxyEnabled || config.MetaProxyEnabled { +// InitProxies() +// } + +// // Example usage +// if metaProxyClient != nil { +// req, _ := http.NewRequest("GET", "https://example.com", nil) +// resp, err := metaProxyClient.Do(req) +// if err != nil { +// fmt.Printf("Error using MetaProxyClient: %v\n", err) +// } else { +// fmt.Printf("Meta Proxy Response Status: %s\n", resp.Status) +// resp.Body.Close() +// } +// } +// } diff --git a/text-brave.go b/text-brave.go index b8dc86a..624f51c 100644 --- a/text-brave.go +++ b/text-brave.go @@ -33,9 +33,16 @@ func PerformBraveTextSearch(query, safe, lang string, offset int) ([]TextSearchR } req.Header.Set("User-Agent", TextUserAgent) - // Perform the HTTP request - client := &http.Client{} - resp, err := client.Do(req) + var resp *http.Response + + // Determine whether to use a proxy client or a default client + if config.MetaProxyEnabled && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{} + resp, err = client.Do(req) + } + if err != nil { printWarn("Error performing request: %v", err) return nil, 0, fmt.Errorf("performing request: %v", err) diff --git a/text-duckduckgo.go b/text-duckduckgo.go index b4033c1..dd6cf38 100644 --- a/text-duckduckgo.go +++ b/text-duckduckgo.go @@ -16,21 +16,38 @@ func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSear var results []TextSearchResult searchURL := buildDuckDuckGoSearchURL(query, page) - resp, err := http.Get(searchURL) + // Create a request + req, err := http.NewRequest("GET", searchURL, nil) + if err != nil { + return nil, 0, fmt.Errorf("creating request: %v", err) + } + + // Use proxy client if MetaProxy is enabled + var resp *http.Response + if config.MetaProxyEnabled && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{} + resp, err = client.Do(req) + } + if err != nil { return nil, 0, fmt.Errorf("making request: %v", err) } defer resp.Body.Close() + // Check for HTTP status code if resp.StatusCode != http.StatusOK { return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) } + // Parse HTML response doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return nil, 0, fmt.Errorf("loading HTML document: %v", err) } + // Extract results from the page doc.Find(".result__body").Each(func(i int, s *goquery.Selection) { header := s.Find(".result__a").Text() description := s.Find(".result__snippet").Text() diff --git a/text-google.go b/text-google.go index a706aff..ce397ca 100644 --- a/text-google.go +++ b/text-google.go @@ -16,37 +16,48 @@ func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchRe startTime := time.Now() // Start the timer - client := &http.Client{} + // Build the search URL searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage) + // Create a new request req, err := http.NewRequest("GET", searchURL, nil) if err != nil { return nil, 0, fmt.Errorf("failed to create request: %v", err) } - // User Agent generation - TextUserAgent, err := GetUserAgent("Text-Search") + // Generate and set User-Agent header + TextUserAgent, err := GetUserAgent("Google-Text-Search") if err != nil { return nil, 0, err } - req.Header.Set("User-Agent", TextUserAgent) - resp, err := client.Do(req) + // Perform the request using proxy if MetaProxy is enabled + var resp *http.Response + if config.MetaProxyEnabled && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{} + resp, err = client.Do(req) + } + if err != nil { return nil, 0, fmt.Errorf("making request: %v", err) } defer resp.Body.Close() + // Check for HTTP status code if resp.StatusCode != http.StatusOK { return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) } + // Parse the HTML response doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return nil, 0, fmt.Errorf("loading HTML document: %v", err) } + // Extract search results results = parseResults(doc) duration := time.Since(startTime) // Calculate the duration diff --git a/text-librex.go b/text-librex.go index 55eeb91..fe4374d 100644 --- a/text-librex.go +++ b/text-librex.go @@ -28,7 +28,7 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page) - // User Agent generation + // Generate User-Agent userAgent, err := GetUserAgent("librex-text-search") if err != nil { return nil, 0, err @@ -40,8 +40,15 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe } req.Header.Set("User-Agent", userAgent) - client := &http.Client{} - resp, err := client.Do(req) + // Perform the request using the appropriate client + var resp *http.Response + if config.MetaProxyEnabled && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{} + resp, err = client.Do(req) + } + if err != nil { return nil, 0, logError("error making request to LibreX", err) } @@ -77,7 +84,7 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe return results, duration, nil } -// This is just stupid it will probbably lead to printing error twice +// This is just stupid it will probably lead to printing error twice func logError(message string, err error) error { log.Printf("%s: %v", message, err) return fmt.Errorf("%s: %w", message, err) diff --git a/text-quant.go b/text-quant.go index de8b03a..5594d29 100644 --- a/text-quant.go +++ b/text-quant.go @@ -46,8 +46,6 @@ func PerformQwantTextSearch(query, safe, lang string) ([]TextSearchResult, error lang, offset) - client := &http.Client{Timeout: 10 * time.Second} - req, err := http.NewRequest("GET", apiURL, nil) if err != nil { return nil, fmt.Errorf("creating request: %v", err) @@ -55,7 +53,15 @@ func PerformQwantTextSearch(query, safe, lang string) ([]TextSearchResult, error req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36") - resp, err := client.Do(req) + // Perform the request using the appropriate client + var resp *http.Response + if config.MetaProxyEnabled && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{Timeout: 10 * time.Second} + resp, err = client.Do(req) + } + if err != nil { return nil, fmt.Errorf("making request: %v", err) } diff --git a/text-searchxng.go b/text-searchxng.go index 8fd13fe..08cabbf 100644 --- a/text-searchxng.go +++ b/text-searchxng.go @@ -39,7 +39,6 @@ const searxInstancesURL = "https://searx.space/data/instances.json" // FetchInstances fetches available SearX instances from the registry. func fetchInstances() ([]Instance, error) { - client := &http.Client{Timeout: 10 * time.Second} req, err := http.NewRequest("GET", searxInstancesURL, nil) if err != nil { return nil, fmt.Errorf("creating request: %v", err) @@ -51,7 +50,14 @@ func fetchInstances() ([]Instance, error) { } req.Header.Set("User-Agent", XNGUserAgent) - resp, err := client.Do(req) + var resp *http.Response + if config.MetaProxyEnabled && config.MetaProxyStrict && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{Timeout: 10 * time.Second} + resp, err = client.Do(req) + } + if err != nil { return nil, fmt.Errorf("performing request: %v", err) } @@ -191,7 +197,6 @@ func PerformSearXTextSearch(query, categories, language string, page int) ([]Tex searchURL := fmt.Sprintf("%s/search?q=%s&categories=%s&language=%s&safe_search=%s&page=%d&format=json", instance.URL, url.QueryEscape(query), categories, language, safe, page) - client := &http.Client{Timeout: 10 * time.Second} req, err := http.NewRequest("GET", searchURL, nil) if err != nil { return nil, 0, fmt.Errorf("creating request: %v", err) @@ -203,7 +208,13 @@ func PerformSearXTextSearch(query, categories, language string, page int) ([]Tex } req.Header.Set("User-Agent", XNGUserAgent) - resp, err := client.Do(req) + var resp *http.Response + if config.MetaProxyEnabled && metaProxyClient != nil { + resp, err = metaProxyClient.Do(req) + } else { + client := &http.Client{Timeout: 10 * time.Second} + resp, err = client.Do(req) + } if err != nil { return nil, 0, fmt.Errorf("performing request: %v", err) } diff --git a/text.go b/text.go index 07ec09a..fb4a892 100755 --- a/text.go +++ b/text.go @@ -98,7 +98,7 @@ func prefetchPage(query, safe, lang string, page int) { cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "active", Lang: lang, Type: "text"} if _, exists := resultsCache.Get(cacheKey); !exists { printInfo("Page %d not cached, caching now...", page) - if config.CrawlerEnabled { + if config.MetaSearchEnabled { pageResults := fetchTextResults(query, safe, lang, page) if len(pageResults) > 0 { resultsCache.Set(cacheKey, convertToSearchResults(pageResults)) @@ -114,7 +114,7 @@ func prefetchPage(query, safe, lang string, page int) { func fetchTextResults(query, safe, lang string, page int) []TextSearchResult { var results []TextSearchResult - if !config.CrawlerEnabled { + if !config.MetaSearchEnabled { printDebug("Crawler is disabled; fetching from local index.") // Calculate the starting position based on the page number diff --git a/video.go b/video.go index 3120367..a2fa3fb 100644 --- a/video.go +++ b/video.go @@ -151,7 +151,7 @@ func handleVideoSearch(w http.ResponseWriter, settings UserSettings, query strin start := time.Now() var results []VideoResult - if config.CrawlerEnabled { + if config.MetaSearchEnabled { results = fetchVideoResults(query, settings.SafeSearch, settings.SearchLanguage, page) } @@ -184,12 +184,12 @@ func handleVideoSearch(w http.ResponseWriter, settings UserSettings, query strin func fetchVideoResults(query, safe, lang string, page int) []VideoResult { // Check if the crawler is enabled - if !config.CrawlerEnabled { + if !config.MetaSearchEnabled { printDebug("Crawler is disabled; skipping video search.") return []VideoResult{} } - // Proceed with Piped API request if CrawlerEnabled + // Proceed with Piped API request if MetaSearchEnabled apiResp, err := makeHTMLRequest(query, safe, lang, page) if err != nil { printWarn("Error fetching video results: %v", err)