added ProxyRetry to config and fixed ProxyStrict
Some checks failed
Run Integration Tests / test (push) Failing after 50s

This commit is contained in:
partisan 2025-02-22 22:36:54 +01:00
parent ab707a91e8
commit 35e657bccd
17 changed files with 224 additions and 186 deletions

View file

@ -43,10 +43,12 @@ type Config struct {
DriveCacheEnabled bool DriveCacheEnabled bool
MetaProxyEnabled bool MetaProxyEnabled bool
MetaProxyStrict bool MetaProxyStrict bool
MetaProxyRetry int
MetaProxies []string MetaProxies []string
CrawlerProxyEnabled bool CrawlerProxyEnabled bool
CrawlerProxyStrict bool CrawlerProxyStrict bool
CrawlerProxies []string CrawlerProxies []string
CrawlerProxyRetry int
// Maybye add Proxy support for Image Extraction? // Maybye add Proxy support for Image Extraction?
LogLevel int LogLevel int
ConcurrentStandardCrawlers int ConcurrentStandardCrawlers int
@ -76,9 +78,11 @@ var defaultConfig = Config{
MetaProxyEnabled: false, MetaProxyEnabled: false,
MetaProxyStrict: true, MetaProxyStrict: true,
MetaProxies: []string{}, MetaProxies: []string{},
MetaProxyRetry: 3,
CrawlerProxyEnabled: false, CrawlerProxyEnabled: false,
CrawlerProxyStrict: true, CrawlerProxyStrict: true,
CrawlerProxies: []string{}, CrawlerProxies: []string{},
CrawlerProxyRetry: 1,
ConcurrentStandardCrawlers: 12, ConcurrentStandardCrawlers: 12,
ConcurrentChromeCrawlers: 4, ConcurrentChromeCrawlers: 4,
CrawlingInterval: 24 * time.Hour, CrawlingInterval: 24 * time.Hour,
@ -307,6 +311,8 @@ func saveConfig(config Config) {
proxiesSec.Key("MetaProxies").SetValue(strings.Join(config.MetaProxies, ",")) proxiesSec.Key("MetaProxies").SetValue(strings.Join(config.MetaProxies, ","))
proxiesSec.Key("CrawlerProxyStrict").SetValue(strconv.FormatBool(config.CrawlerProxyStrict)) proxiesSec.Key("CrawlerProxyStrict").SetValue(strconv.FormatBool(config.CrawlerProxyStrict))
proxiesSec.Key("CrawlerProxies").SetValue(strings.Join(config.CrawlerProxies, ",")) proxiesSec.Key("CrawlerProxies").SetValue(strings.Join(config.CrawlerProxies, ","))
proxiesSec.Key("MetaProxyRetry").SetValue(strconv.Itoa(config.MetaProxyRetry))
proxiesSec.Key("CrawlerProxyRetry").SetValue(strconv.Itoa(config.CrawlerProxyRetry))
// MetaSearch section // MetaSearch section
metaSec := cfg.Section("MetaSearches") metaSec := cfg.Section("MetaSearches")
@ -372,6 +378,8 @@ func loadConfig() Config {
metaProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("MetaProxies"), ""), ",") metaProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("MetaProxies"), ""), ",")
crawlerProxyStrict := getConfigValueBool(cfg.Section("Proxies").Key("CrawlerProxyStrict"), defaultConfig.CrawlerProxyStrict) crawlerProxyStrict := getConfigValueBool(cfg.Section("Proxies").Key("CrawlerProxyStrict"), defaultConfig.CrawlerProxyStrict)
crawlerProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("CrawlerProxies"), ""), ",") crawlerProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("CrawlerProxies"), ""), ",")
metaProxyRetry := getConfigValue(cfg.Section("Proxies").Key("MetaProxyRetry"), defaultConfig.MetaProxyRetry, strconv.Atoi)
crawlerProxyRetry := getConfigValue(cfg.Section("Proxies").Key("CrawlerProxyRetry"), defaultConfig.CrawlerProxyRetry, strconv.Atoi)
// MetaSearch // MetaSearch
searchXInstances := strings.Split(getConfigValueString(cfg.Section("MetaSearches").Key("LibreXInstances"), strings.Join(defaultConfig.LibreXInstances, ",")), ",") searchXInstances := strings.Split(getConfigValueString(cfg.Section("MetaSearches").Key("LibreXInstances"), strings.Join(defaultConfig.LibreXInstances, ",")), ",")
@ -415,9 +423,11 @@ func loadConfig() Config {
MetaProxyEnabled: metaProxyEnabled, MetaProxyEnabled: metaProxyEnabled,
MetaProxyStrict: metaProxyStrict, MetaProxyStrict: metaProxyStrict,
MetaProxies: metaProxies, MetaProxies: metaProxies,
MetaProxyRetry: metaProxyRetry,
CrawlerProxyEnabled: crawlerProxyEnabled, CrawlerProxyEnabled: crawlerProxyEnabled,
CrawlerProxyStrict: crawlerProxyStrict, CrawlerProxyStrict: crawlerProxyStrict,
CrawlerProxies: crawlerProxies, CrawlerProxies: crawlerProxies,
CrawlerProxyRetry: crawlerProxyRetry,
ConcurrentStandardCrawlers: concurrentStandardCrawlers, ConcurrentStandardCrawlers: concurrentStandardCrawlers,
ConcurrentChromeCrawlers: concurrentChromeCrawlers, ConcurrentChromeCrawlers: concurrentChromeCrawlers,
CrawlingInterval: crawlingInterval, CrawlingInterval: crawlingInterval,

View file

@ -65,6 +65,7 @@ func fetchPageMetadataChrome(pageURL, userAgent string) (string, string, string)
func configureChromeOptions() []chromedp.ExecAllocatorOption { func configureChromeOptions() []chromedp.ExecAllocatorOption {
options := chromedp.DefaultExecAllocatorOptions[:] options := chromedp.DefaultExecAllocatorOptions[:]
// This code is not using config.CrawlerProxyRetry
if config.CrawlerProxyEnabled && crawlerProxyClient != nil { if config.CrawlerProxyEnabled && crawlerProxyClient != nil {
// Retrieve proxy settings from CrawlerProxy // Retrieve proxy settings from CrawlerProxy
proxy := crawlerProxyClient.GetProxy() // Ensure a `GetProxy` method is implemented for your proxy client proxy := crawlerProxyClient.GetProxy() // Ensure a `GetProxy` method is implemented for your proxy client
@ -89,7 +90,7 @@ func configureChromeOptions() []chromedp.ExecAllocatorOption {
// extractStandard does the normal HTML parse with OG, Twitter, etc. // extractStandard does the normal HTML parse with OG, Twitter, etc.
func extractStandard(pageURL, userAgent string) (title, desc, keywords string) { func extractStandard(pageURL, userAgent string) (title, desc, keywords string) {
client := &http.Client{Timeout: 15 * time.Second}
req, err := http.NewRequest("GET", pageURL, nil) req, err := http.NewRequest("GET", pageURL, nil)
if err != nil { if err != nil {
printDebug("Failed to create request for %s: %v", pageURL, err) printDebug("Failed to create request for %s: %v", pageURL, err)
@ -99,12 +100,7 @@ func extractStandard(pageURL, userAgent string) (title, desc, keywords string) {
req.Header.Set("Accept-Language", "en-US,en;q=0.9") req.Header.Set("Accept-Language", "en-US,en;q=0.9")
// Use CrawlerProxy if enabled // Use CrawlerProxy if enabled
var resp *http.Response resp, err := DoCrawlerProxyRequest(req)
if config.CrawlerProxyEnabled && crawlerProxyClient != nil {
resp, err = crawlerProxyClient.Do(req)
} else {
resp, err = client.Do(req)
}
if err != nil { if err != nil {
printDebug("Failed to GET %s: %v", pageURL, err) printDebug("Failed to GET %s: %v", pageURL, err)
return return
@ -212,7 +208,6 @@ func fallbackReadability(pageURL, userAgent, title, desc, keywords string) (stri
return title, desc, keywords return title, desc, keywords
} }
client := &http.Client{Timeout: 15 * time.Second}
readReq, err := http.NewRequest("GET", pageURL, nil) readReq, err := http.NewRequest("GET", pageURL, nil)
if err != nil { if err != nil {
printDebug("Failed to create fallbackReadability request: %v", err) printDebug("Failed to create fallbackReadability request: %v", err)
@ -222,19 +217,15 @@ func fallbackReadability(pageURL, userAgent, title, desc, keywords string) (stri
readReq.Header.Set("Accept-Language", "en-US,en;q=0.9") readReq.Header.Set("Accept-Language", "en-US,en;q=0.9")
// Use CrawlerProxy if enabled // Use CrawlerProxy if enabled
var readResp *http.Response readResp, err := DoCrawlerProxyRequest(readReq)
if config.CrawlerProxyEnabled && crawlerProxyClient != nil { if err != nil {
readResp, err = crawlerProxyClient.Do(readReq) printDebug("go-readability GET error for %s: %v", pageURL, err)
} else { return title, desc, keywords
readResp, err = client.Do(readReq)
} }
if err != nil || readResp.StatusCode < 200 || readResp.StatusCode >= 300 {
if err != nil { if readResp.StatusCode < 200 || readResp.StatusCode >= 300 {
printDebug("go-readability GET error for %s: %v", pageURL, err) printDebug("go-readability GET returned status %d for %s", readResp.StatusCode, pageURL)
} readResp.Body.Close() // Safely close body
if readResp != nil {
readResp.Body.Close()
}
return title, desc, keywords return title, desc, keywords
} }
defer readResp.Body.Close() defer readResp.Body.Close()

View file

@ -72,13 +72,7 @@ func (t *ThePirateBay) Search(query string, category string) ([]TorrentResult, e
req.Header.Set("User-Agent", userAgent) req.Header.Set("User-Agent", userAgent)
// Perform the request using MetaProxy if enabled // Perform the request using MetaProxy if enabled
var resp *http.Response resp, err := DoMetaProxyRequest(req)
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil { if err != nil {
return nil, fmt.Errorf("error making request to The Pirate Bay: %w", err) return nil, fmt.Errorf("error making request to The Pirate Bay: %w", err)
} }

View file

@ -72,13 +72,7 @@ func (tg *TorrentGalaxy) Search(query string, category string) ([]TorrentResult,
req.Header.Set("User-Agent", userAgent) req.Header.Set("User-Agent", userAgent)
// Perform the request using MetaProxy if enabled // Perform the request using MetaProxy if enabled
var resp *http.Response resp, err := DoMetaProxyRequest(req)
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil { if err != nil {
return nil, fmt.Errorf("error making request to TorrentGalaxy: %w", err) return nil, fmt.Errorf("error making request to TorrentGalaxy: %w", err)
} }

View file

@ -3,7 +3,6 @@ package main
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"math"
"net/http" "net/http"
"net/url" "net/url"
"time" "time"
@ -16,41 +15,45 @@ func PerformRedditSearch(query string, safe string, page int) ([]ForumSearchResu
} }
const ( const (
pageSize = 25 pageSize = 25
baseURL = "https://www.reddit.com" baseURL = "https://www.reddit.com"
maxRetries = 5
initialBackoff = 2 * time.Second
) )
var results []ForumSearchResult var results []ForumSearchResult
offset := page * pageSize
searchURL := fmt.Sprintf("%s/search.json?q=%s&limit=%d&start=%d",
baseURL,
url.QueryEscape(query),
pageSize,
offset,
)
searchURL := fmt.Sprintf("%s/search.json?q=%s&limit=%d&start=%d", baseURL, url.QueryEscape(query), pageSize, page*pageSize) // Create request
var resp *http.Response req, err := http.NewRequest("GET", searchURL, nil)
var err error if err != nil {
return nil, fmt.Errorf("creating request: %v", err)
// Retry logic with exponential backoff
for i := 0; i <= maxRetries; i++ {
resp, err = http.Get(searchURL)
if err != nil {
return nil, fmt.Errorf("making request: %v", err)
}
if resp.StatusCode != http.StatusTooManyRequests {
break
}
// Wait for some time before retrying
backoff := time.Duration(math.Pow(2, float64(i))) * initialBackoff
time.Sleep(backoff)
} }
// Set User-Agent
userAgent, uaErr := GetUserAgent("Reddit-Forum-Search")
if uaErr != nil {
return nil, fmt.Errorf("getting user agent: %v", uaErr)
}
req.Header.Set("User-Agent", userAgent)
// Make request using MetaProxy logic
resp, err := DoMetaProxyRequest(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("making request: %v", err) return nil, fmt.Errorf("making request: %v", err)
} }
defer resp.Body.Close() defer resp.Body.Close()
// Validate response status
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
} }
// Parse JSON response
var searchResults map[string]interface{} var searchResults map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&searchResults); err != nil { if err := json.NewDecoder(resp.Body).Decode(&searchResults); err != nil {
return nil, fmt.Errorf("decoding response: %v", err) return nil, fmt.Errorf("decoding response: %v", err)
@ -66,9 +69,9 @@ func PerformRedditSearch(query string, safe string, page int) ([]ForumSearchResu
return nil, fmt.Errorf("no children field in data") return nil, fmt.Errorf("no children field in data")
} }
// Extract search results
for _, post := range posts { for _, post := range posts {
postData := post.(map[string]interface{})["data"].(map[string]interface{}) postData := post.(map[string]interface{})["data"].(map[string]interface{})
if safe == "active" && postData["over_18"].(bool) { if safe == "active" && postData["over_18"].(bool) {
continue continue
} }
@ -78,6 +81,7 @@ func PerformRedditSearch(query string, safe string, page int) ([]ForumSearchResu
if len(description) > 500 { if len(description) > 500 {
description = description[:500] + "..." description = description[:500] + "..."
} }
publishedDate := time.Unix(int64(postData["created_utc"].(float64)), 0) publishedDate := time.Unix(int64(postData["created_utc"].(float64)), 0)
permalink := postData["permalink"].(string) permalink := postData["permalink"].(string)
resultURL := fmt.Sprintf("%s%s", baseURL, permalink) resultURL := fmt.Sprintf("%s%s", baseURL, permalink)

View file

@ -32,13 +32,7 @@ func PerformBingImageSearch(query, safe, lang string, page int) ([]ImageSearchRe
req.Header.Set("User-Agent", ImageUserAgent) req.Header.Set("User-Agent", ImageUserAgent)
// Use MetaProxy if enabled // Use MetaProxy if enabled
var resp *http.Response resp, err := DoMetaProxyRequest(req)
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{Timeout: 10 * time.Second}
resp, err = client.Do(req)
}
if err != nil { if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err) return nil, 0, fmt.Errorf("making request: %v", err)
} }

View file

@ -95,13 +95,7 @@ func PerformDeviantArtImageSearch(query, safe, lang string, page int) ([]ImageSe
req.Header.Set("User-Agent", DeviantArtImageUserAgent) req.Header.Set("User-Agent", DeviantArtImageUserAgent)
// Perform the request using MetaProxy if enabled // Perform the request using MetaProxy if enabled
var resp *http.Response resp, err := DoMetaProxyRequest(req)
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil { if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err) return nil, 0, fmt.Errorf("making request: %v", err)
} }

View file

@ -32,13 +32,7 @@ func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchR
req.Header.Set("User-Agent", imgurUserAgent) req.Header.Set("User-Agent", imgurUserAgent)
// Perform the HTTP request with MetaProxy if enabled // Perform the HTTP request with MetaProxy if enabled
var resp *http.Response resp, err := DoMetaProxyRequest(req)
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil { if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err) return nil, 0, fmt.Errorf("making request: %v", err)
} }

View file

@ -127,13 +127,7 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
req.Header.Set("User-Agent", ImageUserAgent) req.Header.Set("User-Agent", ImageUserAgent)
// Perform the request with MetaProxy if enabled // Perform the request with MetaProxy if enabled
var resp *http.Response resp, err := DoMetaProxyRequest(req)
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{Timeout: 10 * time.Second}
resp, err = client.Do(req)
}
if err != nil { if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err) return nil, 0, fmt.Errorf("making request: %v", err)
} }

129
proxy.go
View file

@ -30,7 +30,7 @@ var (
crawlerProxyClient *ProxyClient crawlerProxyClient *ProxyClient
) )
// NewProxyClientPool creates a pool of HTTP clients with proxies. // NewProxyClientPool creates a pool of HTTP clients with SOCKS5 proxies.
func NewProxyClientPool(proxies []ProxyConfig, timeout time.Duration) (*ProxyClient, error) { func NewProxyClientPool(proxies []ProxyConfig, timeout time.Duration) (*ProxyClient, error) {
if len(proxies) == 0 { if len(proxies) == 0 {
return nil, fmt.Errorf("no proxies provided") return nil, fmt.Errorf("no proxies provided")
@ -38,18 +38,17 @@ func NewProxyClientPool(proxies []ProxyConfig, timeout time.Duration) (*ProxyCli
clients := make([]*http.Client, len(proxies)) clients := make([]*http.Client, len(proxies))
for i, proxyConfig := range proxies { for i, pc := range proxies {
var auth *proxy.Auth var auth *proxy.Auth
if proxyConfig.Username != "" || proxyConfig.Password != "" { if pc.Username != "" || pc.Password != "" {
auth = &proxy.Auth{ auth = &proxy.Auth{
User: proxyConfig.Username, User: pc.Username,
Password: proxyConfig.Password, Password: pc.Password,
} }
} }
dialer, err := proxy.SOCKS5("tcp", pc.Address, auth, proxy.Direct)
dialer, err := proxy.SOCKS5("tcp", proxyConfig.Address, auth, proxy.Direct)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create SOCKS5 dialer for %s: %w", proxyConfig.Address, err) return nil, fmt.Errorf("failed to create SOCKS5 dialer for %s: %w", pc.Address, err)
} }
transport := &http.Transport{Dial: dialer.Dial} transport := &http.Transport{Dial: dialer.Dial}
@ -99,20 +98,21 @@ func (p *ProxyClient) GetProxy() string {
// ParseProxies parses the proxy strings in the format ADDRESS:PORT or ADDRESS:PORT:USER:PASSWORD. // ParseProxies parses the proxy strings in the format ADDRESS:PORT or ADDRESS:PORT:USER:PASSWORD.
func ParseProxies(proxyStrings []string) []ProxyConfig { func ParseProxies(proxyStrings []string) []ProxyConfig {
var proxies []ProxyConfig var proxies []ProxyConfig
for _, proxy := range proxyStrings { for _, proxyStr := range proxyStrings {
parts := strings.Split(proxy, ":") parts := strings.Split(proxyStr, ":")
if len(parts) == 2 { // ADDRESS:PORT switch len(parts) {
case 2: // ADDRESS:PORT
proxies = append(proxies, ProxyConfig{ proxies = append(proxies, ProxyConfig{
Address: fmt.Sprintf("%s:%s", parts[0], parts[1]), Address: fmt.Sprintf("%s:%s", parts[0], parts[1]),
}) })
} else if len(parts) == 4 { // ADDRESS:PORT:USER:PASSWORD case 4: // ADDRESS:PORT:USER:PASSWORD
proxies = append(proxies, ProxyConfig{ proxies = append(proxies, ProxyConfig{
Address: fmt.Sprintf("%s:%s", parts[0], parts[1]), Address: fmt.Sprintf("%s:%s", parts[0], parts[1]),
Username: parts[2], Username: parts[2],
Password: parts[3], Password: parts[3],
}) })
} else { default:
fmt.Printf("Invalid proxy format: %s\n", proxy) fmt.Printf("Invalid proxy format: %s\n", proxyStr)
} }
} }
return proxies return proxies
@ -147,6 +147,107 @@ func InitProxies() {
} }
} }
// Doer is an interface so we can accept *http.Client or *ProxyClient for requests.
type Doer interface {
Do(*http.Request) (*http.Response, error)
}
// DoProxyRequest handles “try direct, then proxy if needed,” with retries if proxy is used.
//
// - strict: if true, always try proxy first if enabled; if not available, do one direct attempt
// - enabled: whether this type of proxy is turned on
// - retryCount: how many times to retry with the proxy
// - proxyClient: the pool of proxy connections
func DoProxyRequest(req *http.Request, strict bool, enabled bool, retryCount int, proxyClient *ProxyClient) (*http.Response, error) {
// 1) If !strict => try direct once first
if !strict {
resp, err := tryRequestOnce(req, http.DefaultClient)
if isSuccessful(resp, err) {
return resp, nil
}
// If direct fails => if proxy is enabled, retry
if enabled && proxyClient != nil {
resp, err = tryRequestWithRetry(req, proxyClient, retryCount)
if isSuccessful(resp, err) {
return resp, nil
}
return nil, fmt.Errorf("failed after direct & proxy attempts: %v", err)
}
return nil, fmt.Errorf("request failed direct, no valid proxy: %v", err)
}
// 2) If strict => if proxy is enabled, try it up to “retryCount”
if enabled && proxyClient != nil {
resp, err := tryRequestWithRetry(req, proxyClient, retryCount)
if isSuccessful(resp, err) {
return resp, nil
}
return nil, fmt.Errorf("failed after %d proxy attempts: %v", retryCount, err)
}
// If strict but no proxy => direct once
resp, err := tryRequestOnce(req, http.DefaultClient)
if isSuccessful(resp, err) {
return resp, nil
}
return nil, fmt.Errorf("direct request failed in strict mode, no proxy: %v", err)
}
// Helper Wrapper functions for DoProxyRequest()
func DoMetaProxyRequest(req *http.Request) (*http.Response, error) {
return DoProxyRequest(
req,
config.MetaProxyStrict,
config.MetaProxyEnabled,
config.MetaProxyRetry,
metaProxyClient,
)
}
func DoCrawlerProxyRequest(req *http.Request) (*http.Response, error) {
return DoProxyRequest(
req,
config.CrawlerProxyStrict,
config.CrawlerProxyEnabled,
config.CrawlerProxyRetry,
metaProxyClient,
)
}
// tryRequestWithRetry tries the request up to "retries" times, waiting 200ms between attempts.
func tryRequestWithRetry(req *http.Request, client Doer, retries int) (*http.Response, error) {
var resp *http.Response
var err error
for i := 1; i <= retries; i++ {
if resp != nil {
resp.Body.Close()
}
printDebug("Attempt %d of %d with proxy/client...", i, retries)
resp, err = tryRequestOnce(req, client)
if isSuccessful(resp, err) {
return resp, nil
}
time.Sleep(200 * time.Millisecond)
}
return resp, err
}
// tryRequestOnce sends a single request with the given client. If client is nil, uses default client.
func tryRequestOnce(req *http.Request, client Doer) (*http.Response, error) {
if client == nil {
client = http.DefaultClient
}
resp, err := client.Do(req)
return resp, err
}
// isSuccessful checks if err==nil & resp != nil & resp.StatusCode in [200..299].
func isSuccessful(resp *http.Response, err error) bool {
if err != nil || resp == nil {
return false
}
return resp.StatusCode >= 200 && resp.StatusCode < 300
}
// func main() { // func main() {
// config := loadConfig() // config := loadConfig()

View file

@ -25,27 +25,18 @@ func PerformBraveTextSearch(query, safe, lang string, offset int) ([]TextSearchR
return nil, 0, fmt.Errorf("creating request: %v", err) return nil, 0, fmt.Errorf("creating request: %v", err)
} }
// Set headers including User-Agent TextUserAgent, err := GetUserAgent("Text-Search-Brave")
TextUserAgent, err := GetUserAgent("Text-Search")
if err != nil { if err != nil {
printWarn("Error generating User-Agent: %v", err) printWarn("Error generating User-Agent: %v", err)
return nil, 0, err return nil, 0, err
} }
req.Header.Set("User-Agent", TextUserAgent) req.Header.Set("User-Agent", TextUserAgent)
var resp *http.Response // Single call to DoMetaProxyRequest:
resp, err := DoMetaProxyRequest(req)
// Determine whether to use a proxy client or a default client
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil { if err != nil {
printWarn("Error performing request: %v", err) printWarn("Error performing request: %v", err)
return nil, 0, fmt.Errorf("performing request: %v", err) return nil, 0, fmt.Errorf("performing meta-request: %v", err)
} }
defer resp.Body.Close() defer resp.Body.Close()

View file

@ -22,17 +22,15 @@ func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSear
return nil, 0, fmt.Errorf("creating request: %v", err) return nil, 0, fmt.Errorf("creating request: %v", err)
} }
// Use proxy client if MetaProxy is enabled userAgent, err := GetUserAgent("duckduck-text-search")
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil { if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err) return nil, 0, err
}
req.Header.Set("User-Agent", userAgent)
resp, err := DoMetaProxyRequest(req)
if err != nil {
return nil, 0, fmt.Errorf("failed to do meta-request: %v", err)
} }
defer resp.Body.Close() defer resp.Body.Close()

View file

@ -11,56 +11,46 @@ import (
) )
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) { func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
startTime := time.Now()
const resultsPerPage = 10 const resultsPerPage = 10
var results []TextSearchResult
startTime := time.Now() // Start the timer // 1) Build the search URL
// Build the search URL
searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage) searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage)
// Create a new request // 2) Create a new request
req, err := http.NewRequest("GET", searchURL, nil) req, err := http.NewRequest("GET", searchURL, nil)
if err != nil { if err != nil {
return nil, 0, fmt.Errorf("failed to create request: %v", err) return nil, 0, fmt.Errorf("failed to create request: %v", err)
} }
// Generate and set User-Agent header // 3) Generate and set a User-Agent header
TextUserAgent, err := GetUserAgent("Google-Text-Search") userAgent, err := GetUserAgent("Google-Text-Search")
if err != nil { if err != nil {
return nil, 0, err return nil, 0, err
} }
req.Header.Set("User-Agent", TextUserAgent) req.Header.Set("User-Agent", userAgent)
// Perform the request using proxy if MetaProxy is enabled
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
// 4) Use the meta-proxy wrapper
resp, err := DoMetaProxyRequest(req)
if err != nil { if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err) return nil, 0, fmt.Errorf("failed to do meta-request: %v", err)
} }
defer resp.Body.Close() defer resp.Body.Close()
// Check for HTTP status code // 5) Check HTTP status
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
} }
// Parse the HTML response // 6) Parse the HTML response
doc, err := goquery.NewDocumentFromReader(resp.Body) doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil { if err != nil {
return nil, 0, fmt.Errorf("loading HTML document: %v", err) return nil, 0, fmt.Errorf("loading HTML document: %v", err)
} }
results := parseResults(doc)
// Extract search results // 7) Calculate duration
results = parseResults(doc) duration := time.Since(startTime)
duration := time.Since(startTime) // Calculate the duration
if len(results) == 0 { if len(results) == 0 {
printDebug("No results found from Google Search") printDebug("No results found from Google Search")

View file

@ -44,17 +44,9 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
} }
req.Header.Set("User-Agent", userAgent) req.Header.Set("User-Agent", userAgent)
// Respect MetaProxy if enabled and strict resp, err := DoMetaProxyRequest(req)
var resp *http.Response
if config.MetaProxyEnabled && config.MetaProxyStrict && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil { if err != nil {
printWarn("error requesting domain %s: %v", domain, err) return nil, 0, fmt.Errorf("failed to do meta-request: %v", err)
continue
} }
defer resp.Body.Close() defer resp.Body.Close()

View file

@ -26,9 +26,11 @@ type QwantTextAPIResponse struct {
} }
// PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult // PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult
func PerformQwantTextSearch(query, safe, lang string) ([]TextSearchResult, error) { func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
startTime := time.Now()
const resultsPerPage = 10 const resultsPerPage = 10
const offset = 0 offset := (page - 1) * resultsPerPage
// Ensure safe search is disabled by default if not specified // Ensure safe search is disabled by default if not specified
if safe == "" { if safe == "" {
@ -40,45 +42,44 @@ func PerformQwantTextSearch(query, safe, lang string) ([]TextSearchResult, error
lang = "en_CA" lang = "en_CA"
} }
apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/web?q=%s&count=%d&locale=%s&offset=%d&device=desktop", apiURL := fmt.Sprintf(
"https://api.qwant.com/v3/search/web?q=%s&count=%d&locale=%s&offset=%d&device=desktop",
url.QueryEscape(query), url.QueryEscape(query),
resultsPerPage, resultsPerPage,
lang, lang,
offset) offset,
)
req, err := http.NewRequest("GET", apiURL, nil) req, err := http.NewRequest("GET", apiURL, nil)
if err != nil { if err != nil {
return nil, fmt.Errorf("creating request: %v", err) // Return three values: nil for the slice, 0 for duration, error for the third.
} return nil, 0, fmt.Errorf("creating request: %v", err)
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36")
// Perform the request using the appropriate client
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{Timeout: 10 * time.Second}
resp, err = client.Do(req)
} }
userAgent, err := GetUserAgent("Quant-Text-Search")
if err != nil { if err != nil {
return nil, fmt.Errorf("making request: %v", err) return nil, 0, err
}
req.Header.Set("User-Agent", userAgent)
resp, err := DoMetaProxyRequest(req)
if err != nil {
return nil, 0, fmt.Errorf("failed to do meta-request: %v", err)
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
} }
var apiResp QwantTextAPIResponse var apiResp QwantTextAPIResponse
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil {
return nil, fmt.Errorf("decoding response: %v", err) return nil, 0, fmt.Errorf("decoding response: %v", err)
} }
// Extracting results from the nested JSON structure // Extracting results from the nested JSON structure
if len(apiResp.Data.Result.Items.Mainline) == 0 { if len(apiResp.Data.Result.Items.Mainline) == 0 {
return nil, fmt.Errorf("no search results found") return nil, 0, fmt.Errorf("no search results found")
} }
var results []TextSearchResult var results []TextSearchResult
@ -92,7 +93,8 @@ func PerformQwantTextSearch(query, safe, lang string) ([]TextSearchResult, error
}) })
} }
return results, nil duration := time.Since(startTime)
return results, duration, nil
} }
// cleanQwantURL extracts the main part of the URL, removing tracking information // cleanQwantURL extracts the main part of the URL, removing tracking information

View file

@ -50,16 +50,9 @@ func fetchInstances() ([]Instance, error) {
} }
req.Header.Set("User-Agent", XNGUserAgent) req.Header.Set("User-Agent", XNGUserAgent)
var resp *http.Response resp, err := DoMetaProxyRequest(req)
if config.MetaProxyEnabled && config.MetaProxyStrict && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{Timeout: 10 * time.Second}
resp, err = client.Do(req)
}
if err != nil { if err != nil {
return nil, fmt.Errorf("performing request: %v", err) return nil, fmt.Errorf("failed to do meta-request: %v", err)
} }
defer resp.Body.Close() defer resp.Body.Close()

View file

@ -13,6 +13,7 @@ var allTextSearchEngines = []SearchEngine{
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)}, {Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)}, {Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)}, {Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
{Name: "Quant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken !
//{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh //{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh
} }
@ -123,6 +124,7 @@ func prefetchPage(query, safe, lang string, page int) {
} }
} }
// The logic in this function is rotating search engines instead of running them in order as noted in the wiki
func fetchTextResults(query, safe, lang string, page int) []TextSearchResult { func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
var results []TextSearchResult var results []TextSearchResult