added ProxyRetry to config and fixed ProxyStrict
Some checks failed
Run Integration Tests / test (push) Failing after 50s
Some checks failed
Run Integration Tests / test (push) Failing after 50s
This commit is contained in:
parent
ab707a91e8
commit
35e657bccd
17 changed files with 224 additions and 186 deletions
10
config.go
10
config.go
|
@ -43,10 +43,12 @@ type Config struct {
|
|||
DriveCacheEnabled bool
|
||||
MetaProxyEnabled bool
|
||||
MetaProxyStrict bool
|
||||
MetaProxyRetry int
|
||||
MetaProxies []string
|
||||
CrawlerProxyEnabled bool
|
||||
CrawlerProxyStrict bool
|
||||
CrawlerProxies []string
|
||||
CrawlerProxyRetry int
|
||||
// Maybye add Proxy support for Image Extraction?
|
||||
LogLevel int
|
||||
ConcurrentStandardCrawlers int
|
||||
|
@ -76,9 +78,11 @@ var defaultConfig = Config{
|
|||
MetaProxyEnabled: false,
|
||||
MetaProxyStrict: true,
|
||||
MetaProxies: []string{},
|
||||
MetaProxyRetry: 3,
|
||||
CrawlerProxyEnabled: false,
|
||||
CrawlerProxyStrict: true,
|
||||
CrawlerProxies: []string{},
|
||||
CrawlerProxyRetry: 1,
|
||||
ConcurrentStandardCrawlers: 12,
|
||||
ConcurrentChromeCrawlers: 4,
|
||||
CrawlingInterval: 24 * time.Hour,
|
||||
|
@ -307,6 +311,8 @@ func saveConfig(config Config) {
|
|||
proxiesSec.Key("MetaProxies").SetValue(strings.Join(config.MetaProxies, ","))
|
||||
proxiesSec.Key("CrawlerProxyStrict").SetValue(strconv.FormatBool(config.CrawlerProxyStrict))
|
||||
proxiesSec.Key("CrawlerProxies").SetValue(strings.Join(config.CrawlerProxies, ","))
|
||||
proxiesSec.Key("MetaProxyRetry").SetValue(strconv.Itoa(config.MetaProxyRetry))
|
||||
proxiesSec.Key("CrawlerProxyRetry").SetValue(strconv.Itoa(config.CrawlerProxyRetry))
|
||||
|
||||
// MetaSearch section
|
||||
metaSec := cfg.Section("MetaSearches")
|
||||
|
@ -372,6 +378,8 @@ func loadConfig() Config {
|
|||
metaProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("MetaProxies"), ""), ",")
|
||||
crawlerProxyStrict := getConfigValueBool(cfg.Section("Proxies").Key("CrawlerProxyStrict"), defaultConfig.CrawlerProxyStrict)
|
||||
crawlerProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("CrawlerProxies"), ""), ",")
|
||||
metaProxyRetry := getConfigValue(cfg.Section("Proxies").Key("MetaProxyRetry"), defaultConfig.MetaProxyRetry, strconv.Atoi)
|
||||
crawlerProxyRetry := getConfigValue(cfg.Section("Proxies").Key("CrawlerProxyRetry"), defaultConfig.CrawlerProxyRetry, strconv.Atoi)
|
||||
|
||||
// MetaSearch
|
||||
searchXInstances := strings.Split(getConfigValueString(cfg.Section("MetaSearches").Key("LibreXInstances"), strings.Join(defaultConfig.LibreXInstances, ",")), ",")
|
||||
|
@ -415,9 +423,11 @@ func loadConfig() Config {
|
|||
MetaProxyEnabled: metaProxyEnabled,
|
||||
MetaProxyStrict: metaProxyStrict,
|
||||
MetaProxies: metaProxies,
|
||||
MetaProxyRetry: metaProxyRetry,
|
||||
CrawlerProxyEnabled: crawlerProxyEnabled,
|
||||
CrawlerProxyStrict: crawlerProxyStrict,
|
||||
CrawlerProxies: crawlerProxies,
|
||||
CrawlerProxyRetry: crawlerProxyRetry,
|
||||
ConcurrentStandardCrawlers: concurrentStandardCrawlers,
|
||||
ConcurrentChromeCrawlers: concurrentChromeCrawlers,
|
||||
CrawlingInterval: crawlingInterval,
|
||||
|
|
|
@ -65,6 +65,7 @@ func fetchPageMetadataChrome(pageURL, userAgent string) (string, string, string)
|
|||
func configureChromeOptions() []chromedp.ExecAllocatorOption {
|
||||
options := chromedp.DefaultExecAllocatorOptions[:]
|
||||
|
||||
// This code is not using config.CrawlerProxyRetry
|
||||
if config.CrawlerProxyEnabled && crawlerProxyClient != nil {
|
||||
// Retrieve proxy settings from CrawlerProxy
|
||||
proxy := crawlerProxyClient.GetProxy() // Ensure a `GetProxy` method is implemented for your proxy client
|
||||
|
@ -89,7 +90,7 @@ func configureChromeOptions() []chromedp.ExecAllocatorOption {
|
|||
|
||||
// extractStandard does the normal HTML parse with OG, Twitter, etc.
|
||||
func extractStandard(pageURL, userAgent string) (title, desc, keywords string) {
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
|
||||
req, err := http.NewRequest("GET", pageURL, nil)
|
||||
if err != nil {
|
||||
printDebug("Failed to create request for %s: %v", pageURL, err)
|
||||
|
@ -99,12 +100,7 @@ func extractStandard(pageURL, userAgent string) (title, desc, keywords string) {
|
|||
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||
|
||||
// Use CrawlerProxy if enabled
|
||||
var resp *http.Response
|
||||
if config.CrawlerProxyEnabled && crawlerProxyClient != nil {
|
||||
resp, err = crawlerProxyClient.Do(req)
|
||||
} else {
|
||||
resp, err = client.Do(req)
|
||||
}
|
||||
resp, err := DoCrawlerProxyRequest(req)
|
||||
if err != nil {
|
||||
printDebug("Failed to GET %s: %v", pageURL, err)
|
||||
return
|
||||
|
@ -212,7 +208,6 @@ func fallbackReadability(pageURL, userAgent, title, desc, keywords string) (stri
|
|||
return title, desc, keywords
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
readReq, err := http.NewRequest("GET", pageURL, nil)
|
||||
if err != nil {
|
||||
printDebug("Failed to create fallbackReadability request: %v", err)
|
||||
|
@ -222,19 +217,15 @@ func fallbackReadability(pageURL, userAgent, title, desc, keywords string) (stri
|
|||
readReq.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||
|
||||
// Use CrawlerProxy if enabled
|
||||
var readResp *http.Response
|
||||
if config.CrawlerProxyEnabled && crawlerProxyClient != nil {
|
||||
readResp, err = crawlerProxyClient.Do(readReq)
|
||||
} else {
|
||||
readResp, err = client.Do(readReq)
|
||||
readResp, err := DoCrawlerProxyRequest(readReq)
|
||||
if err != nil {
|
||||
printDebug("go-readability GET error for %s: %v", pageURL, err)
|
||||
return title, desc, keywords
|
||||
}
|
||||
if err != nil || readResp.StatusCode < 200 || readResp.StatusCode >= 300 {
|
||||
if err != nil {
|
||||
printDebug("go-readability GET error for %s: %v", pageURL, err)
|
||||
}
|
||||
if readResp != nil {
|
||||
readResp.Body.Close()
|
||||
}
|
||||
|
||||
if readResp.StatusCode < 200 || readResp.StatusCode >= 300 {
|
||||
printDebug("go-readability GET returned status %d for %s", readResp.StatusCode, pageURL)
|
||||
readResp.Body.Close() // Safely close body
|
||||
return title, desc, keywords
|
||||
}
|
||||
defer readResp.Body.Close()
|
||||
|
|
|
@ -72,13 +72,7 @@ func (t *ThePirateBay) Search(query string, category string) ([]TorrentResult, e
|
|||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
// Perform the request using MetaProxy if enabled
|
||||
var resp *http.Response
|
||||
if config.MetaProxyEnabled && metaProxyClient != nil {
|
||||
resp, err = metaProxyClient.Do(req)
|
||||
} else {
|
||||
client := &http.Client{}
|
||||
resp, err = client.Do(req)
|
||||
}
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error making request to The Pirate Bay: %w", err)
|
||||
}
|
||||
|
|
|
@ -72,13 +72,7 @@ func (tg *TorrentGalaxy) Search(query string, category string) ([]TorrentResult,
|
|||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
// Perform the request using MetaProxy if enabled
|
||||
var resp *http.Response
|
||||
if config.MetaProxyEnabled && metaProxyClient != nil {
|
||||
resp, err = metaProxyClient.Do(req)
|
||||
} else {
|
||||
client := &http.Client{}
|
||||
resp, err = client.Do(req)
|
||||
}
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error making request to TorrentGalaxy: %w", err)
|
||||
}
|
||||
|
|
50
forums.go
50
forums.go
|
@ -3,7 +3,6 @@ package main
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"time"
|
||||
|
@ -16,41 +15,45 @@ func PerformRedditSearch(query string, safe string, page int) ([]ForumSearchResu
|
|||
}
|
||||
|
||||
const (
|
||||
pageSize = 25
|
||||
baseURL = "https://www.reddit.com"
|
||||
maxRetries = 5
|
||||
initialBackoff = 2 * time.Second
|
||||
pageSize = 25
|
||||
baseURL = "https://www.reddit.com"
|
||||
)
|
||||
|
||||
var results []ForumSearchResult
|
||||
offset := page * pageSize
|
||||
searchURL := fmt.Sprintf("%s/search.json?q=%s&limit=%d&start=%d",
|
||||
baseURL,
|
||||
url.QueryEscape(query),
|
||||
pageSize,
|
||||
offset,
|
||||
)
|
||||
|
||||
searchURL := fmt.Sprintf("%s/search.json?q=%s&limit=%d&start=%d", baseURL, url.QueryEscape(query), pageSize, page*pageSize)
|
||||
var resp *http.Response
|
||||
var err error
|
||||
|
||||
// Retry logic with exponential backoff
|
||||
for i := 0; i <= maxRetries; i++ {
|
||||
resp, err = http.Get(searchURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
if resp.StatusCode != http.StatusTooManyRequests {
|
||||
break
|
||||
}
|
||||
|
||||
// Wait for some time before retrying
|
||||
backoff := time.Duration(math.Pow(2, float64(i))) * initialBackoff
|
||||
time.Sleep(backoff)
|
||||
// Create request
|
||||
req, err := http.NewRequest("GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating request: %v", err)
|
||||
}
|
||||
|
||||
// Set User-Agent
|
||||
userAgent, uaErr := GetUserAgent("Reddit-Forum-Search")
|
||||
if uaErr != nil {
|
||||
return nil, fmt.Errorf("getting user agent: %v", uaErr)
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
// Make request using MetaProxy logic
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Validate response status
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
// Parse JSON response
|
||||
var searchResults map[string]interface{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&searchResults); err != nil {
|
||||
return nil, fmt.Errorf("decoding response: %v", err)
|
||||
|
@ -66,9 +69,9 @@ func PerformRedditSearch(query string, safe string, page int) ([]ForumSearchResu
|
|||
return nil, fmt.Errorf("no children field in data")
|
||||
}
|
||||
|
||||
// Extract search results
|
||||
for _, post := range posts {
|
||||
postData := post.(map[string]interface{})["data"].(map[string]interface{})
|
||||
|
||||
if safe == "active" && postData["over_18"].(bool) {
|
||||
continue
|
||||
}
|
||||
|
@ -78,6 +81,7 @@ func PerformRedditSearch(query string, safe string, page int) ([]ForumSearchResu
|
|||
if len(description) > 500 {
|
||||
description = description[:500] + "..."
|
||||
}
|
||||
|
||||
publishedDate := time.Unix(int64(postData["created_utc"].(float64)), 0)
|
||||
permalink := postData["permalink"].(string)
|
||||
resultURL := fmt.Sprintf("%s%s", baseURL, permalink)
|
||||
|
|
|
@ -32,13 +32,7 @@ func PerformBingImageSearch(query, safe, lang string, page int) ([]ImageSearchRe
|
|||
req.Header.Set("User-Agent", ImageUserAgent)
|
||||
|
||||
// Use MetaProxy if enabled
|
||||
var resp *http.Response
|
||||
if config.MetaProxyEnabled && metaProxyClient != nil {
|
||||
resp, err = metaProxyClient.Do(req)
|
||||
} else {
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
resp, err = client.Do(req)
|
||||
}
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
|
|
|
@ -95,13 +95,7 @@ func PerformDeviantArtImageSearch(query, safe, lang string, page int) ([]ImageSe
|
|||
req.Header.Set("User-Agent", DeviantArtImageUserAgent)
|
||||
|
||||
// Perform the request using MetaProxy if enabled
|
||||
var resp *http.Response
|
||||
if config.MetaProxyEnabled && metaProxyClient != nil {
|
||||
resp, err = metaProxyClient.Do(req)
|
||||
} else {
|
||||
client := &http.Client{}
|
||||
resp, err = client.Do(req)
|
||||
}
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
|
|
|
@ -32,13 +32,7 @@ func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchR
|
|||
req.Header.Set("User-Agent", imgurUserAgent)
|
||||
|
||||
// Perform the HTTP request with MetaProxy if enabled
|
||||
var resp *http.Response
|
||||
if config.MetaProxyEnabled && metaProxyClient != nil {
|
||||
resp, err = metaProxyClient.Do(req)
|
||||
} else {
|
||||
client := &http.Client{}
|
||||
resp, err = client.Do(req)
|
||||
}
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
|
|
|
@ -127,13 +127,7 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
|
|||
req.Header.Set("User-Agent", ImageUserAgent)
|
||||
|
||||
// Perform the request with MetaProxy if enabled
|
||||
var resp *http.Response
|
||||
if config.MetaProxyEnabled && metaProxyClient != nil {
|
||||
resp, err = metaProxyClient.Do(req)
|
||||
} else {
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
resp, err = client.Do(req)
|
||||
}
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
|
|
129
proxy.go
129
proxy.go
|
@ -30,7 +30,7 @@ var (
|
|||
crawlerProxyClient *ProxyClient
|
||||
)
|
||||
|
||||
// NewProxyClientPool creates a pool of HTTP clients with proxies.
|
||||
// NewProxyClientPool creates a pool of HTTP clients with SOCKS5 proxies.
|
||||
func NewProxyClientPool(proxies []ProxyConfig, timeout time.Duration) (*ProxyClient, error) {
|
||||
if len(proxies) == 0 {
|
||||
return nil, fmt.Errorf("no proxies provided")
|
||||
|
@ -38,18 +38,17 @@ func NewProxyClientPool(proxies []ProxyConfig, timeout time.Duration) (*ProxyCli
|
|||
|
||||
clients := make([]*http.Client, len(proxies))
|
||||
|
||||
for i, proxyConfig := range proxies {
|
||||
for i, pc := range proxies {
|
||||
var auth *proxy.Auth
|
||||
if proxyConfig.Username != "" || proxyConfig.Password != "" {
|
||||
if pc.Username != "" || pc.Password != "" {
|
||||
auth = &proxy.Auth{
|
||||
User: proxyConfig.Username,
|
||||
Password: proxyConfig.Password,
|
||||
User: pc.Username,
|
||||
Password: pc.Password,
|
||||
}
|
||||
}
|
||||
|
||||
dialer, err := proxy.SOCKS5("tcp", proxyConfig.Address, auth, proxy.Direct)
|
||||
dialer, err := proxy.SOCKS5("tcp", pc.Address, auth, proxy.Direct)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create SOCKS5 dialer for %s: %w", proxyConfig.Address, err)
|
||||
return nil, fmt.Errorf("failed to create SOCKS5 dialer for %s: %w", pc.Address, err)
|
||||
}
|
||||
|
||||
transport := &http.Transport{Dial: dialer.Dial}
|
||||
|
@ -99,20 +98,21 @@ func (p *ProxyClient) GetProxy() string {
|
|||
// ParseProxies parses the proxy strings in the format ADDRESS:PORT or ADDRESS:PORT:USER:PASSWORD.
|
||||
func ParseProxies(proxyStrings []string) []ProxyConfig {
|
||||
var proxies []ProxyConfig
|
||||
for _, proxy := range proxyStrings {
|
||||
parts := strings.Split(proxy, ":")
|
||||
if len(parts) == 2 { // ADDRESS:PORT
|
||||
for _, proxyStr := range proxyStrings {
|
||||
parts := strings.Split(proxyStr, ":")
|
||||
switch len(parts) {
|
||||
case 2: // ADDRESS:PORT
|
||||
proxies = append(proxies, ProxyConfig{
|
||||
Address: fmt.Sprintf("%s:%s", parts[0], parts[1]),
|
||||
})
|
||||
} else if len(parts) == 4 { // ADDRESS:PORT:USER:PASSWORD
|
||||
case 4: // ADDRESS:PORT:USER:PASSWORD
|
||||
proxies = append(proxies, ProxyConfig{
|
||||
Address: fmt.Sprintf("%s:%s", parts[0], parts[1]),
|
||||
Username: parts[2],
|
||||
Password: parts[3],
|
||||
})
|
||||
} else {
|
||||
fmt.Printf("Invalid proxy format: %s\n", proxy)
|
||||
default:
|
||||
fmt.Printf("Invalid proxy format: %s\n", proxyStr)
|
||||
}
|
||||
}
|
||||
return proxies
|
||||
|
@ -147,6 +147,107 @@ func InitProxies() {
|
|||
}
|
||||
}
|
||||
|
||||
// Doer is an interface so we can accept *http.Client or *ProxyClient for requests.
|
||||
type Doer interface {
|
||||
Do(*http.Request) (*http.Response, error)
|
||||
}
|
||||
|
||||
// DoProxyRequest handles “try direct, then proxy if needed,” with retries if proxy is used.
|
||||
//
|
||||
// - strict: if true, always try proxy first if enabled; if not available, do one direct attempt
|
||||
// - enabled: whether this type of proxy is turned on
|
||||
// - retryCount: how many times to retry with the proxy
|
||||
// - proxyClient: the pool of proxy connections
|
||||
func DoProxyRequest(req *http.Request, strict bool, enabled bool, retryCount int, proxyClient *ProxyClient) (*http.Response, error) {
|
||||
// 1) If !strict => try direct once first
|
||||
if !strict {
|
||||
resp, err := tryRequestOnce(req, http.DefaultClient)
|
||||
if isSuccessful(resp, err) {
|
||||
return resp, nil
|
||||
}
|
||||
// If direct fails => if proxy is enabled, retry
|
||||
if enabled && proxyClient != nil {
|
||||
resp, err = tryRequestWithRetry(req, proxyClient, retryCount)
|
||||
if isSuccessful(resp, err) {
|
||||
return resp, nil
|
||||
}
|
||||
return nil, fmt.Errorf("failed after direct & proxy attempts: %v", err)
|
||||
}
|
||||
return nil, fmt.Errorf("request failed direct, no valid proxy: %v", err)
|
||||
}
|
||||
|
||||
// 2) If strict => if proxy is enabled, try it up to “retryCount”
|
||||
if enabled && proxyClient != nil {
|
||||
resp, err := tryRequestWithRetry(req, proxyClient, retryCount)
|
||||
if isSuccessful(resp, err) {
|
||||
return resp, nil
|
||||
}
|
||||
return nil, fmt.Errorf("failed after %d proxy attempts: %v", retryCount, err)
|
||||
}
|
||||
|
||||
// If strict but no proxy => direct once
|
||||
resp, err := tryRequestOnce(req, http.DefaultClient)
|
||||
if isSuccessful(resp, err) {
|
||||
return resp, nil
|
||||
}
|
||||
return nil, fmt.Errorf("direct request failed in strict mode, no proxy: %v", err)
|
||||
}
|
||||
|
||||
// Helper Wrapper functions for DoProxyRequest()
|
||||
func DoMetaProxyRequest(req *http.Request) (*http.Response, error) {
|
||||
return DoProxyRequest(
|
||||
req,
|
||||
config.MetaProxyStrict,
|
||||
config.MetaProxyEnabled,
|
||||
config.MetaProxyRetry,
|
||||
metaProxyClient,
|
||||
)
|
||||
}
|
||||
func DoCrawlerProxyRequest(req *http.Request) (*http.Response, error) {
|
||||
return DoProxyRequest(
|
||||
req,
|
||||
config.CrawlerProxyStrict,
|
||||
config.CrawlerProxyEnabled,
|
||||
config.CrawlerProxyRetry,
|
||||
metaProxyClient,
|
||||
)
|
||||
}
|
||||
|
||||
// tryRequestWithRetry tries the request up to "retries" times, waiting 200ms between attempts.
|
||||
func tryRequestWithRetry(req *http.Request, client Doer, retries int) (*http.Response, error) {
|
||||
var resp *http.Response
|
||||
var err error
|
||||
for i := 1; i <= retries; i++ {
|
||||
if resp != nil {
|
||||
resp.Body.Close()
|
||||
}
|
||||
printDebug("Attempt %d of %d with proxy/client...", i, retries)
|
||||
resp, err = tryRequestOnce(req, client)
|
||||
if isSuccessful(resp, err) {
|
||||
return resp, nil
|
||||
}
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
}
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// tryRequestOnce sends a single request with the given client. If client is nil, uses default client.
|
||||
func tryRequestOnce(req *http.Request, client Doer) (*http.Response, error) {
|
||||
if client == nil {
|
||||
client = http.DefaultClient
|
||||
}
|
||||
resp, err := client.Do(req)
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// isSuccessful checks if err==nil & resp != nil & resp.StatusCode in [200..299].
|
||||
func isSuccessful(resp *http.Response, err error) bool {
|
||||
if err != nil || resp == nil {
|
||||
return false
|
||||
}
|
||||
return resp.StatusCode >= 200 && resp.StatusCode < 300
|
||||
}
|
||||
|
||||
// func main() {
|
||||
// config := loadConfig()
|
||||
|
||||
|
|
|
@ -25,27 +25,18 @@ func PerformBraveTextSearch(query, safe, lang string, offset int) ([]TextSearchR
|
|||
return nil, 0, fmt.Errorf("creating request: %v", err)
|
||||
}
|
||||
|
||||
// Set headers including User-Agent
|
||||
TextUserAgent, err := GetUserAgent("Text-Search")
|
||||
TextUserAgent, err := GetUserAgent("Text-Search-Brave")
|
||||
if err != nil {
|
||||
printWarn("Error generating User-Agent: %v", err)
|
||||
return nil, 0, err
|
||||
}
|
||||
req.Header.Set("User-Agent", TextUserAgent)
|
||||
|
||||
var resp *http.Response
|
||||
|
||||
// Determine whether to use a proxy client or a default client
|
||||
if config.MetaProxyEnabled && metaProxyClient != nil {
|
||||
resp, err = metaProxyClient.Do(req)
|
||||
} else {
|
||||
client := &http.Client{}
|
||||
resp, err = client.Do(req)
|
||||
}
|
||||
|
||||
// Single call to DoMetaProxyRequest:
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
printWarn("Error performing request: %v", err)
|
||||
return nil, 0, fmt.Errorf("performing request: %v", err)
|
||||
return nil, 0, fmt.Errorf("performing meta-request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
|
|
|
@ -22,17 +22,15 @@ func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSear
|
|||
return nil, 0, fmt.Errorf("creating request: %v", err)
|
||||
}
|
||||
|
||||
// Use proxy client if MetaProxy is enabled
|
||||
var resp *http.Response
|
||||
if config.MetaProxyEnabled && metaProxyClient != nil {
|
||||
resp, err = metaProxyClient.Do(req)
|
||||
} else {
|
||||
client := &http.Client{}
|
||||
resp, err = client.Do(req)
|
||||
}
|
||||
|
||||
userAgent, err := GetUserAgent("duckduck-text-search")
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("making request: %v", err)
|
||||
return nil, 0, err
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to do meta-request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
|
|
|
@ -11,56 +11,46 @@ import (
|
|||
)
|
||||
|
||||
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
|
||||
startTime := time.Now()
|
||||
const resultsPerPage = 10
|
||||
var results []TextSearchResult
|
||||
|
||||
startTime := time.Now() // Start the timer
|
||||
|
||||
// Build the search URL
|
||||
// 1) Build the search URL
|
||||
searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage)
|
||||
|
||||
// Create a new request
|
||||
// 2) Create a new request
|
||||
req, err := http.NewRequest("GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to create request: %v", err)
|
||||
}
|
||||
|
||||
// Generate and set User-Agent header
|
||||
TextUserAgent, err := GetUserAgent("Google-Text-Search")
|
||||
// 3) Generate and set a User-Agent header
|
||||
userAgent, err := GetUserAgent("Google-Text-Search")
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
req.Header.Set("User-Agent", TextUserAgent)
|
||||
|
||||
// Perform the request using proxy if MetaProxy is enabled
|
||||
var resp *http.Response
|
||||
if config.MetaProxyEnabled && metaProxyClient != nil {
|
||||
resp, err = metaProxyClient.Do(req)
|
||||
} else {
|
||||
client := &http.Client{}
|
||||
resp, err = client.Do(req)
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
// 4) Use the meta-proxy wrapper
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("making request: %v", err)
|
||||
return nil, 0, fmt.Errorf("failed to do meta-request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Check for HTTP status code
|
||||
// 5) Check HTTP status
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
// Parse the HTML response
|
||||
// 6) Parse the HTML response
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
|
||||
}
|
||||
results := parseResults(doc)
|
||||
|
||||
// Extract search results
|
||||
results = parseResults(doc)
|
||||
|
||||
duration := time.Since(startTime) // Calculate the duration
|
||||
// 7) Calculate duration
|
||||
duration := time.Since(startTime)
|
||||
|
||||
if len(results) == 0 {
|
||||
printDebug("No results found from Google Search")
|
||||
|
|
|
@ -44,17 +44,9 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
|
|||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
// Respect MetaProxy if enabled and strict
|
||||
var resp *http.Response
|
||||
if config.MetaProxyEnabled && config.MetaProxyStrict && metaProxyClient != nil {
|
||||
resp, err = metaProxyClient.Do(req)
|
||||
} else {
|
||||
client := &http.Client{}
|
||||
resp, err = client.Do(req)
|
||||
}
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
printWarn("error requesting domain %s: %v", domain, err)
|
||||
continue
|
||||
return nil, 0, fmt.Errorf("failed to do meta-request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
|
|
|
@ -26,9 +26,11 @@ type QwantTextAPIResponse struct {
|
|||
}
|
||||
|
||||
// PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult
|
||||
func PerformQwantTextSearch(query, safe, lang string) ([]TextSearchResult, error) {
|
||||
func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
|
||||
startTime := time.Now()
|
||||
|
||||
const resultsPerPage = 10
|
||||
const offset = 0
|
||||
offset := (page - 1) * resultsPerPage
|
||||
|
||||
// Ensure safe search is disabled by default if not specified
|
||||
if safe == "" {
|
||||
|
@ -40,45 +42,44 @@ func PerformQwantTextSearch(query, safe, lang string) ([]TextSearchResult, error
|
|||
lang = "en_CA"
|
||||
}
|
||||
|
||||
apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/web?q=%s&count=%d&locale=%s&offset=%d&device=desktop",
|
||||
apiURL := fmt.Sprintf(
|
||||
"https://api.qwant.com/v3/search/web?q=%s&count=%d&locale=%s&offset=%d&device=desktop",
|
||||
url.QueryEscape(query),
|
||||
resultsPerPage,
|
||||
lang,
|
||||
offset)
|
||||
offset,
|
||||
)
|
||||
|
||||
req, err := http.NewRequest("GET", apiURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating request: %v", err)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36")
|
||||
|
||||
// Perform the request using the appropriate client
|
||||
var resp *http.Response
|
||||
if config.MetaProxyEnabled && metaProxyClient != nil {
|
||||
resp, err = metaProxyClient.Do(req)
|
||||
} else {
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
resp, err = client.Do(req)
|
||||
// Return three values: nil for the slice, 0 for duration, error for the third.
|
||||
return nil, 0, fmt.Errorf("creating request: %v", err)
|
||||
}
|
||||
|
||||
userAgent, err := GetUserAgent("Quant-Text-Search")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %v", err)
|
||||
return nil, 0, err
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to do meta-request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var apiResp QwantTextAPIResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil {
|
||||
return nil, fmt.Errorf("decoding response: %v", err)
|
||||
return nil, 0, fmt.Errorf("decoding response: %v", err)
|
||||
}
|
||||
|
||||
// Extracting results from the nested JSON structure
|
||||
if len(apiResp.Data.Result.Items.Mainline) == 0 {
|
||||
return nil, fmt.Errorf("no search results found")
|
||||
return nil, 0, fmt.Errorf("no search results found")
|
||||
}
|
||||
|
||||
var results []TextSearchResult
|
||||
|
@ -92,7 +93,8 @@ func PerformQwantTextSearch(query, safe, lang string) ([]TextSearchResult, error
|
|||
})
|
||||
}
|
||||
|
||||
return results, nil
|
||||
duration := time.Since(startTime)
|
||||
return results, duration, nil
|
||||
}
|
||||
|
||||
// cleanQwantURL extracts the main part of the URL, removing tracking information
|
||||
|
|
|
@ -50,16 +50,9 @@ func fetchInstances() ([]Instance, error) {
|
|||
}
|
||||
req.Header.Set("User-Agent", XNGUserAgent)
|
||||
|
||||
var resp *http.Response
|
||||
if config.MetaProxyEnabled && config.MetaProxyStrict && metaProxyClient != nil {
|
||||
resp, err = metaProxyClient.Do(req)
|
||||
} else {
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
resp, err = client.Do(req)
|
||||
}
|
||||
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("performing request: %v", err)
|
||||
return nil, fmt.Errorf("failed to do meta-request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
|
|
2
text.go
2
text.go
|
@ -13,6 +13,7 @@ var allTextSearchEngines = []SearchEngine{
|
|||
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
|
||||
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
|
||||
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
|
||||
{Name: "Quant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken !
|
||||
//{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh
|
||||
}
|
||||
|
||||
|
@ -123,6 +124,7 @@ func prefetchPage(query, safe, lang string, page int) {
|
|||
}
|
||||
}
|
||||
|
||||
// The logic in this function is rotating search engines instead of running them in order as noted in the wiki
|
||||
func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
|
||||
var results []TextSearchResult
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue