added ProxyRetry to config and fixed ProxyStrict

This commit is contained in:
partisan 2025-02-22 22:36:54 +01:00
parent ab707a91e8
commit 35e657bccd
17 changed files with 224 additions and 186 deletions

View file

@ -65,6 +65,7 @@ func fetchPageMetadataChrome(pageURL, userAgent string) (string, string, string)
func configureChromeOptions() []chromedp.ExecAllocatorOption {
options := chromedp.DefaultExecAllocatorOptions[:]
// This code is not using config.CrawlerProxyRetry
if config.CrawlerProxyEnabled && crawlerProxyClient != nil {
// Retrieve proxy settings from CrawlerProxy
proxy := crawlerProxyClient.GetProxy() // Ensure a `GetProxy` method is implemented for your proxy client
@ -89,7 +90,7 @@ func configureChromeOptions() []chromedp.ExecAllocatorOption {
// extractStandard does the normal HTML parse with OG, Twitter, etc.
func extractStandard(pageURL, userAgent string) (title, desc, keywords string) {
client := &http.Client{Timeout: 15 * time.Second}
req, err := http.NewRequest("GET", pageURL, nil)
if err != nil {
printDebug("Failed to create request for %s: %v", pageURL, err)
@ -99,12 +100,7 @@ func extractStandard(pageURL, userAgent string) (title, desc, keywords string) {
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
// Use CrawlerProxy if enabled
var resp *http.Response
if config.CrawlerProxyEnabled && crawlerProxyClient != nil {
resp, err = crawlerProxyClient.Do(req)
} else {
resp, err = client.Do(req)
}
resp, err := DoCrawlerProxyRequest(req)
if err != nil {
printDebug("Failed to GET %s: %v", pageURL, err)
return
@ -212,7 +208,6 @@ func fallbackReadability(pageURL, userAgent, title, desc, keywords string) (stri
return title, desc, keywords
}
client := &http.Client{Timeout: 15 * time.Second}
readReq, err := http.NewRequest("GET", pageURL, nil)
if err != nil {
printDebug("Failed to create fallbackReadability request: %v", err)
@ -222,19 +217,15 @@ func fallbackReadability(pageURL, userAgent, title, desc, keywords string) (stri
readReq.Header.Set("Accept-Language", "en-US,en;q=0.9")
// Use CrawlerProxy if enabled
var readResp *http.Response
if config.CrawlerProxyEnabled && crawlerProxyClient != nil {
readResp, err = crawlerProxyClient.Do(readReq)
} else {
readResp, err = client.Do(readReq)
readResp, err := DoCrawlerProxyRequest(readReq)
if err != nil {
printDebug("go-readability GET error for %s: %v", pageURL, err)
return title, desc, keywords
}
if err != nil || readResp.StatusCode < 200 || readResp.StatusCode >= 300 {
if err != nil {
printDebug("go-readability GET error for %s: %v", pageURL, err)
}
if readResp != nil {
readResp.Body.Close()
}
if readResp.StatusCode < 200 || readResp.StatusCode >= 300 {
printDebug("go-readability GET returned status %d for %s", readResp.StatusCode, pageURL)
readResp.Body.Close() // Safely close body
return title, desc, keywords
}
defer readResp.Body.Close()