parent
ab707a91e8
commit
35e657bccd
17 changed files with 224 additions and 186 deletions
|
@ -65,6 +65,7 @@ func fetchPageMetadataChrome(pageURL, userAgent string) (string, string, string)
|
|||
func configureChromeOptions() []chromedp.ExecAllocatorOption {
|
||||
options := chromedp.DefaultExecAllocatorOptions[:]
|
||||
|
||||
// This code is not using config.CrawlerProxyRetry
|
||||
if config.CrawlerProxyEnabled && crawlerProxyClient != nil {
|
||||
// Retrieve proxy settings from CrawlerProxy
|
||||
proxy := crawlerProxyClient.GetProxy() // Ensure a `GetProxy` method is implemented for your proxy client
|
||||
|
@ -89,7 +90,7 @@ func configureChromeOptions() []chromedp.ExecAllocatorOption {
|
|||
|
||||
// extractStandard does the normal HTML parse with OG, Twitter, etc.
|
||||
func extractStandard(pageURL, userAgent string) (title, desc, keywords string) {
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
|
||||
req, err := http.NewRequest("GET", pageURL, nil)
|
||||
if err != nil {
|
||||
printDebug("Failed to create request for %s: %v", pageURL, err)
|
||||
|
@ -99,12 +100,7 @@ func extractStandard(pageURL, userAgent string) (title, desc, keywords string) {
|
|||
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||
|
||||
// Use CrawlerProxy if enabled
|
||||
var resp *http.Response
|
||||
if config.CrawlerProxyEnabled && crawlerProxyClient != nil {
|
||||
resp, err = crawlerProxyClient.Do(req)
|
||||
} else {
|
||||
resp, err = client.Do(req)
|
||||
}
|
||||
resp, err := DoCrawlerProxyRequest(req)
|
||||
if err != nil {
|
||||
printDebug("Failed to GET %s: %v", pageURL, err)
|
||||
return
|
||||
|
@ -212,7 +208,6 @@ func fallbackReadability(pageURL, userAgent, title, desc, keywords string) (stri
|
|||
return title, desc, keywords
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
readReq, err := http.NewRequest("GET", pageURL, nil)
|
||||
if err != nil {
|
||||
printDebug("Failed to create fallbackReadability request: %v", err)
|
||||
|
@ -222,19 +217,15 @@ func fallbackReadability(pageURL, userAgent, title, desc, keywords string) (stri
|
|||
readReq.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||
|
||||
// Use CrawlerProxy if enabled
|
||||
var readResp *http.Response
|
||||
if config.CrawlerProxyEnabled && crawlerProxyClient != nil {
|
||||
readResp, err = crawlerProxyClient.Do(readReq)
|
||||
} else {
|
||||
readResp, err = client.Do(readReq)
|
||||
readResp, err := DoCrawlerProxyRequest(readReq)
|
||||
if err != nil {
|
||||
printDebug("go-readability GET error for %s: %v", pageURL, err)
|
||||
return title, desc, keywords
|
||||
}
|
||||
if err != nil || readResp.StatusCode < 200 || readResp.StatusCode >= 300 {
|
||||
if err != nil {
|
||||
printDebug("go-readability GET error for %s: %v", pageURL, err)
|
||||
}
|
||||
if readResp != nil {
|
||||
readResp.Body.Close()
|
||||
}
|
||||
|
||||
if readResp.StatusCode < 200 || readResp.StatusCode >= 300 {
|
||||
printDebug("go-readability GET returned status %d for %s", readResp.StatusCode, pageURL)
|
||||
readResp.Body.Close() // Safely close body
|
||||
return title, desc, keywords
|
||||
}
|
||||
defer readResp.Body.Close()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue