Search/text-google.go
partisan 35e657bccd
Some checks failed
Run Integration Tests / test (push) Failing after 50s
added ProxyRetry to config and fixed ProxyStrict
2025-02-22 22:36:54 +01:00

132 lines
3.1 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"fmt"
"net/http"
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
startTime := time.Now()
const resultsPerPage = 10
// 1) Build the search URL
searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage)
// 2) Create a new request
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, 0, fmt.Errorf("failed to create request: %v", err)
}
// 3) Generate and set a User-Agent header
userAgent, err := GetUserAgent("Google-Text-Search")
if err != nil {
return nil, 0, err
}
req.Header.Set("User-Agent", userAgent)
// 4) Use the meta-proxy wrapper
resp, err := DoMetaProxyRequest(req)
if err != nil {
return nil, 0, fmt.Errorf("failed to do meta-request: %v", err)
}
defer resp.Body.Close()
// 5) Check HTTP status
if resp.StatusCode != http.StatusOK {
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
// 6) Parse the HTML response
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
}
results := parseResults(doc)
// 7) Calculate duration
duration := time.Since(startTime)
if len(results) == 0 {
printDebug("No results found from Google Search")
}
return results, duration, nil
}
func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
safeParam := "&safe=off"
if safe == "active" {
safeParam = "&safe=active"
}
langParam := ""
glParam := ""
if lang != "" {
// Use lang as the geolocation
langParam = "&lr=lang_" + lang
glParam = "&gl=" + lang
} else {
// Use default geolocation
glParam = "&gl=us"
}
startIndex := (page - 1) * resultsPerPage
udmParam := "&udm=14" // Add the required parameter
// Build the URL string
baseURL := "https://www.google.com/search?q=%s%s%s%s&start=%d%s"
fullURL := fmt.Sprintf(baseURL, url.QueryEscape(query), safeParam, langParam, glParam, startIndex, udmParam)
// Debug print
printDebug("%s", fullURL)
return fullURL
}
// func getRandomGeoLocation() (string, string) {
// countries := []string{"us", "ca", "gb", "fr", "de", "au", "in", "jp", "br", "za"}
// randomCountry := countries[rand.Intn(len(countries))]
// glParam := "&gl=" + randomCountry
// uuleParam := ""
// return glParam, uuleParam
// }
func parseResults(doc *goquery.Document) []TextSearchResult {
var results []TextSearchResult
doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) {
link := s.Find("a")
href, exists := link.Attr("href")
if !exists {
printDebug("No href attribute found for result %d\n", i)
return
}
header := link.Find("h3").Text()
header = strings.TrimSpace(strings.TrimSuffix(header, ""))
description := ""
descSelection := doc.Find(".VwiC3b").Eq(i)
if descSelection.Length() > 0 {
description = descSelection.Text()
}
result := TextSearchResult{
URL: href,
Header: header,
Description: description,
}
results = append(results, result)
})
return results
}