2024-04-15 08:35:17 +02:00
|
|
|
|
package main
|
|
|
|
|
|
|
|
|
|
import (
|
2024-05-19 22:57:23 +02:00
|
|
|
|
"fmt"
|
2024-06-14 09:07:07 +02:00
|
|
|
|
"net/http"
|
2024-04-15 08:35:17 +02:00
|
|
|
|
"net/url"
|
|
|
|
|
"strings"
|
2024-06-14 17:56:20 +02:00
|
|
|
|
"time"
|
2024-04-15 08:35:17 +02:00
|
|
|
|
|
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
|
)
|
|
|
|
|
|
2024-06-14 17:56:20 +02:00
|
|
|
|
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
|
2024-06-14 09:07:07 +02:00
|
|
|
|
const resultsPerPage = 10
|
2024-06-09 21:44:49 +02:00
|
|
|
|
var results []TextSearchResult
|
2024-04-15 08:35:17 +02:00
|
|
|
|
|
2024-06-14 17:56:20 +02:00
|
|
|
|
startTime := time.Now() // Start the timer
|
|
|
|
|
|
2024-06-14 09:07:07 +02:00
|
|
|
|
client := &http.Client{}
|
|
|
|
|
searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage)
|
|
|
|
|
|
|
|
|
|
req, err := http.NewRequest("GET", searchURL, nil)
|
2024-04-15 08:35:17 +02:00
|
|
|
|
if err != nil {
|
2024-06-14 17:56:20 +02:00
|
|
|
|
return nil, 0, fmt.Errorf("failed to create request: %v", err)
|
2024-04-15 08:35:17 +02:00
|
|
|
|
}
|
|
|
|
|
|
2024-06-14 09:07:07 +02:00
|
|
|
|
// User Agent generation
|
|
|
|
|
TextUserAgent, err := GetUserAgent("Text-Search")
|
2024-06-09 21:44:49 +02:00
|
|
|
|
if err != nil {
|
2024-06-14 17:56:20 +02:00
|
|
|
|
return nil, 0, err
|
2024-06-14 09:07:07 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
req.Header.Set("User-Agent", TextUserAgent)
|
|
|
|
|
|
|
|
|
|
resp, err := client.Do(req)
|
|
|
|
|
if err != nil {
|
2024-06-14 17:56:20 +02:00
|
|
|
|
return nil, 0, fmt.Errorf("making request: %v", err)
|
2024-06-14 09:07:07 +02:00
|
|
|
|
}
|
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
2024-06-14 17:56:20 +02:00
|
|
|
|
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
2024-06-14 09:07:07 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
|
|
|
if err != nil {
|
2024-06-14 17:56:20 +02:00
|
|
|
|
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
|
2024-06-14 09:07:07 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
results = parseResults(doc)
|
|
|
|
|
|
2024-06-14 17:56:20 +02:00
|
|
|
|
duration := time.Since(startTime) // Calculate the duration
|
|
|
|
|
|
2024-06-14 09:07:07 +02:00
|
|
|
|
if len(results) == 0 {
|
2024-08-10 13:27:23 +02:00
|
|
|
|
printDebug("No results found from Google Search")
|
2024-05-21 21:22:36 +02:00
|
|
|
|
}
|
|
|
|
|
|
2024-06-14 17:56:20 +02:00
|
|
|
|
return results, duration, nil
|
2024-05-21 21:22:36 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
|
|
|
|
|
safeParam := "&safe=off"
|
|
|
|
|
if safe == "active" {
|
|
|
|
|
safeParam = "&safe=active"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
langParam := ""
|
2024-09-11 12:49:06 +02:00
|
|
|
|
var glParam, uuleParam string
|
|
|
|
|
|
2024-05-21 21:22:36 +02:00
|
|
|
|
if lang != "" {
|
2024-09-11 12:49:06 +02:00
|
|
|
|
// Use lang as the geolocation
|
2024-08-29 21:21:36 +02:00
|
|
|
|
langParam = "&lr=lang_" + lang
|
2024-09-11 12:49:06 +02:00
|
|
|
|
glParam = "&gl=" + lang
|
|
|
|
|
uuleParam = ""
|
|
|
|
|
} else {
|
|
|
|
|
// Use random geolocation
|
2024-10-09 18:50:04 +02:00
|
|
|
|
glParam, uuleParam = "us", "us"
|
2024-05-21 21:22:36 +02:00
|
|
|
|
}
|
|
|
|
|
|
2024-06-14 09:07:07 +02:00
|
|
|
|
startIndex := (page - 1) * resultsPerPage
|
2024-09-27 13:16:36 +02:00
|
|
|
|
|
|
|
|
|
printDebug(fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s%s&start=%d",
|
|
|
|
|
url.QueryEscape(query), safeParam, langParam, glParam, uuleParam, startIndex))
|
|
|
|
|
|
2024-08-12 17:02:17 +02:00
|
|
|
|
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s%s&start=%d",
|
|
|
|
|
url.QueryEscape(query), safeParam, langParam, glParam, uuleParam, startIndex)
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-09 18:50:04 +02:00
|
|
|
|
// func getRandomGeoLocation() (string, string) {
|
|
|
|
|
// countries := []string{"us", "ca", "gb", "fr", "de", "au", "in", "jp", "br", "za"}
|
|
|
|
|
// randomCountry := countries[rand.Intn(len(countries))]
|
2024-08-12 17:02:17 +02:00
|
|
|
|
|
2024-10-09 18:50:04 +02:00
|
|
|
|
// glParam := "&gl=" + randomCountry
|
|
|
|
|
// uuleParam := ""
|
2024-08-12 17:02:17 +02:00
|
|
|
|
|
2024-10-09 18:50:04 +02:00
|
|
|
|
// return glParam, uuleParam
|
|
|
|
|
// }
|
2024-05-21 21:22:36 +02:00
|
|
|
|
|
2024-06-14 09:07:07 +02:00
|
|
|
|
func parseResults(doc *goquery.Document) []TextSearchResult {
|
2024-05-21 21:22:36 +02:00
|
|
|
|
var results []TextSearchResult
|
|
|
|
|
|
2024-04-15 08:35:17 +02:00
|
|
|
|
doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) {
|
|
|
|
|
link := s.Find("a")
|
2024-05-19 22:57:23 +02:00
|
|
|
|
href, exists := link.Attr("href")
|
|
|
|
|
if !exists {
|
2024-08-10 13:27:23 +02:00
|
|
|
|
printDebug("No href attribute found for result %d\n", i)
|
2024-05-19 22:57:23 +02:00
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-15 08:35:17 +02:00
|
|
|
|
header := link.Find("h3").Text()
|
|
|
|
|
header = strings.TrimSpace(strings.TrimSuffix(header, "›"))
|
|
|
|
|
|
|
|
|
|
description := ""
|
2024-05-21 21:22:36 +02:00
|
|
|
|
descSelection := doc.Find(".VwiC3b").Eq(i)
|
2024-04-15 08:35:17 +02:00
|
|
|
|
if descSelection.Length() > 0 {
|
|
|
|
|
description = descSelection.Text()
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-18 01:59:29 +02:00
|
|
|
|
result := TextSearchResult{
|
2024-04-15 08:35:17 +02:00
|
|
|
|
URL: href,
|
|
|
|
|
Header: header,
|
|
|
|
|
Description: description,
|
2024-05-18 01:59:29 +02:00
|
|
|
|
}
|
|
|
|
|
results = append(results, result)
|
2024-04-15 08:35:17 +02:00
|
|
|
|
})
|
|
|
|
|
|
2024-06-14 09:07:07 +02:00
|
|
|
|
return results
|
2024-04-15 08:35:17 +02:00
|
|
|
|
}
|