Search/text-google.go

128 lines
3 KiB
Go
Raw Permalink Normal View History

2024-04-15 08:35:17 +02:00
package main
import (
"fmt"
2024-06-14 09:07:07 +02:00
"net/http"
2024-04-15 08:35:17 +02:00
"net/url"
"strings"
"time"
2024-04-15 08:35:17 +02:00
"github.com/PuerkitoBio/goquery"
)
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
2024-06-14 09:07:07 +02:00
const resultsPerPage = 10
2024-06-09 21:44:49 +02:00
var results []TextSearchResult
2024-04-15 08:35:17 +02:00
startTime := time.Now() // Start the timer
2024-06-14 09:07:07 +02:00
client := &http.Client{}
searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage)
req, err := http.NewRequest("GET", searchURL, nil)
2024-04-15 08:35:17 +02:00
if err != nil {
return nil, 0, fmt.Errorf("failed to create request: %v", err)
2024-04-15 08:35:17 +02:00
}
2024-06-14 09:07:07 +02:00
// User Agent generation
TextUserAgent, err := GetUserAgent("Text-Search")
2024-06-09 21:44:49 +02:00
if err != nil {
return nil, 0, err
2024-06-14 09:07:07 +02:00
}
req.Header.Set("User-Agent", TextUserAgent)
resp, err := client.Do(req)
if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err)
2024-06-14 09:07:07 +02:00
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
2024-06-14 09:07:07 +02:00
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
2024-06-14 09:07:07 +02:00
}
results = parseResults(doc)
duration := time.Since(startTime) // Calculate the duration
2024-06-14 09:07:07 +02:00
if len(results) == 0 {
2024-08-10 13:27:23 +02:00
printDebug("No results found from Google Search")
2024-05-21 21:22:36 +02:00
}
return results, duration, nil
2024-05-21 21:22:36 +02:00
}
func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
safeParam := "&safe=off"
if safe == "active" {
safeParam = "&safe=active"
}
langParam := ""
2024-10-09 21:03:53 +02:00
var glParam string
2024-05-21 21:22:36 +02:00
if lang != "" {
// Use lang as the geolocation
langParam = "&lr=lang_" + lang
glParam = "&gl=" + lang
} else {
2024-10-09 21:03:53 +02:00
// Use default geolocation
glParam = "&gl=us"
2024-05-21 21:22:36 +02:00
}
2024-06-14 09:07:07 +02:00
startIndex := (page - 1) * resultsPerPage
2024-10-09 21:03:53 +02:00
printDebug(fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s&start=%d",
url.QueryEscape(query), safeParam, langParam, glParam, startIndex))
2024-10-09 21:03:53 +02:00
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s&start=%d",
url.QueryEscape(query), safeParam, langParam, glParam, startIndex)
2024-08-12 17:02:17 +02:00
}
// func getRandomGeoLocation() (string, string) {
// countries := []string{"us", "ca", "gb", "fr", "de", "au", "in", "jp", "br", "za"}
// randomCountry := countries[rand.Intn(len(countries))]
2024-08-12 17:02:17 +02:00
// glParam := "&gl=" + randomCountry
// uuleParam := ""
2024-08-12 17:02:17 +02:00
// return glParam, uuleParam
// }
2024-05-21 21:22:36 +02:00
2024-06-14 09:07:07 +02:00
func parseResults(doc *goquery.Document) []TextSearchResult {
2024-05-21 21:22:36 +02:00
var results []TextSearchResult
2024-04-15 08:35:17 +02:00
doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) {
link := s.Find("a")
href, exists := link.Attr("href")
if !exists {
2024-08-10 13:27:23 +02:00
printDebug("No href attribute found for result %d\n", i)
return
}
2024-04-15 08:35:17 +02:00
header := link.Find("h3").Text()
header = strings.TrimSpace(strings.TrimSuffix(header, ""))
description := ""
2024-05-21 21:22:36 +02:00
descSelection := doc.Find(".VwiC3b").Eq(i)
2024-04-15 08:35:17 +02:00
if descSelection.Length() > 0 {
description = descSelection.Text()
}
2024-05-18 01:59:29 +02:00
result := TextSearchResult{
2024-04-15 08:35:17 +02:00
URL: href,
Header: header,
Description: description,
2024-05-18 01:59:29 +02:00
}
results = append(results, result)
2024-04-15 08:35:17 +02:00
})
2024-06-14 09:07:07 +02:00
return results
2024-04-15 08:35:17 +02:00
}