2024-04-15 08:35:17 +02:00
|
|
|
|
package main
|
|
|
|
|
|
|
|
|
|
import (
|
2024-06-09 12:43:46 +02:00
|
|
|
|
"context"
|
2024-05-19 22:57:23 +02:00
|
|
|
|
"fmt"
|
2024-04-15 08:35:17 +02:00
|
|
|
|
"net/url"
|
|
|
|
|
"strings"
|
2024-06-09 12:43:46 +02:00
|
|
|
|
"time"
|
2024-04-15 08:35:17 +02:00
|
|
|
|
|
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
2024-06-09 12:43:46 +02:00
|
|
|
|
"github.com/chromedp/chromedp"
|
2024-04-15 08:35:17 +02:00
|
|
|
|
)
|
|
|
|
|
|
2024-06-09 21:44:49 +02:00
|
|
|
|
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
|
|
|
|
opts := append(chromedp.DefaultExecAllocatorOptions[:],
|
|
|
|
|
chromedp.DisableGPU,
|
|
|
|
|
chromedp.NoDefaultBrowserCheck,
|
|
|
|
|
chromedp.NoFirstRun,
|
|
|
|
|
chromedp.Flag("disable-javascript", true),
|
|
|
|
|
)
|
|
|
|
|
ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
|
2024-06-09 12:43:46 +02:00
|
|
|
|
defer cancel()
|
2024-06-02 12:05:25 +02:00
|
|
|
|
|
2024-06-09 21:44:49 +02:00
|
|
|
|
ctx, cancel = chromedp.NewContext(ctx)
|
|
|
|
|
defer cancel()
|
2024-06-02 12:05:25 +02:00
|
|
|
|
|
2024-06-09 21:44:49 +02:00
|
|
|
|
var results []TextSearchResult
|
2024-04-15 08:35:17 +02:00
|
|
|
|
|
2024-06-09 21:44:49 +02:00
|
|
|
|
searchURL := buildSearchURL(query, safe, lang, page, 10)
|
|
|
|
|
var pageSource string
|
2024-06-09 12:43:46 +02:00
|
|
|
|
err := chromedp.Run(ctx,
|
|
|
|
|
chromedp.Navigate(searchURL),
|
2024-06-09 21:44:49 +02:00
|
|
|
|
chromedp.Sleep(2*time.Second),
|
|
|
|
|
chromedp.OuterHTML("html", &pageSource),
|
2024-06-09 12:43:46 +02:00
|
|
|
|
)
|
2024-04-15 08:35:17 +02:00
|
|
|
|
if err != nil {
|
2024-06-09 21:44:49 +02:00
|
|
|
|
return nil, fmt.Errorf("failed to retrieve page source: %v", err)
|
2024-04-15 08:35:17 +02:00
|
|
|
|
}
|
|
|
|
|
|
2024-06-09 21:44:49 +02:00
|
|
|
|
newResults, err := parseResults(pageSource)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("error parsing results: %v", err)
|
2024-05-21 21:22:36 +02:00
|
|
|
|
}
|
2024-06-09 21:44:49 +02:00
|
|
|
|
results = append(results, newResults...)
|
2024-05-21 21:22:36 +02:00
|
|
|
|
|
|
|
|
|
return results, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
|
|
|
|
|
safeParam := "&safe=off"
|
|
|
|
|
if safe == "active" {
|
|
|
|
|
safeParam = "&safe=active"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
langParam := ""
|
|
|
|
|
if lang != "" {
|
|
|
|
|
langParam = "&lr=" + lang
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-09 21:44:49 +02:00
|
|
|
|
startParam := fmt.Sprintf("&start=%d", (page-1)*resultsPerPage)
|
|
|
|
|
|
|
|
|
|
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s", url.QueryEscape(query), safeParam, langParam, startParam)
|
2024-05-21 21:22:36 +02:00
|
|
|
|
}
|
|
|
|
|
|
2024-06-09 12:43:46 +02:00
|
|
|
|
func parseResults(pageSource string) ([]TextSearchResult, error) {
|
2024-05-21 21:22:36 +02:00
|
|
|
|
var results []TextSearchResult
|
|
|
|
|
|
2024-06-09 12:43:46 +02:00
|
|
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageSource))
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("loading HTML document: %v", err)
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-15 08:35:17 +02:00
|
|
|
|
doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) {
|
|
|
|
|
link := s.Find("a")
|
2024-05-19 22:57:23 +02:00
|
|
|
|
href, exists := link.Attr("href")
|
|
|
|
|
if !exists {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-15 08:35:17 +02:00
|
|
|
|
header := link.Find("h3").Text()
|
|
|
|
|
header = strings.TrimSpace(strings.TrimSuffix(header, "›"))
|
|
|
|
|
|
|
|
|
|
description := ""
|
2024-05-21 21:22:36 +02:00
|
|
|
|
descSelection := doc.Find(".VwiC3b").Eq(i)
|
2024-04-15 08:35:17 +02:00
|
|
|
|
if descSelection.Length() > 0 {
|
|
|
|
|
description = descSelection.Text()
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-18 01:59:29 +02:00
|
|
|
|
result := TextSearchResult{
|
2024-04-15 08:35:17 +02:00
|
|
|
|
URL: href,
|
|
|
|
|
Header: header,
|
|
|
|
|
Description: description,
|
2024-05-18 01:59:29 +02:00
|
|
|
|
}
|
|
|
|
|
results = append(results, result)
|
2024-04-15 08:35:17 +02:00
|
|
|
|
})
|
|
|
|
|
|
2024-06-09 12:43:46 +02:00
|
|
|
|
return results, nil
|
2024-04-15 08:35:17 +02:00
|
|
|
|
}
|