116 lines
2.7 KiB
Go
116 lines
2.7 KiB
Go
package main
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"net/url"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/PuerkitoBio/goquery"
|
||
"github.com/chromedp/chromedp"
|
||
)
|
||
|
||
// type TextSearchResult struct {
|
||
// URL string
|
||
// Header string
|
||
// Description string
|
||
// }
|
||
|
||
// func main() {
|
||
// // Example usage
|
||
// results, err := PerformGoogleTextSearch("golang", "off", "lang_en", 2)
|
||
// if err != nil {
|
||
// log.Fatalf("Error performing search: %v", err)
|
||
// }
|
||
|
||
// for _, result := range results {
|
||
// fmt.Printf("URL: %s\nHeader: %s\nDescription: %s\n", result.URL, result.Header, result.Description)
|
||
// }
|
||
// }
|
||
|
||
func PerformGoogleTextSearch(query, safe, lang string, numPages int) ([]TextSearchResult, error) {
|
||
ctx, cancel := chromedp.NewContext(context.Background())
|
||
defer cancel()
|
||
|
||
var results []TextSearchResult
|
||
|
||
searchURL := buildSearchURL(query, safe, lang, 1, 10)
|
||
|
||
err := chromedp.Run(ctx,
|
||
chromedp.Navigate(searchURL),
|
||
)
|
||
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to navigate to search URL: %v", err)
|
||
}
|
||
|
||
for page := 1; page <= numPages; page++ {
|
||
var pageSource string
|
||
err := chromedp.Run(ctx,
|
||
chromedp.Sleep(2*time.Second),
|
||
chromedp.OuterHTML("html", &pageSource),
|
||
chromedp.Evaluate(`window.scrollTo(0, document.body.scrollHeight);`, nil),
|
||
)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to retrieve page source: %v", err)
|
||
}
|
||
|
||
newResults, err := parseResults(pageSource)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("error parsing results: %v", err)
|
||
}
|
||
results = append(results, newResults...)
|
||
}
|
||
|
||
return results, nil
|
||
}
|
||
|
||
func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
|
||
safeParam := "&safe=off"
|
||
if safe == "active" {
|
||
safeParam = "&safe=active"
|
||
}
|
||
|
||
langParam := ""
|
||
if lang != "" {
|
||
langParam = "&lr=" + lang
|
||
}
|
||
|
||
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s", url.QueryEscape(query), safeParam, langParam)
|
||
}
|
||
|
||
func parseResults(pageSource string) ([]TextSearchResult, error) {
|
||
var results []TextSearchResult
|
||
|
||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageSource))
|
||
if err != nil {
|
||
return nil, fmt.Errorf("loading HTML document: %v", err)
|
||
}
|
||
|
||
doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) {
|
||
link := s.Find("a")
|
||
href, exists := link.Attr("href")
|
||
if !exists {
|
||
return
|
||
}
|
||
|
||
header := link.Find("h3").Text()
|
||
header = strings.TrimSpace(strings.TrimSuffix(header, "›"))
|
||
|
||
description := ""
|
||
descSelection := doc.Find(".VwiC3b").Eq(i)
|
||
if descSelection.Length() > 0 {
|
||
description = descSelection.Text()
|
||
}
|
||
|
||
result := TextSearchResult{
|
||
URL: href,
|
||
Header: header,
|
||
Description: description,
|
||
}
|
||
results = append(results, result)
|
||
})
|
||
|
||
return results, nil
|
||
}
|