fix for pages

This commit is contained in:
partisan 2024-06-09 21:44:49 +02:00
parent 6885983576
commit a86b370f69
5 changed files with 99 additions and 157 deletions

View file

@ -11,57 +11,37 @@ import (
"github.com/chromedp/chromedp"
)
// type TextSearchResult struct {
// URL string
// Header string
// Description string
// }
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.NoDefaultBrowserCheck,
chromedp.NoFirstRun,
chromedp.Flag("disable-javascript", true),
)
ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
// func main() {
// // Example usage
// results, err := PerformGoogleTextSearch("golang", "off", "lang_en", 2)
// if err != nil {
// log.Fatalf("Error performing search: %v", err)
// }
// for _, result := range results {
// fmt.Printf("URL: %s\nHeader: %s\nDescription: %s\n", result.URL, result.Header, result.Description)
// }
// }
func PerformGoogleTextSearch(query, safe, lang string, numPages int) ([]TextSearchResult, error) {
ctx, cancel := chromedp.NewContext(context.Background())
ctx, cancel = chromedp.NewContext(ctx)
defer cancel()
var results []TextSearchResult
searchURL := buildSearchURL(query, safe, lang, 1, 10)
searchURL := buildSearchURL(query, safe, lang, page, 10)
var pageSource string
err := chromedp.Run(ctx,
chromedp.Navigate(searchURL),
chromedp.Sleep(2*time.Second),
chromedp.OuterHTML("html", &pageSource),
)
if err != nil {
return nil, fmt.Errorf("failed to navigate to search URL: %v", err)
return nil, fmt.Errorf("failed to retrieve page source: %v", err)
}
for page := 1; page <= numPages; page++ {
var pageSource string
err := chromedp.Run(ctx,
chromedp.Sleep(2*time.Second),
chromedp.OuterHTML("html", &pageSource),
chromedp.Evaluate(`window.scrollTo(0, document.body.scrollHeight);`, nil),
)
if err != nil {
return nil, fmt.Errorf("failed to retrieve page source: %v", err)
}
newResults, err := parseResults(pageSource)
if err != nil {
return nil, fmt.Errorf("error parsing results: %v", err)
}
results = append(results, newResults...)
newResults, err := parseResults(pageSource)
if err != nil {
return nil, fmt.Errorf("error parsing results: %v", err)
}
results = append(results, newResults...)
return results, nil
}
@ -77,7 +57,9 @@ func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
langParam = "&lr=" + lang
}
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s", url.QueryEscape(query), safeParam, langParam)
startParam := fmt.Sprintf("&start=%d", (page-1)*resultsPerPage)
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s", url.QueryEscape(query), safeParam, langParam, startParam)
}
func parseResults(pageSource string) ([]TextSearchResult, error) {