fix for pages
This commit is contained in:
parent
6885983576
commit
a86b370f69
5 changed files with 99 additions and 157 deletions
|
@ -11,57 +11,37 @@ import (
|
|||
"github.com/chromedp/chromedp"
|
||||
)
|
||||
|
||||
// type TextSearchResult struct {
|
||||
// URL string
|
||||
// Header string
|
||||
// Description string
|
||||
// }
|
||||
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
||||
opts := append(chromedp.DefaultExecAllocatorOptions[:],
|
||||
chromedp.DisableGPU,
|
||||
chromedp.NoDefaultBrowserCheck,
|
||||
chromedp.NoFirstRun,
|
||||
chromedp.Flag("disable-javascript", true),
|
||||
)
|
||||
ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
|
||||
defer cancel()
|
||||
|
||||
// func main() {
|
||||
// // Example usage
|
||||
// results, err := PerformGoogleTextSearch("golang", "off", "lang_en", 2)
|
||||
// if err != nil {
|
||||
// log.Fatalf("Error performing search: %v", err)
|
||||
// }
|
||||
|
||||
// for _, result := range results {
|
||||
// fmt.Printf("URL: %s\nHeader: %s\nDescription: %s\n", result.URL, result.Header, result.Description)
|
||||
// }
|
||||
// }
|
||||
|
||||
func PerformGoogleTextSearch(query, safe, lang string, numPages int) ([]TextSearchResult, error) {
|
||||
ctx, cancel := chromedp.NewContext(context.Background())
|
||||
ctx, cancel = chromedp.NewContext(ctx)
|
||||
defer cancel()
|
||||
|
||||
var results []TextSearchResult
|
||||
|
||||
searchURL := buildSearchURL(query, safe, lang, 1, 10)
|
||||
|
||||
searchURL := buildSearchURL(query, safe, lang, page, 10)
|
||||
var pageSource string
|
||||
err := chromedp.Run(ctx,
|
||||
chromedp.Navigate(searchURL),
|
||||
chromedp.Sleep(2*time.Second),
|
||||
chromedp.OuterHTML("html", &pageSource),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to navigate to search URL: %v", err)
|
||||
return nil, fmt.Errorf("failed to retrieve page source: %v", err)
|
||||
}
|
||||
|
||||
for page := 1; page <= numPages; page++ {
|
||||
var pageSource string
|
||||
err := chromedp.Run(ctx,
|
||||
chromedp.Sleep(2*time.Second),
|
||||
chromedp.OuterHTML("html", &pageSource),
|
||||
chromedp.Evaluate(`window.scrollTo(0, document.body.scrollHeight);`, nil),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to retrieve page source: %v", err)
|
||||
}
|
||||
|
||||
newResults, err := parseResults(pageSource)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing results: %v", err)
|
||||
}
|
||||
results = append(results, newResults...)
|
||||
newResults, err := parseResults(pageSource)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing results: %v", err)
|
||||
}
|
||||
results = append(results, newResults...)
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
@ -77,7 +57,9 @@ func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
|
|||
langParam = "&lr=" + lang
|
||||
}
|
||||
|
||||
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s", url.QueryEscape(query), safeParam, langParam)
|
||||
startParam := fmt.Sprintf("&start=%d", (page-1)*resultsPerPage)
|
||||
|
||||
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s", url.QueryEscape(query), safeParam, langParam, startParam)
|
||||
}
|
||||
|
||||
func parseResults(pageSource string) ([]TextSearchResult, error) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue