wip, revert changes
This commit is contained in:
parent
7d1d2cba67
commit
6885983576
7 changed files with 103 additions and 144 deletions
105
text-google.go
105
text-google.go
|
@ -1,61 +1,66 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/chromedp/chromedp"
|
||||
)
|
||||
|
||||
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
||||
const resultsPerPage = 10
|
||||
// type TextSearchResult struct {
|
||||
// URL string
|
||||
// Header string
|
||||
// Description string
|
||||
// }
|
||||
|
||||
// func main() {
|
||||
// // Example usage
|
||||
// results, err := PerformGoogleTextSearch("golang", "off", "lang_en", 2)
|
||||
// if err != nil {
|
||||
// log.Fatalf("Error performing search: %v", err)
|
||||
// }
|
||||
|
||||
// for _, result := range results {
|
||||
// fmt.Printf("URL: %s\nHeader: %s\nDescription: %s\n", result.URL, result.Header, result.Description)
|
||||
// }
|
||||
// }
|
||||
|
||||
func PerformGoogleTextSearch(query, safe, lang string, numPages int) ([]TextSearchResult, error) {
|
||||
ctx, cancel := chromedp.NewContext(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var results []TextSearchResult
|
||||
|
||||
client := &http.Client{}
|
||||
searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage)
|
||||
searchURL := buildSearchURL(query, safe, lang, 1, 10)
|
||||
|
||||
err := chromedp.Run(ctx,
|
||||
chromedp.Navigate(searchURL),
|
||||
)
|
||||
|
||||
req, err := http.NewRequest("GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %v", err)
|
||||
return nil, fmt.Errorf("failed to navigate to search URL: %v", err)
|
||||
}
|
||||
|
||||
// User Agent generation
|
||||
TextUserAgent, err := GetUserAgent("Text-Search")
|
||||
if err != nil {
|
||||
fmt.Println("Error:", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if debugMode {
|
||||
fmt.Println("Generated User Agent (text):", TextUserAgent)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", TextUserAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("loading HTML document: %v", err)
|
||||
}
|
||||
|
||||
results = parseResults(doc)
|
||||
|
||||
if len(results) == 0 {
|
||||
if debugMode {
|
||||
log.Println("No results found from Google")
|
||||
for page := 1; page <= numPages; page++ {
|
||||
var pageSource string
|
||||
err := chromedp.Run(ctx,
|
||||
chromedp.Sleep(2*time.Second),
|
||||
chromedp.OuterHTML("html", &pageSource),
|
||||
chromedp.Evaluate(`window.scrollTo(0, document.body.scrollHeight);`, nil),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to retrieve page source: %v", err)
|
||||
}
|
||||
|
||||
newResults, err := parseResults(pageSource)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing results: %v", err)
|
||||
}
|
||||
results = append(results, newResults...)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
|
@ -72,20 +77,21 @@ func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
|
|||
langParam = "&lr=" + lang
|
||||
}
|
||||
|
||||
startIndex := (page - 1) * resultsPerPage
|
||||
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s&udm=14&start=%d", url.QueryEscape(query), safeParam, langParam, startIndex)
|
||||
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s", url.QueryEscape(query), safeParam, langParam)
|
||||
}
|
||||
|
||||
func parseResults(doc *goquery.Document) []TextSearchResult {
|
||||
func parseResults(pageSource string) ([]TextSearchResult, error) {
|
||||
var results []TextSearchResult
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageSource))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("loading HTML document: %v", err)
|
||||
}
|
||||
|
||||
doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) {
|
||||
link := s.Find("a")
|
||||
href, exists := link.Attr("href")
|
||||
if !exists {
|
||||
if debugMode {
|
||||
log.Printf("No href attribute found for result %d\n", i)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -104,10 +110,7 @@ func parseResults(doc *goquery.Document) []TextSearchResult {
|
|||
Description: description,
|
||||
}
|
||||
results = append(results, result)
|
||||
if debugMode {
|
||||
log.Printf("Google result: %+v\n", result)
|
||||
}
|
||||
})
|
||||
|
||||
return results
|
||||
return results, nil
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue