revert "text-google.go" changes
This commit is contained in:
parent
b29bedc522
commit
abce95724a
1 changed files with 51 additions and 36 deletions
|
@ -1,47 +1,62 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
"github.com/chromedp/chromedp"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
||||||
opts := append(chromedp.DefaultExecAllocatorOptions[:],
|
const resultsPerPage = 10
|
||||||
chromedp.DisableGPU,
|
|
||||||
chromedp.NoDefaultBrowserCheck,
|
|
||||||
chromedp.NoFirstRun,
|
|
||||||
chromedp.Flag("disable-javascript", true),
|
|
||||||
)
|
|
||||||
ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
ctx, cancel = chromedp.NewContext(ctx)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
var results []TextSearchResult
|
var results []TextSearchResult
|
||||||
|
|
||||||
searchURL := buildSearchURL(query, safe, lang, page, 10)
|
client := &http.Client{}
|
||||||
var pageSource string
|
searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage)
|
||||||
err := chromedp.Run(ctx,
|
|
||||||
chromedp.Navigate(searchURL),
|
req, err := http.NewRequest("GET", searchURL, nil)
|
||||||
chromedp.Sleep(2*time.Second),
|
|
||||||
chromedp.OuterHTML("html", &pageSource),
|
|
||||||
)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to retrieve page source: %v", err)
|
return nil, fmt.Errorf("failed to create request: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
newResults, err := parseResults(pageSource)
|
// User Agent generation
|
||||||
|
TextUserAgent, err := GetUserAgent("Text-Search")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("error parsing results: %v", err)
|
fmt.Println("Error:", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if debugMode {
|
||||||
|
fmt.Println("Generated User Agent (text):", TextUserAgent)
|
||||||
|
}
|
||||||
|
|
||||||
|
req.Header.Set("User-Agent", TextUserAgent)
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("making request: %v", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("loading HTML document: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
results = parseResults(doc)
|
||||||
|
|
||||||
|
if len(results) == 0 {
|
||||||
|
if debugMode {
|
||||||
|
log.Println("No results found from Google")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
results = append(results, newResults...)
|
|
||||||
|
|
||||||
return results, nil
|
return results, nil
|
||||||
}
|
}
|
||||||
|
@ -57,23 +72,20 @@ func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
|
||||||
langParam = "&lr=" + lang
|
langParam = "&lr=" + lang
|
||||||
}
|
}
|
||||||
|
|
||||||
startParam := fmt.Sprintf("&start=%d", (page-1)*resultsPerPage)
|
startIndex := (page - 1) * resultsPerPage
|
||||||
|
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s&udm=14&start=%d", url.QueryEscape(query), safeParam, langParam, startIndex)
|
||||||
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s", url.QueryEscape(query), safeParam, langParam, startParam)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseResults(pageSource string) ([]TextSearchResult, error) {
|
func parseResults(doc *goquery.Document) []TextSearchResult {
|
||||||
var results []TextSearchResult
|
var results []TextSearchResult
|
||||||
|
|
||||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageSource))
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("loading HTML document: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) {
|
doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) {
|
||||||
link := s.Find("a")
|
link := s.Find("a")
|
||||||
href, exists := link.Attr("href")
|
href, exists := link.Attr("href")
|
||||||
if !exists {
|
if !exists {
|
||||||
|
if debugMode {
|
||||||
|
log.Printf("No href attribute found for result %d\n", i)
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,7 +104,10 @@ func parseResults(pageSource string) ([]TextSearchResult, error) {
|
||||||
Description: description,
|
Description: description,
|
||||||
}
|
}
|
||||||
results = append(results, result)
|
results = append(results, result)
|
||||||
|
if debugMode {
|
||||||
|
log.Printf("Google result: %+v\n", result)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
return results, nil
|
return results
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue