fix for pages
This commit is contained in:
parent
6885983576
commit
a86b370f69
5 changed files with 99 additions and 157 deletions
2
run.sh
2
run.sh
|
@ -1,3 +1,3 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
go run main.go images.go imageproxy.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go --debug
|
go run main.go images.go imageproxy.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go
|
|
@ -11,9 +11,9 @@ import (
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
)
|
)
|
||||||
|
|
||||||
func PerformDuckDuckGoTextSearch(query, safe, lang string) ([]TextSearchResult, error) {
|
func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
||||||
var results []TextSearchResult
|
var results []TextSearchResult
|
||||||
searchURL := fmt.Sprintf("https://duckduckgo.com/html/?q=%s", url.QueryEscape(query))
|
searchURL := buildDuckDuckGoSearchURL(query, page)
|
||||||
|
|
||||||
resp, err := http.Get(searchURL)
|
resp, err := http.Get(searchURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -56,3 +56,11 @@ func PerformDuckDuckGoTextSearch(query, safe, lang string) ([]TextSearchResult,
|
||||||
|
|
||||||
return results, nil
|
return results, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func buildDuckDuckGoSearchURL(query string, page int) string {
|
||||||
|
startParam := ""
|
||||||
|
if page > 1 {
|
||||||
|
startParam = fmt.Sprintf("&s=%d", (page-1)*10)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("https://duckduckgo.com/html/?q=%s%s", url.QueryEscape(query), startParam)
|
||||||
|
}
|
|
@ -11,57 +11,37 @@ import (
|
||||||
"github.com/chromedp/chromedp"
|
"github.com/chromedp/chromedp"
|
||||||
)
|
)
|
||||||
|
|
||||||
// type TextSearchResult struct {
|
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
||||||
// URL string
|
opts := append(chromedp.DefaultExecAllocatorOptions[:],
|
||||||
// Header string
|
chromedp.DisableGPU,
|
||||||
// Description string
|
chromedp.NoDefaultBrowserCheck,
|
||||||
// }
|
chromedp.NoFirstRun,
|
||||||
|
chromedp.Flag("disable-javascript", true),
|
||||||
|
)
|
||||||
|
ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
// func main() {
|
ctx, cancel = chromedp.NewContext(ctx)
|
||||||
// // Example usage
|
|
||||||
// results, err := PerformGoogleTextSearch("golang", "off", "lang_en", 2)
|
|
||||||
// if err != nil {
|
|
||||||
// log.Fatalf("Error performing search: %v", err)
|
|
||||||
// }
|
|
||||||
|
|
||||||
// for _, result := range results {
|
|
||||||
// fmt.Printf("URL: %s\nHeader: %s\nDescription: %s\n", result.URL, result.Header, result.Description)
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
func PerformGoogleTextSearch(query, safe, lang string, numPages int) ([]TextSearchResult, error) {
|
|
||||||
ctx, cancel := chromedp.NewContext(context.Background())
|
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
var results []TextSearchResult
|
var results []TextSearchResult
|
||||||
|
|
||||||
searchURL := buildSearchURL(query, safe, lang, 1, 10)
|
searchURL := buildSearchURL(query, safe, lang, page, 10)
|
||||||
|
var pageSource string
|
||||||
err := chromedp.Run(ctx,
|
err := chromedp.Run(ctx,
|
||||||
chromedp.Navigate(searchURL),
|
chromedp.Navigate(searchURL),
|
||||||
|
chromedp.Sleep(2*time.Second),
|
||||||
|
chromedp.OuterHTML("html", &pageSource),
|
||||||
)
|
)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to navigate to search URL: %v", err)
|
return nil, fmt.Errorf("failed to retrieve page source: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
for page := 1; page <= numPages; page++ {
|
newResults, err := parseResults(pageSource)
|
||||||
var pageSource string
|
if err != nil {
|
||||||
err := chromedp.Run(ctx,
|
return nil, fmt.Errorf("error parsing results: %v", err)
|
||||||
chromedp.Sleep(2*time.Second),
|
|
||||||
chromedp.OuterHTML("html", &pageSource),
|
|
||||||
chromedp.Evaluate(`window.scrollTo(0, document.body.scrollHeight);`, nil),
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to retrieve page source: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
newResults, err := parseResults(pageSource)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error parsing results: %v", err)
|
|
||||||
}
|
|
||||||
results = append(results, newResults...)
|
|
||||||
}
|
}
|
||||||
|
results = append(results, newResults...)
|
||||||
|
|
||||||
return results, nil
|
return results, nil
|
||||||
}
|
}
|
||||||
|
@ -77,7 +57,9 @@ func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
|
||||||
langParam = "&lr=" + lang
|
langParam = "&lr=" + lang
|
||||||
}
|
}
|
||||||
|
|
||||||
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s", url.QueryEscape(query), safeParam, langParam)
|
startParam := fmt.Sprintf("&start=%d", (page-1)*resultsPerPage)
|
||||||
|
|
||||||
|
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s", url.QueryEscape(query), safeParam, langParam, startParam)
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseResults(pageSource string) ([]TextSearchResult, error) {
|
func parseResults(pageSource string) ([]TextSearchResult, error) {
|
||||||
|
|
|
@ -20,7 +20,7 @@ type LibreXResponse []LibreXResult
|
||||||
|
|
||||||
func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
||||||
// LibreX uses page starting from 0
|
// LibreX uses page starting from 0
|
||||||
searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page-1)
|
searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page)
|
||||||
|
|
||||||
// User Agent generation
|
// User Agent generation
|
||||||
userAgent, err := GetUserAgent("librex-text-search")
|
userAgent, err := GetUserAgent("librex-text-search")
|
||||||
|
@ -63,10 +63,6 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
|
||||||
Source: "LibreX",
|
Source: "LibreX",
|
||||||
}
|
}
|
||||||
|
|
||||||
if debugMode {
|
|
||||||
log.Printf("LibreX result: %+v\n", result)
|
|
||||||
}
|
|
||||||
|
|
||||||
results = append(results, result)
|
results = append(results, result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
174
text.go
174
text.go
|
@ -1,44 +1,59 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"flag"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"html/template"
|
"html/template"
|
||||||
"log"
|
"log"
|
||||||
|
"math/rand"
|
||||||
"net/http"
|
"net/http"
|
||||||
"sort"
|
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
debugMode bool
|
debugMode bool
|
||||||
|
searchEngines []searchEngine
|
||||||
|
searchEngineLock sync.Mutex
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type searchEngine struct {
|
||||||
|
Name string
|
||||||
|
Func func(string, string, string, int) ([]TextSearchResult, error)
|
||||||
|
Weight int
|
||||||
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
flag.BoolVar(&debugMode, "debug", false, "enable debug mode")
|
debugMode = false
|
||||||
flag.Parse()
|
|
||||||
|
searchEngines = []searchEngine{
|
||||||
|
{Name: "Google", Func: PerformGoogleTextSearch, Weight: 1},
|
||||||
|
{Name: "LibreX", Func: PerformLibreXTextSearch, Weight: 2},
|
||||||
|
// {Name: "DuckDuckGo", Func: PerformDuckDuckGoTextSearch, Weight: 3}, // DuckDuckGo timeouts too fast and search results are trash
|
||||||
|
// {Name: "SearXNG", Func: PerformSearXNGTextSearch, Weight: 2}, // Uncomment when implemented
|
||||||
|
}
|
||||||
|
|
||||||
|
rand.Seed(time.Now().UnixNano())
|
||||||
}
|
}
|
||||||
|
|
||||||
func HandleTextSearch(w http.ResponseWriter, query, safe, lang string, page int) {
|
func HandleTextSearch(w http.ResponseWriter, query, safe, lang string, page int) {
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
const resultsPerPage = 10
|
|
||||||
|
|
||||||
cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "text"}
|
cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "text"}
|
||||||
combinedResults := getTextResultsFromCacheOrFetch(cacheKey, query, safe, lang, page, resultsPerPage)
|
combinedResults := getTextResultsFromCacheOrFetch(cacheKey, query, safe, lang, page)
|
||||||
|
|
||||||
hasPrevPage := page > 1
|
hasPrevPage := page > 1
|
||||||
hasNextPage := len(combinedResults) == resultsPerPage
|
hasNextPage := len(combinedResults) > 0
|
||||||
|
|
||||||
displayResults(w, combinedResults, query, lang, time.Since(startTime).Seconds(), page, hasPrevPage, hasNextPage)
|
displayResults(w, combinedResults, query, lang, time.Since(startTime).Seconds(), page, hasPrevPage, hasNextPage)
|
||||||
|
|
||||||
// Always check and cache the next page if not enough results
|
// Prefetch next and previous pages
|
||||||
if hasNextPage {
|
go prefetchPage(query, safe, lang, page+1)
|
||||||
go cacheNextPageIfNotCached(query, safe, lang, page+1, resultsPerPage)
|
if hasPrevPage {
|
||||||
|
go prefetchPage(query, safe, lang, page-1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, page, resultsPerPage int) []TextSearchResult {
|
func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, page int) []TextSearchResult {
|
||||||
cacheChan := make(chan []SearchResult)
|
cacheChan := make(chan []SearchResult)
|
||||||
var combinedResults []TextSearchResult
|
var combinedResults []TextSearchResult
|
||||||
|
|
||||||
|
@ -56,7 +71,7 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
|
||||||
select {
|
select {
|
||||||
case results := <-cacheChan:
|
case results := <-cacheChan:
|
||||||
if results == nil {
|
if results == nil {
|
||||||
combinedResults = fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage)
|
combinedResults = fetchTextResults(query, safe, lang, page)
|
||||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||||
} else {
|
} else {
|
||||||
textResults, _, _ := convertToSpecificResults(results)
|
textResults, _, _ := convertToSpecificResults(results)
|
||||||
|
@ -64,129 +79,70 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
|
||||||
}
|
}
|
||||||
case <-time.After(2 * time.Second):
|
case <-time.After(2 * time.Second):
|
||||||
log.Println("Cache check timeout")
|
log.Println("Cache check timeout")
|
||||||
combinedResults = fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage)
|
combinedResults = fetchTextResults(query, safe, lang, page)
|
||||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||||
}
|
}
|
||||||
|
|
||||||
return combinedResults
|
return combinedResults
|
||||||
}
|
}
|
||||||
|
|
||||||
func cacheNextPageIfNotCached(query, safe, lang string, page, resultsPerPage int) {
|
func prefetchPage(query, safe, lang string, page int) {
|
||||||
cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "text"}
|
cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "text"}
|
||||||
if _, exists := resultsCache.Get(cacheKey); !exists {
|
if _, exists := resultsCache.Get(cacheKey); !exists {
|
||||||
log.Printf("Next page %d not cached, caching now...", page)
|
log.Printf("Page %d not cached, caching now...", page)
|
||||||
nextPageResults := fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage)
|
pageResults := fetchTextResults(query, safe, lang, page)
|
||||||
resultsCache.Set(cacheKey, convertToSearchResults(nextPageResults))
|
resultsCache.Set(cacheKey, convertToSearchResults(pageResults))
|
||||||
} else {
|
} else {
|
||||||
log.Printf("Next page %d already cached", page)
|
log.Printf("Page %d already cached", page)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func fetchTextResultsUntilFull(query, safe, lang string, targetPage, resultsPerPage int) []TextSearchResult {
|
func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
|
||||||
var combinedResults []TextSearchResult
|
engine := selectSearchEngine()
|
||||||
currentPage := 1
|
log.Printf("Using search engine: %s", engine.Name)
|
||||||
resultsNeeded := targetPage * resultsPerPage
|
|
||||||
|
|
||||||
for len(combinedResults) < resultsNeeded {
|
results, err := engine.Func(query, safe, lang, page)
|
||||||
cacheKey := CacheKey{Query: query, Page: targetPage, Safe: safe == "true", Lang: lang, Type: "text"}
|
if err != nil {
|
||||||
cachedResults, exists := resultsCache.Get(cacheKey)
|
log.Printf("Error performing search with %s: %v", engine.Name, err)
|
||||||
if exists {
|
return nil
|
||||||
textResults, _, _ := convertToSpecificResults(cachedResults)
|
|
||||||
combinedResults = append(combinedResults, textResults...)
|
|
||||||
} else {
|
|
||||||
results := fetchAndCacheTextResults(query, safe, lang, currentPage, resultsPerPage)
|
|
||||||
if len(results) == 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
combinedResults = append(combinedResults, results...)
|
|
||||||
resultsCache.Set(cacheKey, convertToSearchResults(results))
|
|
||||||
}
|
|
||||||
|
|
||||||
currentPage++
|
|
||||||
|
|
||||||
// Stop fetching if we have enough results for the target page and the next page
|
|
||||||
if len(combinedResults) >= resultsNeeded+resultsPerPage {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
startIndex := (targetPage - 1) * resultsPerPage
|
return results
|
||||||
endIndex := startIndex + resultsPerPage
|
|
||||||
|
|
||||||
if startIndex >= len(combinedResults) {
|
|
||||||
return []TextSearchResult{}
|
|
||||||
}
|
|
||||||
if endIndex > len(combinedResults) {
|
|
||||||
endIndex = len(combinedResults)
|
|
||||||
}
|
|
||||||
|
|
||||||
return combinedResults[startIndex:endIndex]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func fetchAndCacheTextResults(query, safe, lang string, page, resultsPerPage int) []TextSearchResult {
|
func selectSearchEngine() searchEngine {
|
||||||
var combinedResults []TextSearchResult
|
searchEngineLock.Lock()
|
||||||
var wg sync.WaitGroup
|
defer searchEngineLock.Unlock()
|
||||||
var mu sync.Mutex
|
|
||||||
|
|
||||||
resultsChan := make(chan []TextSearchResult)
|
totalWeight := 0
|
||||||
|
for _, engine := range searchEngines {
|
||||||
searchFuncs := []struct {
|
totalWeight += engine.Weight
|
||||||
Func func(string, string, string, int) ([]TextSearchResult, error)
|
|
||||||
Source string
|
|
||||||
}{
|
|
||||||
{PerformGoogleTextSearch, "Google"},
|
|
||||||
// {PerformLibreXTextSearch, "LibreX"},
|
|
||||||
// {PerformSearXNGTextSearch, "SearXNG"},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Add(len(searchFuncs))
|
randValue := rand.Intn(totalWeight)
|
||||||
|
for _, engine := range searchEngines {
|
||||||
for _, searchFunc := range searchFuncs {
|
if randValue < engine.Weight {
|
||||||
go func(searchFunc func(string, string, string, int) ([]TextSearchResult, error), source string) {
|
// Adjust weights for load balancing
|
||||||
defer wg.Done()
|
for i := range searchEngines {
|
||||||
results, err := searchFunc(query, safe, lang, page)
|
if searchEngines[i].Name == engine.Name {
|
||||||
if err == nil {
|
searchEngines[i].Weight = max(1, searchEngines[i].Weight-1)
|
||||||
for i := range results {
|
} else {
|
||||||
results[i].Source = source
|
searchEngines[i].Weight++
|
||||||
}
|
}
|
||||||
resultsChan <- results
|
|
||||||
} else {
|
|
||||||
log.Printf("Error performing search from %s: %v", source, err)
|
|
||||||
}
|
}
|
||||||
}(searchFunc.Func, searchFunc.Source)
|
return engine
|
||||||
|
}
|
||||||
|
randValue -= engine.Weight
|
||||||
}
|
}
|
||||||
|
|
||||||
go func() {
|
return searchEngines[0] // fallback to the first engine
|
||||||
wg.Wait()
|
|
||||||
close(resultsChan)
|
|
||||||
}()
|
|
||||||
|
|
||||||
for results := range resultsChan {
|
|
||||||
mu.Lock()
|
|
||||||
combinedResults = append(combinedResults, results...)
|
|
||||||
mu.Unlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
sort.SliceStable(combinedResults, func(i, j int) bool {
|
|
||||||
return sourceOrder(combinedResults[i].Source) < sourceOrder(combinedResults[j].Source)
|
|
||||||
})
|
|
||||||
|
|
||||||
log.Printf("Fetched %d results for page %d", len(combinedResults), page)
|
|
||||||
|
|
||||||
return combinedResults
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func sourceOrder(source string) int {
|
func max(a, b int) int {
|
||||||
switch source {
|
if a > b {
|
||||||
case "Google":
|
return a
|
||||||
return 1
|
|
||||||
case "LibreX":
|
|
||||||
return 2
|
|
||||||
case "SearchXNG":
|
|
||||||
return 3
|
|
||||||
default:
|
|
||||||
return 4
|
|
||||||
}
|
}
|
||||||
|
return b
|
||||||
}
|
}
|
||||||
|
|
||||||
func displayResults(w http.ResponseWriter, results []TextSearchResult, query, lang string, elapsed float64, page int, hasPrevPage, hasNextPage bool) {
|
func displayResults(w http.ResponseWriter, results []TextSearchResult, query, lang string, elapsed float64, page int, hasPrevPage, hasNextPage bool) {
|
||||||
|
|
Loading…
Add table
Reference in a new issue