automatic reputation for search engines

This commit is contained in:
partisan 2024-06-14 17:56:20 +02:00
parent dd9ed4cc53
commit e3d568f6cb
9 changed files with 198 additions and 126 deletions

View file

@ -6,28 +6,31 @@ import (
"net/url" "net/url"
"strconv" "strconv"
"strings" "strings"
"time"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
) )
// PerformImgurImageSearch performs an image search on Imgur and returns the results // PerformImgurImageSearch performs an image search on Imgur and returns the results
func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) { func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) {
startTime := time.Now() // Start the timer
var results []ImageSearchResult var results []ImageSearchResult
searchURL := buildImgurSearchURL(query, page) searchURL := buildImgurSearchURL(query, page)
resp, err := http.Get(searchURL) resp, err := http.Get(searchURL)
if err != nil { if err != nil {
return nil, fmt.Errorf("making request: %v", err) return nil, 0, fmt.Errorf("making request: %v", err)
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
} }
doc, err := goquery.NewDocumentFromReader(resp.Body) doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil { if err != nil {
return nil, fmt.Errorf("loading HTML document: %v", err) return nil, 0, fmt.Errorf("loading HTML document: %v", err)
} }
doc.Find("div.cards div.post").Each(func(i int, s *goquery.Selection) { doc.Find("div.cards div.post").Each(func(i int, s *goquery.Selection) {
@ -72,7 +75,9 @@ func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchR
}) })
}) })
return results, nil duration := time.Since(startTime) // Calculate the duration
return results, duration, nil
} }
// scrapeImageFromImgurPage scrapes the image source from the Imgur page // scrapeImageFromImgurPage scrapes the image source from the Imgur page
@ -130,12 +135,13 @@ func buildImgurSearchURL(query string, page int) string {
} }
// func main() { // func main() {
// results, err := PerformImgurImageSearch("cats", "true", "en", 1) // results, duration, err := PerformImgurImageSearch("cats", "true", "en", 1)
// if err != nil { // if err != nil {
// fmt.Println("Error:", err) // fmt.Println("Error:", err)
// return // return
// } // }
// fmt.Printf("Search took: %v\n", duration)
// for _, result := range results { // for _, result := range results {
// fmt.Printf("Title: %s\nSource: %s\nMedia: %s\nThumbnail: %s\nThumbProxy: %s\nWidth: %d\nHeight: %d\n\n", // fmt.Printf("Title: %s\nSource: %s\nMedia: %s\nThumbnail: %s\nThumbProxy: %s\nWidth: %d\nHeight: %d\n\n",
// result.Title, result.Source, result.Media, result.Thumbnail, result.ThumbProxy, result.Width, result.Height) // result.Title, result.Source, result.Media, result.Thumbnail, result.ThumbProxy, result.Width, result.Height)

View file

@ -25,7 +25,9 @@ type QwantAPIResponse struct {
} }
// PerformQwantImageSearch performs an image search on Qwant and returns the results. // PerformQwantImageSearch performs an image search on Qwant and returns the results.
func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) { func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) {
startTime := time.Now() // Start the timer
const resultsPerPage = 50 const resultsPerPage = 50
var offset int var offset int
if page <= 1 { if page <= 1 {
@ -53,29 +55,29 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
req, err := http.NewRequest("GET", apiURL, nil) req, err := http.NewRequest("GET", apiURL, nil)
if err != nil { if err != nil {
return nil, fmt.Errorf("creating request: %v", err) return nil, 0, fmt.Errorf("creating request: %v", err)
} }
ImageUserAgent, err := GetUserAgent("Image-Search") ImageUserAgent, err := GetUserAgent("Image-Search")
if err != nil { if err != nil {
return nil, err return nil, 0, err
} }
req.Header.Set("User-Agent", ImageUserAgent) req.Header.Set("User-Agent", ImageUserAgent)
resp, err := client.Do(req) resp, err := client.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("making request: %v", err) return nil, 0, fmt.Errorf("making request: %v", err)
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
} }
var apiResp QwantAPIResponse var apiResp QwantAPIResponse
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil {
return nil, fmt.Errorf("decoding response: %v", err) return nil, 0, fmt.Errorf("decoding response: %v", err)
} }
var results []ImageSearchResult var results []ImageSearchResult
@ -91,5 +93,7 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
}) })
} }
return results, nil duration := time.Since(startTime) // Calculate the duration
return results, duration, nil
} }

View file

@ -4,30 +4,17 @@ import (
"fmt" "fmt"
"html/template" "html/template"
"log" "log"
"math/rand"
"net/http" "net/http"
"sync"
"time" "time"
) )
var ( var imageSearchEngines []SearchEngine
imageEngines []imageEngine
imageEngineLock sync.Mutex
)
type imageEngine struct {
Name string
Func func(string, string, string, int) ([]ImageSearchResult, error)
Weight int
}
func init() { func init() {
imageEngines = []imageEngine{ imageSearchEngines = []SearchEngine{
{Name: "Qwant", Func: PerformQwantImageSearch, Weight: 1}, {Name: "Qwant", Func: wrapImageSearchFunc(PerformQwantImageSearch), Weight: 1},
{Name: "Imgur", Func: PerformImgurImageSearch, Weight: 2}, {Name: "Imgur", Func: wrapImageSearchFunc(PerformImgurImageSearch), Weight: 2},
} }
rand.Seed(time.Now().UnixNano())
} }
func handleImageSearch(w http.ResponseWriter, query, safe, lang string, page int) { func handleImageSearch(w http.ResponseWriter, query, safe, lang string, page int) {
@ -111,17 +98,24 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string
func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult { func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult {
var results []ImageSearchResult var results []ImageSearchResult
var err error var err error
var duration time.Duration
for attempts := 0; attempts < len(imageEngines); attempts++ { for attempts := 0; attempts < len(imageSearchEngines); attempts++ {
engine := selectImageEngine() engine := selectSearchEngine(imageSearchEngines)
log.Printf("Using image search engine: %s", engine.Name) log.Printf("Using image search engine: %s", engine.Name)
results, err = engine.Func(query, safe, lang, page) var searchResults []SearchResult
searchResults, duration, err = engine.Func(query, safe, lang, page)
updateEngineMetrics(&engine, duration, err == nil)
if err != nil { if err != nil {
log.Printf("Error performing image search with %s: %v", engine.Name, err) log.Printf("Error performing image search with %s: %v", engine.Name, err)
continue continue
} }
for _, result := range searchResults {
results = append(results, result.(ImageSearchResult))
}
if len(results) > 0 { if len(results) > 0 {
break break
} }
@ -130,30 +124,16 @@ func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult {
return results return results
} }
func selectImageEngine() imageEngine { func wrapImageSearchFunc(f func(string, string, string, int) ([]ImageSearchResult, time.Duration, error)) func(string, string, string, int) ([]SearchResult, time.Duration, error) {
imageEngineLock.Lock() return func(query, safe, lang string, page int) ([]SearchResult, time.Duration, error) {
defer imageEngineLock.Unlock() imageResults, duration, err := f(query, safe, lang, page)
if err != nil {
totalWeight := 0 return nil, duration, err
for _, engine := range imageEngines {
totalWeight += engine.Weight
}
randValue := rand.Intn(totalWeight)
for _, engine := range imageEngines {
if randValue < engine.Weight {
// Adjust weights for load balancing
for i := range imageEngines {
if imageEngines[i].Name == engine.Name {
imageEngines[i].Weight = max(1, imageEngines[i].Weight-1)
} else {
imageEngines[i].Weight++
}
}
return engine
} }
randValue -= engine.Weight searchResults := make([]SearchResult, len(imageResults))
for i, result := range imageResults {
searchResults[i] = result
}
return searchResults, duration, nil
} }
return imageEngines[0] // fallback to the first engine
} }

2
run.sh
View file

@ -1,3 +1,3 @@
#!/bin/bash #!/bin/bash
go run main.go common.go init.go open-search.go images.go imageproxy.go images-quant.go images-imgur.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go go run main.go common.go search-engine.go init.go open-search.go images.go imageproxy.go images-quant.go images-imgur.go video.go map.go text.go text-searchxng.go text-duckduckgo.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go

90
search-engine.go Normal file
View file

@ -0,0 +1,90 @@
package main
import (
"math/rand"
"sync"
"time"
)
var (
searchEngineLock sync.Mutex
)
// SearchEngine struct now includes metrics for calculating reputation.
type SearchEngine struct {
Name string
Func func(string, string, string, int) ([]SearchResult, time.Duration, error)
Weight int
TotalRequests int
TotalTime time.Duration
SuccessfulSearches int
FailedSearches int
}
// init function seeds the random number generator.
func init() {
rand.Seed(time.Now().UnixNano())
}
// Selects a search engine based on weighted random selection with dynamic weighting.
func selectSearchEngine(engines []SearchEngine) SearchEngine {
searchEngineLock.Lock()
defer searchEngineLock.Unlock()
// Recalculate weights based on average response time and success rate.
for i := range engines {
engines[i].Weight = calculateReputation(engines[i])
}
totalWeight := 0
for _, engine := range engines {
totalWeight += engine.Weight
}
randValue := rand.Intn(totalWeight)
for _, engine := range engines {
if randValue < engine.Weight {
return engine
}
randValue -= engine.Weight
}
return engines[0] // fallback to the first engine
}
// Updates the engine's performance metrics.
func updateEngineMetrics(engine *SearchEngine, responseTime time.Duration, success bool) {
searchEngineLock.Lock()
defer searchEngineLock.Unlock()
engine.TotalRequests++
engine.TotalTime += responseTime
if success {
engine.SuccessfulSearches++
} else {
engine.FailedSearches++
}
engine.Weight = calculateReputation(*engine)
}
// Calculates the reputation of the search engine based on average response time and success rate.
func calculateReputation(engine SearchEngine) int {
const referenceTime = time.Second // 1 second reference time in nanoseconds (1000 ms)
if engine.TotalRequests == 0 {
return 10 // Default weight for new engines
}
// Calculate average response time in seconds.
avgResponseTime := engine.TotalTime.Seconds() / float64(engine.TotalRequests)
// Calculate success rate.
successRate := float64(engine.SuccessfulSearches) / float64(engine.TotalRequests)
// Combine response time and success rate into a single reputation score.
// The formula can be adjusted to weigh response time and success rate differently.
reputation := (referenceTime.Seconds() / avgResponseTime) * successRate
// Scale reputation for better interpretability (e.g., multiply by 10)
return int(reputation * 10)
}

View file

@ -1,4 +1,3 @@
// text-duckduckgo.go
package main package main
import ( import (
@ -7,27 +6,30 @@ import (
"net/http" "net/http"
"net/url" "net/url"
"strings" "strings"
"time"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
) )
func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
startTime := time.Now() // Start the timer
var results []TextSearchResult var results []TextSearchResult
searchURL := buildDuckDuckGoSearchURL(query, page) searchURL := buildDuckDuckGoSearchURL(query, page)
resp, err := http.Get(searchURL) resp, err := http.Get(searchURL)
if err != nil { if err != nil {
return nil, fmt.Errorf("making request: %v", err) return nil, 0, fmt.Errorf("making request: %v", err)
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
} }
doc, err := goquery.NewDocumentFromReader(resp.Body) doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil { if err != nil {
return nil, fmt.Errorf("loading HTML document: %v", err) return nil, 0, fmt.Errorf("loading HTML document: %v", err)
} }
doc.Find(".result__body").Each(func(i int, s *goquery.Selection) { doc.Find(".result__body").Each(func(i int, s *goquery.Selection) {
@ -54,7 +56,9 @@ func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSear
} }
}) })
return results, nil duration := time.Since(startTime) // Calculate the duration
return results, duration, nil
} }
func buildDuckDuckGoSearchURL(query string, page int) string { func buildDuckDuckGoSearchURL(query string, page int) string {

View file

@ -6,27 +6,29 @@ import (
"net/http" "net/http"
"net/url" "net/url"
"strings" "strings"
"time"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
) )
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
const resultsPerPage = 10 const resultsPerPage = 10
var results []TextSearchResult var results []TextSearchResult
startTime := time.Now() // Start the timer
client := &http.Client{} client := &http.Client{}
searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage) searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage)
req, err := http.NewRequest("GET", searchURL, nil) req, err := http.NewRequest("GET", searchURL, nil)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create request: %v", err) return nil, 0, fmt.Errorf("failed to create request: %v", err)
} }
// User Agent generation // User Agent generation
TextUserAgent, err := GetUserAgent("Text-Search") TextUserAgent, err := GetUserAgent("Text-Search")
if err != nil { if err != nil {
fmt.Println("Error:", err) return nil, 0, err
return nil, err
} }
if debugMode { if debugMode {
@ -37,28 +39,30 @@ func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchRe
resp, err := client.Do(req) resp, err := client.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("making request: %v", err) return nil, 0, fmt.Errorf("making request: %v", err)
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
} }
doc, err := goquery.NewDocumentFromReader(resp.Body) doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil { if err != nil {
return nil, fmt.Errorf("loading HTML document: %v", err) return nil, 0, fmt.Errorf("loading HTML document: %v", err)
} }
results = parseResults(doc) results = parseResults(doc)
duration := time.Since(startTime) // Calculate the duration
if len(results) == 0 { if len(results) == 0 {
if debugMode { if debugMode {
log.Println("No results found from Google") log.Println("No results found from Google")
} }
} }
return results, nil return results, duration, nil
} }
func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string { func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {

View file

@ -6,6 +6,7 @@ import (
"log" "log"
"net/http" "net/http"
"net/url" "net/url"
"time"
) )
const LIBREX_DOMAIN = "librex.antopie.org" const LIBREX_DOMAIN = "librex.antopie.org"
@ -18,7 +19,8 @@ type LibreXResult struct {
type LibreXResponse []LibreXResult type LibreXResponse []LibreXResult
func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
startTime := time.Now() // Start the timer
// LibreX/Y uses offset instead of page that starts at 0 // LibreX/Y uses offset instead of page that starts at 0
page-- page--
@ -29,7 +31,7 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
// User Agent generation // User Agent generation
userAgent, err := GetUserAgent("librex-text-search") userAgent, err := GetUserAgent("librex-text-search")
if err != nil { if err != nil {
return nil, err return nil, 0, err
} }
if debugMode { if debugMode {
@ -38,24 +40,24 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
req, err := http.NewRequest("GET", searchURL, nil) req, err := http.NewRequest("GET", searchURL, nil)
if err != nil { if err != nil {
return nil, err return nil, 0, err
} }
req.Header.Set("User-Agent", userAgent) req.Header.Set("User-Agent", userAgent)
client := &http.Client{} client := &http.Client{}
resp, err := client.Do(req) resp, err := client.Do(req)
if err != nil { if err != nil {
return nil, logError("error making request to LibreX", err) return nil, 0, logError("error making request to LibreX", err)
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
return nil, logError("unexpected status code", fmt.Errorf("%d", resp.StatusCode)) return nil, 0, logError("unexpected status code", fmt.Errorf("%d", resp.StatusCode))
} }
var librexResp LibreXResponse var librexResp LibreXResponse
if err := json.NewDecoder(resp.Body).Decode(&librexResp); err != nil { if err := json.NewDecoder(resp.Body).Decode(&librexResp); err != nil {
return nil, logError("error decoding LibreX response", err) return nil, 0, logError("error decoding LibreX response", err)
} }
var results []TextSearchResult var results []TextSearchResult
@ -70,7 +72,9 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
results = append(results, result) results = append(results, result)
} }
return results, nil duration := time.Since(startTime) // Calculate the duration
return results, duration, nil
} }
func logError(message string, err error) error { func logError(message string, err error) error {

72
text.go
View file

@ -4,32 +4,19 @@ import (
"fmt" "fmt"
"html/template" "html/template"
"log" "log"
"math/rand"
"net/http" "net/http"
"sync"
"time" "time"
) )
var ( var textSearchEngines []SearchEngine
searchEngines []searchEngine
searchEngineLock sync.Mutex
)
type searchEngine struct {
Name string
Func func(string, string, string, int) ([]TextSearchResult, error)
Weight int
}
func init() { func init() {
searchEngines = []searchEngine{ textSearchEngines = []SearchEngine{
{Name: "Google", Func: PerformGoogleTextSearch, Weight: 1}, {Name: "Google", Func: wrapTextSearchFunc(PerformGoogleTextSearch), Weight: 1},
{Name: "LibreX", Func: PerformLibreXTextSearch, Weight: 2}, {Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch), Weight: 2},
// {Name: "DuckDuckGo", Func: PerformDuckDuckGoTextSearch, Weight: 3}, // DuckDuckGo timeouts too fast and search results are trash {Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch), Weight: 3}, // DuckDuckGo timeouts too fast and search results are trash
// {Name: "SearXNG", Func: PerformSearXNGTextSearch, Weight: 2}, // Uncomment when implemented // {Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXNGTextSearch), Weight: 2}, // Uncomment when implemented
} }
rand.Seed(time.Now().UnixNano())
} }
func HandleTextSearch(w http.ResponseWriter, query, safe, lang string, page int) { func HandleTextSearch(w http.ResponseWriter, query, safe, lang string, page int) {
@ -103,17 +90,24 @@ func prefetchPage(query, safe, lang string, page int) {
func fetchTextResults(query, safe, lang string, page int) []TextSearchResult { func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
var results []TextSearchResult var results []TextSearchResult
var err error var err error
var duration time.Duration
for attempts := 0; attempts < len(searchEngines); attempts++ { for attempts := 0; attempts < len(textSearchEngines); attempts++ {
engine := selectSearchEngine() engine := selectSearchEngine(textSearchEngines)
log.Printf("Using search engine: %s", engine.Name) log.Printf("Using search engine: %s", engine.Name)
results, err = engine.Func(query, safe, lang, page) var searchResults []SearchResult
searchResults, duration, err = engine.Func(query, safe, lang, page)
updateEngineMetrics(&engine, duration, err == nil)
if err != nil { if err != nil {
log.Printf("Error performing search with %s: %v", engine.Name, err) log.Printf("Error performing search with %s: %v", engine.Name, err)
continue continue
} }
for _, result := range searchResults {
results = append(results, result.(TextSearchResult))
}
if len(results) > 0 { if len(results) > 0 {
break break
} }
@ -122,32 +116,18 @@ func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
return results return results
} }
func selectSearchEngine() searchEngine { func wrapTextSearchFunc(f func(string, string, string, int) ([]TextSearchResult, time.Duration, error)) func(string, string, string, int) ([]SearchResult, time.Duration, error) {
searchEngineLock.Lock() return func(query, safe, lang string, page int) ([]SearchResult, time.Duration, error) {
defer searchEngineLock.Unlock() textResults, duration, err := f(query, safe, lang, page)
if err != nil {
totalWeight := 0 return nil, duration, err
for _, engine := range searchEngines {
totalWeight += engine.Weight
}
randValue := rand.Intn(totalWeight)
for _, engine := range searchEngines {
if randValue < engine.Weight {
// Adjust weights for load balancing
for i := range searchEngines {
if searchEngines[i].Name == engine.Name {
searchEngines[i].Weight = max(1, searchEngines[i].Weight-1)
} else {
searchEngines[i].Weight++
}
}
return engine
} }
randValue -= engine.Weight searchResults := make([]SearchResult, len(textResults))
for i, result := range textResults {
searchResults[i] = result
}
return searchResults, duration, nil
} }
return searchEngines[0] // fallback to the first engine
} }
func displayResults(w http.ResponseWriter, results []TextSearchResult, query, lang string, elapsed float64, page int, hasPrevPage, hasNextPage bool) { func displayResults(w http.ResponseWriter, results []TextSearchResult, query, lang string, elapsed float64, page int, hasPrevPage, hasNextPage bool) {