From e3d568f6cbb9b36af063f31895305bb6d78068d4 Mon Sep 17 00:00:00 2001 From: partisan Date: Fri, 14 Jun 2024 17:56:20 +0200 Subject: [PATCH] automatic reputation for search engines --- images-imgur.go | 18 ++++++---- images-quant.go | 18 ++++++---- images.go | 68 +++++++++++++---------------------- run.sh | 2 +- search-engine.go | 90 ++++++++++++++++++++++++++++++++++++++++++++++ text-duckduckgo.go | 18 ++++++---- text-google.go | 20 ++++++----- text-librex.go | 18 ++++++---- text.go | 72 ++++++++++++++----------------------- 9 files changed, 198 insertions(+), 126 deletions(-) create mode 100644 search-engine.go diff --git a/images-imgur.go b/images-imgur.go index 2e76879..ede8d10 100644 --- a/images-imgur.go +++ b/images-imgur.go @@ -6,28 +6,31 @@ import ( "net/url" "strconv" "strings" + "time" "github.com/PuerkitoBio/goquery" ) // PerformImgurImageSearch performs an image search on Imgur and returns the results -func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) { +func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) { + startTime := time.Now() // Start the timer + var results []ImageSearchResult searchURL := buildImgurSearchURL(query, page) resp, err := http.Get(searchURL) if err != nil { - return nil, fmt.Errorf("making request: %v", err) + return nil, 0, fmt.Errorf("making request: %v", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) } doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { - return nil, fmt.Errorf("loading HTML document: %v", err) + return nil, 0, fmt.Errorf("loading HTML document: %v", err) } doc.Find("div.cards div.post").Each(func(i int, s *goquery.Selection) { @@ -72,7 +75,9 @@ func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchR }) }) - return results, nil + duration := time.Since(startTime) // Calculate the duration + + return results, duration, nil } // scrapeImageFromImgurPage scrapes the image source from the Imgur page @@ -130,12 +135,13 @@ func buildImgurSearchURL(query string, page int) string { } // func main() { -// results, err := PerformImgurImageSearch("cats", "true", "en", 1) +// results, duration, err := PerformImgurImageSearch("cats", "true", "en", 1) // if err != nil { // fmt.Println("Error:", err) // return // } +// fmt.Printf("Search took: %v\n", duration) // for _, result := range results { // fmt.Printf("Title: %s\nSource: %s\nMedia: %s\nThumbnail: %s\nThumbProxy: %s\nWidth: %d\nHeight: %d\n\n", // result.Title, result.Source, result.Media, result.Thumbnail, result.ThumbProxy, result.Width, result.Height) diff --git a/images-quant.go b/images-quant.go index fa799f8..d9a9770 100644 --- a/images-quant.go +++ b/images-quant.go @@ -25,7 +25,9 @@ type QwantAPIResponse struct { } // PerformQwantImageSearch performs an image search on Qwant and returns the results. -func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) { +func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) { + startTime := time.Now() // Start the timer + const resultsPerPage = 50 var offset int if page <= 1 { @@ -53,29 +55,29 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR req, err := http.NewRequest("GET", apiURL, nil) if err != nil { - return nil, fmt.Errorf("creating request: %v", err) + return nil, 0, fmt.Errorf("creating request: %v", err) } ImageUserAgent, err := GetUserAgent("Image-Search") if err != nil { - return nil, err + return nil, 0, err } req.Header.Set("User-Agent", ImageUserAgent) resp, err := client.Do(req) if err != nil { - return nil, fmt.Errorf("making request: %v", err) + return nil, 0, fmt.Errorf("making request: %v", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) } var apiResp QwantAPIResponse if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { - return nil, fmt.Errorf("decoding response: %v", err) + return nil, 0, fmt.Errorf("decoding response: %v", err) } var results []ImageSearchResult @@ -91,5 +93,7 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR }) } - return results, nil + duration := time.Since(startTime) // Calculate the duration + + return results, duration, nil } diff --git a/images.go b/images.go index 9d9ef55..942a9f1 100644 --- a/images.go +++ b/images.go @@ -4,30 +4,17 @@ import ( "fmt" "html/template" "log" - "math/rand" "net/http" - "sync" "time" ) -var ( - imageEngines []imageEngine - imageEngineLock sync.Mutex -) - -type imageEngine struct { - Name string - Func func(string, string, string, int) ([]ImageSearchResult, error) - Weight int -} +var imageSearchEngines []SearchEngine func init() { - imageEngines = []imageEngine{ - {Name: "Qwant", Func: PerformQwantImageSearch, Weight: 1}, - {Name: "Imgur", Func: PerformImgurImageSearch, Weight: 2}, + imageSearchEngines = []SearchEngine{ + {Name: "Qwant", Func: wrapImageSearchFunc(PerformQwantImageSearch), Weight: 1}, + {Name: "Imgur", Func: wrapImageSearchFunc(PerformImgurImageSearch), Weight: 2}, } - - rand.Seed(time.Now().UnixNano()) } func handleImageSearch(w http.ResponseWriter, query, safe, lang string, page int) { @@ -111,17 +98,24 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult { var results []ImageSearchResult var err error + var duration time.Duration - for attempts := 0; attempts < len(imageEngines); attempts++ { - engine := selectImageEngine() + for attempts := 0; attempts < len(imageSearchEngines); attempts++ { + engine := selectSearchEngine(imageSearchEngines) log.Printf("Using image search engine: %s", engine.Name) - results, err = engine.Func(query, safe, lang, page) + var searchResults []SearchResult + searchResults, duration, err = engine.Func(query, safe, lang, page) + updateEngineMetrics(&engine, duration, err == nil) if err != nil { log.Printf("Error performing image search with %s: %v", engine.Name, err) continue } + for _, result := range searchResults { + results = append(results, result.(ImageSearchResult)) + } + if len(results) > 0 { break } @@ -130,30 +124,16 @@ func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult { return results } -func selectImageEngine() imageEngine { - imageEngineLock.Lock() - defer imageEngineLock.Unlock() - - totalWeight := 0 - for _, engine := range imageEngines { - totalWeight += engine.Weight - } - - randValue := rand.Intn(totalWeight) - for _, engine := range imageEngines { - if randValue < engine.Weight { - // Adjust weights for load balancing - for i := range imageEngines { - if imageEngines[i].Name == engine.Name { - imageEngines[i].Weight = max(1, imageEngines[i].Weight-1) - } else { - imageEngines[i].Weight++ - } - } - return engine +func wrapImageSearchFunc(f func(string, string, string, int) ([]ImageSearchResult, time.Duration, error)) func(string, string, string, int) ([]SearchResult, time.Duration, error) { + return func(query, safe, lang string, page int) ([]SearchResult, time.Duration, error) { + imageResults, duration, err := f(query, safe, lang, page) + if err != nil { + return nil, duration, err } - randValue -= engine.Weight + searchResults := make([]SearchResult, len(imageResults)) + for i, result := range imageResults { + searchResults[i] = result + } + return searchResults, duration, nil } - - return imageEngines[0] // fallback to the first engine } diff --git a/run.sh b/run.sh index a845d9f..c09b727 100755 --- a/run.sh +++ b/run.sh @@ -1,3 +1,3 @@ #!/bin/bash -go run main.go common.go init.go open-search.go images.go imageproxy.go images-quant.go images-imgur.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go \ No newline at end of file +go run main.go common.go search-engine.go init.go open-search.go images.go imageproxy.go images-quant.go images-imgur.go video.go map.go text.go text-searchxng.go text-duckduckgo.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go \ No newline at end of file diff --git a/search-engine.go b/search-engine.go new file mode 100644 index 0000000..36dde9a --- /dev/null +++ b/search-engine.go @@ -0,0 +1,90 @@ +package main + +import ( + "math/rand" + "sync" + "time" +) + +var ( + searchEngineLock sync.Mutex +) + +// SearchEngine struct now includes metrics for calculating reputation. +type SearchEngine struct { + Name string + Func func(string, string, string, int) ([]SearchResult, time.Duration, error) + Weight int + TotalRequests int + TotalTime time.Duration + SuccessfulSearches int + FailedSearches int +} + +// init function seeds the random number generator. +func init() { + rand.Seed(time.Now().UnixNano()) +} + +// Selects a search engine based on weighted random selection with dynamic weighting. +func selectSearchEngine(engines []SearchEngine) SearchEngine { + searchEngineLock.Lock() + defer searchEngineLock.Unlock() + + // Recalculate weights based on average response time and success rate. + for i := range engines { + engines[i].Weight = calculateReputation(engines[i]) + } + + totalWeight := 0 + for _, engine := range engines { + totalWeight += engine.Weight + } + + randValue := rand.Intn(totalWeight) + for _, engine := range engines { + if randValue < engine.Weight { + return engine + } + randValue -= engine.Weight + } + + return engines[0] // fallback to the first engine +} + +// Updates the engine's performance metrics. +func updateEngineMetrics(engine *SearchEngine, responseTime time.Duration, success bool) { + searchEngineLock.Lock() + defer searchEngineLock.Unlock() + + engine.TotalRequests++ + engine.TotalTime += responseTime + if success { + engine.SuccessfulSearches++ + } else { + engine.FailedSearches++ + } + engine.Weight = calculateReputation(*engine) +} + +// Calculates the reputation of the search engine based on average response time and success rate. +func calculateReputation(engine SearchEngine) int { + const referenceTime = time.Second // 1 second reference time in nanoseconds (1000 ms) + + if engine.TotalRequests == 0 { + return 10 // Default weight for new engines + } + + // Calculate average response time in seconds. + avgResponseTime := engine.TotalTime.Seconds() / float64(engine.TotalRequests) + + // Calculate success rate. + successRate := float64(engine.SuccessfulSearches) / float64(engine.TotalRequests) + + // Combine response time and success rate into a single reputation score. + // The formula can be adjusted to weigh response time and success rate differently. + reputation := (referenceTime.Seconds() / avgResponseTime) * successRate + + // Scale reputation for better interpretability (e.g., multiply by 10) + return int(reputation * 10) +} diff --git a/text-duckduckgo.go b/text-duckduckgo.go index 56d098f..a05d9c9 100644 --- a/text-duckduckgo.go +++ b/text-duckduckgo.go @@ -1,4 +1,3 @@ -// text-duckduckgo.go package main import ( @@ -7,27 +6,30 @@ import ( "net/http" "net/url" "strings" + "time" "github.com/PuerkitoBio/goquery" ) -func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { +func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) { + startTime := time.Now() // Start the timer + var results []TextSearchResult searchURL := buildDuckDuckGoSearchURL(query, page) resp, err := http.Get(searchURL) if err != nil { - return nil, fmt.Errorf("making request: %v", err) + return nil, 0, fmt.Errorf("making request: %v", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) } doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { - return nil, fmt.Errorf("loading HTML document: %v", err) + return nil, 0, fmt.Errorf("loading HTML document: %v", err) } doc.Find(".result__body").Each(func(i int, s *goquery.Selection) { @@ -54,7 +56,9 @@ func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSear } }) - return results, nil + duration := time.Since(startTime) // Calculate the duration + + return results, duration, nil } func buildDuckDuckGoSearchURL(query string, page int) string { @@ -63,4 +67,4 @@ func buildDuckDuckGoSearchURL(query string, page int) string { startParam = fmt.Sprintf("&s=%d", (page-1)*10) } return fmt.Sprintf("https://duckduckgo.com/html/?q=%s%s", url.QueryEscape(query), startParam) -} \ No newline at end of file +} diff --git a/text-google.go b/text-google.go index 9c338cc..19c01d7 100644 --- a/text-google.go +++ b/text-google.go @@ -6,27 +6,29 @@ import ( "net/http" "net/url" "strings" + "time" "github.com/PuerkitoBio/goquery" ) -func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { +func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) { const resultsPerPage = 10 var results []TextSearchResult + startTime := time.Now() // Start the timer + client := &http.Client{} searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage) req, err := http.NewRequest("GET", searchURL, nil) if err != nil { - return nil, fmt.Errorf("failed to create request: %v", err) + return nil, 0, fmt.Errorf("failed to create request: %v", err) } // User Agent generation TextUserAgent, err := GetUserAgent("Text-Search") if err != nil { - fmt.Println("Error:", err) - return nil, err + return nil, 0, err } if debugMode { @@ -37,28 +39,30 @@ func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchRe resp, err := client.Do(req) if err != nil { - return nil, fmt.Errorf("making request: %v", err) + return nil, 0, fmt.Errorf("making request: %v", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) } doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { - return nil, fmt.Errorf("loading HTML document: %v", err) + return nil, 0, fmt.Errorf("loading HTML document: %v", err) } results = parseResults(doc) + duration := time.Since(startTime) // Calculate the duration + if len(results) == 0 { if debugMode { log.Println("No results found from Google") } } - return results, nil + return results, duration, nil } func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string { diff --git a/text-librex.go b/text-librex.go index 15dddd4..fe428ac 100644 --- a/text-librex.go +++ b/text-librex.go @@ -6,6 +6,7 @@ import ( "log" "net/http" "net/url" + "time" ) const LIBREX_DOMAIN = "librex.antopie.org" @@ -18,7 +19,8 @@ type LibreXResult struct { type LibreXResponse []LibreXResult -func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { +func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) { + startTime := time.Now() // Start the timer // LibreX/Y uses offset instead of page that starts at 0 page-- @@ -29,7 +31,7 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe // User Agent generation userAgent, err := GetUserAgent("librex-text-search") if err != nil { - return nil, err + return nil, 0, err } if debugMode { @@ -38,24 +40,24 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe req, err := http.NewRequest("GET", searchURL, nil) if err != nil { - return nil, err + return nil, 0, err } req.Header.Set("User-Agent", userAgent) client := &http.Client{} resp, err := client.Do(req) if err != nil { - return nil, logError("error making request to LibreX", err) + return nil, 0, logError("error making request to LibreX", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return nil, logError("unexpected status code", fmt.Errorf("%d", resp.StatusCode)) + return nil, 0, logError("unexpected status code", fmt.Errorf("%d", resp.StatusCode)) } var librexResp LibreXResponse if err := json.NewDecoder(resp.Body).Decode(&librexResp); err != nil { - return nil, logError("error decoding LibreX response", err) + return nil, 0, logError("error decoding LibreX response", err) } var results []TextSearchResult @@ -70,7 +72,9 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe results = append(results, result) } - return results, nil + duration := time.Since(startTime) // Calculate the duration + + return results, duration, nil } func logError(message string, err error) error { diff --git a/text.go b/text.go index 68c1033..31c2f5d 100644 --- a/text.go +++ b/text.go @@ -4,32 +4,19 @@ import ( "fmt" "html/template" "log" - "math/rand" "net/http" - "sync" "time" ) -var ( - searchEngines []searchEngine - searchEngineLock sync.Mutex -) - -type searchEngine struct { - Name string - Func func(string, string, string, int) ([]TextSearchResult, error) - Weight int -} +var textSearchEngines []SearchEngine func init() { - searchEngines = []searchEngine{ - {Name: "Google", Func: PerformGoogleTextSearch, Weight: 1}, - {Name: "LibreX", Func: PerformLibreXTextSearch, Weight: 2}, - // {Name: "DuckDuckGo", Func: PerformDuckDuckGoTextSearch, Weight: 3}, // DuckDuckGo timeouts too fast and search results are trash - // {Name: "SearXNG", Func: PerformSearXNGTextSearch, Weight: 2}, // Uncomment when implemented + textSearchEngines = []SearchEngine{ + {Name: "Google", Func: wrapTextSearchFunc(PerformGoogleTextSearch), Weight: 1}, + {Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch), Weight: 2}, + {Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch), Weight: 3}, // DuckDuckGo timeouts too fast and search results are trash + // {Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXNGTextSearch), Weight: 2}, // Uncomment when implemented } - - rand.Seed(time.Now().UnixNano()) } func HandleTextSearch(w http.ResponseWriter, query, safe, lang string, page int) { @@ -103,17 +90,24 @@ func prefetchPage(query, safe, lang string, page int) { func fetchTextResults(query, safe, lang string, page int) []TextSearchResult { var results []TextSearchResult var err error + var duration time.Duration - for attempts := 0; attempts < len(searchEngines); attempts++ { - engine := selectSearchEngine() + for attempts := 0; attempts < len(textSearchEngines); attempts++ { + engine := selectSearchEngine(textSearchEngines) log.Printf("Using search engine: %s", engine.Name) - results, err = engine.Func(query, safe, lang, page) + var searchResults []SearchResult + searchResults, duration, err = engine.Func(query, safe, lang, page) + updateEngineMetrics(&engine, duration, err == nil) if err != nil { log.Printf("Error performing search with %s: %v", engine.Name, err) continue } + for _, result := range searchResults { + results = append(results, result.(TextSearchResult)) + } + if len(results) > 0 { break } @@ -122,32 +116,18 @@ func fetchTextResults(query, safe, lang string, page int) []TextSearchResult { return results } -func selectSearchEngine() searchEngine { - searchEngineLock.Lock() - defer searchEngineLock.Unlock() - - totalWeight := 0 - for _, engine := range searchEngines { - totalWeight += engine.Weight - } - - randValue := rand.Intn(totalWeight) - for _, engine := range searchEngines { - if randValue < engine.Weight { - // Adjust weights for load balancing - for i := range searchEngines { - if searchEngines[i].Name == engine.Name { - searchEngines[i].Weight = max(1, searchEngines[i].Weight-1) - } else { - searchEngines[i].Weight++ - } - } - return engine +func wrapTextSearchFunc(f func(string, string, string, int) ([]TextSearchResult, time.Duration, error)) func(string, string, string, int) ([]SearchResult, time.Duration, error) { + return func(query, safe, lang string, page int) ([]SearchResult, time.Duration, error) { + textResults, duration, err := f(query, safe, lang, page) + if err != nil { + return nil, duration, err } - randValue -= engine.Weight + searchResults := make([]SearchResult, len(textResults)) + for i, result := range textResults { + searchResults[i] = result + } + return searchResults, duration, nil } - - return searchEngines[0] // fallback to the first engine } func displayResults(w http.ResponseWriter, results []TextSearchResult, query, lang string, elapsed float64, page int, hasPrevPage, hasNextPage bool) {