Changed self-crawling as experimental, cleanup unused features
Some checks failed
Run Integration Tests / test (push) Failing after 1m15s
Some checks failed
Run Integration Tests / test (push) Failing after 1m15s
This commit is contained in:
parent
ca87df5df1
commit
49cb7bb94a
27 changed files with 1731 additions and 832 deletions
272
text-extra.go
Executable file
272
text-extra.go
Executable file
|
@ -0,0 +1,272 @@
|
|||
//go:build experimental
|
||||
// +build experimental
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
)
|
||||
|
||||
var textSearchEngines []SearchEngine
|
||||
|
||||
var allTextSearchEngines = []SearchEngine{
|
||||
{Name: "Google", Func: wrapTextSearchFunc(PerformGoogleTextSearch)},
|
||||
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
|
||||
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
|
||||
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
|
||||
{Name: "Quant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken !
|
||||
//{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh
|
||||
}
|
||||
|
||||
func initTextEngines() {
|
||||
// textSearchEngines is your final slice (already declared globally)
|
||||
textSearchEngines = nil // or make([]SearchEngine, 0)
|
||||
|
||||
for _, engineName := range config.MetaSearch.Text {
|
||||
for _, candidate := range allTextSearchEngines {
|
||||
if candidate.Name == engineName {
|
||||
textSearchEngines = append(textSearchEngines, candidate)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func HandleTextSearch(w http.ResponseWriter, settings UserSettings, query string, page int) {
|
||||
startTime := time.Now()
|
||||
|
||||
cacheKey := CacheKey{
|
||||
Query: query,
|
||||
Page: page,
|
||||
Safe: settings.SafeSearch == "active",
|
||||
Lang: settings.SearchLanguage,
|
||||
Type: "text",
|
||||
}
|
||||
combinedResults := getTextResultsFromCacheOrFetch(cacheKey, query, settings.SafeSearch, settings.SearchLanguage, page)
|
||||
|
||||
hasPrevPage := page > 1
|
||||
|
||||
// Prefetch next and previous pages asynchronously
|
||||
go prefetchPage(query, settings.SafeSearch, settings.SearchLanguage, page+1)
|
||||
if hasPrevPage {
|
||||
go prefetchPage(query, settings.SafeSearch, settings.SearchLanguage, page-1)
|
||||
}
|
||||
|
||||
elapsedTime := time.Since(startTime)
|
||||
|
||||
// Simplified result structure without waiting for favicons
|
||||
type DecoratedResult struct {
|
||||
TextSearchResult
|
||||
PrettyLink LinkParts
|
||||
FaviconID string // Just the ID, URL will be generated client-side
|
||||
}
|
||||
|
||||
var decoratedResults []DecoratedResult
|
||||
for _, r := range combinedResults {
|
||||
if r.URL == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
prettyLink := FormatLinkHTML(r.URL)
|
||||
faviconID := faviconIDFromURL(prettyLink.RootURL)
|
||||
|
||||
decoratedResults = append(decoratedResults, DecoratedResult{
|
||||
TextSearchResult: r,
|
||||
PrettyLink: prettyLink,
|
||||
FaviconID: faviconID,
|
||||
})
|
||||
|
||||
// Start async favicon fetch if not already cached
|
||||
go ensureFaviconIsCached(faviconID, prettyLink.RootURL)
|
||||
}
|
||||
|
||||
data := map[string]interface{}{
|
||||
"Results": decoratedResults,
|
||||
"Query": query,
|
||||
"Fetched": FormatElapsedTime(elapsedTime),
|
||||
"Page": page,
|
||||
"HasPrevPage": hasPrevPage,
|
||||
"HasNextPage": len(combinedResults) >= 50,
|
||||
"NoResults": len(combinedResults) == 0,
|
||||
"LanguageOptions": languageOptions,
|
||||
"CurrentLang": settings.SearchLanguage,
|
||||
"Theme": settings.Theme,
|
||||
"Safe": settings.SafeSearch,
|
||||
"IsThemeDark": settings.IsThemeDark,
|
||||
"Trans": Translate,
|
||||
"HardCacheEnabled": config.DriveCacheEnabled,
|
||||
}
|
||||
|
||||
renderTemplate(w, "text.html", data)
|
||||
}
|
||||
|
||||
func ensureFaviconIsCached(faviconID, rootURL string) {
|
||||
// Check if already exists in cache
|
||||
filename := fmt.Sprintf("%s_icon.webp", faviconID)
|
||||
cachedPath := filepath.Join(config.DriveCache.Path, "images", filename)
|
||||
|
||||
if _, err := os.Stat(cachedPath); err == nil {
|
||||
return // Already cached
|
||||
}
|
||||
|
||||
// Not cached, initiate download
|
||||
getFaviconProxyURL("", rootURL) // This will trigger async download
|
||||
}
|
||||
|
||||
func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, page int) []TextSearchResult {
|
||||
cacheChan := make(chan []SearchResult)
|
||||
var combinedResults []TextSearchResult
|
||||
|
||||
go func() {
|
||||
results, exists := resultsCache.Get(cacheKey)
|
||||
if exists {
|
||||
printDebug("Cache hit")
|
||||
cacheChan <- results
|
||||
} else {
|
||||
printDebug("Cache miss")
|
||||
cacheChan <- nil
|
||||
}
|
||||
}()
|
||||
|
||||
select {
|
||||
case results := <-cacheChan:
|
||||
if results == nil {
|
||||
// Always attempt to fetch results on a cache miss
|
||||
combinedResults = fetchTextResults(query, safe, lang, page)
|
||||
if len(combinedResults) > 0 {
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||
}
|
||||
} else {
|
||||
textResults, _, _, _, _ := convertToSpecificResults(results)
|
||||
combinedResults = textResults
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
printInfo("Cache check timeout")
|
||||
// Even on timeout, attempt to fetch results
|
||||
combinedResults = fetchTextResults(query, safe, lang, page)
|
||||
if len(combinedResults) > 0 {
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||
}
|
||||
}
|
||||
|
||||
return combinedResults
|
||||
}
|
||||
|
||||
func prefetchPage(query, safe, lang string, page int) {
|
||||
cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "active", Lang: lang, Type: "text"}
|
||||
if _, exists := resultsCache.Get(cacheKey); !exists {
|
||||
printInfo("Page %d not cached, caching now...", page)
|
||||
if config.MetaSearchEnabled {
|
||||
pageResults := fetchTextResults(query, safe, lang, page)
|
||||
if len(pageResults) > 0 {
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(pageResults))
|
||||
}
|
||||
} else {
|
||||
printInfo("Crawler disabled; skipping prefetch for page %d", page)
|
||||
}
|
||||
} else {
|
||||
printInfo("Page %d already cached", page)
|
||||
}
|
||||
}
|
||||
|
||||
// The logic in this function is rotating search engines instead of running them in order as noted in the wiki
|
||||
func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
|
||||
var results []TextSearchResult
|
||||
|
||||
if !config.MetaSearchEnabled {
|
||||
printDebug("Crawler is disabled; fetching from local index.")
|
||||
|
||||
// Calculate the starting position based on the page number
|
||||
indexedResults, err := SearchIndex(query, page, 10)
|
||||
if err != nil {
|
||||
printErr("Error searching the index: %v", err)
|
||||
return results // Return empty results on error
|
||||
}
|
||||
|
||||
// Convert indexed results to TextSearchResult format
|
||||
for _, doc := range indexedResults {
|
||||
results = append(results, TextSearchResult{
|
||||
URL: doc.Link,
|
||||
Header: doc.Title,
|
||||
Description: doc.Description,
|
||||
Source: doc.Tags,
|
||||
})
|
||||
}
|
||||
|
||||
return results
|
||||
} else {
|
||||
// Crawler is enabled, so use the search engines
|
||||
engineCount := len(textSearchEngines)
|
||||
|
||||
// Determine which engine to use for the current page
|
||||
engineIndex := (page - 1) % engineCount
|
||||
engine := textSearchEngines[engineIndex]
|
||||
|
||||
// Calculate the page number for this engine
|
||||
enginePage := (page-1)/engineCount + 1
|
||||
|
||||
printDebug("Fetching results for overall page %d using engine: %s (engine page %d)", page, engine.Name, enginePage)
|
||||
|
||||
// Fetch results from the selected engine
|
||||
searchResults, _, err := engine.Func(query, safe, lang, enginePage)
|
||||
if err != nil {
|
||||
printWarn("Error performing search with %s: %v", engine.Name, err)
|
||||
} else {
|
||||
results = append(results, validateResults(searchResults)...)
|
||||
}
|
||||
|
||||
// If no results are found with the selected engine, try the next in line
|
||||
if len(results) == 0 {
|
||||
for i := 1; i < engineCount; i++ {
|
||||
nextEngine := textSearchEngines[(engineIndex+i)%engineCount]
|
||||
enginePage = (page-1)/engineCount + 1
|
||||
printInfo("No results found, trying next engine: %s (engine page %d)", nextEngine.Name, enginePage)
|
||||
|
||||
searchResults, _, err := nextEngine.Func(query, safe, lang, enginePage)
|
||||
if err != nil {
|
||||
printWarn("Error performing search with %s: %v", nextEngine.Name, err)
|
||||
continue
|
||||
}
|
||||
results = append(results, validateResults(searchResults)...)
|
||||
if len(results) > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printInfo("Fetched %d results for overall page %d", len(results), page)
|
||||
return results
|
||||
}
|
||||
}
|
||||
|
||||
func validateResults(searchResults []SearchResult) []TextSearchResult {
|
||||
var validResults []TextSearchResult
|
||||
|
||||
// Remove anything that is missing a URL or Header
|
||||
for _, result := range searchResults {
|
||||
textResult := result.(TextSearchResult)
|
||||
if textResult.URL != "" || textResult.Header != "" {
|
||||
validResults = append(validResults, textResult)
|
||||
}
|
||||
}
|
||||
|
||||
return validResults
|
||||
}
|
||||
|
||||
func wrapTextSearchFunc(f func(string, string, string, int) ([]TextSearchResult, time.Duration, error)) func(string, string, string, int) ([]SearchResult, time.Duration, error) {
|
||||
return func(query, safe, lang string, page int) ([]SearchResult, time.Duration, error) {
|
||||
textResults, duration, err := f(query, safe, lang, page)
|
||||
if err != nil {
|
||||
return nil, duration, err
|
||||
}
|
||||
searchResults := make([]SearchResult, len(textResults))
|
||||
for i, result := range textResults {
|
||||
searchResults[i] = result
|
||||
}
|
||||
return searchResults, duration, nil
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue