added config values to configure enabled meta-search engines

This commit is contained in:
partisan 2025-02-21 19:47:55 +01:00
parent dc4a3a4bec
commit 5e6fc73038
7 changed files with 184 additions and 88 deletions

View file

@ -22,6 +22,13 @@ type CacheConfig struct {
Path string
}
type MetaSearchConfig struct {
Text []string
Image []string
Files []string
Video []string
}
type Config struct {
Port int // Added
AuthCode string // Added
@ -47,6 +54,9 @@ type Config struct {
CrawlingInterval time.Duration // Refres crawled results in...
MaxPagesPerDomain int // Max pages to crawl per domain
IndexBatchSize int
LibreXInstances []string
MetaSearch MetaSearchConfig
DriveCache CacheConfig
RamCache CacheConfig
@ -75,6 +85,33 @@ var defaultConfig = Config{
MaxPagesPerDomain: 10,
IndexBatchSize: 50,
LogLevel: 1,
LibreXInstances: []string{"librex.antopie.org"},
MetaSearch: MetaSearchConfig{
// For Text search (skip SearXNG and LibreX by default, as that would be mega stupid)
Text: []string{"Google", "Brave", "DuckDuckGo"},
// For Image search
Image: []string{"Qwant", "Bing", "DeviantArt"},
// For Files search
Files: []string{"TorrentGalaxy", "ThePirateBay"},
// For Video (piped instances)
Video: []string{
"api.piped.yt",
"pipedapi.moomoo.me",
"pipedapi.darkness.services",
"pipedapi.kavin.rocks",
"piped-api.hostux.net",
"pipedapi.syncpundit.io",
"piped-api.cfe.re",
"pipedapi.in.projectsegfau.lt",
"piapi.ggtyler.dev",
"piped-api.codespace.cz",
"pipedapi.coldforge.xyz",
"pipedapi.osphost.fi",
},
},
DriveCache: CacheConfig{
Duration: 48 * time.Hour, // Added
Path: "./cache", // Added
@ -271,6 +308,14 @@ func saveConfig(config Config) {
proxiesSec.Key("CrawlerProxyStrict").SetValue(strconv.FormatBool(config.CrawlerProxyStrict))
proxiesSec.Key("CrawlerProxies").SetValue(strings.Join(config.CrawlerProxies, ","))
// MetaSearch section
metaSec := cfg.Section("MetaSearches")
metaSec.Key("LibreXInstances").SetValue(strings.Join(config.LibreXInstances, ","))
metaSec.Key("Text").SetValue(strings.Join(config.MetaSearch.Text, ","))
metaSec.Key("Image").SetValue(strings.Join(config.MetaSearch.Image, ","))
metaSec.Key("Files").SetValue(strings.Join(config.MetaSearch.Files, ","))
metaSec.Key("Video").SetValue(strings.Join(config.MetaSearch.Video, ","))
// Indexer section
indexerSec := cfg.Section("Indexer")
indexerSec.Key("ConcurrentStandardCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
@ -328,6 +373,13 @@ func loadConfig() Config {
crawlerProxyStrict := getConfigValueBool(cfg.Section("Proxies").Key("CrawlerProxyStrict"), defaultConfig.CrawlerProxyStrict)
crawlerProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("CrawlerProxies"), ""), ",")
// MetaSearch
searchXInstances := strings.Split(getConfigValueString(cfg.Section("MetaSearches").Key("LibreXInstances"), strings.Join(defaultConfig.LibreXInstances, ",")), ",")
textList := strings.Split(getConfigValueString(cfg.Section("MetaSearch").Key("Text"), strings.Join(defaultConfig.MetaSearch.Text, ",")), ",")
imageList := strings.Split(getConfigValueString(cfg.Section("MetaSearch").Key("Image"), strings.Join(defaultConfig.MetaSearch.Image, ",")), ",")
filesList := strings.Split(getConfigValueString(cfg.Section("MetaSearch").Key("Files"), strings.Join(defaultConfig.MetaSearch.Files, ",")), ",")
videoList := strings.Split(getConfigValueString(cfg.Section("MetaSearch").Key("Video"), strings.Join(defaultConfig.MetaSearch.Video, ",")), ",")
// Indexing
concurrentStandardCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentStandardCrawlers"), defaultConfig.ConcurrentStandardCrawlers, strconv.Atoi)
concurrentChromeCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentChromeCrawlers"), defaultConfig.ConcurrentChromeCrawlers, strconv.Atoi)
@ -371,6 +423,13 @@ func loadConfig() Config {
CrawlingInterval: crawlingInterval,
MaxPagesPerDomain: maxPagesPerDomain,
IndexBatchSize: indexBatchSize,
LibreXInstances: searchXInstances,
MetaSearch: MetaSearchConfig{
Text: textList,
Image: imageList,
Files: filesList,
Video: videoList,
},
DriveCache: CacheConfig{
Duration: driveDuration,
MaxUsageBytes: driveMaxUsage,

View file

@ -30,12 +30,26 @@ var (
var fileResultsChan = make(chan []TorrentResult)
func init() {
func initFileEngines() {
torrentGalaxy = nil
thePirateBay = nil
// nyaa = nil
// rutor = nil
for _, engineName := range config.MetaSearch.Files {
switch engineName {
case "TorrentGalaxy":
torrentGalaxy = NewTorrentGalaxy()
// nyaa = NewNyaa()
case "ThePirateBay":
thePirateBay = NewThePirateBay()
// case "Nyaa":
// nyaa = NewNyaa()
// case "Rutor":
// rutor = NewRutor()
}
}
}
func handleFileSearch(w http.ResponseWriter, settings UserSettings, query string, page int) {
startTime := time.Now()
@ -123,7 +137,7 @@ func fetchFileResults(query, safe, lang string, page int) []TorrentResult {
}
sites := []TorrentSite{torrentGalaxy, nyaa, thePirateBay, rutor}
results := []TorrentResult{}
var results []TorrentResult
for _, site := range sites {
if site == nil {
@ -140,10 +154,13 @@ func fetchFileResults(query, safe, lang string, page int) []TorrentResult {
}
}
// If no results, try from other nodes
if len(results) == 0 {
if config.NodesEnabled {
printWarn("No file results found for query: %s, trying other nodes", query)
results = tryOtherNodesForFileSearch(query, safe, lang, page, []string{hostID})
}
}
return results
}

View file

@ -10,12 +10,23 @@ import (
var imageSearchEngines []SearchEngine
func init() {
imageSearchEngines = []SearchEngine{
var allImageSearchEngines = []SearchEngine{
{Name: "Qwant", Func: wrapImageSearchFunc(PerformQwantImageSearch)},
{Name: "Bing", Func: wrapImageSearchFunc(PerformBingImageSearch)},
{Name: "DeviantArt", Func: wrapImageSearchFunc(PerformDeviantArtImageSearch)},
//{Name: "Imgur", Func: wrapImageSearchFunc(PerformImgurImageSearch), Weight: 4}, // Image proxy not working
// {Name: "Imgur", Func: wrapImageSearchFunc(PerformImgurImageSearch), Weight: 4}, // example
}
func initImageEngines() {
imageSearchEngines = nil
for _, engineName := range config.MetaSearch.Image {
for _, candidate := range allImageSearchEngines {
if candidate.Name == engineName {
imageSearchEngines = append(imageSearchEngines, candidate)
break
}
}
}
}

View file

@ -69,6 +69,14 @@ func main() {
go periodicAgentUpdate()
}
// Load List of Meta Search Engines
if config.MetaSearchEnabled {
initTextEngines()
initImageEngines()
initFileEngines()
initPipedInstances()
}
InitializeLanguage("en") // Initialize language before generating OpenSearch
generateOpenSearchXML(config)

View file

@ -3,14 +3,11 @@ package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"net/url"
"time"
)
const LIBREX_DOMAIN = "librex.antopie.org"
type LibreXResult struct {
Title string `json:"title"`
URL string `json:"url"`
@ -20,13 +17,10 @@ type LibreXResult struct {
type LibreXResponse []LibreXResult
func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
startTime := time.Now() // Start the timer
startTime := time.Now()
// LibreX/Y uses offset instead of page that starts at 0
page--
page = page * 10
searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page)
// LibreX uses offset instead of page (starting at 0)
pageOffset := (page - 1) * 10
// Generate User-Agent
userAgent, err := GetUserAgent("librex-text-search")
@ -34,13 +28,23 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
return nil, 0, err
}
var allResults []TextSearchResult
for _, domain := range config.LibreXInstances {
searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0",
domain,
url.QueryEscape(query),
pageOffset,
)
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, 0, err
printWarn("failed to create request for domain %s: %v", domain, err)
continue
}
req.Header.Set("User-Agent", userAgent)
// Perform the request using the appropriate client
// Respect MetaProxy if enabled and strict
var resp *http.Response
if config.MetaProxyEnabled && config.MetaProxyStrict && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
@ -48,44 +52,38 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil {
return nil, 0, logError("error making request to LibreX", err)
printWarn("error requesting domain %s: %v", domain, err)
continue
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, 0, logError("unexpected status code", fmt.Errorf("%d", resp.StatusCode))
printWarn("unexpected status code from %s: %d", domain, resp.StatusCode)
continue
}
var librexResp LibreXResponse
if err := json.NewDecoder(resp.Body).Decode(&librexResp); err != nil {
return nil, 0, logError("error decoding LibreX response", err)
printWarn("error decoding response from %s: %v", domain, err)
continue
}
var results []TextSearchResult
// Accumulate results from this instance
for _, item := range librexResp {
result := TextSearchResult{
allResults = append(allResults, TextSearchResult{
URL: item.URL,
Header: item.Title,
Description: item.Description,
Source: "LibreX",
})
}
}
results = append(results, result)
duration := time.Since(startTime)
if len(allResults) == 0 {
return nil, duration, fmt.Errorf("no results found from any LibreX instance")
}
duration := time.Since(startTime) // Calculate the duration
if len(results) == 0 {
return nil, duration, fmt.Errorf("no results found")
}
return results, duration, nil
}
// This is just stupid it will probably lead to printing error twice
func logError(message string, err error) error {
log.Printf("%s: %v", message, err)
return fmt.Errorf("%s: %w", message, err)
return allResults, duration, nil
}

24
text.go
View file

@ -8,13 +8,25 @@ import (
var textSearchEngines []SearchEngine
func init() {
textSearchEngines = []SearchEngine{
{Name: "Google", Func: wrapTextSearchFunc(PerformGoogleTextSearch)},
var allTextSearchEngines = []SearchEngine{
//{Name: "Google", Func: wrapTextSearchFunc(PerformGoogleTextSearch)},
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
// {Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // Always says StatusCode: 429
// {Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
// {Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
// {Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // example
}
func initTextEngines() {
// textSearchEngines is your final slice (already declared globally)
textSearchEngines = nil // or make([]SearchEngine, 0)
for _, engineName := range config.MetaSearch.Text {
for _, candidate := range allTextSearchEngines {
if candidate.Name == engineName {
textSearchEngines = append(textSearchEngines, candidate)
break
}
}
}
}

View file

@ -12,25 +12,16 @@ import (
const retryDuration = 12 * time.Hour // Retry duration for unresponding piped instances
var (
pipedInstances = []string{
"api.piped.yt",
"pipedapi.moomoo.me",
"pipedapi.darkness.services",
"pipedapi.kavin.rocks",
"piped-api.hostux.net",
"pipedapi.syncpundit.io",
"piped-api.cfe.re",
"pipedapi.in.projectsegfau.lt",
"piapi.ggtyler.dev",
"piped-api.codespace.cz",
"pipedapi.coldforge.xyz",
"pipedapi.osphost.fi",
}
pipedInstances = []string{}
disabledInstances = make(map[string]bool)
mu sync.Mutex
videoResultsChan = make(chan []VideoResult) // Channel to receive video results from other nodes
)
func initPipedInstances() {
pipedInstances = config.MetaSearch.Video
}
// VideoAPIResponse matches the structure of the JSON response from the Piped API
type VideoAPIResponse struct {
Items []struct {