added config values to configure enabled meta-search engines
This commit is contained in:
parent
dc4a3a4bec
commit
5e6fc73038
7 changed files with 184 additions and 88 deletions
59
config.go
59
config.go
|
@ -22,6 +22,13 @@ type CacheConfig struct {
|
|||
Path string
|
||||
}
|
||||
|
||||
type MetaSearchConfig struct {
|
||||
Text []string
|
||||
Image []string
|
||||
Files []string
|
||||
Video []string
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Port int // Added
|
||||
AuthCode string // Added
|
||||
|
@ -47,6 +54,9 @@ type Config struct {
|
|||
CrawlingInterval time.Duration // Refres crawled results in...
|
||||
MaxPagesPerDomain int // Max pages to crawl per domain
|
||||
IndexBatchSize int
|
||||
LibreXInstances []string
|
||||
|
||||
MetaSearch MetaSearchConfig
|
||||
|
||||
DriveCache CacheConfig
|
||||
RamCache CacheConfig
|
||||
|
@ -75,6 +85,33 @@ var defaultConfig = Config{
|
|||
MaxPagesPerDomain: 10,
|
||||
IndexBatchSize: 50,
|
||||
LogLevel: 1,
|
||||
LibreXInstances: []string{"librex.antopie.org"},
|
||||
MetaSearch: MetaSearchConfig{
|
||||
// For Text search (skip SearXNG and LibreX by default, as that would be mega stupid)
|
||||
Text: []string{"Google", "Brave", "DuckDuckGo"},
|
||||
|
||||
// For Image search
|
||||
Image: []string{"Qwant", "Bing", "DeviantArt"},
|
||||
|
||||
// For Files search
|
||||
Files: []string{"TorrentGalaxy", "ThePirateBay"},
|
||||
|
||||
// For Video (piped instances)
|
||||
Video: []string{
|
||||
"api.piped.yt",
|
||||
"pipedapi.moomoo.me",
|
||||
"pipedapi.darkness.services",
|
||||
"pipedapi.kavin.rocks",
|
||||
"piped-api.hostux.net",
|
||||
"pipedapi.syncpundit.io",
|
||||
"piped-api.cfe.re",
|
||||
"pipedapi.in.projectsegfau.lt",
|
||||
"piapi.ggtyler.dev",
|
||||
"piped-api.codespace.cz",
|
||||
"pipedapi.coldforge.xyz",
|
||||
"pipedapi.osphost.fi",
|
||||
},
|
||||
},
|
||||
DriveCache: CacheConfig{
|
||||
Duration: 48 * time.Hour, // Added
|
||||
Path: "./cache", // Added
|
||||
|
@ -271,6 +308,14 @@ func saveConfig(config Config) {
|
|||
proxiesSec.Key("CrawlerProxyStrict").SetValue(strconv.FormatBool(config.CrawlerProxyStrict))
|
||||
proxiesSec.Key("CrawlerProxies").SetValue(strings.Join(config.CrawlerProxies, ","))
|
||||
|
||||
// MetaSearch section
|
||||
metaSec := cfg.Section("MetaSearches")
|
||||
metaSec.Key("LibreXInstances").SetValue(strings.Join(config.LibreXInstances, ","))
|
||||
metaSec.Key("Text").SetValue(strings.Join(config.MetaSearch.Text, ","))
|
||||
metaSec.Key("Image").SetValue(strings.Join(config.MetaSearch.Image, ","))
|
||||
metaSec.Key("Files").SetValue(strings.Join(config.MetaSearch.Files, ","))
|
||||
metaSec.Key("Video").SetValue(strings.Join(config.MetaSearch.Video, ","))
|
||||
|
||||
// Indexer section
|
||||
indexerSec := cfg.Section("Indexer")
|
||||
indexerSec.Key("ConcurrentStandardCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
|
||||
|
@ -328,6 +373,13 @@ func loadConfig() Config {
|
|||
crawlerProxyStrict := getConfigValueBool(cfg.Section("Proxies").Key("CrawlerProxyStrict"), defaultConfig.CrawlerProxyStrict)
|
||||
crawlerProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("CrawlerProxies"), ""), ",")
|
||||
|
||||
// MetaSearch
|
||||
searchXInstances := strings.Split(getConfigValueString(cfg.Section("MetaSearches").Key("LibreXInstances"), strings.Join(defaultConfig.LibreXInstances, ",")), ",")
|
||||
textList := strings.Split(getConfigValueString(cfg.Section("MetaSearch").Key("Text"), strings.Join(defaultConfig.MetaSearch.Text, ",")), ",")
|
||||
imageList := strings.Split(getConfigValueString(cfg.Section("MetaSearch").Key("Image"), strings.Join(defaultConfig.MetaSearch.Image, ",")), ",")
|
||||
filesList := strings.Split(getConfigValueString(cfg.Section("MetaSearch").Key("Files"), strings.Join(defaultConfig.MetaSearch.Files, ",")), ",")
|
||||
videoList := strings.Split(getConfigValueString(cfg.Section("MetaSearch").Key("Video"), strings.Join(defaultConfig.MetaSearch.Video, ",")), ",")
|
||||
|
||||
// Indexing
|
||||
concurrentStandardCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentStandardCrawlers"), defaultConfig.ConcurrentStandardCrawlers, strconv.Atoi)
|
||||
concurrentChromeCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentChromeCrawlers"), defaultConfig.ConcurrentChromeCrawlers, strconv.Atoi)
|
||||
|
@ -371,6 +423,13 @@ func loadConfig() Config {
|
|||
CrawlingInterval: crawlingInterval,
|
||||
MaxPagesPerDomain: maxPagesPerDomain,
|
||||
IndexBatchSize: indexBatchSize,
|
||||
LibreXInstances: searchXInstances,
|
||||
MetaSearch: MetaSearchConfig{
|
||||
Text: textList,
|
||||
Image: imageList,
|
||||
Files: filesList,
|
||||
Video: videoList,
|
||||
},
|
||||
DriveCache: CacheConfig{
|
||||
Duration: driveDuration,
|
||||
MaxUsageBytes: driveMaxUsage,
|
||||
|
|
23
files.go
23
files.go
|
@ -30,12 +30,26 @@ var (
|
|||
|
||||
var fileResultsChan = make(chan []TorrentResult)
|
||||
|
||||
func init() {
|
||||
func initFileEngines() {
|
||||
|
||||
torrentGalaxy = nil
|
||||
thePirateBay = nil
|
||||
// nyaa = nil
|
||||
// rutor = nil
|
||||
|
||||
for _, engineName := range config.MetaSearch.Files {
|
||||
switch engineName {
|
||||
case "TorrentGalaxy":
|
||||
torrentGalaxy = NewTorrentGalaxy()
|
||||
// nyaa = NewNyaa()
|
||||
case "ThePirateBay":
|
||||
thePirateBay = NewThePirateBay()
|
||||
// case "Nyaa":
|
||||
// nyaa = NewNyaa()
|
||||
// case "Rutor":
|
||||
// rutor = NewRutor()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func handleFileSearch(w http.ResponseWriter, settings UserSettings, query string, page int) {
|
||||
startTime := time.Now()
|
||||
|
@ -123,7 +137,7 @@ func fetchFileResults(query, safe, lang string, page int) []TorrentResult {
|
|||
}
|
||||
|
||||
sites := []TorrentSite{torrentGalaxy, nyaa, thePirateBay, rutor}
|
||||
results := []TorrentResult{}
|
||||
var results []TorrentResult
|
||||
|
||||
for _, site := range sites {
|
||||
if site == nil {
|
||||
|
@ -140,10 +154,13 @@ func fetchFileResults(query, safe, lang string, page int) []TorrentResult {
|
|||
}
|
||||
}
|
||||
|
||||
// If no results, try from other nodes
|
||||
if len(results) == 0 {
|
||||
if config.NodesEnabled {
|
||||
printWarn("No file results found for query: %s, trying other nodes", query)
|
||||
results = tryOtherNodesForFileSearch(query, safe, lang, page, []string{hostID})
|
||||
}
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
|
17
images.go
17
images.go
|
@ -10,12 +10,23 @@ import (
|
|||
|
||||
var imageSearchEngines []SearchEngine
|
||||
|
||||
func init() {
|
||||
imageSearchEngines = []SearchEngine{
|
||||
var allImageSearchEngines = []SearchEngine{
|
||||
{Name: "Qwant", Func: wrapImageSearchFunc(PerformQwantImageSearch)},
|
||||
{Name: "Bing", Func: wrapImageSearchFunc(PerformBingImageSearch)},
|
||||
{Name: "DeviantArt", Func: wrapImageSearchFunc(PerformDeviantArtImageSearch)},
|
||||
//{Name: "Imgur", Func: wrapImageSearchFunc(PerformImgurImageSearch), Weight: 4}, // Image proxy not working
|
||||
// {Name: "Imgur", Func: wrapImageSearchFunc(PerformImgurImageSearch), Weight: 4}, // example
|
||||
}
|
||||
|
||||
func initImageEngines() {
|
||||
imageSearchEngines = nil
|
||||
|
||||
for _, engineName := range config.MetaSearch.Image {
|
||||
for _, candidate := range allImageSearchEngines {
|
||||
if candidate.Name == engineName {
|
||||
imageSearchEngines = append(imageSearchEngines, candidate)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
8
init.go
8
init.go
|
@ -69,6 +69,14 @@ func main() {
|
|||
go periodicAgentUpdate()
|
||||
}
|
||||
|
||||
// Load List of Meta Search Engines
|
||||
if config.MetaSearchEnabled {
|
||||
initTextEngines()
|
||||
initImageEngines()
|
||||
initFileEngines()
|
||||
initPipedInstances()
|
||||
}
|
||||
|
||||
InitializeLanguage("en") // Initialize language before generating OpenSearch
|
||||
generateOpenSearchXML(config)
|
||||
|
||||
|
|
|
@ -3,14 +3,11 @@ package main
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"time"
|
||||
)
|
||||
|
||||
const LIBREX_DOMAIN = "librex.antopie.org"
|
||||
|
||||
type LibreXResult struct {
|
||||
Title string `json:"title"`
|
||||
URL string `json:"url"`
|
||||
|
@ -20,13 +17,10 @@ type LibreXResult struct {
|
|||
type LibreXResponse []LibreXResult
|
||||
|
||||
func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
|
||||
startTime := time.Now() // Start the timer
|
||||
startTime := time.Now()
|
||||
|
||||
// LibreX/Y uses offset instead of page that starts at 0
|
||||
page--
|
||||
page = page * 10
|
||||
|
||||
searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page)
|
||||
// LibreX uses offset instead of page (starting at 0)
|
||||
pageOffset := (page - 1) * 10
|
||||
|
||||
// Generate User-Agent
|
||||
userAgent, err := GetUserAgent("librex-text-search")
|
||||
|
@ -34,13 +28,23 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
|
|||
return nil, 0, err
|
||||
}
|
||||
|
||||
var allResults []TextSearchResult
|
||||
|
||||
for _, domain := range config.LibreXInstances {
|
||||
searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0",
|
||||
domain,
|
||||
url.QueryEscape(query),
|
||||
pageOffset,
|
||||
)
|
||||
|
||||
req, err := http.NewRequest("GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
printWarn("failed to create request for domain %s: %v", domain, err)
|
||||
continue
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
// Perform the request using the appropriate client
|
||||
// Respect MetaProxy if enabled and strict
|
||||
var resp *http.Response
|
||||
if config.MetaProxyEnabled && config.MetaProxyStrict && metaProxyClient != nil {
|
||||
resp, err = metaProxyClient.Do(req)
|
||||
|
@ -48,44 +52,38 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
|
|||
client := &http.Client{}
|
||||
resp, err = client.Do(req)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, 0, logError("error making request to LibreX", err)
|
||||
printWarn("error requesting domain %s: %v", domain, err)
|
||||
continue
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, 0, logError("unexpected status code", fmt.Errorf("%d", resp.StatusCode))
|
||||
printWarn("unexpected status code from %s: %d", domain, resp.StatusCode)
|
||||
continue
|
||||
}
|
||||
|
||||
var librexResp LibreXResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&librexResp); err != nil {
|
||||
return nil, 0, logError("error decoding LibreX response", err)
|
||||
printWarn("error decoding response from %s: %v", domain, err)
|
||||
continue
|
||||
}
|
||||
|
||||
var results []TextSearchResult
|
||||
// Accumulate results from this instance
|
||||
for _, item := range librexResp {
|
||||
result := TextSearchResult{
|
||||
allResults = append(allResults, TextSearchResult{
|
||||
URL: item.URL,
|
||||
Header: item.Title,
|
||||
Description: item.Description,
|
||||
Source: "LibreX",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
results = append(results, result)
|
||||
duration := time.Since(startTime)
|
||||
if len(allResults) == 0 {
|
||||
return nil, duration, fmt.Errorf("no results found from any LibreX instance")
|
||||
}
|
||||
|
||||
duration := time.Since(startTime) // Calculate the duration
|
||||
|
||||
if len(results) == 0 {
|
||||
return nil, duration, fmt.Errorf("no results found")
|
||||
}
|
||||
|
||||
return results, duration, nil
|
||||
}
|
||||
|
||||
// This is just stupid it will probably lead to printing error twice
|
||||
func logError(message string, err error) error {
|
||||
log.Printf("%s: %v", message, err)
|
||||
return fmt.Errorf("%s: %w", message, err)
|
||||
return allResults, duration, nil
|
||||
}
|
||||
|
|
24
text.go
24
text.go
|
@ -8,13 +8,25 @@ import (
|
|||
|
||||
var textSearchEngines []SearchEngine
|
||||
|
||||
func init() {
|
||||
textSearchEngines = []SearchEngine{
|
||||
{Name: "Google", Func: wrapTextSearchFunc(PerformGoogleTextSearch)},
|
||||
var allTextSearchEngines = []SearchEngine{
|
||||
//{Name: "Google", Func: wrapTextSearchFunc(PerformGoogleTextSearch)},
|
||||
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
|
||||
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
|
||||
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
|
||||
// {Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // Always says StatusCode: 429
|
||||
// {Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
|
||||
// {Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
|
||||
// {Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // example
|
||||
}
|
||||
|
||||
func initTextEngines() {
|
||||
// textSearchEngines is your final slice (already declared globally)
|
||||
textSearchEngines = nil // or make([]SearchEngine, 0)
|
||||
|
||||
for _, engineName := range config.MetaSearch.Text {
|
||||
for _, candidate := range allTextSearchEngines {
|
||||
if candidate.Name == engineName {
|
||||
textSearchEngines = append(textSearchEngines, candidate)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
19
video.go
19
video.go
|
@ -12,25 +12,16 @@ import (
|
|||
const retryDuration = 12 * time.Hour // Retry duration for unresponding piped instances
|
||||
|
||||
var (
|
||||
pipedInstances = []string{
|
||||
"api.piped.yt",
|
||||
"pipedapi.moomoo.me",
|
||||
"pipedapi.darkness.services",
|
||||
"pipedapi.kavin.rocks",
|
||||
"piped-api.hostux.net",
|
||||
"pipedapi.syncpundit.io",
|
||||
"piped-api.cfe.re",
|
||||
"pipedapi.in.projectsegfau.lt",
|
||||
"piapi.ggtyler.dev",
|
||||
"piped-api.codespace.cz",
|
||||
"pipedapi.coldforge.xyz",
|
||||
"pipedapi.osphost.fi",
|
||||
}
|
||||
pipedInstances = []string{}
|
||||
disabledInstances = make(map[string]bool)
|
||||
mu sync.Mutex
|
||||
videoResultsChan = make(chan []VideoResult) // Channel to receive video results from other nodes
|
||||
)
|
||||
|
||||
func initPipedInstances() {
|
||||
pipedInstances = config.MetaSearch.Video
|
||||
}
|
||||
|
||||
// VideoAPIResponse matches the structure of the JSON response from the Piped API
|
||||
type VideoAPIResponse struct {
|
||||
Items []struct {
|
||||
|
|
Loading…
Add table
Reference in a new issue