added SOCKS5 proxy support
All checks were successful
Run Integration Tests / test (push) Successful in 33s

This commit is contained in:
partisan 2025-01-12 16:46:52 +01:00
parent 234f1dd3be
commit 614ce8903e
22 changed files with 501 additions and 106 deletions

View file

@ -47,11 +47,11 @@ A self-hosted private search engine designed to be scalable and more resource-ef
### For Self-Hosting
- **Self-hosted option** - Run on your own server for even more privacy.
- **[Easy to Set Up](https://weforge.xyz/Spitfire/Search#running-the-qgato)** - Quick and straightforward setup process for anyone.
- **Lightweight** - Low memory footprint (15-30MiB) even during searches.
- **Decentralized** - No single point of failure.
- **Results caching in RAM** - Faster response times through caching.
- **Configurable** - Tweak features via `config.ini`.
- **[Configurable](https://weforge.xyz/Spitfire/Search/wiki/Configuration)** - Fully customizable via the `config.ini` file.
- **Flexible media support** - Images optionally stored on HDD/SSD for caching and improved response time.
### Results Sources
@ -73,30 +73,20 @@ A self-hosted private search engine designed to be scalable and more resource-ef
### Running the QGato
Linux:
```bash
git clone https://weforge.xyz/Spitfire/Search.git
cd Search
chmod +x ./run.sh
./run.sh
```
Windows:
```powershell
git clone https://weforge.xyz/Spitfire/Search.git
cd Search
.\run.bat
go run .
```
*Its that easy!*
### Configuring
Configuration is done via the ``config.ini`` file.
On first start, you will be guided through the basic setup.
More advanced setup and all options will be listed here later, as this is still being updated.
- Configuration is done via the `config.ini` file.
- On first start, you will be guided through the basic setup.
- For more advanced configuration options, visit the [Wiki Configuration Page](https://weforge.xyz/Spitfire/Search/wiki/Configuration).
## License

View file

@ -23,18 +23,25 @@ type CacheConfig struct {
}
type Config struct {
Port int // Added
AuthCode string // Added
PeerID string // Added
Peers []string
Domain string // Added
NodesEnabled bool // Added
CrawlerEnabled bool // Added
IndexerEnabled bool // Added
WebsiteEnabled bool // Added
RamCacheEnabled bool
DriveCacheEnabled bool // Added
LogLevel int // Added
Port int // Added
AuthCode string // Added
PeerID string // Added
Peers []string
Domain string // Added
NodesEnabled bool // Added
MetaSearchEnabled bool // Added
IndexerEnabled bool // Added
WebsiteEnabled bool // Added
RamCacheEnabled bool
DriveCacheEnabled bool // Added
MetaProxyEnabled bool // Added
MetaProxyStrict bool // Added
MetaProxies []string // Added
CrawlerProxyEnabled bool // Added
CrawlerProxyStrict bool // Added
CrawlerProxies []string // Added
// Maybye add Proxy support for Image Extraction?
LogLevel int // Added
ConcurrentStandardCrawlers int
ConcurrentChromeCrawlers int
CrawlingInterval time.Duration // Refres crawled results in...
@ -51,11 +58,17 @@ var defaultConfig = Config{
Peers: []string{},
AuthCode: generateStrongRandomString(64),
NodesEnabled: false,
CrawlerEnabled: true,
MetaSearchEnabled: true,
IndexerEnabled: false,
WebsiteEnabled: true,
RamCacheEnabled: true,
DriveCacheEnabled: false,
MetaProxyEnabled: false,
MetaProxyStrict: true,
MetaProxies: []string{},
CrawlerProxyEnabled: false,
CrawlerProxyStrict: true,
CrawlerProxies: []string{},
ConcurrentStandardCrawlers: 12,
ConcurrentChromeCrawlers: 4,
CrawlingInterval: 24 * time.Hour,
@ -245,14 +258,23 @@ func saveConfig(config Config) {
// Features section
featuresSec := cfg.Section("Features")
featuresSec.Key("Nodes").SetValue(strconv.FormatBool(config.NodesEnabled))
featuresSec.Key("Crawler").SetValue(strconv.FormatBool(config.CrawlerEnabled))
featuresSec.Key("Crawler").SetValue(strconv.FormatBool(config.MetaSearchEnabled))
featuresSec.Key("Indexer").SetValue(strconv.FormatBool(config.IndexerEnabled))
featuresSec.Key("Website").SetValue(strconv.FormatBool(config.WebsiteEnabled))
featuresSec.Key("MetaProxy").SetValue(strconv.FormatBool(config.MetaProxyEnabled))
featuresSec.Key("CrawlerProxy").SetValue(strconv.FormatBool(config.CrawlerProxyEnabled))
// Proxies section
proxiesSec := cfg.Section("Proxies")
proxiesSec.Key("MetaProxyStrict").SetValue(strconv.FormatBool(config.MetaProxyStrict))
proxiesSec.Key("MetaProxies").SetValue(strings.Join(config.MetaProxies, ","))
proxiesSec.Key("CrawlerProxyStrict").SetValue(strconv.FormatBool(config.CrawlerProxyStrict))
proxiesSec.Key("CrawlerProxies").SetValue(strings.Join(config.CrawlerProxies, ","))
// Indexer section
indexerSec := cfg.Section("Indexer")
indexerSec.Key("ConcurrentStandardCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
indexerSec.Key("ConcurrentChromeCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
indexerSec.Key("ConcurrentChromeCrawlers").SetValue(strconv.Itoa(config.ConcurrentChromeCrawlers))
indexerSec.Key("CrawlingInterval").SetValue(config.CrawlingInterval.String())
indexerSec.Key("MaxPagesPerDomain").SetValue(strconv.Itoa(config.MaxPagesPerDomain))
indexerSec.Key("IndexBatchSize").SetValue(strconv.Itoa(config.IndexBatchSize))
@ -292,11 +314,19 @@ func loadConfig() Config {
// Features
nodesEnabled := getConfigValueBool(cfg.Section("Features").Key("Nodes"), defaultConfig.NodesEnabled)
crawlerEnabled := getConfigValueBool(cfg.Section("Features").Key("Crawler"), defaultConfig.CrawlerEnabled)
metaSearchEnabled := getConfigValueBool(cfg.Section("Features").Key("Crawler"), defaultConfig.MetaSearchEnabled)
indexerEnabled := getConfigValueBool(cfg.Section("Features").Key("Indexer"), defaultConfig.IndexerEnabled)
websiteEnabled := getConfigValueBool(cfg.Section("Features").Key("Website"), defaultConfig.WebsiteEnabled)
ramCacheEnabled := getConfigValueBool(cfg.Section("Features").Key("RamCache"), defaultConfig.RamCacheEnabled)
driveCacheEnabled := getConfigValueBool(cfg.Section("Features").Key("DriveCache"), defaultConfig.DriveCacheEnabled)
metaProxyEnabled := getConfigValueBool(cfg.Section("Features").Key("MetaProxy"), defaultConfig.MetaProxyEnabled)
crawlerProxyEnabled := getConfigValueBool(cfg.Section("Features").Key("CrawlerProxy"), defaultConfig.CrawlerProxyEnabled)
// Proxies
metaProxyStrict := getConfigValueBool(cfg.Section("Proxies").Key("MetaProxyStrict"), defaultConfig.MetaProxyStrict)
metaProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("MetaProxies"), ""), ",")
crawlerProxyStrict := getConfigValueBool(cfg.Section("Proxies").Key("CrawlerProxyStrict"), defaultConfig.CrawlerProxyStrict)
crawlerProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("CrawlerProxies"), ""), ",")
// Indexing
concurrentStandardCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentStandardCrawlers"), defaultConfig.ConcurrentStandardCrawlers, strconv.Atoi)
@ -325,11 +355,17 @@ func loadConfig() Config {
AuthCode: authCode,
Peers: peers,
NodesEnabled: nodesEnabled,
CrawlerEnabled: crawlerEnabled,
MetaSearchEnabled: metaSearchEnabled,
IndexerEnabled: indexerEnabled,
WebsiteEnabled: websiteEnabled,
RamCacheEnabled: ramCacheEnabled,
DriveCacheEnabled: driveCacheEnabled,
MetaProxyEnabled: metaProxyEnabled,
MetaProxyStrict: metaProxyStrict,
MetaProxies: metaProxies,
CrawlerProxyEnabled: crawlerProxyEnabled,
CrawlerProxyStrict: crawlerProxyStrict,
CrawlerProxies: crawlerProxies,
ConcurrentStandardCrawlers: concurrentStandardCrawlers,
ConcurrentChromeCrawlers: concurrentChromeCrawlers,
CrawlingInterval: crawlingInterval,

View file

@ -32,8 +32,12 @@ func fetchPageMetadataStandard(pageURL, userAgent string) (string, string, strin
// fetchPageMetadataChrome uses Chromedp to handle JavaScript-rendered pages.
func fetchPageMetadataChrome(pageURL, userAgent string) (string, string, string) {
// Create context
ctx, cancel := chromedp.NewContext(context.Background())
// Create a custom allocator context for Chromedp with proxy support if enabled
allocCtx, cancelAlloc := chromedp.NewExecAllocator(context.Background(), configureChromeOptions()...)
defer cancelAlloc()
// Create a browser context
ctx, cancel := chromedp.NewContext(allocCtx)
defer cancel()
var renderedHTML string
@ -57,6 +61,32 @@ func fetchPageMetadataChrome(pageURL, userAgent string) (string, string, string)
return extractParsedDOM(doc)
}
// configureChromeOptions sets up Chrome options and proxy if CrawlerProxy is enabled.
func configureChromeOptions() []chromedp.ExecAllocatorOption {
options := chromedp.DefaultExecAllocatorOptions[:]
if config.CrawlerProxyEnabled && crawlerProxyClient != nil {
// Retrieve proxy settings from CrawlerProxy
proxy := crawlerProxyClient.GetProxy() // Ensure a `GetProxy` method is implemented for your proxy client
if proxy != "" {
options = append(options, chromedp.ProxyServer(proxy))
printDebug("Using CrawlerProxy for Chromedp: %s", proxy)
} else {
printWarn("CrawlerProxy is enabled but no valid proxy is available")
}
}
// // Add additional Chrome
// options = append(options,
// chromedp.Flag("headless", true),
// chromedp.Flag("disable-gpu", true),
// chromedp.Flag("no-sandbox", true),
// chromedp.Flag("disable-setuid-sandbox", true),
// )
return options
}
// extractStandard does the normal HTML parse with OG, Twitter, etc.
func extractStandard(pageURL, userAgent string) (title, desc, keywords string) {
client := &http.Client{Timeout: 15 * time.Second}
@ -68,7 +98,13 @@ func extractStandard(pageURL, userAgent string) (title, desc, keywords string) {
req.Header.Set("User-Agent", userAgent)
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
resp, err := client.Do(req)
// Use CrawlerProxy if enabled
var resp *http.Response
if config.CrawlerProxyEnabled && crawlerProxyClient != nil {
resp, err = crawlerProxyClient.Do(req)
} else {
resp, err = client.Do(req)
}
if err != nil {
printDebug("Failed to GET %s: %v", pageURL, err)
return
@ -185,7 +221,13 @@ func fallbackReadability(pageURL, userAgent, title, desc, keywords string) (stri
readReq.Header.Set("User-Agent", userAgent)
readReq.Header.Set("Accept-Language", "en-US,en;q=0.9")
readResp, err := client.Do(readReq)
// Use CrawlerProxy if enabled
var readResp *http.Response
if config.CrawlerProxyEnabled && crawlerProxyClient != nil {
readResp, err = crawlerProxyClient.Do(readReq)
} else {
readResp, err = client.Do(readReq)
}
if err != nil || readResp.StatusCode < 200 || readResp.StatusCode >= 300 {
if err != nil {
printDebug("go-readability GET error for %s: %v", pageURL, err)

View file

@ -57,31 +57,40 @@ func (t *ThePirateBay) Search(query string, category string) ([]TorrentResult, e
return []TorrentResult{}, nil
}
url := fmt.Sprintf("https://%s/q.php?q=%s&cat=%s", PIRATEBAY_DOMAIN, url.QueryEscape(query), categoryCode)
searchURL := fmt.Sprintf("https://%s/q.php?q=%s&cat=%s", PIRATEBAY_DOMAIN, url.QueryEscape(query), categoryCode)
// User Agent generation
userAgent, err := GetUserAgent("files-tpb")
if err != nil {
fmt.Println("Error:", err)
return nil, err
return nil, fmt.Errorf("error generating User-Agent: %w", err)
}
req, err := http.NewRequest("GET", url, nil)
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, err
return nil, fmt.Errorf("error creating request: %w", err)
}
req.Header.Set("User-Agent", userAgent)
client := &http.Client{}
response, err := client.Do(req)
if err != nil {
return nil, err
// Perform the request using MetaProxy if enabled
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil {
return nil, fmt.Errorf("error making request to The Pirate Bay: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
defer response.Body.Close()
var torrentData []map[string]interface{}
if err := json.NewDecoder(response.Body).Decode(&torrentData); err != nil {
return nil, err
if err := json.NewDecoder(resp.Body).Decode(&torrentData); err != nil {
return nil, fmt.Errorf("error decoding response JSON: %w", err)
}
var results []TorrentResult

View file

@ -62,18 +62,23 @@ func (tg *TorrentGalaxy) Search(query string, category string) ([]TorrentResult,
// User Agent generation
userAgent, err := GetUserAgent("files-torrentgalaxy")
if err != nil {
fmt.Println("Error:", err)
return nil, err
return nil, fmt.Errorf("error generating User-Agent: %w", err)
}
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, err
return nil, fmt.Errorf("error creating request: %w", err)
}
req.Header.Set("User-Agent", userAgent)
client := &http.Client{}
resp, err := client.Do(req)
// Perform the request using MetaProxy if enabled
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil {
return nil, fmt.Errorf("error making request to TorrentGalaxy: %w", err)
}

View file

@ -88,7 +88,7 @@ func getFileResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
case results := <-cacheChan:
if results == nil {
// Fetch only if the cache miss occurs and Crawler is enabled
if config.CrawlerEnabled {
if config.MetaSearchEnabled {
combinedResults = fetchFileResults(query, safe, lang, page)
if len(combinedResults) > 0 {
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
@ -102,7 +102,7 @@ func getFileResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
}
case <-time.After(2 * time.Second):
printDebug("Cache check timeout")
if config.CrawlerEnabled {
if config.MetaSearchEnabled {
combinedResults = fetchFileResults(query, safe, lang, page)
if len(combinedResults) > 0 {
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
@ -117,7 +117,7 @@ func getFileResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
func fetchFileResults(query, safe, lang string, page int) []TorrentResult {
// If Crawler is disabled, skip fetching from torrent sites
if !config.CrawlerEnabled {
if !config.MetaSearchEnabled {
printInfo("Crawler is disabled; skipping torrent site fetching.")
return []TorrentResult{}
}

View file

@ -10,7 +10,7 @@ import (
)
func PerformRedditSearch(query string, safe string, page int) ([]ForumSearchResult, error) {
if !config.CrawlerEnabled {
if !config.MetaSearchEnabled {
printDebug("Crawler is disabled; skipping forum search.")
return []ForumSearchResult{}, nil
}
@ -150,7 +150,7 @@ func getForumResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string
case results := <-cacheChan:
if results == nil {
// Fetch only if the cache miss occurs and Crawler is enabled
if config.CrawlerEnabled {
if config.MetaSearchEnabled {
combinedResults = fetchForumResults(query, safe, lang, page)
if len(combinedResults) > 0 {
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
@ -164,7 +164,7 @@ func getForumResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string
}
case <-time.After(2 * time.Second):
printDebug("Cache check timeout")
if config.CrawlerEnabled {
if config.MetaSearchEnabled {
combinedResults = fetchForumResults(query, safe, lang, page)
if len(combinedResults) > 0 {
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))

View file

@ -18,8 +18,27 @@ func PerformBingImageSearch(query, safe, lang string, page int) ([]ImageSearchRe
// Build the search URL
searchURL := buildBingSearchURL(query, page)
// Make the HTTP request
resp, err := http.Get(searchURL)
// Create the HTTP request
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, 0, fmt.Errorf("creating request: %v", err)
}
// Set User-Agent
ImageUserAgent, err := GetUserAgent("Image-Search-Bing")
if err != nil {
return nil, 0, fmt.Errorf("generating User-Agent: %v", err)
}
req.Header.Set("User-Agent", ImageUserAgent)
// Use MetaProxy if enabled
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{Timeout: 10 * time.Second}
resp, err = client.Do(req)
}
if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err)
}

View file

@ -87,15 +87,21 @@ func PerformDeviantArtImageSearch(query, safe, lang string, page int) ([]ImageSe
return nil, 0, err
}
// Make the HTTP request with User-Agent header
client := &http.Client{}
// Create the HTTP request
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, 0, fmt.Errorf("creating request: %v", err)
}
req.Header.Set("User-Agent", DeviantArtImageUserAgent)
resp, err := client.Do(req)
// Perform the request using MetaProxy if enabled
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err)
}
@ -182,7 +188,7 @@ func PerformDeviantArtImageSearch(query, safe, lang string, page int) ([]ImageSe
duration := time.Since(startTime)
// Check if the number of results is one or less
// Check if the number of results is zero
if len(results) == 0 {
return nil, duration, fmt.Errorf("no images found")
}

View file

@ -18,7 +18,27 @@ func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchR
var results []ImageSearchResult
searchURL := buildImgurSearchURL(query, page)
resp, err := http.Get(searchURL)
// Create the HTTP request
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, 0, fmt.Errorf("creating request: %v", err)
}
// Get the User-Agent string
imgurUserAgent, err := GetUserAgent("Image-Search-Imgur")
if err != nil {
return nil, 0, fmt.Errorf("getting user-agent: %v", err)
}
req.Header.Set("User-Agent", imgurUserAgent)
// Perform the HTTP request with MetaProxy if enabled
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err)
}
@ -28,6 +48,7 @@ func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchR
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
// Parse the HTML document
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
@ -76,12 +97,35 @@ func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchR
duration := time.Since(startTime) // Calculate the duration
if len(results) == 0 {
return nil, duration, fmt.Errorf("no images found")
}
return results, duration, nil
}
// scrapeImageFromImgurPage scrapes the image source from the Imgur page
func scrapeImageFromImgurPage(pageURL string) string {
resp, err := http.Get(pageURL)
req, err := http.NewRequest("GET", pageURL, nil)
if err != nil {
fmt.Printf("Error creating request for page: %v\n", err)
return ""
}
// Get the User-Agent string
imgurUserAgent, err := GetUserAgent("Image-Search-Imgur")
if err == nil {
req.Header.Set("User-Agent", imgurUserAgent)
}
// Perform the request using MetaProxy if enabled
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil {
fmt.Printf("Error fetching page: %v\n", err)
return ""

View file

@ -97,7 +97,7 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
// Ensure count + offset is within acceptable limits
if offset+resultsPerPage > 250 {
return nil, 0, fmt.Errorf("count + offset must be lower than 250 for quant")
return nil, 0, fmt.Errorf("count + offset must be lower than 250 for Qwant")
}
if safe == "" {
@ -113,21 +113,27 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
offset,
safe)
client := &http.Client{Timeout: 10 * time.Second}
// Create the HTTP request
req, err := http.NewRequest("GET", apiURL, nil)
if err != nil {
return nil, 0, fmt.Errorf("creating request: %v", err)
}
// Get the User-Agent string
ImageUserAgent, err := GetUserAgent("Image-Search-Quant")
if err != nil {
return nil, 0, err
return nil, 0, fmt.Errorf("getting user-agent: %v", err)
}
req.Header.Set("User-Agent", ImageUserAgent)
req.Header.Set("User-Agent", ImageUserAgent) // Quant seems to not like some specific User-Agent strings
resp, err := client.Do(req)
// Perform the request with MetaProxy if enabled
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{Timeout: 10 * time.Second}
resp, err = client.Do(req)
}
if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err)
}
@ -137,11 +143,13 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
// Parse the API response
var apiResp QwantAPIResponse
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil {
return nil, 0, fmt.Errorf("decoding response: %v", err)
}
// Process the results
var wg sync.WaitGroup
results := make([]ImageSearchResult, len(apiResp.Data.Result.Items))
@ -174,5 +182,9 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
duration := time.Since(startTime) // Calculate the duration
if len(results) == 0 {
return nil, duration, fmt.Errorf("no images found")
}
return results, duration, nil
}

View file

@ -86,7 +86,7 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string
select {
case results := <-cacheChan:
if results == nil {
if config.CrawlerEnabled {
if config.MetaSearchEnabled {
combinedResults = fetchImageResults(query, safe, lang, page, synchronous)
if len(combinedResults) > 0 {
combinedResults = filterValidImages(combinedResults)
@ -101,7 +101,7 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string
}
case <-time.After(2 * time.Second):
printDebug("Cache check timeout")
if config.CrawlerEnabled {
if config.MetaSearchEnabled {
combinedResults = fetchImageResults(query, safe, lang, page, synchronous)
if len(combinedResults) > 0 {
combinedResults = filterValidImages(combinedResults)
@ -118,8 +118,8 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string
func fetchImageResults(query, safe, lang string, page int, synchronous bool) []ImageSearchResult {
var results []ImageSearchResult
// Check if CrawlerEnabled is false
if !config.CrawlerEnabled {
// Check if MetaSearchEnabled is false
if !config.MetaSearchEnabled {
printDebug("Crawler is disabled; skipping image search engine fetching.")
return results
}

View file

@ -60,8 +60,12 @@ func main() {
}
config.PeerID = hostID
if config.CrawlerProxyEnabled || config.MetaProxyEnabled {
InitProxies()
}
// Initiate Browser Agent updater
if config.CrawlerEnabled || config.IndexerEnabled {
if config.MetaSearchEnabled || config.IndexerEnabled {
go periodicAgentUpdate()
}

169
proxy.go Normal file
View file

@ -0,0 +1,169 @@
package main
import (
"fmt"
"net/http"
"strings"
"sync"
"time"
"golang.org/x/net/proxy"
)
// ProxyConfig holds configuration for a single proxy.
type ProxyConfig struct {
Address string
Username string
Password string
}
// ProxyClient provides an HTTP client pool for proxies.
type ProxyClient struct {
clients []*http.Client
lock sync.Mutex
index int
}
// Package-level proxy clients
var (
metaProxyClient *ProxyClient
crawlerProxyClient *ProxyClient
)
// NewProxyClientPool creates a pool of HTTP clients with proxies.
func NewProxyClientPool(proxies []ProxyConfig, timeout time.Duration) (*ProxyClient, error) {
if len(proxies) == 0 {
return nil, fmt.Errorf("no proxies provided")
}
clients := make([]*http.Client, len(proxies))
for i, proxyConfig := range proxies {
var auth *proxy.Auth
if proxyConfig.Username != "" || proxyConfig.Password != "" {
auth = &proxy.Auth{
User: proxyConfig.Username,
Password: proxyConfig.Password,
}
}
dialer, err := proxy.SOCKS5("tcp", proxyConfig.Address, auth, proxy.Direct)
if err != nil {
return nil, fmt.Errorf("failed to create SOCKS5 dialer for %s: %w", proxyConfig.Address, err)
}
transport := &http.Transport{Dial: dialer.Dial}
clients[i] = &http.Client{
Transport: transport,
Timeout: timeout,
}
}
return &ProxyClient{clients: clients}, nil
}
// Do sends an HTTP request using the next proxy in the pool.
func (p *ProxyClient) Do(req *http.Request) (*http.Response, error) {
p.lock.Lock()
client := p.clients[p.index]
p.index = (p.index + 1) % len(p.clients)
p.lock.Unlock()
return client.Do(req)
}
func (p *ProxyClient) GetProxy() string {
p.lock.Lock()
defer p.lock.Unlock()
if len(p.clients) == 0 {
return ""
}
// Round-robin proxy retrieval
client := p.clients[p.index]
p.index = (p.index + 1) % len(p.clients)
// Assume each client has a proxy string saved
// Example implementation depends on how your proxies are configured
proxyTransport, ok := client.Transport.(*http.Transport)
if ok && proxyTransport.Proxy != nil {
proxyURL, _ := proxyTransport.Proxy(nil)
if proxyURL != nil {
return proxyURL.String()
}
}
return ""
}
// ParseProxies parses the proxy strings in the format ADDRESS:PORT or ADDRESS:PORT:USER:PASSWORD.
func ParseProxies(proxyStrings []string) []ProxyConfig {
var proxies []ProxyConfig
for _, proxy := range proxyStrings {
parts := strings.Split(proxy, ":")
if len(parts) == 2 { // ADDRESS:PORT
proxies = append(proxies, ProxyConfig{
Address: fmt.Sprintf("%s:%s", parts[0], parts[1]),
})
} else if len(parts) == 4 { // ADDRESS:PORT:USER:PASSWORD
proxies = append(proxies, ProxyConfig{
Address: fmt.Sprintf("%s:%s", parts[0], parts[1]),
Username: parts[2],
Password: parts[3],
})
} else {
fmt.Printf("Invalid proxy format: %s\n", proxy)
}
}
return proxies
}
// InitProxies initializes the proxy clients for Meta and Crawler proxies.
func InitProxies() {
// Initialize Meta Proxy Client
if config.MetaProxyEnabled {
metaProxies := ParseProxies(config.MetaProxies)
client, err := NewProxyClientPool(metaProxies, 30*time.Second)
if err != nil {
if config.MetaProxyStrict {
panic(fmt.Sprintf("Failed to initialize Meta proxies: %v", err))
}
fmt.Printf("Warning: Meta proxy initialization failed: %v\n", err)
}
metaProxyClient = client
}
// Initialize Crawler Proxy Client
if config.CrawlerProxyEnabled {
crawlerProxies := ParseProxies(config.CrawlerProxies)
client, err := NewProxyClientPool(crawlerProxies, 30*time.Second)
if err != nil {
if config.CrawlerProxyStrict {
panic(fmt.Sprintf("Failed to initialize Crawler proxies: %v", err))
}
fmt.Printf("Warning: Crawler proxy initialization failed: %v\n", err)
}
crawlerProxyClient = client
}
}
// func main() {
// config := loadConfig()
// // Initialize proxies if enabled
// if config.CrawlerProxyEnabled || config.MetaProxyEnabled {
// InitProxies()
// }
// // Example usage
// if metaProxyClient != nil {
// req, _ := http.NewRequest("GET", "https://example.com", nil)
// resp, err := metaProxyClient.Do(req)
// if err != nil {
// fmt.Printf("Error using MetaProxyClient: %v\n", err)
// } else {
// fmt.Printf("Meta Proxy Response Status: %s\n", resp.Status)
// resp.Body.Close()
// }
// }
// }

View file

@ -33,9 +33,16 @@ func PerformBraveTextSearch(query, safe, lang string, offset int) ([]TextSearchR
}
req.Header.Set("User-Agent", TextUserAgent)
// Perform the HTTP request
client := &http.Client{}
resp, err := client.Do(req)
var resp *http.Response
// Determine whether to use a proxy client or a default client
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil {
printWarn("Error performing request: %v", err)
return nil, 0, fmt.Errorf("performing request: %v", err)

View file

@ -16,21 +16,38 @@ func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSear
var results []TextSearchResult
searchURL := buildDuckDuckGoSearchURL(query, page)
resp, err := http.Get(searchURL)
// Create a request
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, 0, fmt.Errorf("creating request: %v", err)
}
// Use proxy client if MetaProxy is enabled
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err)
}
defer resp.Body.Close()
// Check for HTTP status code
if resp.StatusCode != http.StatusOK {
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
// Parse HTML response
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
}
// Extract results from the page
doc.Find(".result__body").Each(func(i int, s *goquery.Selection) {
header := s.Find(".result__a").Text()
description := s.Find(".result__snippet").Text()

View file

@ -16,37 +16,48 @@ func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchRe
startTime := time.Now() // Start the timer
client := &http.Client{}
// Build the search URL
searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage)
// Create a new request
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, 0, fmt.Errorf("failed to create request: %v", err)
}
// User Agent generation
TextUserAgent, err := GetUserAgent("Text-Search")
// Generate and set User-Agent header
TextUserAgent, err := GetUserAgent("Google-Text-Search")
if err != nil {
return nil, 0, err
}
req.Header.Set("User-Agent", TextUserAgent)
resp, err := client.Do(req)
// Perform the request using proxy if MetaProxy is enabled
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err)
}
defer resp.Body.Close()
// Check for HTTP status code
if resp.StatusCode != http.StatusOK {
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
// Parse the HTML response
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
}
// Extract search results
results = parseResults(doc)
duration := time.Since(startTime) // Calculate the duration

View file

@ -28,7 +28,7 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page)
// User Agent generation
// Generate User-Agent
userAgent, err := GetUserAgent("librex-text-search")
if err != nil {
return nil, 0, err
@ -40,8 +40,15 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
}
req.Header.Set("User-Agent", userAgent)
client := &http.Client{}
resp, err := client.Do(req)
// Perform the request using the appropriate client
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil {
return nil, 0, logError("error making request to LibreX", err)
}
@ -77,7 +84,7 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
return results, duration, nil
}
// This is just stupid it will probbably lead to printing error twice
// This is just stupid it will probably lead to printing error twice
func logError(message string, err error) error {
log.Printf("%s: %v", message, err)
return fmt.Errorf("%s: %w", message, err)

View file

@ -46,8 +46,6 @@ func PerformQwantTextSearch(query, safe, lang string) ([]TextSearchResult, error
lang,
offset)
client := &http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest("GET", apiURL, nil)
if err != nil {
return nil, fmt.Errorf("creating request: %v", err)
@ -55,7 +53,15 @@ func PerformQwantTextSearch(query, safe, lang string) ([]TextSearchResult, error
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36")
resp, err := client.Do(req)
// Perform the request using the appropriate client
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{Timeout: 10 * time.Second}
resp, err = client.Do(req)
}
if err != nil {
return nil, fmt.Errorf("making request: %v", err)
}

View file

@ -39,7 +39,6 @@ const searxInstancesURL = "https://searx.space/data/instances.json"
// FetchInstances fetches available SearX instances from the registry.
func fetchInstances() ([]Instance, error) {
client := &http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest("GET", searxInstancesURL, nil)
if err != nil {
return nil, fmt.Errorf("creating request: %v", err)
@ -51,7 +50,14 @@ func fetchInstances() ([]Instance, error) {
}
req.Header.Set("User-Agent", XNGUserAgent)
resp, err := client.Do(req)
var resp *http.Response
if config.MetaProxyEnabled && config.MetaProxyStrict && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{Timeout: 10 * time.Second}
resp, err = client.Do(req)
}
if err != nil {
return nil, fmt.Errorf("performing request: %v", err)
}
@ -191,7 +197,6 @@ func PerformSearXTextSearch(query, categories, language string, page int) ([]Tex
searchURL := fmt.Sprintf("%s/search?q=%s&categories=%s&language=%s&safe_search=%s&page=%d&format=json",
instance.URL, url.QueryEscape(query), categories, language, safe, page)
client := &http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, 0, fmt.Errorf("creating request: %v", err)
@ -203,7 +208,13 @@ func PerformSearXTextSearch(query, categories, language string, page int) ([]Tex
}
req.Header.Set("User-Agent", XNGUserAgent)
resp, err := client.Do(req)
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{Timeout: 10 * time.Second}
resp, err = client.Do(req)
}
if err != nil {
return nil, 0, fmt.Errorf("performing request: %v", err)
}

View file

@ -98,7 +98,7 @@ func prefetchPage(query, safe, lang string, page int) {
cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "active", Lang: lang, Type: "text"}
if _, exists := resultsCache.Get(cacheKey); !exists {
printInfo("Page %d not cached, caching now...", page)
if config.CrawlerEnabled {
if config.MetaSearchEnabled {
pageResults := fetchTextResults(query, safe, lang, page)
if len(pageResults) > 0 {
resultsCache.Set(cacheKey, convertToSearchResults(pageResults))
@ -114,7 +114,7 @@ func prefetchPage(query, safe, lang string, page int) {
func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
var results []TextSearchResult
if !config.CrawlerEnabled {
if !config.MetaSearchEnabled {
printDebug("Crawler is disabled; fetching from local index.")
// Calculate the starting position based on the page number

View file

@ -151,7 +151,7 @@ func handleVideoSearch(w http.ResponseWriter, settings UserSettings, query strin
start := time.Now()
var results []VideoResult
if config.CrawlerEnabled {
if config.MetaSearchEnabled {
results = fetchVideoResults(query, settings.SafeSearch, settings.SearchLanguage, page)
}
@ -184,12 +184,12 @@ func handleVideoSearch(w http.ResponseWriter, settings UserSettings, query strin
func fetchVideoResults(query, safe, lang string, page int) []VideoResult {
// Check if the crawler is enabled
if !config.CrawlerEnabled {
if !config.MetaSearchEnabled {
printDebug("Crawler is disabled; skipping video search.")
return []VideoResult{}
}
// Proceed with Piped API request if CrawlerEnabled
// Proceed with Piped API request if MetaSearchEnabled
apiResp, err := makeHTMLRequest(query, safe, lang, page)
if err != nil {
printWarn("Error fetching video results: %v", err)