updated SearXNG search
All checks were successful
Run Integration Tests / test (push) Successful in 34s

This commit is contained in:
partisan 2025-01-11 23:06:48 +01:00
parent 24c7a09479
commit 234f1dd3be
2 changed files with 173 additions and 26 deletions

View file

@ -5,34 +5,55 @@ import (
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
"net/url"
"strings"
"time" "time"
) )
type Instance struct { type Instance struct {
URL string `json:"url"` URL string `json:"-"` // Populated from map key
Status int `json:"status"` Analytics bool `json:"analytics"`
SSLGrade string `json:"ssl_grade"` Comments []string `json:"comments"`
AlternativeUrls map[string]interface{} `json:"alternativeUrls"`
Main bool `json:"main"`
NetworkType string `json:"network_type"`
HTTP struct {
StatusCode int `json:"status_code"`
Error string `json:"error"`
} `json:"http"`
Version string `json:"version"`
Grade string `json:"grade"`
GradeURL string `json:"gradeUrl"`
Generator string `json:"generator"`
ContactURL FlexibleType `json:"contact_url"` // Custom type
DocsURL string `json:"docs_url"`
}
type FlexibleType struct {
StringValue string
BoolValue bool
IsString bool
} }
const searxInstancesURL = "https://searx.space/data/instances.json" const searxInstancesURL = "https://searx.space/data/instances.json"
// FetchInstances fetches available SearX instances from the registry.
func fetchInstances() ([]Instance, error) { func fetchInstances() ([]Instance, error) {
client := &http.Client{Timeout: 10 * time.Second} client := &http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest("GET", searxInstancesURL, nil) req, err := http.NewRequest("GET", searxInstancesURL, nil)
if err != nil { if err != nil {
return nil, err return nil, fmt.Errorf("creating request: %v", err)
} }
XNGUserAgent, err := GetUserAgent("Text-Search-XNG") XNGUserAgent, err := GetUserAgent("Text-Search-XNG")
if err != nil { if err != nil {
return nil, err return nil, fmt.Errorf("generating User-Agent: %v", err)
} }
req.Header.Set("User-Agent", XNGUserAgent) req.Header.Set("User-Agent", XNGUserAgent)
resp, err := client.Do(req) resp, err := client.Do(req)
if err != nil { if err != nil {
return nil, err return nil, fmt.Errorf("performing request: %v", err)
} }
defer resp.Body.Close() defer resp.Body.Close()
@ -42,42 +63,103 @@ func fetchInstances() ([]Instance, error) {
body, err := io.ReadAll(resp.Body) body, err := io.ReadAll(resp.Body)
if err != nil { if err != nil {
return nil, err return nil, fmt.Errorf("reading response body: %v", err)
} }
var instances []Instance // Root structure of the JSON response
err = json.Unmarshal(body, &instances) var root struct {
Instances map[string]Instance `json:"instances"`
}
// Unmarshal JSON into the root structure
err = json.Unmarshal(body, &root)
if err != nil { if err != nil {
return nil, err return nil, fmt.Errorf("parsing response JSON: %v", err)
}
// Collect instances into a slice
var instances []Instance
for url, instance := range root.Instances {
instance.URL = url // Assign the URL from the map key
instances = append(instances, instance)
} }
return instances, nil return instances, nil
} }
// UnmarshalJSON implements custom unmarshalling for FlexibleType.
func (f *FlexibleType) UnmarshalJSON(data []byte) error {
// Try to unmarshal as a string
var str string
if err := json.Unmarshal(data, &str); err == nil {
f.StringValue = str
f.IsString = true
return nil
}
// Try to unmarshal as a bool
var b bool
if err := json.Unmarshal(data, &b); err == nil {
f.BoolValue = b
f.IsString = false
return nil
}
// Return an error if neither works
return fmt.Errorf("invalid FlexibleType: %s", string(data))
}
// String returns the string representation of FlexibleType.
func (f FlexibleType) String() string {
if f.IsString {
return f.StringValue
}
return fmt.Sprintf("%v", f.BoolValue)
}
// ValidateInstance checks if a SearX instance is valid by performing a test query.
func validateInstance(instance Instance) bool { func validateInstance(instance Instance) bool {
client := &http.Client{Timeout: 10 * time.Second} // Skip .onion instances
req, err := http.NewRequest("GET", fmt.Sprintf("%s/search?q=test&categories=general&language=en&safe_search=1&page=1&format=json", instance.URL), nil) if strings.Contains(instance.URL, ".onion") {
if err != nil { printDebug("Skipping .onion instance: %s", instance.URL)
printWarn("Error creating request for URL: %s, Error: %v", instance.URL, err)
return false return false
} }
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
client := &http.Client{
Timeout: 10 * time.Second,
}
testURL := fmt.Sprintf("%s/search?q=test&categories=general&language=en&safe_search=1&page=1&format=json", instance.URL)
req, err := http.NewRequest("GET", testURL, nil)
if err != nil {
printDebug("Error creating SearchXNG request for instance validation: %v", err)
return false
}
XNGUserAgent, err := GetUserAgent("Text-Search-XNG")
if err != nil {
printWarn("Error generating User-Agent: %v", err)
return false
}
req.Header.Set("User-Agent", XNGUserAgent)
resp, err := client.Do(req) resp, err := client.Do(req)
if err != nil { if err != nil {
printWarn("Error performing request for URL: %s, Error: %v", instance.URL, err) printDebug("Error performing request for SearchXNG instance validation: %v", err)
return false return false
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
printWarn("Instance validation failed for URL: %s, StatusCode: %d", instance.URL, resp.StatusCode) printDebug("SearchXNG Instance validation failed. StatusCode: %d", resp.StatusCode)
return false return false
} }
// Successful validation
return true return true
} }
// GetValidInstance fetches and validates SearX instances, returning a valid one.
func getValidInstance() (*Instance, error) { func getValidInstance() (*Instance, error) {
instances, err := fetchInstances() instances, err := fetchInstances()
if err != nil { if err != nil {
@ -93,11 +175,76 @@ func getValidInstance() (*Instance, error) {
return nil, fmt.Errorf("no valid SearX instances found") return nil, fmt.Errorf("no valid SearX instances found")
} }
// func main() { // PerformSearXTextSearch performs a text search using a SearX instance.
// instance, err := getValidInstance() func PerformSearXTextSearch(query, categories, language string, page int) ([]TextSearchResult, time.Duration, error) {
// if err != nil { // Default value for "safe" search
// log.Fatalf("Failed to get a valid SearX instance: %v", err) safe := "1"
// }
// log.Printf("Selected SearX instance: %s", instance.URL) startTime := time.Now() // Start the timer
// } var results []TextSearchResult
instance, err := getValidInstance()
if err != nil {
return nil, 0, fmt.Errorf("failed to get a valid SearX instance: %w", err)
}
searchURL := fmt.Sprintf("%s/search?q=%s&categories=%s&language=%s&safe_search=%s&page=%d&format=json",
instance.URL, url.QueryEscape(query), categories, language, safe, page)
client := &http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, 0, fmt.Errorf("creating request: %v", err)
}
XNGUserAgent, err := GetUserAgent("Text-Search-XNG")
if err != nil {
return nil, 0, fmt.Errorf("generating User-Agent: %v", err)
}
req.Header.Set("User-Agent", XNGUserAgent)
resp, err := client.Do(req)
if err != nil {
return nil, 0, fmt.Errorf("performing request: %v", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, 0, fmt.Errorf("reading response body: %v", err)
}
// Parse the JSON response to extract search results
var response map[string]interface{}
err = json.Unmarshal(body, &response)
if err != nil {
return nil, 0, fmt.Errorf("parsing response JSON: %v", err)
}
// Extract search results
if items, ok := response["results"].([]interface{}); ok {
for _, item := range items {
if result, ok := item.(map[string]interface{}); ok {
title := strings.TrimSpace(fmt.Sprintf("%v", result["title"]))
url := strings.TrimSpace(fmt.Sprintf("%v", result["url"]))
description := strings.TrimSpace(fmt.Sprintf("%v", result["content"]))
results = append(results, TextSearchResult{
Header: title,
URL: url,
Description: description,
})
}
}
}
duration := time.Since(startTime) // Calculate the duration
if len(results) == 0 {
printDebug("No results found for query: %s", query)
return nil, duration, fmt.Errorf("no results found")
}
printDebug("Search completed successfully for query: %s, found %d results", query, len(results))
return results, duration, nil
}

View file

@ -14,7 +14,7 @@ func init() {
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)}, {Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)}, {Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)}, {Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
// {Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXNGTextSearch), Weight: 2}, // Uncomment when implemented // {Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // Always says StatusCode: 429
} }
} }