updated SearXNG search
All checks were successful
Run Integration Tests / test (push) Successful in 34s
All checks were successful
Run Integration Tests / test (push) Successful in 34s
This commit is contained in:
parent
24c7a09479
commit
234f1dd3be
2 changed files with 173 additions and 26 deletions
|
@ -5,34 +5,55 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Instance struct {
|
||||
URL string `json:"url"`
|
||||
Status int `json:"status"`
|
||||
SSLGrade string `json:"ssl_grade"`
|
||||
URL string `json:"-"` // Populated from map key
|
||||
Analytics bool `json:"analytics"`
|
||||
Comments []string `json:"comments"`
|
||||
AlternativeUrls map[string]interface{} `json:"alternativeUrls"`
|
||||
Main bool `json:"main"`
|
||||
NetworkType string `json:"network_type"`
|
||||
HTTP struct {
|
||||
StatusCode int `json:"status_code"`
|
||||
Error string `json:"error"`
|
||||
} `json:"http"`
|
||||
Version string `json:"version"`
|
||||
Grade string `json:"grade"`
|
||||
GradeURL string `json:"gradeUrl"`
|
||||
Generator string `json:"generator"`
|
||||
ContactURL FlexibleType `json:"contact_url"` // Custom type
|
||||
DocsURL string `json:"docs_url"`
|
||||
}
|
||||
|
||||
type FlexibleType struct {
|
||||
StringValue string
|
||||
BoolValue bool
|
||||
IsString bool
|
||||
}
|
||||
|
||||
const searxInstancesURL = "https://searx.space/data/instances.json"
|
||||
|
||||
// FetchInstances fetches available SearX instances from the registry.
|
||||
func fetchInstances() ([]Instance, error) {
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
req, err := http.NewRequest("GET", searxInstancesURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("creating request: %v", err)
|
||||
}
|
||||
|
||||
XNGUserAgent, err := GetUserAgent("Text-Search-XNG")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("generating User-Agent: %v", err)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", XNGUserAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("performing request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
|
@ -42,42 +63,103 @@ func fetchInstances() ([]Instance, error) {
|
|||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("reading response body: %v", err)
|
||||
}
|
||||
|
||||
var instances []Instance
|
||||
err = json.Unmarshal(body, &instances)
|
||||
// Root structure of the JSON response
|
||||
var root struct {
|
||||
Instances map[string]Instance `json:"instances"`
|
||||
}
|
||||
|
||||
// Unmarshal JSON into the root structure
|
||||
err = json.Unmarshal(body, &root)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("parsing response JSON: %v", err)
|
||||
}
|
||||
|
||||
// Collect instances into a slice
|
||||
var instances []Instance
|
||||
for url, instance := range root.Instances {
|
||||
instance.URL = url // Assign the URL from the map key
|
||||
instances = append(instances, instance)
|
||||
}
|
||||
|
||||
return instances, nil
|
||||
}
|
||||
|
||||
// UnmarshalJSON implements custom unmarshalling for FlexibleType.
|
||||
func (f *FlexibleType) UnmarshalJSON(data []byte) error {
|
||||
// Try to unmarshal as a string
|
||||
var str string
|
||||
if err := json.Unmarshal(data, &str); err == nil {
|
||||
f.StringValue = str
|
||||
f.IsString = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// Try to unmarshal as a bool
|
||||
var b bool
|
||||
if err := json.Unmarshal(data, &b); err == nil {
|
||||
f.BoolValue = b
|
||||
f.IsString = false
|
||||
return nil
|
||||
}
|
||||
|
||||
// Return an error if neither works
|
||||
return fmt.Errorf("invalid FlexibleType: %s", string(data))
|
||||
}
|
||||
|
||||
// String returns the string representation of FlexibleType.
|
||||
func (f FlexibleType) String() string {
|
||||
if f.IsString {
|
||||
return f.StringValue
|
||||
}
|
||||
return fmt.Sprintf("%v", f.BoolValue)
|
||||
}
|
||||
|
||||
// ValidateInstance checks if a SearX instance is valid by performing a test query.
|
||||
func validateInstance(instance Instance) bool {
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
req, err := http.NewRequest("GET", fmt.Sprintf("%s/search?q=test&categories=general&language=en&safe_search=1&page=1&format=json", instance.URL), nil)
|
||||
if err != nil {
|
||||
printWarn("Error creating request for URL: %s, Error: %v", instance.URL, err)
|
||||
// Skip .onion instances
|
||||
if strings.Contains(instance.URL, ".onion") {
|
||||
printDebug("Skipping .onion instance: %s", instance.URL)
|
||||
return false
|
||||
}
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
||||
|
||||
client := &http.Client{
|
||||
Timeout: 10 * time.Second,
|
||||
}
|
||||
|
||||
testURL := fmt.Sprintf("%s/search?q=test&categories=general&language=en&safe_search=1&page=1&format=json", instance.URL)
|
||||
req, err := http.NewRequest("GET", testURL, nil)
|
||||
if err != nil {
|
||||
printDebug("Error creating SearchXNG request for instance validation: %v", err)
|
||||
return false
|
||||
}
|
||||
|
||||
XNGUserAgent, err := GetUserAgent("Text-Search-XNG")
|
||||
if err != nil {
|
||||
printWarn("Error generating User-Agent: %v", err)
|
||||
return false
|
||||
}
|
||||
req.Header.Set("User-Agent", XNGUserAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
printWarn("Error performing request for URL: %s, Error: %v", instance.URL, err)
|
||||
printDebug("Error performing request for SearchXNG instance validation: %v", err)
|
||||
return false
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
printWarn("Instance validation failed for URL: %s, StatusCode: %d", instance.URL, resp.StatusCode)
|
||||
printDebug("SearchXNG Instance validation failed. StatusCode: %d", resp.StatusCode)
|
||||
return false
|
||||
}
|
||||
|
||||
// Successful validation
|
||||
return true
|
||||
}
|
||||
|
||||
// GetValidInstance fetches and validates SearX instances, returning a valid one.
|
||||
func getValidInstance() (*Instance, error) {
|
||||
instances, err := fetchInstances()
|
||||
if err != nil {
|
||||
|
@ -93,11 +175,76 @@ func getValidInstance() (*Instance, error) {
|
|||
return nil, fmt.Errorf("no valid SearX instances found")
|
||||
}
|
||||
|
||||
// func main() {
|
||||
// instance, err := getValidInstance()
|
||||
// if err != nil {
|
||||
// log.Fatalf("Failed to get a valid SearX instance: %v", err)
|
||||
// }
|
||||
// PerformSearXTextSearch performs a text search using a SearX instance.
|
||||
func PerformSearXTextSearch(query, categories, language string, page int) ([]TextSearchResult, time.Duration, error) {
|
||||
// Default value for "safe" search
|
||||
safe := "1"
|
||||
|
||||
// log.Printf("Selected SearX instance: %s", instance.URL)
|
||||
// }
|
||||
startTime := time.Now() // Start the timer
|
||||
var results []TextSearchResult
|
||||
|
||||
instance, err := getValidInstance()
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to get a valid SearX instance: %w", err)
|
||||
}
|
||||
|
||||
searchURL := fmt.Sprintf("%s/search?q=%s&categories=%s&language=%s&safe_search=%s&page=%d&format=json",
|
||||
instance.URL, url.QueryEscape(query), categories, language, safe, page)
|
||||
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
req, err := http.NewRequest("GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("creating request: %v", err)
|
||||
}
|
||||
|
||||
XNGUserAgent, err := GetUserAgent("Text-Search-XNG")
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("generating User-Agent: %v", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", XNGUserAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("performing request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("reading response body: %v", err)
|
||||
}
|
||||
|
||||
// Parse the JSON response to extract search results
|
||||
var response map[string]interface{}
|
||||
err = json.Unmarshal(body, &response)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("parsing response JSON: %v", err)
|
||||
}
|
||||
|
||||
// Extract search results
|
||||
if items, ok := response["results"].([]interface{}); ok {
|
||||
for _, item := range items {
|
||||
if result, ok := item.(map[string]interface{}); ok {
|
||||
title := strings.TrimSpace(fmt.Sprintf("%v", result["title"]))
|
||||
url := strings.TrimSpace(fmt.Sprintf("%v", result["url"]))
|
||||
description := strings.TrimSpace(fmt.Sprintf("%v", result["content"]))
|
||||
|
||||
results = append(results, TextSearchResult{
|
||||
Header: title,
|
||||
URL: url,
|
||||
Description: description,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
duration := time.Since(startTime) // Calculate the duration
|
||||
|
||||
if len(results) == 0 {
|
||||
printDebug("No results found for query: %s", query)
|
||||
return nil, duration, fmt.Errorf("no results found")
|
||||
}
|
||||
|
||||
printDebug("Search completed successfully for query: %s, found %d results", query, len(results))
|
||||
return results, duration, nil
|
||||
}
|
||||
|
|
2
text.go
2
text.go
|
@ -14,7 +14,7 @@ func init() {
|
|||
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
|
||||
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
|
||||
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
|
||||
// {Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXNGTextSearch), Weight: 2}, // Uncomment when implemented
|
||||
// {Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // Always says StatusCode: 429
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue