Qwant is still blocking me but this might help you
Some checks failed
Run Integration Tests / test (push) Failing after 42s

This commit is contained in:
partisan 2025-07-04 22:45:06 +02:00
parent a132ca7fd8
commit 89264b0f87
6 changed files with 214 additions and 115 deletions

View file

@ -1,8 +1,11 @@
package main package main
import ( import (
"bytes"
"compress/gzip"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"net/http" "net/http"
"net/url" "net/url"
"sync" "sync"
@ -120,7 +123,7 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
} }
// Get the User-Agent string // Get the User-Agent string
ImageUserAgent, err := GetUserAgent("Image-Search-Quant") ImageUserAgent, err := GetUserAgent("Image-Search-Qwant")
if err != nil { if err != nil {
return nil, 0, fmt.Errorf("getting user-agent: %v", err) return nil, 0, fmt.Errorf("getting user-agent: %v", err)
} }
@ -137,12 +140,34 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
} }
// Parse the API response // Read and (if gzip) decompress body
var apiResp QwantAPIResponse var bodyReader io.ReadCloser = resp.Body
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { if resp.Header.Get("Content-Encoding") == "gzip" {
return nil, 0, fmt.Errorf("decoding response: %v", err) gr, err := gzip.NewReader(resp.Body)
if err != nil {
printDebug("Failed to init gzip reader: %v", err)
} else {
bodyReader = gr
defer gr.Close()
}
} }
var apiResp QwantAPIResponse
bodyBytes, err := io.ReadAll(bodyReader)
if err != nil {
printDebug("Failed to read response body: %v", err)
} else {
printDebug("Qwant response body:\n%s", string(bodyBytes))
}
// Decode JSON from bodyBytes
if err := json.Unmarshal(bodyBytes, &apiResp); err != nil {
return nil, 0, fmt.Errorf("failed to decode JSON: %v\nRaw:\n%s", err, string(bodyBytes))
}
// Optional: recreate body for reuse
resp.Body = io.NopCloser(bytes.NewReader(bodyBytes))
// Process the results // Process the results
var wg sync.WaitGroup var wg sync.WaitGroup
results := make([]ImageSearchResult, len(apiResp.Data.Result.Items)) results := make([]ImageSearchResult, len(apiResp.Data.Result.Items))

View file

@ -1,7 +1,10 @@
package main package main
import ( import (
"bytes"
"compress/gzip"
"fmt" "fmt"
"io"
"net/http" "net/http"
"strings" "strings"
"sync" "sync"
@ -217,17 +220,55 @@ func DoCrawlerProxyRequest(req *http.Request) (*http.Response, error) {
func tryRequestWithRetry(req *http.Request, client Doer, retries int) (*http.Response, error) { func tryRequestWithRetry(req *http.Request, client Doer, retries int) (*http.Response, error) {
var resp *http.Response var resp *http.Response
var err error var err error
for i := 1; i <= retries; i++ { for i := 1; i <= retries; i++ {
if resp != nil { if resp != nil && resp.Body != nil {
resp.Body.Close() resp.Body.Close()
} }
printDebug("Attempt %d of %d with proxy/client...", i, retries) printDebug("Attempt %d of %d with proxy/client...", i, retries)
resp, err = tryRequestOnce(req, client) resp, err = tryRequestOnce(req, client)
if err != nil {
printDebug("Request error: %v", err)
}
if resp == nil {
printDebug("No response received (nil)")
time.Sleep(200 * time.Millisecond)
continue
}
// Try to read and print the body
var bodyReader io.ReadCloser = resp.Body
if resp.Header.Get("Content-Encoding") == "gzip" {
gr, gzErr := gzip.NewReader(resp.Body)
if gzErr != nil {
printDebug("Failed to init gzip reader: %v", gzErr)
bodyReader = resp.Body
} else {
bodyReader = gr
defer gr.Close()
}
}
bodyBytes, readErr := io.ReadAll(bodyReader)
if readErr != nil {
printDebug("Failed to read body: %v", readErr)
} else {
printDebug("Response status: %d\n---\n%s\n---", resp.StatusCode, string(bodyBytes))
}
// Reset body for possible reuse
resp.Body = io.NopCloser(bytes.NewReader(bodyBytes))
if isSuccessful(resp, err) { if isSuccessful(resp, err) {
return resp, nil return resp, nil
} }
time.Sleep(200 * time.Millisecond) time.Sleep(200 * time.Millisecond)
} }
return resp, err return resp, err
} }

View file

@ -18,7 +18,7 @@ var allTextSearchEngines = []SearchEngine{
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)}, {Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)}, {Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)}, {Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
{Name: "Quant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken ! {Name: "Qwant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken !
//{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh //{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh
} }

View file

@ -1,107 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"net/http"
"net/url"
"time"
)
// QwantTextAPIResponse represents the JSON response structure from Qwant API
type QwantTextAPIResponse struct {
Data struct {
Result struct {
Items struct {
Mainline []struct {
Items []struct {
URL string `json:"url"`
Title string `json:"title"`
Description string `json:"desc"`
} `json:"items"`
} `json:"mainline"`
} `json:"items"`
} `json:"result"`
} `json:"data"`
}
// PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult
func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
startTime := time.Now()
const resultsPerPage = 10
offset := (page - 1) * resultsPerPage
// Ensure safe search is disabled by default if not specified
if safe == "" {
safe = "0"
}
// Default to English Canada locale if not specified
if lang == "" {
lang = "en_CA"
}
apiURL := fmt.Sprintf(
"https://api.qwant.com/v3/search/web?q=%s&count=%d&locale=%s&offset=%d&device=desktop",
url.QueryEscape(query),
resultsPerPage,
lang,
offset,
)
req, err := http.NewRequest("GET", apiURL, nil)
if err != nil {
// Return three values: nil for the slice, 0 for duration, error for the third.
return nil, 0, fmt.Errorf("creating request: %v", err)
}
userAgent, err := GetUserAgent("Quant-Text-Search")
if err != nil {
return nil, 0, err
}
req.Header.Set("User-Agent", userAgent)
resp, err := DoMetaProxyRequest(req)
if err != nil {
return nil, 0, fmt.Errorf("failed to do meta-request: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
var apiResp QwantTextAPIResponse
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil {
return nil, 0, fmt.Errorf("decoding response: %v", err)
}
// Extracting results from the nested JSON structure
if len(apiResp.Data.Result.Items.Mainline) == 0 {
return nil, 0, fmt.Errorf("no search results found")
}
var results []TextSearchResult
for _, item := range apiResp.Data.Result.Items.Mainline[0].Items {
cleanURL := cleanQwantURL(item.URL)
results = append(results, TextSearchResult{
URL: cleanURL,
Header: item.Title,
Description: item.Description,
Source: "Qwant",
})
}
duration := time.Since(startTime)
return results, duration, nil
}
// cleanQwantURL extracts the main part of the URL, removing tracking information
func cleanQwantURL(rawURL string) string {
u, err := url.Parse(rawURL)
if err != nil {
return rawURL
}
return u.Scheme + "://" + u.Host + u.Path
}

140
text-qwant.go Normal file
View file

@ -0,0 +1,140 @@
package main
import (
"compress/gzip"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
)
// QwantTextAPIResponse represents the JSON response structure from Qwant API
type QwantTextAPIResponse struct {
Data struct {
Result struct {
Items struct {
Mainline []struct {
Items []struct {
URL string `json:"url"`
Title string `json:"title"`
Description string `json:"desc"`
} `json:"items"`
} `json:"mainline"`
} `json:"items"`
} `json:"result"`
} `json:"data"`
}
// PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult
func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
startTime := time.Now()
const resultsPerPage = 10
offset := (page - 1) * resultsPerPage
if safe == "" {
safe = "0"
}
if lang == "" {
lang = "en_CA"
}
// Create URL
apiURL := "https://api.qwant.com/v3/search/web"
params := url.Values{}
params.Set("q", query)
params.Set("count", fmt.Sprint(resultsPerPage))
params.Set("locale", lang)
params.Set("offset", fmt.Sprint(offset))
params.Set("device", "desktop")
params.Set("safesearch", safe)
fullURL := apiURL + "?" + params.Encode()
// Create HTTP request
req, err := http.NewRequest("GET", fullURL, nil)
if err != nil {
return nil, 0, fmt.Errorf("creating request: %w", err)
}
ua, err := GetUserAgent("Qwant-Text-Search")
if err != nil {
return nil, 0, fmt.Errorf("user-agent error: %w", err)
}
// Set headers
req.Header.Set("User-Agent", ua)
req.Header.Set("Accept", "application/json, text/plain, */*")
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
req.Header.Set("Accept-Encoding", "gzip")
req.Header.Set("DNT", "1")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Origin", "https://www.qwant.com")
req.Header.Set("Referer", "https://www.qwant.com/")
req.Header.Set("Sec-Fetch-Dest", "empty")
req.Header.Set("Sec-Fetch-Mode", "cors")
req.Header.Set("Sec-Fetch-Site", "same-site")
req.Header.Set("TE", "trailers")
// Send Request
resp, err := DoMetaProxyRequest(req)
if err != nil {
return nil, 0, fmt.Errorf("failed to do meta-request: %w", err)
}
defer resp.Body.Close()
// Wait for HTTP 200
if resp.StatusCode != http.StatusOK {
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
var body []byte
if strings.EqualFold(resp.Header.Get("Content-Encoding"), "gzip") {
gr, err := gzip.NewReader(resp.Body)
if err != nil {
return nil, 0, fmt.Errorf("gzip reader: %w", err)
}
gr.Close()
} else {
body, err = io.ReadAll(resp.Body)
}
if err != nil {
return nil, 0, fmt.Errorf("reading body: %w", err)
}
// Captcha block error
if strings.Contains(string(body), `"error_code":27`) {
return nil, 0, fmt.Errorf("Qwant CAPTCHA triggered, request blocked")
}
var apiResp QwantTextAPIResponse
if err := json.Unmarshal(body, &apiResp); err != nil {
return nil, 0, fmt.Errorf("parsing JSON: %w", err)
}
if len(apiResp.Data.Result.Items.Mainline) == 0 {
return nil, 0, fmt.Errorf("no results in mainline")
}
var results []TextSearchResult
for _, item := range apiResp.Data.Result.Items.Mainline[0].Items {
results = append(results, TextSearchResult{
URL: cleanQwantURL(item.URL),
Header: item.Title,
Description: item.Description,
Source: "Qwant",
})
}
return results, time.Since(startTime), nil
}
// cleanQwantURL extracts the main part of the URL, removing tracking information
func cleanQwantURL(rawURL string) string {
u, err := url.Parse(rawURL)
if err != nil {
return rawURL
}
return u.Scheme + "://" + u.Host + u.Path
}

View file

@ -18,7 +18,7 @@ var allTextSearchEngines = []SearchEngine{
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)}, {Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)}, {Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)}, {Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
{Name: "Quant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken ! {Name: "Qwant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken !
//{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh //{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh
} }