Qwant is still blocking me but this might help you
Some checks failed
Run Integration Tests / test (push) Failing after 42s
Some checks failed
Run Integration Tests / test (push) Failing after 42s
This commit is contained in:
parent
a132ca7fd8
commit
89264b0f87
6 changed files with 214 additions and 115 deletions
|
@ -1,8 +1,11 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"compress/gzip"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -120,7 +123,7 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the User-Agent string
|
// Get the User-Agent string
|
||||||
ImageUserAgent, err := GetUserAgent("Image-Search-Quant")
|
ImageUserAgent, err := GetUserAgent("Image-Search-Qwant")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, 0, fmt.Errorf("getting user-agent: %v", err)
|
return nil, 0, fmt.Errorf("getting user-agent: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -137,12 +140,34 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
|
||||||
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse the API response
|
// Read and (if gzip) decompress body
|
||||||
var apiResp QwantAPIResponse
|
var bodyReader io.ReadCloser = resp.Body
|
||||||
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil {
|
if resp.Header.Get("Content-Encoding") == "gzip" {
|
||||||
return nil, 0, fmt.Errorf("decoding response: %v", err)
|
gr, err := gzip.NewReader(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
printDebug("Failed to init gzip reader: %v", err)
|
||||||
|
} else {
|
||||||
|
bodyReader = gr
|
||||||
|
defer gr.Close()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var apiResp QwantAPIResponse
|
||||||
|
|
||||||
|
bodyBytes, err := io.ReadAll(bodyReader)
|
||||||
|
if err != nil {
|
||||||
|
printDebug("Failed to read response body: %v", err)
|
||||||
|
} else {
|
||||||
|
printDebug("Qwant response body:\n%s", string(bodyBytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decode JSON from bodyBytes
|
||||||
|
if err := json.Unmarshal(bodyBytes, &apiResp); err != nil {
|
||||||
|
return nil, 0, fmt.Errorf("failed to decode JSON: %v\nRaw:\n%s", err, string(bodyBytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optional: recreate body for reuse
|
||||||
|
resp.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||||
// Process the results
|
// Process the results
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
results := make([]ImageSearchResult, len(apiResp.Data.Result.Items))
|
results := make([]ImageSearchResult, len(apiResp.Data.Result.Items))
|
43
proxy.go
43
proxy.go
|
@ -1,7 +1,10 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"compress/gzip"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -217,17 +220,55 @@ func DoCrawlerProxyRequest(req *http.Request) (*http.Response, error) {
|
||||||
func tryRequestWithRetry(req *http.Request, client Doer, retries int) (*http.Response, error) {
|
func tryRequestWithRetry(req *http.Request, client Doer, retries int) (*http.Response, error) {
|
||||||
var resp *http.Response
|
var resp *http.Response
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
for i := 1; i <= retries; i++ {
|
for i := 1; i <= retries; i++ {
|
||||||
if resp != nil {
|
if resp != nil && resp.Body != nil {
|
||||||
resp.Body.Close()
|
resp.Body.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
printDebug("Attempt %d of %d with proxy/client...", i, retries)
|
printDebug("Attempt %d of %d with proxy/client...", i, retries)
|
||||||
resp, err = tryRequestOnce(req, client)
|
resp, err = tryRequestOnce(req, client)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
printDebug("Request error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp == nil {
|
||||||
|
printDebug("No response received (nil)")
|
||||||
|
time.Sleep(200 * time.Millisecond)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to read and print the body
|
||||||
|
var bodyReader io.ReadCloser = resp.Body
|
||||||
|
if resp.Header.Get("Content-Encoding") == "gzip" {
|
||||||
|
gr, gzErr := gzip.NewReader(resp.Body)
|
||||||
|
if gzErr != nil {
|
||||||
|
printDebug("Failed to init gzip reader: %v", gzErr)
|
||||||
|
bodyReader = resp.Body
|
||||||
|
} else {
|
||||||
|
bodyReader = gr
|
||||||
|
defer gr.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bodyBytes, readErr := io.ReadAll(bodyReader)
|
||||||
|
if readErr != nil {
|
||||||
|
printDebug("Failed to read body: %v", readErr)
|
||||||
|
} else {
|
||||||
|
printDebug("Response status: %d\n---\n%s\n---", resp.StatusCode, string(bodyBytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset body for possible reuse
|
||||||
|
resp.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||||
|
|
||||||
if isSuccessful(resp, err) {
|
if isSuccessful(resp, err) {
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(200 * time.Millisecond)
|
time.Sleep(200 * time.Millisecond)
|
||||||
}
|
}
|
||||||
|
|
||||||
return resp, err
|
return resp, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ var allTextSearchEngines = []SearchEngine{
|
||||||
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
|
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
|
||||||
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
|
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
|
||||||
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
|
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
|
||||||
{Name: "Quant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken !
|
{Name: "Qwant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken !
|
||||||
//{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh
|
//{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
107
text-quant.go
107
text-quant.go
|
@ -1,107 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"net/http"
|
|
||||||
"net/url"
|
|
||||||
"time"
|
|
||||||
)
|
|
||||||
|
|
||||||
// QwantTextAPIResponse represents the JSON response structure from Qwant API
|
|
||||||
type QwantTextAPIResponse struct {
|
|
||||||
Data struct {
|
|
||||||
Result struct {
|
|
||||||
Items struct {
|
|
||||||
Mainline []struct {
|
|
||||||
Items []struct {
|
|
||||||
URL string `json:"url"`
|
|
||||||
Title string `json:"title"`
|
|
||||||
Description string `json:"desc"`
|
|
||||||
} `json:"items"`
|
|
||||||
} `json:"mainline"`
|
|
||||||
} `json:"items"`
|
|
||||||
} `json:"result"`
|
|
||||||
} `json:"data"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult
|
|
||||||
func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
|
|
||||||
startTime := time.Now()
|
|
||||||
|
|
||||||
const resultsPerPage = 10
|
|
||||||
offset := (page - 1) * resultsPerPage
|
|
||||||
|
|
||||||
// Ensure safe search is disabled by default if not specified
|
|
||||||
if safe == "" {
|
|
||||||
safe = "0"
|
|
||||||
}
|
|
||||||
|
|
||||||
// Default to English Canada locale if not specified
|
|
||||||
if lang == "" {
|
|
||||||
lang = "en_CA"
|
|
||||||
}
|
|
||||||
|
|
||||||
apiURL := fmt.Sprintf(
|
|
||||||
"https://api.qwant.com/v3/search/web?q=%s&count=%d&locale=%s&offset=%d&device=desktop",
|
|
||||||
url.QueryEscape(query),
|
|
||||||
resultsPerPage,
|
|
||||||
lang,
|
|
||||||
offset,
|
|
||||||
)
|
|
||||||
|
|
||||||
req, err := http.NewRequest("GET", apiURL, nil)
|
|
||||||
if err != nil {
|
|
||||||
// Return three values: nil for the slice, 0 for duration, error for the third.
|
|
||||||
return nil, 0, fmt.Errorf("creating request: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
userAgent, err := GetUserAgent("Quant-Text-Search")
|
|
||||||
if err != nil {
|
|
||||||
return nil, 0, err
|
|
||||||
}
|
|
||||||
req.Header.Set("User-Agent", userAgent)
|
|
||||||
|
|
||||||
resp, err := DoMetaProxyRequest(req)
|
|
||||||
if err != nil {
|
|
||||||
return nil, 0, fmt.Errorf("failed to do meta-request: %v", err)
|
|
||||||
}
|
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
|
||||||
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
|
||||||
}
|
|
||||||
|
|
||||||
var apiResp QwantTextAPIResponse
|
|
||||||
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil {
|
|
||||||
return nil, 0, fmt.Errorf("decoding response: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extracting results from the nested JSON structure
|
|
||||||
if len(apiResp.Data.Result.Items.Mainline) == 0 {
|
|
||||||
return nil, 0, fmt.Errorf("no search results found")
|
|
||||||
}
|
|
||||||
|
|
||||||
var results []TextSearchResult
|
|
||||||
for _, item := range apiResp.Data.Result.Items.Mainline[0].Items {
|
|
||||||
cleanURL := cleanQwantURL(item.URL)
|
|
||||||
results = append(results, TextSearchResult{
|
|
||||||
URL: cleanURL,
|
|
||||||
Header: item.Title,
|
|
||||||
Description: item.Description,
|
|
||||||
Source: "Qwant",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
duration := time.Since(startTime)
|
|
||||||
return results, duration, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// cleanQwantURL extracts the main part of the URL, removing tracking information
|
|
||||||
func cleanQwantURL(rawURL string) string {
|
|
||||||
u, err := url.Parse(rawURL)
|
|
||||||
if err != nil {
|
|
||||||
return rawURL
|
|
||||||
}
|
|
||||||
return u.Scheme + "://" + u.Host + u.Path
|
|
||||||
}
|
|
140
text-qwant.go
Normal file
140
text-qwant.go
Normal file
|
@ -0,0 +1,140 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"compress/gzip"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// QwantTextAPIResponse represents the JSON response structure from Qwant API
|
||||||
|
type QwantTextAPIResponse struct {
|
||||||
|
Data struct {
|
||||||
|
Result struct {
|
||||||
|
Items struct {
|
||||||
|
Mainline []struct {
|
||||||
|
Items []struct {
|
||||||
|
URL string `json:"url"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Description string `json:"desc"`
|
||||||
|
} `json:"items"`
|
||||||
|
} `json:"mainline"`
|
||||||
|
} `json:"items"`
|
||||||
|
} `json:"result"`
|
||||||
|
} `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult
|
||||||
|
func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
|
||||||
|
startTime := time.Now()
|
||||||
|
const resultsPerPage = 10
|
||||||
|
offset := (page - 1) * resultsPerPage
|
||||||
|
|
||||||
|
if safe == "" {
|
||||||
|
safe = "0"
|
||||||
|
}
|
||||||
|
if lang == "" {
|
||||||
|
lang = "en_CA"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create URL
|
||||||
|
apiURL := "https://api.qwant.com/v3/search/web"
|
||||||
|
params := url.Values{}
|
||||||
|
params.Set("q", query)
|
||||||
|
params.Set("count", fmt.Sprint(resultsPerPage))
|
||||||
|
params.Set("locale", lang)
|
||||||
|
params.Set("offset", fmt.Sprint(offset))
|
||||||
|
params.Set("device", "desktop")
|
||||||
|
params.Set("safesearch", safe)
|
||||||
|
fullURL := apiURL + "?" + params.Encode()
|
||||||
|
|
||||||
|
// Create HTTP request
|
||||||
|
req, err := http.NewRequest("GET", fullURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, fmt.Errorf("creating request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ua, err := GetUserAgent("Qwant-Text-Search")
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, fmt.Errorf("user-agent error: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set headers
|
||||||
|
req.Header.Set("User-Agent", ua)
|
||||||
|
req.Header.Set("Accept", "application/json, text/plain, */*")
|
||||||
|
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
|
||||||
|
req.Header.Set("Accept-Encoding", "gzip")
|
||||||
|
req.Header.Set("DNT", "1")
|
||||||
|
req.Header.Set("Connection", "keep-alive")
|
||||||
|
req.Header.Set("Origin", "https://www.qwant.com")
|
||||||
|
req.Header.Set("Referer", "https://www.qwant.com/")
|
||||||
|
req.Header.Set("Sec-Fetch-Dest", "empty")
|
||||||
|
req.Header.Set("Sec-Fetch-Mode", "cors")
|
||||||
|
req.Header.Set("Sec-Fetch-Site", "same-site")
|
||||||
|
req.Header.Set("TE", "trailers")
|
||||||
|
|
||||||
|
// Send Request
|
||||||
|
resp, err := DoMetaProxyRequest(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, fmt.Errorf("failed to do meta-request: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// Wait for HTTP 200
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
var body []byte
|
||||||
|
if strings.EqualFold(resp.Header.Get("Content-Encoding"), "gzip") {
|
||||||
|
gr, err := gzip.NewReader(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, fmt.Errorf("gzip reader: %w", err)
|
||||||
|
}
|
||||||
|
gr.Close()
|
||||||
|
} else {
|
||||||
|
body, err = io.ReadAll(resp.Body)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, fmt.Errorf("reading body: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Captcha block error
|
||||||
|
if strings.Contains(string(body), `"error_code":27`) {
|
||||||
|
return nil, 0, fmt.Errorf("Qwant CAPTCHA triggered, request blocked")
|
||||||
|
}
|
||||||
|
|
||||||
|
var apiResp QwantTextAPIResponse
|
||||||
|
if err := json.Unmarshal(body, &apiResp); err != nil {
|
||||||
|
return nil, 0, fmt.Errorf("parsing JSON: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(apiResp.Data.Result.Items.Mainline) == 0 {
|
||||||
|
return nil, 0, fmt.Errorf("no results in mainline")
|
||||||
|
}
|
||||||
|
|
||||||
|
var results []TextSearchResult
|
||||||
|
for _, item := range apiResp.Data.Result.Items.Mainline[0].Items {
|
||||||
|
results = append(results, TextSearchResult{
|
||||||
|
URL: cleanQwantURL(item.URL),
|
||||||
|
Header: item.Title,
|
||||||
|
Description: item.Description,
|
||||||
|
Source: "Qwant",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return results, time.Since(startTime), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanQwantURL extracts the main part of the URL, removing tracking information
|
||||||
|
func cleanQwantURL(rawURL string) string {
|
||||||
|
u, err := url.Parse(rawURL)
|
||||||
|
if err != nil {
|
||||||
|
return rawURL
|
||||||
|
}
|
||||||
|
return u.Scheme + "://" + u.Host + u.Path
|
||||||
|
}
|
2
text.go
2
text.go
|
@ -18,7 +18,7 @@ var allTextSearchEngines = []SearchEngine{
|
||||||
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
|
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
|
||||||
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
|
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
|
||||||
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
|
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
|
||||||
{Name: "Quant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken !
|
{Name: "Qwant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken !
|
||||||
//{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh
|
//{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue