Qwant is still blocking me but this might help you
Some checks failed
Run Integration Tests / test (push) Failing after 42s
Some checks failed
Run Integration Tests / test (push) Failing after 42s
This commit is contained in:
parent
a132ca7fd8
commit
89264b0f87
6 changed files with 214 additions and 115 deletions
|
@ -1,8 +1,11 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"sync"
|
||||
|
@ -120,7 +123,7 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
|
|||
}
|
||||
|
||||
// Get the User-Agent string
|
||||
ImageUserAgent, err := GetUserAgent("Image-Search-Quant")
|
||||
ImageUserAgent, err := GetUserAgent("Image-Search-Qwant")
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("getting user-agent: %v", err)
|
||||
}
|
||||
|
@ -137,12 +140,34 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
|
|||
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
// Parse the API response
|
||||
var apiResp QwantAPIResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil {
|
||||
return nil, 0, fmt.Errorf("decoding response: %v", err)
|
||||
// Read and (if gzip) decompress body
|
||||
var bodyReader io.ReadCloser = resp.Body
|
||||
if resp.Header.Get("Content-Encoding") == "gzip" {
|
||||
gr, err := gzip.NewReader(resp.Body)
|
||||
if err != nil {
|
||||
printDebug("Failed to init gzip reader: %v", err)
|
||||
} else {
|
||||
bodyReader = gr
|
||||
defer gr.Close()
|
||||
}
|
||||
}
|
||||
|
||||
var apiResp QwantAPIResponse
|
||||
|
||||
bodyBytes, err := io.ReadAll(bodyReader)
|
||||
if err != nil {
|
||||
printDebug("Failed to read response body: %v", err)
|
||||
} else {
|
||||
printDebug("Qwant response body:\n%s", string(bodyBytes))
|
||||
}
|
||||
|
||||
// Decode JSON from bodyBytes
|
||||
if err := json.Unmarshal(bodyBytes, &apiResp); err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to decode JSON: %v\nRaw:\n%s", err, string(bodyBytes))
|
||||
}
|
||||
|
||||
// Optional: recreate body for reuse
|
||||
resp.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
// Process the results
|
||||
var wg sync.WaitGroup
|
||||
results := make([]ImageSearchResult, len(apiResp.Data.Result.Items))
|
43
proxy.go
43
proxy.go
|
@ -1,7 +1,10 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
|
@ -217,17 +220,55 @@ func DoCrawlerProxyRequest(req *http.Request) (*http.Response, error) {
|
|||
func tryRequestWithRetry(req *http.Request, client Doer, retries int) (*http.Response, error) {
|
||||
var resp *http.Response
|
||||
var err error
|
||||
|
||||
for i := 1; i <= retries; i++ {
|
||||
if resp != nil {
|
||||
if resp != nil && resp.Body != nil {
|
||||
resp.Body.Close()
|
||||
}
|
||||
|
||||
printDebug("Attempt %d of %d with proxy/client...", i, retries)
|
||||
resp, err = tryRequestOnce(req, client)
|
||||
|
||||
if err != nil {
|
||||
printDebug("Request error: %v", err)
|
||||
}
|
||||
|
||||
if resp == nil {
|
||||
printDebug("No response received (nil)")
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
|
||||
// Try to read and print the body
|
||||
var bodyReader io.ReadCloser = resp.Body
|
||||
if resp.Header.Get("Content-Encoding") == "gzip" {
|
||||
gr, gzErr := gzip.NewReader(resp.Body)
|
||||
if gzErr != nil {
|
||||
printDebug("Failed to init gzip reader: %v", gzErr)
|
||||
bodyReader = resp.Body
|
||||
} else {
|
||||
bodyReader = gr
|
||||
defer gr.Close()
|
||||
}
|
||||
}
|
||||
|
||||
bodyBytes, readErr := io.ReadAll(bodyReader)
|
||||
if readErr != nil {
|
||||
printDebug("Failed to read body: %v", readErr)
|
||||
} else {
|
||||
printDebug("Response status: %d\n---\n%s\n---", resp.StatusCode, string(bodyBytes))
|
||||
}
|
||||
|
||||
// Reset body for possible reuse
|
||||
resp.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
|
||||
if isSuccessful(resp, err) {
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
}
|
||||
|
||||
return resp, err
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ var allTextSearchEngines = []SearchEngine{
|
|||
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
|
||||
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
|
||||
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
|
||||
{Name: "Quant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken !
|
||||
{Name: "Qwant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken !
|
||||
//{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh
|
||||
}
|
||||
|
||||
|
|
107
text-quant.go
107
text-quant.go
|
@ -1,107 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"time"
|
||||
)
|
||||
|
||||
// QwantTextAPIResponse represents the JSON response structure from Qwant API
|
||||
type QwantTextAPIResponse struct {
|
||||
Data struct {
|
||||
Result struct {
|
||||
Items struct {
|
||||
Mainline []struct {
|
||||
Items []struct {
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
Description string `json:"desc"`
|
||||
} `json:"items"`
|
||||
} `json:"mainline"`
|
||||
} `json:"items"`
|
||||
} `json:"result"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
// PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult
|
||||
func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
|
||||
startTime := time.Now()
|
||||
|
||||
const resultsPerPage = 10
|
||||
offset := (page - 1) * resultsPerPage
|
||||
|
||||
// Ensure safe search is disabled by default if not specified
|
||||
if safe == "" {
|
||||
safe = "0"
|
||||
}
|
||||
|
||||
// Default to English Canada locale if not specified
|
||||
if lang == "" {
|
||||
lang = "en_CA"
|
||||
}
|
||||
|
||||
apiURL := fmt.Sprintf(
|
||||
"https://api.qwant.com/v3/search/web?q=%s&count=%d&locale=%s&offset=%d&device=desktop",
|
||||
url.QueryEscape(query),
|
||||
resultsPerPage,
|
||||
lang,
|
||||
offset,
|
||||
)
|
||||
|
||||
req, err := http.NewRequest("GET", apiURL, nil)
|
||||
if err != nil {
|
||||
// Return three values: nil for the slice, 0 for duration, error for the third.
|
||||
return nil, 0, fmt.Errorf("creating request: %v", err)
|
||||
}
|
||||
|
||||
userAgent, err := GetUserAgent("Quant-Text-Search")
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to do meta-request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var apiResp QwantTextAPIResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil {
|
||||
return nil, 0, fmt.Errorf("decoding response: %v", err)
|
||||
}
|
||||
|
||||
// Extracting results from the nested JSON structure
|
||||
if len(apiResp.Data.Result.Items.Mainline) == 0 {
|
||||
return nil, 0, fmt.Errorf("no search results found")
|
||||
}
|
||||
|
||||
var results []TextSearchResult
|
||||
for _, item := range apiResp.Data.Result.Items.Mainline[0].Items {
|
||||
cleanURL := cleanQwantURL(item.URL)
|
||||
results = append(results, TextSearchResult{
|
||||
URL: cleanURL,
|
||||
Header: item.Title,
|
||||
Description: item.Description,
|
||||
Source: "Qwant",
|
||||
})
|
||||
}
|
||||
|
||||
duration := time.Since(startTime)
|
||||
return results, duration, nil
|
||||
}
|
||||
|
||||
// cleanQwantURL extracts the main part of the URL, removing tracking information
|
||||
func cleanQwantURL(rawURL string) string {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return rawURL
|
||||
}
|
||||
return u.Scheme + "://" + u.Host + u.Path
|
||||
}
|
140
text-qwant.go
Normal file
140
text-qwant.go
Normal file
|
@ -0,0 +1,140 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// QwantTextAPIResponse represents the JSON response structure from Qwant API
|
||||
type QwantTextAPIResponse struct {
|
||||
Data struct {
|
||||
Result struct {
|
||||
Items struct {
|
||||
Mainline []struct {
|
||||
Items []struct {
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
Description string `json:"desc"`
|
||||
} `json:"items"`
|
||||
} `json:"mainline"`
|
||||
} `json:"items"`
|
||||
} `json:"result"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
// PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult
|
||||
func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
|
||||
startTime := time.Now()
|
||||
const resultsPerPage = 10
|
||||
offset := (page - 1) * resultsPerPage
|
||||
|
||||
if safe == "" {
|
||||
safe = "0"
|
||||
}
|
||||
if lang == "" {
|
||||
lang = "en_CA"
|
||||
}
|
||||
|
||||
// Create URL
|
||||
apiURL := "https://api.qwant.com/v3/search/web"
|
||||
params := url.Values{}
|
||||
params.Set("q", query)
|
||||
params.Set("count", fmt.Sprint(resultsPerPage))
|
||||
params.Set("locale", lang)
|
||||
params.Set("offset", fmt.Sprint(offset))
|
||||
params.Set("device", "desktop")
|
||||
params.Set("safesearch", safe)
|
||||
fullURL := apiURL + "?" + params.Encode()
|
||||
|
||||
// Create HTTP request
|
||||
req, err := http.NewRequest("GET", fullURL, nil)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("creating request: %w", err)
|
||||
}
|
||||
|
||||
ua, err := GetUserAgent("Qwant-Text-Search")
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("user-agent error: %w", err)
|
||||
}
|
||||
|
||||
// Set headers
|
||||
req.Header.Set("User-Agent", ua)
|
||||
req.Header.Set("Accept", "application/json, text/plain, */*")
|
||||
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
|
||||
req.Header.Set("Accept-Encoding", "gzip")
|
||||
req.Header.Set("DNT", "1")
|
||||
req.Header.Set("Connection", "keep-alive")
|
||||
req.Header.Set("Origin", "https://www.qwant.com")
|
||||
req.Header.Set("Referer", "https://www.qwant.com/")
|
||||
req.Header.Set("Sec-Fetch-Dest", "empty")
|
||||
req.Header.Set("Sec-Fetch-Mode", "cors")
|
||||
req.Header.Set("Sec-Fetch-Site", "same-site")
|
||||
req.Header.Set("TE", "trailers")
|
||||
|
||||
// Send Request
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to do meta-request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Wait for HTTP 200
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var body []byte
|
||||
if strings.EqualFold(resp.Header.Get("Content-Encoding"), "gzip") {
|
||||
gr, err := gzip.NewReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("gzip reader: %w", err)
|
||||
}
|
||||
gr.Close()
|
||||
} else {
|
||||
body, err = io.ReadAll(resp.Body)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("reading body: %w", err)
|
||||
}
|
||||
|
||||
// Captcha block error
|
||||
if strings.Contains(string(body), `"error_code":27`) {
|
||||
return nil, 0, fmt.Errorf("Qwant CAPTCHA triggered, request blocked")
|
||||
}
|
||||
|
||||
var apiResp QwantTextAPIResponse
|
||||
if err := json.Unmarshal(body, &apiResp); err != nil {
|
||||
return nil, 0, fmt.Errorf("parsing JSON: %w", err)
|
||||
}
|
||||
|
||||
if len(apiResp.Data.Result.Items.Mainline) == 0 {
|
||||
return nil, 0, fmt.Errorf("no results in mainline")
|
||||
}
|
||||
|
||||
var results []TextSearchResult
|
||||
for _, item := range apiResp.Data.Result.Items.Mainline[0].Items {
|
||||
results = append(results, TextSearchResult{
|
||||
URL: cleanQwantURL(item.URL),
|
||||
Header: item.Title,
|
||||
Description: item.Description,
|
||||
Source: "Qwant",
|
||||
})
|
||||
}
|
||||
|
||||
return results, time.Since(startTime), nil
|
||||
}
|
||||
|
||||
// cleanQwantURL extracts the main part of the URL, removing tracking information
|
||||
func cleanQwantURL(rawURL string) string {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return rawURL
|
||||
}
|
||||
return u.Scheme + "://" + u.Host + u.Path
|
||||
}
|
2
text.go
2
text.go
|
@ -18,7 +18,7 @@ var allTextSearchEngines = []SearchEngine{
|
|||
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)},
|
||||
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)},
|
||||
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)},
|
||||
{Name: "Quant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken !
|
||||
{Name: "Qwant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken !
|
||||
//{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue