Qwant is still blocking me but this might help you
Some checks failed
Run Integration Tests / test (push) Failing after 42s
Some checks failed
Run Integration Tests / test (push) Failing after 42s
This commit is contained in:
parent
a132ca7fd8
commit
89264b0f87
6 changed files with 214 additions and 115 deletions
140
text-qwant.go
Normal file
140
text-qwant.go
Normal file
|
@ -0,0 +1,140 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// QwantTextAPIResponse represents the JSON response structure from Qwant API
|
||||
type QwantTextAPIResponse struct {
|
||||
Data struct {
|
||||
Result struct {
|
||||
Items struct {
|
||||
Mainline []struct {
|
||||
Items []struct {
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
Description string `json:"desc"`
|
||||
} `json:"items"`
|
||||
} `json:"mainline"`
|
||||
} `json:"items"`
|
||||
} `json:"result"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
// PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult
|
||||
func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
|
||||
startTime := time.Now()
|
||||
const resultsPerPage = 10
|
||||
offset := (page - 1) * resultsPerPage
|
||||
|
||||
if safe == "" {
|
||||
safe = "0"
|
||||
}
|
||||
if lang == "" {
|
||||
lang = "en_CA"
|
||||
}
|
||||
|
||||
// Create URL
|
||||
apiURL := "https://api.qwant.com/v3/search/web"
|
||||
params := url.Values{}
|
||||
params.Set("q", query)
|
||||
params.Set("count", fmt.Sprint(resultsPerPage))
|
||||
params.Set("locale", lang)
|
||||
params.Set("offset", fmt.Sprint(offset))
|
||||
params.Set("device", "desktop")
|
||||
params.Set("safesearch", safe)
|
||||
fullURL := apiURL + "?" + params.Encode()
|
||||
|
||||
// Create HTTP request
|
||||
req, err := http.NewRequest("GET", fullURL, nil)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("creating request: %w", err)
|
||||
}
|
||||
|
||||
ua, err := GetUserAgent("Qwant-Text-Search")
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("user-agent error: %w", err)
|
||||
}
|
||||
|
||||
// Set headers
|
||||
req.Header.Set("User-Agent", ua)
|
||||
req.Header.Set("Accept", "application/json, text/plain, */*")
|
||||
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
|
||||
req.Header.Set("Accept-Encoding", "gzip")
|
||||
req.Header.Set("DNT", "1")
|
||||
req.Header.Set("Connection", "keep-alive")
|
||||
req.Header.Set("Origin", "https://www.qwant.com")
|
||||
req.Header.Set("Referer", "https://www.qwant.com/")
|
||||
req.Header.Set("Sec-Fetch-Dest", "empty")
|
||||
req.Header.Set("Sec-Fetch-Mode", "cors")
|
||||
req.Header.Set("Sec-Fetch-Site", "same-site")
|
||||
req.Header.Set("TE", "trailers")
|
||||
|
||||
// Send Request
|
||||
resp, err := DoMetaProxyRequest(req)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to do meta-request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Wait for HTTP 200
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var body []byte
|
||||
if strings.EqualFold(resp.Header.Get("Content-Encoding"), "gzip") {
|
||||
gr, err := gzip.NewReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("gzip reader: %w", err)
|
||||
}
|
||||
gr.Close()
|
||||
} else {
|
||||
body, err = io.ReadAll(resp.Body)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("reading body: %w", err)
|
||||
}
|
||||
|
||||
// Captcha block error
|
||||
if strings.Contains(string(body), `"error_code":27`) {
|
||||
return nil, 0, fmt.Errorf("Qwant CAPTCHA triggered, request blocked")
|
||||
}
|
||||
|
||||
var apiResp QwantTextAPIResponse
|
||||
if err := json.Unmarshal(body, &apiResp); err != nil {
|
||||
return nil, 0, fmt.Errorf("parsing JSON: %w", err)
|
||||
}
|
||||
|
||||
if len(apiResp.Data.Result.Items.Mainline) == 0 {
|
||||
return nil, 0, fmt.Errorf("no results in mainline")
|
||||
}
|
||||
|
||||
var results []TextSearchResult
|
||||
for _, item := range apiResp.Data.Result.Items.Mainline[0].Items {
|
||||
results = append(results, TextSearchResult{
|
||||
URL: cleanQwantURL(item.URL),
|
||||
Header: item.Title,
|
||||
Description: item.Description,
|
||||
Source: "Qwant",
|
||||
})
|
||||
}
|
||||
|
||||
return results, time.Since(startTime), nil
|
||||
}
|
||||
|
||||
// cleanQwantURL extracts the main part of the URL, removing tracking information
|
||||
func cleanQwantURL(rawURL string) string {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return rawURL
|
||||
}
|
||||
return u.Scheme + "://" + u.Host + u.Path
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue