Search/text-qwant.go
partisan 89264b0f87
Some checks failed
Run Integration Tests / test (push) Failing after 42s
Qwant is still blocking me but this might help you
2025-07-04 22:45:06 +02:00

140 lines
3.6 KiB
Go

package main
import (
"compress/gzip"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
)
// QwantTextAPIResponse represents the JSON response structure from Qwant API
type QwantTextAPIResponse struct {
Data struct {
Result struct {
Items struct {
Mainline []struct {
Items []struct {
URL string `json:"url"`
Title string `json:"title"`
Description string `json:"desc"`
} `json:"items"`
} `json:"mainline"`
} `json:"items"`
} `json:"result"`
} `json:"data"`
}
// PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult
func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
startTime := time.Now()
const resultsPerPage = 10
offset := (page - 1) * resultsPerPage
if safe == "" {
safe = "0"
}
if lang == "" {
lang = "en_CA"
}
// Create URL
apiURL := "https://api.qwant.com/v3/search/web"
params := url.Values{}
params.Set("q", query)
params.Set("count", fmt.Sprint(resultsPerPage))
params.Set("locale", lang)
params.Set("offset", fmt.Sprint(offset))
params.Set("device", "desktop")
params.Set("safesearch", safe)
fullURL := apiURL + "?" + params.Encode()
// Create HTTP request
req, err := http.NewRequest("GET", fullURL, nil)
if err != nil {
return nil, 0, fmt.Errorf("creating request: %w", err)
}
ua, err := GetUserAgent("Qwant-Text-Search")
if err != nil {
return nil, 0, fmt.Errorf("user-agent error: %w", err)
}
// Set headers
req.Header.Set("User-Agent", ua)
req.Header.Set("Accept", "application/json, text/plain, */*")
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
req.Header.Set("Accept-Encoding", "gzip")
req.Header.Set("DNT", "1")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Origin", "https://www.qwant.com")
req.Header.Set("Referer", "https://www.qwant.com/")
req.Header.Set("Sec-Fetch-Dest", "empty")
req.Header.Set("Sec-Fetch-Mode", "cors")
req.Header.Set("Sec-Fetch-Site", "same-site")
req.Header.Set("TE", "trailers")
// Send Request
resp, err := DoMetaProxyRequest(req)
if err != nil {
return nil, 0, fmt.Errorf("failed to do meta-request: %w", err)
}
defer resp.Body.Close()
// Wait for HTTP 200
if resp.StatusCode != http.StatusOK {
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
var body []byte
if strings.EqualFold(resp.Header.Get("Content-Encoding"), "gzip") {
gr, err := gzip.NewReader(resp.Body)
if err != nil {
return nil, 0, fmt.Errorf("gzip reader: %w", err)
}
gr.Close()
} else {
body, err = io.ReadAll(resp.Body)
}
if err != nil {
return nil, 0, fmt.Errorf("reading body: %w", err)
}
// Captcha block error
if strings.Contains(string(body), `"error_code":27`) {
return nil, 0, fmt.Errorf("Qwant CAPTCHA triggered, request blocked")
}
var apiResp QwantTextAPIResponse
if err := json.Unmarshal(body, &apiResp); err != nil {
return nil, 0, fmt.Errorf("parsing JSON: %w", err)
}
if len(apiResp.Data.Result.Items.Mainline) == 0 {
return nil, 0, fmt.Errorf("no results in mainline")
}
var results []TextSearchResult
for _, item := range apiResp.Data.Result.Items.Mainline[0].Items {
results = append(results, TextSearchResult{
URL: cleanQwantURL(item.URL),
Header: item.Title,
Description: item.Description,
Source: "Qwant",
})
}
return results, time.Since(startTime), nil
}
// cleanQwantURL extracts the main part of the URL, removing tracking information
func cleanQwantURL(rawURL string) string {
u, err := url.Parse(rawURL)
if err != nil {
return rawURL
}
return u.Scheme + "://" + u.Host + u.Path
}