141 lines
3.6 KiB
Go
141 lines
3.6 KiB
Go
|
package main
|
||
|
|
||
|
import (
|
||
|
"compress/gzip"
|
||
|
"encoding/json"
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"net/http"
|
||
|
"net/url"
|
||
|
"strings"
|
||
|
"time"
|
||
|
)
|
||
|
|
||
|
// QwantTextAPIResponse represents the JSON response structure from Qwant API
|
||
|
type QwantTextAPIResponse struct {
|
||
|
Data struct {
|
||
|
Result struct {
|
||
|
Items struct {
|
||
|
Mainline []struct {
|
||
|
Items []struct {
|
||
|
URL string `json:"url"`
|
||
|
Title string `json:"title"`
|
||
|
Description string `json:"desc"`
|
||
|
} `json:"items"`
|
||
|
} `json:"mainline"`
|
||
|
} `json:"items"`
|
||
|
} `json:"result"`
|
||
|
} `json:"data"`
|
||
|
}
|
||
|
|
||
|
// PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult
|
||
|
func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
|
||
|
startTime := time.Now()
|
||
|
const resultsPerPage = 10
|
||
|
offset := (page - 1) * resultsPerPage
|
||
|
|
||
|
if safe == "" {
|
||
|
safe = "0"
|
||
|
}
|
||
|
if lang == "" {
|
||
|
lang = "en_CA"
|
||
|
}
|
||
|
|
||
|
// Create URL
|
||
|
apiURL := "https://api.qwant.com/v3/search/web"
|
||
|
params := url.Values{}
|
||
|
params.Set("q", query)
|
||
|
params.Set("count", fmt.Sprint(resultsPerPage))
|
||
|
params.Set("locale", lang)
|
||
|
params.Set("offset", fmt.Sprint(offset))
|
||
|
params.Set("device", "desktop")
|
||
|
params.Set("safesearch", safe)
|
||
|
fullURL := apiURL + "?" + params.Encode()
|
||
|
|
||
|
// Create HTTP request
|
||
|
req, err := http.NewRequest("GET", fullURL, nil)
|
||
|
if err != nil {
|
||
|
return nil, 0, fmt.Errorf("creating request: %w", err)
|
||
|
}
|
||
|
|
||
|
ua, err := GetUserAgent("Qwant-Text-Search")
|
||
|
if err != nil {
|
||
|
return nil, 0, fmt.Errorf("user-agent error: %w", err)
|
||
|
}
|
||
|
|
||
|
// Set headers
|
||
|
req.Header.Set("User-Agent", ua)
|
||
|
req.Header.Set("Accept", "application/json, text/plain, */*")
|
||
|
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
|
||
|
req.Header.Set("Accept-Encoding", "gzip")
|
||
|
req.Header.Set("DNT", "1")
|
||
|
req.Header.Set("Connection", "keep-alive")
|
||
|
req.Header.Set("Origin", "https://www.qwant.com")
|
||
|
req.Header.Set("Referer", "https://www.qwant.com/")
|
||
|
req.Header.Set("Sec-Fetch-Dest", "empty")
|
||
|
req.Header.Set("Sec-Fetch-Mode", "cors")
|
||
|
req.Header.Set("Sec-Fetch-Site", "same-site")
|
||
|
req.Header.Set("TE", "trailers")
|
||
|
|
||
|
// Send Request
|
||
|
resp, err := DoMetaProxyRequest(req)
|
||
|
if err != nil {
|
||
|
return nil, 0, fmt.Errorf("failed to do meta-request: %w", err)
|
||
|
}
|
||
|
defer resp.Body.Close()
|
||
|
|
||
|
// Wait for HTTP 200
|
||
|
if resp.StatusCode != http.StatusOK {
|
||
|
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||
|
}
|
||
|
|
||
|
var body []byte
|
||
|
if strings.EqualFold(resp.Header.Get("Content-Encoding"), "gzip") {
|
||
|
gr, err := gzip.NewReader(resp.Body)
|
||
|
if err != nil {
|
||
|
return nil, 0, fmt.Errorf("gzip reader: %w", err)
|
||
|
}
|
||
|
gr.Close()
|
||
|
} else {
|
||
|
body, err = io.ReadAll(resp.Body)
|
||
|
}
|
||
|
if err != nil {
|
||
|
return nil, 0, fmt.Errorf("reading body: %w", err)
|
||
|
}
|
||
|
|
||
|
// Captcha block error
|
||
|
if strings.Contains(string(body), `"error_code":27`) {
|
||
|
return nil, 0, fmt.Errorf("Qwant CAPTCHA triggered, request blocked")
|
||
|
}
|
||
|
|
||
|
var apiResp QwantTextAPIResponse
|
||
|
if err := json.Unmarshal(body, &apiResp); err != nil {
|
||
|
return nil, 0, fmt.Errorf("parsing JSON: %w", err)
|
||
|
}
|
||
|
|
||
|
if len(apiResp.Data.Result.Items.Mainline) == 0 {
|
||
|
return nil, 0, fmt.Errorf("no results in mainline")
|
||
|
}
|
||
|
|
||
|
var results []TextSearchResult
|
||
|
for _, item := range apiResp.Data.Result.Items.Mainline[0].Items {
|
||
|
results = append(results, TextSearchResult{
|
||
|
URL: cleanQwantURL(item.URL),
|
||
|
Header: item.Title,
|
||
|
Description: item.Description,
|
||
|
Source: "Qwant",
|
||
|
})
|
||
|
}
|
||
|
|
||
|
return results, time.Since(startTime), nil
|
||
|
}
|
||
|
|
||
|
// cleanQwantURL extracts the main part of the URL, removing tracking information
|
||
|
func cleanQwantURL(rawURL string) string {
|
||
|
u, err := url.Parse(rawURL)
|
||
|
if err != nil {
|
||
|
return rawURL
|
||
|
}
|
||
|
return u.Scheme + "://" + u.Host + u.Path
|
||
|
}
|