diff --git a/images-quant.go b/images-qwant.go similarity index 86% rename from images-quant.go rename to images-qwant.go index ab5d677..20408ce 100644 --- a/images-quant.go +++ b/images-qwant.go @@ -1,8 +1,11 @@ package main import ( + "bytes" + "compress/gzip" "encoding/json" "fmt" + "io" "net/http" "net/url" "sync" @@ -120,7 +123,7 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR } // Get the User-Agent string - ImageUserAgent, err := GetUserAgent("Image-Search-Quant") + ImageUserAgent, err := GetUserAgent("Image-Search-Qwant") if err != nil { return nil, 0, fmt.Errorf("getting user-agent: %v", err) } @@ -137,12 +140,34 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) } - // Parse the API response - var apiResp QwantAPIResponse - if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { - return nil, 0, fmt.Errorf("decoding response: %v", err) + // Read and (if gzip) decompress body + var bodyReader io.ReadCloser = resp.Body + if resp.Header.Get("Content-Encoding") == "gzip" { + gr, err := gzip.NewReader(resp.Body) + if err != nil { + printDebug("Failed to init gzip reader: %v", err) + } else { + bodyReader = gr + defer gr.Close() + } } + var apiResp QwantAPIResponse + + bodyBytes, err := io.ReadAll(bodyReader) + if err != nil { + printDebug("Failed to read response body: %v", err) + } else { + printDebug("Qwant response body:\n%s", string(bodyBytes)) + } + + // Decode JSON from bodyBytes + if err := json.Unmarshal(bodyBytes, &apiResp); err != nil { + return nil, 0, fmt.Errorf("failed to decode JSON: %v\nRaw:\n%s", err, string(bodyBytes)) + } + + // Optional: recreate body for reuse + resp.Body = io.NopCloser(bytes.NewReader(bodyBytes)) // Process the results var wg sync.WaitGroup results := make([]ImageSearchResult, len(apiResp.Data.Result.Items)) diff --git a/proxy.go b/proxy.go index 0f2a26a..06d3ba0 100644 --- a/proxy.go +++ b/proxy.go @@ -1,7 +1,10 @@ package main import ( + "bytes" + "compress/gzip" "fmt" + "io" "net/http" "strings" "sync" @@ -217,17 +220,55 @@ func DoCrawlerProxyRequest(req *http.Request) (*http.Response, error) { func tryRequestWithRetry(req *http.Request, client Doer, retries int) (*http.Response, error) { var resp *http.Response var err error + for i := 1; i <= retries; i++ { - if resp != nil { + if resp != nil && resp.Body != nil { resp.Body.Close() } + printDebug("Attempt %d of %d with proxy/client...", i, retries) resp, err = tryRequestOnce(req, client) + + if err != nil { + printDebug("Request error: %v", err) + } + + if resp == nil { + printDebug("No response received (nil)") + time.Sleep(200 * time.Millisecond) + continue + } + + // Try to read and print the body + var bodyReader io.ReadCloser = resp.Body + if resp.Header.Get("Content-Encoding") == "gzip" { + gr, gzErr := gzip.NewReader(resp.Body) + if gzErr != nil { + printDebug("Failed to init gzip reader: %v", gzErr) + bodyReader = resp.Body + } else { + bodyReader = gr + defer gr.Close() + } + } + + bodyBytes, readErr := io.ReadAll(bodyReader) + if readErr != nil { + printDebug("Failed to read body: %v", readErr) + } else { + printDebug("Response status: %d\n---\n%s\n---", resp.StatusCode, string(bodyBytes)) + } + + // Reset body for possible reuse + resp.Body = io.NopCloser(bytes.NewReader(bodyBytes)) + if isSuccessful(resp, err) { return resp, nil } + time.Sleep(200 * time.Millisecond) } + return resp, err } diff --git a/text-extra.go b/text-extra.go index a2986b4..598b721 100755 --- a/text-extra.go +++ b/text-extra.go @@ -18,7 +18,7 @@ var allTextSearchEngines = []SearchEngine{ {Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)}, {Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)}, {Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)}, - {Name: "Quant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken ! + {Name: "Qwant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken ! //{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh } diff --git a/text-quant.go b/text-quant.go deleted file mode 100644 index 959879d..0000000 --- a/text-quant.go +++ /dev/null @@ -1,107 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "net/http" - "net/url" - "time" -) - -// QwantTextAPIResponse represents the JSON response structure from Qwant API -type QwantTextAPIResponse struct { - Data struct { - Result struct { - Items struct { - Mainline []struct { - Items []struct { - URL string `json:"url"` - Title string `json:"title"` - Description string `json:"desc"` - } `json:"items"` - } `json:"mainline"` - } `json:"items"` - } `json:"result"` - } `json:"data"` -} - -// PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult -func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) { - startTime := time.Now() - - const resultsPerPage = 10 - offset := (page - 1) * resultsPerPage - - // Ensure safe search is disabled by default if not specified - if safe == "" { - safe = "0" - } - - // Default to English Canada locale if not specified - if lang == "" { - lang = "en_CA" - } - - apiURL := fmt.Sprintf( - "https://api.qwant.com/v3/search/web?q=%s&count=%d&locale=%s&offset=%d&device=desktop", - url.QueryEscape(query), - resultsPerPage, - lang, - offset, - ) - - req, err := http.NewRequest("GET", apiURL, nil) - if err != nil { - // Return three values: nil for the slice, 0 for duration, error for the third. - return nil, 0, fmt.Errorf("creating request: %v", err) - } - - userAgent, err := GetUserAgent("Quant-Text-Search") - if err != nil { - return nil, 0, err - } - req.Header.Set("User-Agent", userAgent) - - resp, err := DoMetaProxyRequest(req) - if err != nil { - return nil, 0, fmt.Errorf("failed to do meta-request: %v", err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) - } - - var apiResp QwantTextAPIResponse - if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { - return nil, 0, fmt.Errorf("decoding response: %v", err) - } - - // Extracting results from the nested JSON structure - if len(apiResp.Data.Result.Items.Mainline) == 0 { - return nil, 0, fmt.Errorf("no search results found") - } - - var results []TextSearchResult - for _, item := range apiResp.Data.Result.Items.Mainline[0].Items { - cleanURL := cleanQwantURL(item.URL) - results = append(results, TextSearchResult{ - URL: cleanURL, - Header: item.Title, - Description: item.Description, - Source: "Qwant", - }) - } - - duration := time.Since(startTime) - return results, duration, nil -} - -// cleanQwantURL extracts the main part of the URL, removing tracking information -func cleanQwantURL(rawURL string) string { - u, err := url.Parse(rawURL) - if err != nil { - return rawURL - } - return u.Scheme + "://" + u.Host + u.Path -} diff --git a/text-qwant.go b/text-qwant.go new file mode 100644 index 0000000..c77c09d --- /dev/null +++ b/text-qwant.go @@ -0,0 +1,140 @@ +package main + +import ( + "compress/gzip" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" +) + +// QwantTextAPIResponse represents the JSON response structure from Qwant API +type QwantTextAPIResponse struct { + Data struct { + Result struct { + Items struct { + Mainline []struct { + Items []struct { + URL string `json:"url"` + Title string `json:"title"` + Description string `json:"desc"` + } `json:"items"` + } `json:"mainline"` + } `json:"items"` + } `json:"result"` + } `json:"data"` +} + +// PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult +func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) { + startTime := time.Now() + const resultsPerPage = 10 + offset := (page - 1) * resultsPerPage + + if safe == "" { + safe = "0" + } + if lang == "" { + lang = "en_CA" + } + + // Create URL + apiURL := "https://api.qwant.com/v3/search/web" + params := url.Values{} + params.Set("q", query) + params.Set("count", fmt.Sprint(resultsPerPage)) + params.Set("locale", lang) + params.Set("offset", fmt.Sprint(offset)) + params.Set("device", "desktop") + params.Set("safesearch", safe) + fullURL := apiURL + "?" + params.Encode() + + // Create HTTP request + req, err := http.NewRequest("GET", fullURL, nil) + if err != nil { + return nil, 0, fmt.Errorf("creating request: %w", err) + } + + ua, err := GetUserAgent("Qwant-Text-Search") + if err != nil { + return nil, 0, fmt.Errorf("user-agent error: %w", err) + } + + // Set headers + req.Header.Set("User-Agent", ua) + req.Header.Set("Accept", "application/json, text/plain, */*") + req.Header.Set("Accept-Language", "en-US,en;q=0.5") + req.Header.Set("Accept-Encoding", "gzip") + req.Header.Set("DNT", "1") + req.Header.Set("Connection", "keep-alive") + req.Header.Set("Origin", "https://www.qwant.com") + req.Header.Set("Referer", "https://www.qwant.com/") + req.Header.Set("Sec-Fetch-Dest", "empty") + req.Header.Set("Sec-Fetch-Mode", "cors") + req.Header.Set("Sec-Fetch-Site", "same-site") + req.Header.Set("TE", "trailers") + + // Send Request + resp, err := DoMetaProxyRequest(req) + if err != nil { + return nil, 0, fmt.Errorf("failed to do meta-request: %w", err) + } + defer resp.Body.Close() + + // Wait for HTTP 200 + if resp.StatusCode != http.StatusOK { + return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + var body []byte + if strings.EqualFold(resp.Header.Get("Content-Encoding"), "gzip") { + gr, err := gzip.NewReader(resp.Body) + if err != nil { + return nil, 0, fmt.Errorf("gzip reader: %w", err) + } + gr.Close() + } else { + body, err = io.ReadAll(resp.Body) + } + if err != nil { + return nil, 0, fmt.Errorf("reading body: %w", err) + } + + // Captcha block error + if strings.Contains(string(body), `"error_code":27`) { + return nil, 0, fmt.Errorf("Qwant CAPTCHA triggered, request blocked") + } + + var apiResp QwantTextAPIResponse + if err := json.Unmarshal(body, &apiResp); err != nil { + return nil, 0, fmt.Errorf("parsing JSON: %w", err) + } + + if len(apiResp.Data.Result.Items.Mainline) == 0 { + return nil, 0, fmt.Errorf("no results in mainline") + } + + var results []TextSearchResult + for _, item := range apiResp.Data.Result.Items.Mainline[0].Items { + results = append(results, TextSearchResult{ + URL: cleanQwantURL(item.URL), + Header: item.Title, + Description: item.Description, + Source: "Qwant", + }) + } + + return results, time.Since(startTime), nil +} + +// cleanQwantURL extracts the main part of the URL, removing tracking information +func cleanQwantURL(rawURL string) string { + u, err := url.Parse(rawURL) + if err != nil { + return rawURL + } + return u.Scheme + "://" + u.Host + u.Path +} diff --git a/text.go b/text.go index f0d56ef..02229b8 100755 --- a/text.go +++ b/text.go @@ -18,7 +18,7 @@ var allTextSearchEngines = []SearchEngine{ {Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch)}, {Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch)}, {Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch)}, - {Name: "Quant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken ! + {Name: "Qwant", Func: wrapTextSearchFunc(PerformQwantTextSearch)}, // Broken ! //{Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXTextSearch)}, // bruh }