2024-08-12 12:56:42 +02:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2024-08-21 12:30:03 +02:00
|
|
|
"encoding/json"
|
2024-08-12 12:56:42 +02:00
|
|
|
"fmt"
|
|
|
|
"net/http"
|
|
|
|
"net/url"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
)
|
|
|
|
|
2024-09-12 22:11:39 +02:00
|
|
|
// PerformBingImageSearch performs a Bing image search and returns the results.
|
2024-08-12 12:56:42 +02:00
|
|
|
func PerformBingImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) {
|
|
|
|
startTime := time.Now()
|
|
|
|
|
|
|
|
// Build the search URL
|
|
|
|
searchURL := buildBingSearchURL(query, page)
|
|
|
|
|
2025-01-12 16:46:52 +01:00
|
|
|
// Create the HTTP request
|
|
|
|
req, err := http.NewRequest("GET", searchURL, nil)
|
|
|
|
if err != nil {
|
|
|
|
return nil, 0, fmt.Errorf("creating request: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set User-Agent
|
|
|
|
ImageUserAgent, err := GetUserAgent("Image-Search-Bing")
|
|
|
|
if err != nil {
|
|
|
|
return nil, 0, fmt.Errorf("generating User-Agent: %v", err)
|
|
|
|
}
|
|
|
|
req.Header.Set("User-Agent", ImageUserAgent)
|
|
|
|
|
|
|
|
// Use MetaProxy if enabled
|
|
|
|
var resp *http.Response
|
|
|
|
if config.MetaProxyEnabled && metaProxyClient != nil {
|
|
|
|
resp, err = metaProxyClient.Do(req)
|
|
|
|
} else {
|
|
|
|
client := &http.Client{Timeout: 10 * time.Second}
|
|
|
|
resp, err = client.Do(req)
|
|
|
|
}
|
2024-08-12 12:56:42 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, 0, fmt.Errorf("making request: %v", err)
|
|
|
|
}
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse the HTML document
|
|
|
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
|
|
if err != nil {
|
|
|
|
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Extract data using goquery
|
|
|
|
var results []ImageSearchResult
|
2024-08-21 12:30:03 +02:00
|
|
|
doc.Find(".iusc").Each(func(i int, s *goquery.Selection) {
|
|
|
|
// Extract the m parameter (JSON-encoded image metadata)
|
|
|
|
metadata, exists := s.Attr("m")
|
|
|
|
if !exists {
|
|
|
|
return
|
2024-08-12 12:56:42 +02:00
|
|
|
}
|
|
|
|
|
2024-11-19 10:36:33 +01:00
|
|
|
// Parse the metadata to get the direct image URL and title
|
2024-08-21 12:30:03 +02:00
|
|
|
var data map[string]interface{}
|
|
|
|
if err := json.Unmarshal([]byte(metadata), &data); err == nil {
|
|
|
|
mediaURL, ok := data["murl"].(string)
|
2024-10-31 22:24:23 +01:00
|
|
|
if !ok {
|
|
|
|
return
|
2024-08-21 12:30:03 +02:00
|
|
|
}
|
2024-10-31 22:24:23 +01:00
|
|
|
|
2024-11-19 10:36:33 +01:00
|
|
|
imgURL, ok := data["imgurl"].(string)
|
|
|
|
if !ok {
|
|
|
|
imgURL = mediaURL // Fallback to mediaURL if imgurl is not available
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use imgURL as the direct image URL
|
|
|
|
directImageURL := imgURL
|
|
|
|
|
2024-10-31 22:24:23 +01:00
|
|
|
// Extract title from the metadata
|
|
|
|
title, _ := data["t"].(string)
|
|
|
|
|
2024-11-19 10:36:33 +01:00
|
|
|
// Extract dimensions if available
|
|
|
|
width := 0
|
|
|
|
height := 0
|
|
|
|
if ow, ok := data["ow"].(float64); ok {
|
|
|
|
width = int(ow)
|
|
|
|
}
|
|
|
|
if oh, ok := data["oh"].(float64); ok {
|
|
|
|
height = int(oh)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Extract thumbnail URL from the 'turl' field
|
|
|
|
thumbURL, _ := data["turl"].(string)
|
|
|
|
if thumbURL == "" {
|
|
|
|
// As a fallback, try to get it from the 'src' or 'data-src' attributes
|
|
|
|
imgTag := s.Find("img")
|
|
|
|
thumbURL, exists = imgTag.Attr("src")
|
|
|
|
if !exists {
|
|
|
|
thumbURL, _ = imgTag.Attr("data-src")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-10-31 22:24:23 +01:00
|
|
|
results = append(results, ImageSearchResult{
|
2024-11-19 10:36:33 +01:00
|
|
|
Thumb: thumbURL,
|
|
|
|
Title: strings.TrimSpace(title),
|
|
|
|
Full: directImageURL,
|
|
|
|
Source: mediaURL,
|
|
|
|
Width: width,
|
|
|
|
Height: height,
|
2024-10-31 22:24:23 +01:00
|
|
|
})
|
2024-08-21 12:30:03 +02:00
|
|
|
}
|
2024-08-12 12:56:42 +02:00
|
|
|
})
|
|
|
|
|
|
|
|
duration := time.Since(startTime)
|
|
|
|
|
2024-10-31 22:24:23 +01:00
|
|
|
// Check if the number of results is zero
|
2024-08-21 12:30:03 +02:00
|
|
|
if len(results) == 0 {
|
2024-08-12 12:56:42 +02:00
|
|
|
return nil, duration, fmt.Errorf("no images found")
|
|
|
|
}
|
|
|
|
|
|
|
|
return results, duration, nil
|
|
|
|
}
|
|
|
|
|
2024-09-12 22:11:39 +02:00
|
|
|
// buildBingSearchURL constructs the search URL for Bing Image Search
|
2024-08-12 12:56:42 +02:00
|
|
|
func buildBingSearchURL(query string, page int) string {
|
|
|
|
baseURL := "https://www.bing.com/images/search"
|
|
|
|
params := url.Values{}
|
|
|
|
params.Add("q", query)
|
|
|
|
params.Add("first", fmt.Sprintf("%d", (page-1)*35+1)) // Pagination, but increasing it doesn't seem to make a difference
|
|
|
|
params.Add("count", "35")
|
|
|
|
params.Add("form", "HDRSC2")
|
|
|
|
return baseURL + "?" + params.Encode()
|
|
|
|
}
|
|
|
|
|
2024-09-12 22:11:39 +02:00
|
|
|
// Example usage in main (commented out for clarity)
|
2024-08-12 12:56:42 +02:00
|
|
|
// func main() {
|
|
|
|
// results, duration, err := PerformBingImageSearch("kittens", "false", "en", 1)
|
|
|
|
// if err != nil {
|
|
|
|
// fmt.Println("Error:", err)
|
|
|
|
// return
|
|
|
|
// }
|
|
|
|
|
|
|
|
// fmt.Printf("Search took: %v\n", duration)
|
|
|
|
// fmt.Printf("Total results: %d\n", len(results))
|
|
|
|
// for _, result := range results {
|
|
|
|
// fmt.Printf("Title: %s\nThumbnail: %s\nWidth: %d\nHeight: %d\nThumbProxy: %s\nSource (Original Image URL): %s\n\n",
|
|
|
|
// result.Title, result.Thumbnail, result.Width, result.Height, result.ThumbProxy, result.Source)
|
|
|
|
// }
|
|
|
|
// }
|