Search/images-imgur.go
partisan 614ce8903e
All checks were successful
Run Integration Tests / test (push) Successful in 33s
added SOCKS5 proxy support
2025-01-12 16:46:52 +01:00

192 lines
5 KiB
Go

package main
import (
"fmt"
"net/http"
"net/url"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
// PerformImgurImageSearch performs an image search on Imgur and returns the results
func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) {
startTime := time.Now() // Start the timer
var results []ImageSearchResult
searchURL := buildImgurSearchURL(query, page)
// Create the HTTP request
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, 0, fmt.Errorf("creating request: %v", err)
}
// Get the User-Agent string
imgurUserAgent, err := GetUserAgent("Image-Search-Imgur")
if err != nil {
return nil, 0, fmt.Errorf("getting user-agent: %v", err)
}
req.Header.Set("User-Agent", imgurUserAgent)
// Perform the HTTP request with MetaProxy if enabled
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
// Parse the HTML document
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
}
doc.Find("div.cards div.post").Each(func(i int, s *goquery.Selection) {
thumbnailSrc, exists := s.Find("a img").Attr("src")
if !exists || len(thumbnailSrc) < 25 {
return
}
imgSrc := strings.Replace(thumbnailSrc, "b.", ".", 1)
// Ensure the URLs have the correct protocol
if !strings.HasPrefix(thumbnailSrc, "http") {
thumbnailSrc = "https:" + thumbnailSrc
}
if !strings.HasPrefix(imgSrc, "http") {
imgSrc = "https:" + imgSrc
}
urlPath, exists := s.Find("a").Attr("href")
if !exists {
return
}
// Scrape the image directly from the Imgur page
imgSrc = scrapeImageFromImgurPage("https://imgur.com" + urlPath)
// Remove any query parameters from the URL
imgSrc = removeQueryParameters(imgSrc)
title, _ := s.Find("a img").Attr("alt")
width, _ := strconv.Atoi(s.Find("a img").AttrOr("width", "0"))
height, _ := strconv.Atoi(s.Find("a img").AttrOr("height", "0"))
results = append(results, ImageSearchResult{
Thumb: thumbnailSrc,
Title: strings.TrimSpace(title),
Full: imgSrc,
Width: width,
Height: height,
Source: "https://imgur.com" + urlPath,
})
})
duration := time.Since(startTime) // Calculate the duration
if len(results) == 0 {
return nil, duration, fmt.Errorf("no images found")
}
return results, duration, nil
}
// scrapeImageFromImgurPage scrapes the image source from the Imgur page
func scrapeImageFromImgurPage(pageURL string) string {
req, err := http.NewRequest("GET", pageURL, nil)
if err != nil {
fmt.Printf("Error creating request for page: %v\n", err)
return ""
}
// Get the User-Agent string
imgurUserAgent, err := GetUserAgent("Image-Search-Imgur")
if err == nil {
req.Header.Set("User-Agent", imgurUserAgent)
}
// Perform the request using MetaProxy if enabled
var resp *http.Response
if config.MetaProxyEnabled && metaProxyClient != nil {
resp, err = metaProxyClient.Do(req)
} else {
client := &http.Client{}
resp, err = client.Do(req)
}
if err != nil {
fmt.Printf("Error fetching page: %v\n", err)
return ""
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
fmt.Printf("Unexpected status code: %d\n", resp.StatusCode)
return ""
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
fmt.Printf("Error loading HTML document: %v\n", err)
return ""
}
imgSrc, exists := doc.Find("meta[property='og:image']").Attr("content")
if !exists {
fmt.Printf("Image not found on page: %s\n", pageURL)
return ""
}
// Ensure the URL has the correct protocol
if !strings.HasPrefix(imgSrc, "http") {
imgSrc = "https:" + imgSrc
}
return imgSrc
}
// removeQueryParameters removes query parameters from a URL
func removeQueryParameters(rawURL string) string {
parsedURL, err := url.Parse(rawURL)
if err != nil {
fmt.Printf("Error parsing URL: %v\n", err)
return rawURL
}
parsedURL.RawQuery = ""
return parsedURL.String()
}
func buildImgurSearchURL(query string, page int) string {
baseURL := "https://imgur.com/search/score/all"
params := url.Values{}
params.Add("q", query)
params.Add("qs", "thumbs")
params.Add("p", fmt.Sprintf("%d", page-1))
return fmt.Sprintf("%s?%s", baseURL, params.Encode())
}
// func main() {
// results, duration, err := PerformImgurImageSearch("cats", "true", "en", 1)
// if err != nil {
// fmt.Println("Error:", err)
// return
// }
// fmt.Printf("Search took: %v\n", duration)
// for _, result := range results {
// fmt.Printf("Title: %s\nSource: %s\nMedia: %s\nThumbnail: %s\nThumbProxy: %s\nWidth: %d\nHeight: %d\n\n",
// result.Title, result.Source, result.Media, result.Thumbnail, result.ThumbProxy, result.Width, result.Height)
// }
// }