192 lines
5 KiB
Go
192 lines
5 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
// PerformImgurImageSearch performs an image search on Imgur and returns the results
|
|
func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) {
|
|
startTime := time.Now() // Start the timer
|
|
|
|
var results []ImageSearchResult
|
|
searchURL := buildImgurSearchURL(query, page)
|
|
|
|
// Create the HTTP request
|
|
req, err := http.NewRequest("GET", searchURL, nil)
|
|
if err != nil {
|
|
return nil, 0, fmt.Errorf("creating request: %v", err)
|
|
}
|
|
|
|
// Get the User-Agent string
|
|
imgurUserAgent, err := GetUserAgent("Image-Search-Imgur")
|
|
if err != nil {
|
|
return nil, 0, fmt.Errorf("getting user-agent: %v", err)
|
|
}
|
|
req.Header.Set("User-Agent", imgurUserAgent)
|
|
|
|
// Perform the HTTP request with MetaProxy if enabled
|
|
var resp *http.Response
|
|
if config.MetaProxyEnabled && metaProxyClient != nil {
|
|
resp, err = metaProxyClient.Do(req)
|
|
} else {
|
|
client := &http.Client{}
|
|
resp, err = client.Do(req)
|
|
}
|
|
if err != nil {
|
|
return nil, 0, fmt.Errorf("making request: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
|
}
|
|
|
|
// Parse the HTML document
|
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
if err != nil {
|
|
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
|
|
}
|
|
|
|
doc.Find("div.cards div.post").Each(func(i int, s *goquery.Selection) {
|
|
thumbnailSrc, exists := s.Find("a img").Attr("src")
|
|
if !exists || len(thumbnailSrc) < 25 {
|
|
return
|
|
}
|
|
imgSrc := strings.Replace(thumbnailSrc, "b.", ".", 1)
|
|
|
|
// Ensure the URLs have the correct protocol
|
|
if !strings.HasPrefix(thumbnailSrc, "http") {
|
|
thumbnailSrc = "https:" + thumbnailSrc
|
|
}
|
|
if !strings.HasPrefix(imgSrc, "http") {
|
|
imgSrc = "https:" + imgSrc
|
|
}
|
|
|
|
urlPath, exists := s.Find("a").Attr("href")
|
|
if !exists {
|
|
return
|
|
}
|
|
|
|
// Scrape the image directly from the Imgur page
|
|
imgSrc = scrapeImageFromImgurPage("https://imgur.com" + urlPath)
|
|
|
|
// Remove any query parameters from the URL
|
|
imgSrc = removeQueryParameters(imgSrc)
|
|
|
|
title, _ := s.Find("a img").Attr("alt")
|
|
|
|
width, _ := strconv.Atoi(s.Find("a img").AttrOr("width", "0"))
|
|
height, _ := strconv.Atoi(s.Find("a img").AttrOr("height", "0"))
|
|
|
|
results = append(results, ImageSearchResult{
|
|
Thumb: thumbnailSrc,
|
|
Title: strings.TrimSpace(title),
|
|
Full: imgSrc,
|
|
Width: width,
|
|
Height: height,
|
|
Source: "https://imgur.com" + urlPath,
|
|
})
|
|
})
|
|
|
|
duration := time.Since(startTime) // Calculate the duration
|
|
|
|
if len(results) == 0 {
|
|
return nil, duration, fmt.Errorf("no images found")
|
|
}
|
|
|
|
return results, duration, nil
|
|
}
|
|
|
|
// scrapeImageFromImgurPage scrapes the image source from the Imgur page
|
|
func scrapeImageFromImgurPage(pageURL string) string {
|
|
req, err := http.NewRequest("GET", pageURL, nil)
|
|
if err != nil {
|
|
fmt.Printf("Error creating request for page: %v\n", err)
|
|
return ""
|
|
}
|
|
|
|
// Get the User-Agent string
|
|
imgurUserAgent, err := GetUserAgent("Image-Search-Imgur")
|
|
if err == nil {
|
|
req.Header.Set("User-Agent", imgurUserAgent)
|
|
}
|
|
|
|
// Perform the request using MetaProxy if enabled
|
|
var resp *http.Response
|
|
if config.MetaProxyEnabled && metaProxyClient != nil {
|
|
resp, err = metaProxyClient.Do(req)
|
|
} else {
|
|
client := &http.Client{}
|
|
resp, err = client.Do(req)
|
|
}
|
|
if err != nil {
|
|
fmt.Printf("Error fetching page: %v\n", err)
|
|
return ""
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
fmt.Printf("Unexpected status code: %d\n", resp.StatusCode)
|
|
return ""
|
|
}
|
|
|
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
if err != nil {
|
|
fmt.Printf("Error loading HTML document: %v\n", err)
|
|
return ""
|
|
}
|
|
|
|
imgSrc, exists := doc.Find("meta[property='og:image']").Attr("content")
|
|
if !exists {
|
|
fmt.Printf("Image not found on page: %s\n", pageURL)
|
|
return ""
|
|
}
|
|
|
|
// Ensure the URL has the correct protocol
|
|
if !strings.HasPrefix(imgSrc, "http") {
|
|
imgSrc = "https:" + imgSrc
|
|
}
|
|
|
|
return imgSrc
|
|
}
|
|
|
|
// removeQueryParameters removes query parameters from a URL
|
|
func removeQueryParameters(rawURL string) string {
|
|
parsedURL, err := url.Parse(rawURL)
|
|
if err != nil {
|
|
fmt.Printf("Error parsing URL: %v\n", err)
|
|
return rawURL
|
|
}
|
|
parsedURL.RawQuery = ""
|
|
return parsedURL.String()
|
|
}
|
|
|
|
func buildImgurSearchURL(query string, page int) string {
|
|
baseURL := "https://imgur.com/search/score/all"
|
|
params := url.Values{}
|
|
params.Add("q", query)
|
|
params.Add("qs", "thumbs")
|
|
params.Add("p", fmt.Sprintf("%d", page-1))
|
|
return fmt.Sprintf("%s?%s", baseURL, params.Encode())
|
|
}
|
|
|
|
// func main() {
|
|
// results, duration, err := PerformImgurImageSearch("cats", "true", "en", 1)
|
|
// if err != nil {
|
|
// fmt.Println("Error:", err)
|
|
// return
|
|
// }
|
|
|
|
// fmt.Printf("Search took: %v\n", duration)
|
|
// for _, result := range results {
|
|
// fmt.Printf("Title: %s\nSource: %s\nMedia: %s\nThumbnail: %s\nThumbProxy: %s\nWidth: %d\nHeight: %d\n\n",
|
|
// result.Title, result.Source, result.Media, result.Thumbnail, result.ThumbProxy, result.Width, result.Height)
|
|
// }
|
|
// }
|