added imgur + some cleanup
This commit is contained in:
parent
12b32b6600
commit
b3eb7e39ea
8 changed files with 324 additions and 127 deletions
143
images-imgur.go
Normal file
143
images-imgur.go
Normal file
|
@ -0,0 +1,143 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// PerformImgurImageSearch performs an image search on Imgur and returns the results
|
||||
func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) {
|
||||
var results []ImageSearchResult
|
||||
searchURL := buildImgurSearchURL(query, page)
|
||||
|
||||
resp, err := http.Get(searchURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("loading HTML document: %v", err)
|
||||
}
|
||||
|
||||
doc.Find("div.cards div.post").Each(func(i int, s *goquery.Selection) {
|
||||
thumbnailSrc, exists := s.Find("a img").Attr("src")
|
||||
if !exists || len(thumbnailSrc) < 25 {
|
||||
return
|
||||
}
|
||||
imgSrc := strings.Replace(thumbnailSrc, "b.", ".", 1)
|
||||
|
||||
// Ensure the URLs have the correct protocol
|
||||
if !strings.HasPrefix(thumbnailSrc, "http") {
|
||||
thumbnailSrc = "https:" + thumbnailSrc
|
||||
}
|
||||
if !strings.HasPrefix(imgSrc, "http") {
|
||||
imgSrc = "https:" + imgSrc
|
||||
}
|
||||
|
||||
urlPath, exists := s.Find("a").Attr("href")
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
|
||||
// Scrape the image directly from the Imgur page
|
||||
imgSrc = scrapeImageFromImgurPage("https://imgur.com" + urlPath)
|
||||
|
||||
// Remove any query parameters from the URL
|
||||
imgSrc = removeQueryParameters(imgSrc)
|
||||
|
||||
title, _ := s.Find("a img").Attr("alt")
|
||||
|
||||
width, _ := strconv.Atoi(s.Find("a img").AttrOr("width", "0"))
|
||||
height, _ := strconv.Atoi(s.Find("a img").AttrOr("height", "0"))
|
||||
|
||||
results = append(results, ImageSearchResult{
|
||||
Thumbnail: thumbnailSrc,
|
||||
Title: strings.TrimSpace(title),
|
||||
Media: imgSrc,
|
||||
Width: width,
|
||||
Height: height,
|
||||
Source: "https://imgur.com" + urlPath,
|
||||
ThumbProxy: imgSrc, //"/img_proxy?url=" + url.QueryEscape(imgSrc)
|
||||
})
|
||||
})
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// scrapeImageFromImgurPage scrapes the image source from the Imgur page
|
||||
func scrapeImageFromImgurPage(pageURL string) string {
|
||||
resp, err := http.Get(pageURL)
|
||||
if err != nil {
|
||||
fmt.Printf("Error fetching page: %v\n", err)
|
||||
return ""
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
fmt.Printf("Unexpected status code: %d\n", resp.StatusCode)
|
||||
return ""
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
fmt.Printf("Error loading HTML document: %v\n", err)
|
||||
return ""
|
||||
}
|
||||
|
||||
imgSrc, exists := doc.Find("meta[property='og:image']").Attr("content")
|
||||
if !exists {
|
||||
fmt.Printf("Image not found on page: %s\n", pageURL)
|
||||
return ""
|
||||
}
|
||||
|
||||
// Ensure the URL has the correct protocol
|
||||
if !strings.HasPrefix(imgSrc, "http") {
|
||||
imgSrc = "https:" + imgSrc
|
||||
}
|
||||
|
||||
return imgSrc
|
||||
}
|
||||
|
||||
// removeQueryParameters removes query parameters from a URL
|
||||
func removeQueryParameters(rawURL string) string {
|
||||
parsedURL, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
fmt.Printf("Error parsing URL: %v\n", err)
|
||||
return rawURL
|
||||
}
|
||||
parsedURL.RawQuery = ""
|
||||
return parsedURL.String()
|
||||
}
|
||||
|
||||
func buildImgurSearchURL(query string, page int) string {
|
||||
baseURL := "https://imgur.com/search/score/all"
|
||||
params := url.Values{}
|
||||
params.Add("q", query)
|
||||
params.Add("qs", "thumbs")
|
||||
params.Add("p", fmt.Sprintf("%d", page-1))
|
||||
return fmt.Sprintf("%s?%s", baseURL, params.Encode())
|
||||
}
|
||||
|
||||
// func main() {
|
||||
// results, err := PerformImgurImageSearch("cats", "true", "en", 1)
|
||||
// if err != nil {
|
||||
// fmt.Println("Error:", err)
|
||||
// return
|
||||
// }
|
||||
|
||||
// for _, result := range results {
|
||||
// fmt.Printf("Title: %s\nSource: %s\nMedia: %s\nThumbnail: %s\nThumbProxy: %s\nWidth: %d\nHeight: %d\n\n",
|
||||
// result.Title, result.Source, result.Media, result.Thumbnail, result.ThumbProxy, result.Width, result.Height)
|
||||
// }
|
||||
// }
|
Loading…
Add table
Add a link
Reference in a new issue