242 lines
6.3 KiB
Go
242 lines
6.3 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
// NextPageCache is a specialized cache for storing next page links
|
|
type NextPageCache struct {
|
|
mu sync.Mutex
|
|
links map[string]string
|
|
expiration time.Duration
|
|
}
|
|
|
|
// NewNextPageCache creates a new NextPageCache with a specified expiration duration
|
|
func NewNextPageCache(expiration time.Duration) *NextPageCache {
|
|
return &NextPageCache{
|
|
links: make(map[string]string),
|
|
expiration: expiration,
|
|
}
|
|
}
|
|
|
|
// Get retrieves the next page link for a given key from the cache
|
|
func (npc *NextPageCache) Get(key CacheKey) (string, bool) {
|
|
npc.mu.Lock()
|
|
defer npc.mu.Unlock()
|
|
|
|
link, exists := npc.links[npc.keyToString(key)]
|
|
if !exists {
|
|
return "", false
|
|
}
|
|
|
|
return link, true
|
|
}
|
|
|
|
// Set stores the next page link for a given key in the cache
|
|
// Idk it maybye worth it to use "cache.go" for this
|
|
func (npc *NextPageCache) Set(key CacheKey, link string) {
|
|
npc.mu.Lock()
|
|
defer npc.mu.Unlock()
|
|
|
|
npc.links[npc.keyToString(key)] = link
|
|
}
|
|
|
|
// keyToString converts a CacheKey to a string representation
|
|
func (npc *NextPageCache) keyToString(key CacheKey) string {
|
|
return fmt.Sprintf("%s|%d|%t|%s|%s", key.Query, key.Page, key.Safe, key.Lang, key.Type)
|
|
}
|
|
|
|
var (
|
|
nextPageCache = NewNextPageCache(6 * time.Hour) // Cache with 6-hour expiration
|
|
)
|
|
|
|
// PerformDeviantArtImageSearch performs a search on DeviantArt and returns a list of image results
|
|
func PerformDeviantArtImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) {
|
|
startTime := time.Now()
|
|
|
|
cacheKey := CacheKey{
|
|
Query: query,
|
|
Page: page,
|
|
Safe: safe == "active",
|
|
Lang: lang,
|
|
Type: "deviantart",
|
|
}
|
|
|
|
// Check if the next page link is cached
|
|
var searchURL string
|
|
if page > 1 {
|
|
if nextPageLink, found := nextPageCache.Get(cacheKey); found {
|
|
searchURL = nextPageLink
|
|
} else {
|
|
return nil, 0, fmt.Errorf("next page link not found in cache")
|
|
}
|
|
} else {
|
|
searchURL = buildDeviantArtSearchURL(query, page)
|
|
}
|
|
|
|
// Get the User-Agent string
|
|
DeviantArtImageUserAgent, err := GetUserAgent("Image-Search-DeviantArt")
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
|
|
// Create the HTTP request
|
|
req, err := http.NewRequest("GET", searchURL, nil)
|
|
if err != nil {
|
|
return nil, 0, fmt.Errorf("creating request: %v", err)
|
|
}
|
|
req.Header.Set("User-Agent", DeviantArtImageUserAgent)
|
|
|
|
// Perform the request using MetaProxy if enabled
|
|
var resp *http.Response
|
|
if config.MetaProxyEnabled && metaProxyClient != nil {
|
|
resp, err = metaProxyClient.Do(req)
|
|
} else {
|
|
client := &http.Client{}
|
|
resp, err = client.Do(req)
|
|
}
|
|
if err != nil {
|
|
return nil, 0, fmt.Errorf("making request: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
|
}
|
|
|
|
// Parse the HTML document
|
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
if err != nil {
|
|
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
|
|
}
|
|
|
|
// Channel to receive valid image results
|
|
resultsChan := make(chan ImageSearchResult)
|
|
var wg sync.WaitGroup
|
|
|
|
// Extract data using goquery
|
|
doc.Find("div._2pZkk div div a").Each(func(i int, s *goquery.Selection) {
|
|
// Skip images that are blurred (premium content)
|
|
premiumText := s.Find("../div/div/div").Text()
|
|
if strings.Contains(premiumText, "Watch the artist to view this deviation") {
|
|
return
|
|
}
|
|
|
|
// Extract image source, fallback on data-src if necessary
|
|
imgSrc, exists := s.Find("div img").Attr("srcset")
|
|
if !exists {
|
|
imgSrc, exists = s.Find("div img").Attr("data-src")
|
|
}
|
|
if !exists || imgSrc == "" {
|
|
return
|
|
}
|
|
imgSrc = strings.Split(imgSrc, " ")[0]
|
|
parsedURL, err := url.Parse(imgSrc)
|
|
if err == nil {
|
|
parts := strings.Split(parsedURL.Path, "/v1")
|
|
parsedURL.Path = parts[0]
|
|
imgSrc = parsedURL.String()
|
|
}
|
|
|
|
// Extract URL and title
|
|
resultURL := s.AttrOr("href", "")
|
|
title := s.AttrOr("aria-label", "")
|
|
|
|
// Only proceed if title, URL, and img_src are not empty
|
|
if title != "" && resultURL != "" && imgSrc != "" {
|
|
wg.Add(1)
|
|
go func(imgSrc, resultURL, title string) {
|
|
defer wg.Done()
|
|
// Verify if the image URL is accessible
|
|
if DeviantArtisValidImageURL(imgSrc, DeviantArtImageUserAgent, resultURL) {
|
|
resultsChan <- ImageSearchResult{
|
|
Title: strings.TrimSpace(title),
|
|
Full: imgSrc,
|
|
Width: 0,
|
|
Height: 0,
|
|
Source: resultURL,
|
|
}
|
|
}
|
|
}(imgSrc, resultURL, title)
|
|
}
|
|
})
|
|
|
|
// Close the results channel when all goroutines are done
|
|
go func() {
|
|
wg.Wait()
|
|
close(resultsChan)
|
|
}()
|
|
|
|
// Collect results from the channel
|
|
var results []ImageSearchResult
|
|
for result := range resultsChan {
|
|
results = append(results, result)
|
|
}
|
|
|
|
// Cache the next page link, if any
|
|
nextPageLink := doc.Find("a._1OGeq").Last().AttrOr("href", "")
|
|
if nextPageLink != "" {
|
|
nextPageCache.Set(cacheKey, nextPageLink)
|
|
}
|
|
|
|
duration := time.Since(startTime)
|
|
|
|
// Check if the number of results is zero
|
|
if len(results) == 0 {
|
|
return nil, duration, fmt.Errorf("no images found")
|
|
}
|
|
|
|
return results, duration, nil
|
|
}
|
|
|
|
// buildDeviantArtSearchURL builds the search URL for DeviantArt
|
|
func buildDeviantArtSearchURL(query string, page int) string {
|
|
baseURL := "https://www.deviantart.com/search"
|
|
params := url.Values{}
|
|
params.Add("q", query)
|
|
return baseURL + "?" + params.Encode()
|
|
}
|
|
|
|
// isValidImageURL checks if the image URL is accessible with the provided User-Agent
|
|
func DeviantArtisValidImageURL(imgSrc, userAgent, referer string) bool {
|
|
client := &http.Client{}
|
|
req, err := http.NewRequest("HEAD", imgSrc, nil)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
// Set headers to mimic a regular browser request
|
|
req.Header.Set("User-Agent", userAgent)
|
|
req.Header.Set("Referer", referer)
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
return resp.StatusCode == http.StatusOK
|
|
}
|
|
|
|
// // Example usage:
|
|
// func main() {
|
|
// results, duration, err := PerformDeviantArtImageSearch("kittens", "false", "en", 1)
|
|
// if err != nil {
|
|
// fmt.Println("Error:", err)
|
|
// return
|
|
// }
|
|
|
|
// fmt.Printf("Search took: %v\n", duration)
|
|
// fmt.Printf("Total results: %d\n", len(results))
|
|
// for _, result := range results {
|
|
// fmt.Printf("Title: %s\nThumbnail: %s\nMedia: %s\nSource (Original Image URL): %s\n\n",
|
|
// result.Title, result.Thumbnail, result.Media, result.Source)
|
|
// }
|
|
// }
|