images from deviantart
This commit is contained in:
parent
088f80746a
commit
0019202571
3 changed files with 239 additions and 1 deletions
237
images-deviantart.go
Normal file
237
images-deviantart.go
Normal file
|
@ -0,0 +1,237 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// NextPageCache is a specialized cache for storing next page links
|
||||
type NextPageCache struct {
|
||||
mu sync.Mutex
|
||||
links map[string]string
|
||||
expiration time.Duration
|
||||
}
|
||||
|
||||
// NewNextPageCache creates a new NextPageCache with a specified expiration duration
|
||||
func NewNextPageCache(expiration time.Duration) *NextPageCache {
|
||||
return &NextPageCache{
|
||||
links: make(map[string]string),
|
||||
expiration: expiration,
|
||||
}
|
||||
}
|
||||
|
||||
// Get retrieves the next page link for a given key from the cache
|
||||
func (npc *NextPageCache) Get(key CacheKey) (string, bool) {
|
||||
npc.mu.Lock()
|
||||
defer npc.mu.Unlock()
|
||||
|
||||
link, exists := npc.links[npc.keyToString(key)]
|
||||
if !exists {
|
||||
return "", false
|
||||
}
|
||||
|
||||
return link, true
|
||||
}
|
||||
|
||||
// Set stores the next page link for a given key in the cache
|
||||
// Idk it maybye worth it to use "cache.go" for this
|
||||
func (npc *NextPageCache) Set(key CacheKey, link string) {
|
||||
npc.mu.Lock()
|
||||
defer npc.mu.Unlock()
|
||||
|
||||
npc.links[npc.keyToString(key)] = link
|
||||
}
|
||||
|
||||
// keyToString converts a CacheKey to a string representation
|
||||
func (npc *NextPageCache) keyToString(key CacheKey) string {
|
||||
return fmt.Sprintf("%s|%d|%t|%s|%s", key.Query, key.Page, key.Safe, key.Lang, key.Type)
|
||||
}
|
||||
|
||||
var (
|
||||
nextPageCache = NewNextPageCache(6 * time.Hour) // Cache with 6-hour expiration
|
||||
)
|
||||
|
||||
// PerformDeviantArtImageSearch performs a search on DeviantArt and returns a list of image results
|
||||
func PerformDeviantArtImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) {
|
||||
startTime := time.Now()
|
||||
|
||||
cacheKey := CacheKey{
|
||||
Query: query,
|
||||
Page: page,
|
||||
Safe: safe == "active",
|
||||
Lang: lang,
|
||||
Type: "deviantart",
|
||||
}
|
||||
|
||||
// Check if the next page link is cached
|
||||
var searchURL string
|
||||
if page > 1 {
|
||||
if nextPageLink, found := nextPageCache.Get(cacheKey); found {
|
||||
searchURL = nextPageLink
|
||||
} else {
|
||||
return nil, 0, fmt.Errorf("next page link not found in cache")
|
||||
}
|
||||
} else {
|
||||
searchURL = buildDeviantArtSearchURL(query, page)
|
||||
}
|
||||
|
||||
// Get the User-Agent string
|
||||
DeviantArtImageUserAgent, err := GetUserAgent("Image-Search-DeviantArt")
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
// Make the HTTP request with User-Agent header
|
||||
client := &http.Client{}
|
||||
req, err := http.NewRequest("GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("creating request: %v", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", DeviantArtImageUserAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
// Parse the HTML document
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
|
||||
}
|
||||
|
||||
// Channel to receive valid image results
|
||||
resultsChan := make(chan ImageSearchResult)
|
||||
var wg sync.WaitGroup
|
||||
|
||||
// Extract data using goquery
|
||||
doc.Find("div._2pZkk div div a").Each(func(i int, s *goquery.Selection) {
|
||||
// Skip images that are blurred (premium content)
|
||||
premiumText := s.Find("../div/div/div").Text()
|
||||
if strings.Contains(premiumText, "Watch the artist to view this deviation") {
|
||||
return
|
||||
}
|
||||
|
||||
// Extract image source, fallback on data-src if necessary
|
||||
imgSrc, exists := s.Find("div img").Attr("srcset")
|
||||
if !exists {
|
||||
imgSrc, exists = s.Find("div img").Attr("data-src")
|
||||
}
|
||||
if !exists || imgSrc == "" {
|
||||
return
|
||||
}
|
||||
imgSrc = strings.Split(imgSrc, " ")[0]
|
||||
parsedURL, err := url.Parse(imgSrc)
|
||||
if err == nil {
|
||||
parts := strings.Split(parsedURL.Path, "/v1")
|
||||
parsedURL.Path = parts[0]
|
||||
imgSrc = parsedURL.String()
|
||||
}
|
||||
|
||||
// Extract URL and title
|
||||
resultURL := s.AttrOr("href", "")
|
||||
title := s.AttrOr("aria-label", "")
|
||||
|
||||
// Only proceed if title, URL, and img_src are not empty
|
||||
if title != "" && resultURL != "" && imgSrc != "" {
|
||||
wg.Add(1)
|
||||
go func(imgSrc, resultURL, title string) {
|
||||
defer wg.Done()
|
||||
// Verify if the image URL is accessible
|
||||
if isValidImageURL(imgSrc, DeviantArtImageUserAgent, resultURL) {
|
||||
resultsChan <- ImageSearchResult{
|
||||
Title: strings.TrimSpace(title),
|
||||
Media: imgSrc,
|
||||
Width: 0,
|
||||
Height: 0,
|
||||
Source: resultURL,
|
||||
ThumbProxy: imgSrc,
|
||||
}
|
||||
}
|
||||
}(imgSrc, resultURL, title)
|
||||
}
|
||||
})
|
||||
|
||||
// Close the results channel when all goroutines are done
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(resultsChan)
|
||||
}()
|
||||
|
||||
// Collect results from the channel
|
||||
var results []ImageSearchResult
|
||||
for result := range resultsChan {
|
||||
results = append(results, result)
|
||||
}
|
||||
|
||||
// Cache the next page link, if any
|
||||
nextPageLink := doc.Find("a._1OGeq").Last().AttrOr("href", "")
|
||||
if nextPageLink != "" {
|
||||
nextPageCache.Set(cacheKey, nextPageLink)
|
||||
}
|
||||
|
||||
duration := time.Since(startTime)
|
||||
|
||||
// Check if the number of results is one or less
|
||||
if len(results) == 0 {
|
||||
return nil, duration, fmt.Errorf("no images found")
|
||||
}
|
||||
|
||||
return results, duration, nil
|
||||
}
|
||||
|
||||
// buildDeviantArtSearchURL builds the search URL for DeviantArt
|
||||
func buildDeviantArtSearchURL(query string, page int) string {
|
||||
baseURL := "https://www.deviantart.com/search"
|
||||
params := url.Values{}
|
||||
params.Add("q", query)
|
||||
return baseURL + "?" + params.Encode()
|
||||
}
|
||||
|
||||
// isValidImageURL checks if the image URL is accessible with the provided User-Agent
|
||||
func isValidImageURL(imgSrc, userAgent, referer string) bool {
|
||||
client := &http.Client{}
|
||||
req, err := http.NewRequest("HEAD", imgSrc, nil)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Set headers to mimic a regular browser request
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
req.Header.Set("Referer", referer)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
return resp.StatusCode == http.StatusOK
|
||||
}
|
||||
|
||||
// // Example usage:
|
||||
// func main() {
|
||||
// results, duration, err := PerformDeviantArtImageSearch("kittens", "false", "en", 1)
|
||||
// if err != nil {
|
||||
// fmt.Println("Error:", err)
|
||||
// return
|
||||
// }
|
||||
|
||||
// fmt.Printf("Search took: %v\n", duration)
|
||||
// fmt.Printf("Total results: %d\n", len(results))
|
||||
// for _, result := range results {
|
||||
// fmt.Printf("Title: %s\nThumbnail: %s\nMedia: %s\nSource (Original Image URL): %s\n\n",
|
||||
// result.Title, result.Thumbnail, result.Media, result.Source)
|
||||
// }
|
||||
// }
|
|
@ -58,7 +58,7 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
|
|||
return nil, 0, fmt.Errorf("creating request: %v", err)
|
||||
}
|
||||
|
||||
ImageUserAgent, err := GetUserAgent("Image-Search")
|
||||
ImageUserAgent, err := GetUserAgent("Image-Search-Quant")
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@ var imageSearchEngines []SearchEngine
|
|||
func init() {
|
||||
imageSearchEngines = []SearchEngine{
|
||||
{Name: "Qwant", Func: wrapImageSearchFunc(PerformQwantImageSearch), Weight: 1},
|
||||
{Name: "DeviantArt", Func: wrapImageSearchFunc(PerformDeviantArtImageSearch), Weight: 2},
|
||||
{Name: "Bing", Func: wrapImageSearchFunc(PerformBingImageSearch), Weight: 2}, // Bing sometimes returns with low amount of images, this leads to danamica page loading not working
|
||||
{Name: "Imgur", Func: wrapImageSearchFunc(PerformImgurImageSearch), Weight: 3},
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue