Search/cache-images.go

524 lines
13 KiB
Go

package main
import (
"bytes"
"crypto/tls"
"encoding/json"
"fmt"
"image"
"image/gif"
"image/jpeg"
"image/png"
"io"
"net/http"
"os"
"path/filepath"
"sort"
"strings"
"sync"
"time"
"github.com/chai2010/webp"
"golang.org/x/image/bmp"
"golang.org/x/image/tiff"
)
var (
cachingImages = make(map[string]*sync.Mutex)
cachingImagesMu sync.Mutex
cachingSemaphore = make(chan struct{}, 100)
invalidImageIDs = make(map[string]struct{})
invalidImageIDsMu sync.Mutex
imageURLMap = make(map[string]string)
imageURLMapMu sync.RWMutex
)
func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error) {
if imageURL == "" {
recordInvalidImageID(imageID)
return "", false, fmt.Errorf("empty image URL for image ID %s", imageID)
}
// Construct the filename based on the image ID and type
var filename string
if isThumbnail {
filename = fmt.Sprintf("%s_thumb.webp", imageID)
} else {
filename = fmt.Sprintf("%s_full.webp", imageID)
}
// Make sure we store inside: config.DriveCache.Path / images
imageCacheDir := filepath.Join(config.DriveCache.Path, "images")
if err := os.MkdirAll(imageCacheDir, 0755); err != nil {
return "", false, fmt.Errorf("couldn't create images folder: %v", err)
}
cachedImagePath := filepath.Join(imageCacheDir, filename)
tempImagePath := cachedImagePath + ".tmp"
// Check if the image is already cached
if _, err := os.Stat(cachedImagePath); err == nil {
return cachedImagePath, true, nil
}
// Ensure only one goroutine caches the same image
cachingImagesMu.Lock()
if _, exists := cachingImages[imageURL]; !exists {
cachingImages[imageURL] = &sync.Mutex{}
}
mu := cachingImages[imageURL]
cachingImagesMu.Unlock()
mu.Lock()
defer mu.Unlock()
// Double-check if the image was cached while waiting
if _, err := os.Stat(cachedImagePath); err == nil {
return cachedImagePath, true, nil
}
cachingSemaphore <- struct{}{}
defer func() { <-cachingSemaphore }()
// Create a custom http.Client that skips SSL certificate verification
client := &http.Client{
Timeout: 15 * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
},
}
// Download the image using the custom client
resp, err := client.Get(imageURL)
if err != nil {
recordInvalidImageID(imageID)
return "", false, err
}
defer resp.Body.Close()
// Read the image data into a byte slice
data, err := io.ReadAll(resp.Body)
if err != nil {
recordInvalidImageID(imageID)
return "", false, err
}
// Check if the response is actually an image
contentType := http.DetectContentType(data)
if !strings.HasPrefix(contentType, "image/") {
recordInvalidImageID(imageID)
return "", false, fmt.Errorf("URL did not return an image: %s", imageURL)
}
// Handle SVG files directly
if contentType == "image/svg+xml" {
// Save the SVG file as-is to the temp path
err = os.WriteFile(tempImagePath, data, 0644)
if err != nil {
recordInvalidImageID(imageID)
return "", false, err
}
// Atomically rename the temp file to the final cached image path
err = os.Rename(tempImagePath, cachedImagePath)
if err != nil {
recordInvalidImageID(imageID)
return "", false, err
}
// Clean up mutex
cachingImagesMu.Lock()
delete(cachingImages, imageURL)
cachingImagesMu.Unlock()
return cachedImagePath, true, nil
}
// Decode the image based on the content type
var img image.Image
switch contentType {
case "image/jpeg":
img, err = jpeg.Decode(bytes.NewReader(data))
case "image/png":
img, err = png.Decode(bytes.NewReader(data))
case "image/gif":
img, err = gif.Decode(bytes.NewReader(data))
case "image/webp":
img, err = webp.Decode(bytes.NewReader(data))
case "image/bmp":
img, err = bmp.Decode(bytes.NewReader(data))
case "image/tiff":
img, err = tiff.Decode(bytes.NewReader(data))
default:
recordInvalidImageID(imageID)
return "", false, fmt.Errorf("unsupported image type: %s", contentType)
}
if err != nil {
recordInvalidImageID(imageID)
return "", false, fmt.Errorf("failed to decode image: %v", err)
}
// This is not working
// // Ensure the cache directory exists
// if _, err := os.Stat(config.DriveCache.Path); os.IsNotExist(err) {
// os.Mkdir(config.DriveCache.Path, os.ModePerm)
// }
// Open the temp file for writing
outFile, err := os.Create(tempImagePath)
if err != nil {
recordInvalidImageID(imageID)
return "", false, err
}
// Encode the image to WebP and save to the temp file
options := &webp.Options{Lossless: false, Quality: 80}
err = webp.Encode(outFile, img, options)
if err != nil {
outFile.Close()
recordInvalidImageID(imageID)
return "", false, err
}
outFile.Close()
// Atomically rename the temp file to the final cached image path
err = os.Rename(tempImagePath, cachedImagePath)
if err != nil {
recordInvalidImageID(imageID)
return "", false, err
}
// Clean up mutex
cachingImagesMu.Lock()
delete(cachingImages, imageURL)
cachingImagesMu.Unlock()
return cachedImagePath, true, nil
}
func handleImageServe(w http.ResponseWriter, r *http.Request) {
// Extract the image ID and type from the URL
imageName := filepath.Base(r.URL.Path)
idType := imageName
var imageID, imageType string
hasExtension := false
if strings.HasSuffix(idType, ".webp") {
// Cached image, remove extension
idType = strings.TrimSuffix(idType, ".webp")
hasExtension = true
}
parts := strings.SplitN(idType, "_", 2)
if len(parts) != 2 {
http.NotFound(w, r)
return
}
imageID = parts[0]
imageType = parts[1]
filename := fmt.Sprintf("%s_%s.webp", imageID, imageType)
// Adjust to read from config.DriveCache.Path / images
cachedImagePath := filepath.Join(config.DriveCache.Path, "images", filename)
if hasExtension && imageType == "thumb" {
// Requesting cached image (thumbnail or full)
if _, err := os.Stat(cachedImagePath); err == nil {
// Update the modification time to now
err := os.Chtimes(cachedImagePath, time.Now(), time.Now())
if err != nil {
printWarn("Failed to update modification time for %s: %v", cachedImagePath, err)
}
// Determine content type based on file extension
contentType := "image/webp"
w.Header().Set("Content-Type", contentType)
w.Header().Set("Cache-Control", "public, max-age=31536000")
http.ServeFile(w, r, cachedImagePath)
return
} else {
// Cached image not found
if config.DriveCacheEnabled {
// Thumbnail should be cached, but not found
serveMissingImage(w, r)
return
}
// Else, proceed to proxy if caching is disabled
}
}
// For full images, proceed to proxy the image
// Image not cached or caching not enabled
imageKey := fmt.Sprintf("%s_%s", imageID, imageType)
imageURLMapMu.RLock()
imageURL, exists := imageURLMap[imageKey]
imageURLMapMu.RUnlock()
if !exists {
// Cannot find original URL, serve missing image
serveMissingImage(w, r)
return
}
// For thumbnails, if HardCacheEnabled is true, and image not cached, serve missing image
if imageType == "thumb" && config.DriveCacheEnabled {
// Thumbnail should be cached, but not found
serveMissingImage(w, r)
return
}
// For full images, proceed to proxy the image
// Fetch the image from the original URL
resp, err := http.Get(imageURL)
if err != nil {
printWarn("Error fetching image: %v", err)
recordInvalidImageID(imageID)
serveMissingImage(w, r)
return
}
defer resp.Body.Close()
// Check if the request was successful
if resp.StatusCode != http.StatusOK {
serveMissingImage(w, r)
return
}
// Set the Content-Type header to the type of the fetched image
contentType := resp.Header.Get("Content-Type")
if contentType != "" && strings.HasPrefix(contentType, "image/") {
w.Header().Set("Content-Type", contentType)
} else {
serveMissingImage(w, r)
return
}
// Write the image content to the response
if _, err := io.Copy(w, resp.Body); err != nil {
printWarn("Error writing image to response: %v", err)
}
}
func handleImageStatus(w http.ResponseWriter, r *http.Request) {
imageIDs := r.URL.Query().Get("image_ids")
ids := strings.Split(imageIDs, ",")
statusMap := make(map[string]string)
for _, id := range ids {
if id == "" {
continue
}
// Check if the image ID is marked as invalid
invalidImageIDsMu.Lock()
_, isInvalid := invalidImageIDs[id]
invalidImageIDsMu.Unlock()
if isInvalid {
// Image is invalid; inform the frontend by setting the missing image URL
statusMap[id] = "/static/images/missing.svg"
continue
}
// Existing code to check for cached images
extensions := []string{"webp", "svg"} // Extensions without leading dots
imageReady := false
// Check thumbnail first
for _, ext := range extensions {
thumbFilename := fmt.Sprintf("%s_thumb.%s", id, ext)
thumbPath := filepath.Join(config.DriveCache.Path, "images", thumbFilename)
if _, err := os.Stat(thumbPath); err == nil {
statusMap[id] = fmt.Sprintf("/image/%s_thumb.%s", id, ext)
imageReady = true
break
}
}
// If no thumbnail, check full image
if !imageReady {
for _, ext := range extensions {
fullFilename := fmt.Sprintf("%s_full.%s", id, ext)
fullPath := filepath.Join(config.DriveCache.Path, "images", fullFilename)
if _, err := os.Stat(fullPath); err == nil {
statusMap[id] = fmt.Sprintf("/image/%s_full.%s", id, ext)
imageReady = true
break
}
}
}
// If neither is ready and image is not invalid
if !imageReady {
if !config.DriveCacheEnabled {
// Hard cache is disabled; use the proxy URL
statusMap[id] = fmt.Sprintf("/image/%s_thumb", id)
}
// Else, do not set statusMap[id]; the frontend will keep checking
}
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(statusMap)
}
func recordInvalidImageID(imageID string) {
invalidImageIDsMu.Lock()
defer invalidImageIDsMu.Unlock()
invalidImageIDs[imageID] = struct{}{}
printDebug("Recorded invalid image ID: %s", imageID)
}
func filterValidImages(imageResults []ImageSearchResult) []ImageSearchResult {
invalidImageIDsMu.Lock()
defer invalidImageIDsMu.Unlock()
var filteredResults []ImageSearchResult
for _, img := range imageResults {
if _, invalid := invalidImageIDs[img.ID]; !invalid {
filteredResults = append(filteredResults, img)
} else {
printDebug("Filtering out invalid image ID: %s", img.ID)
}
}
return filteredResults
}
func removeImageResultFromCache(query string, page int, safe bool, lang string, imageID string) {
cacheKey := CacheKey{
Query: query,
Page: page,
Safe: safe,
Lang: lang,
Type: "image",
}
rc := resultsCache
rc.mu.Lock()
defer rc.mu.Unlock()
keyStr := rc.keyToString(cacheKey)
item, exists := rc.results[keyStr]
if !exists {
return
}
// Filter out the image with the given ID
var newResults []SearchResult
for _, r := range item.Results {
if imgResult, ok := r.(ImageSearchResult); ok {
if imgResult.ID != imageID {
newResults = append(newResults, r)
} else {
printDebug("Removing invalid image ID from cache: %s", imageID)
}
} else {
newResults = append(newResults, r)
}
}
// Update or delete the cache entry
if len(newResults) > 0 {
rc.results[keyStr] = CachedItem{
Results: newResults,
StoredTime: item.StoredTime,
}
} else {
delete(rc.results, keyStr)
}
}
func cleanExpiredCachedImages() {
if config.DriveCache.Duration <= 0 && config.DriveCache.MaxUsageBytes <= 0 {
return // No cleanup needed if both duration and max usage are disabled
}
ticker := time.NewTicker(1 * time.Hour)
defer ticker.Stop()
for range ticker.C {
cleanupCache()
}
}
func cleanupCache() {
// Read from: config.DriveCache.Path / images
imageCacheDir := filepath.Join(config.DriveCache.Path, "images")
files, err := os.ReadDir(imageCacheDir)
if err != nil {
printErr("Failed to read DriveCache directory: %v", err)
return
}
var totalSize uint64
fileInfos := make([]os.FileInfo, 0, len(files))
for _, file := range files {
info, err := file.Info()
if err != nil {
continue
}
filePath := filepath.Join(imageCacheDir, file.Name())
if config.DriveCache.Duration > 0 && time.Since(info.ModTime()) > config.DriveCache.Duration {
if err := os.Remove(filePath); err == nil {
printDebug("Removed expired cache file: %s", filePath)
} else {
printErr("Failed to remove expired cache file: %s", filePath)
}
continue
}
totalSize += uint64(info.Size())
fileInfos = append(fileInfos, info)
}
// If total size exceeds MaxUsageBytes, delete least recently used files
if config.DriveCache.MaxUsageBytes > 0 && totalSize > config.DriveCache.MaxUsageBytes {
// Sort files by last access time (oldest first)
sort.Slice(fileInfos, func(i, j int) bool {
return fileInfos[i].ModTime().Before(fileInfos[j].ModTime())
})
for _, info := range fileInfos {
if totalSize <= config.DriveCache.MaxUsageBytes {
break
}
filePath := filepath.Join(imageCacheDir, info.Name())
fileSize := uint64(info.Size())
if err := os.Remove(filePath); err == nil {
totalSize -= fileSize
printDebug("Removed cache file to reduce size: %s", filePath)
} else {
printErr("Failed to remove cache file: %s", filePath)
}
}
}
}
// Serve missing.svg
func serveMissingImage(w http.ResponseWriter, r *http.Request) {
missingImagePath := filepath.Join("static", "images", "missing.svg")
w.Header().Set("Content-Type", "image/svg+xml")
w.Header().Set("Cache-Control", "no-store, must-revalidate")
w.Header().Set("Pragma", "no-cache")
w.Header().Set("Expires", "0")
if config.DriveCacheEnabled {
w.WriteHeader(http.StatusNotFound)
}
http.ServeFile(w, r, missingImagePath)
}