From 787816d0ab0bdb1dfa26858480e37d640a0c7ac0 Mon Sep 17 00:00:00 2001 From: partisan Date: Sat, 19 Oct 2024 14:02:27 +0200 Subject: [PATCH] added: serving missing.svg on error instead of an invalid image URL --- cache-images.go | 164 ++++++++++++++++++++++++++-------- imageproxy.go | 26 ++++-- images-bing.go | 4 +- images-deviantart.go | 4 +- images.go | 108 ++++++++++++++++++++-- static/images/missing.svg | 8 ++ static/images/placeholder.svg | 6 +- 7 files changed, 263 insertions(+), 57 deletions(-) create mode 100644 static/images/missing.svg diff --git a/cache-images.go b/cache-images.go index 46a2f47..3a52f0b 100644 --- a/cache-images.go +++ b/cache-images.go @@ -2,6 +2,7 @@ package main import ( "bytes" + "crypto/tls" "encoding/json" "fmt" "image" @@ -14,6 +15,7 @@ import ( "path/filepath" "strings" "sync" + "time" "github.com/chai2010/webp" "golang.org/x/image/bmp" @@ -24,15 +26,18 @@ var ( cachingImages = make(map[string]*sync.Mutex) cachingImagesMu sync.Mutex cachingSemaphore = make(chan struct{}, 10) // Limit to 10 concurrent downloads + + invalidImageIDs = make(map[string]struct{}) + invalidImageIDsMu sync.Mutex ) -func cacheImage(imageURL, filename string) (string, error) { +func cacheImage(imageURL, filename, imageID string) (string, bool, error) { cacheDir := "image_cache" cachedImagePath := filepath.Join(cacheDir, filename) // Check if the image is already cached if _, err := os.Stat(cachedImagePath); err == nil { - return cachedImagePath, nil + return cachedImagePath, true, nil } // Ensure only one goroutine caches the same image @@ -48,31 +53,40 @@ func cacheImage(imageURL, filename string) (string, error) { // Double-check if the image was cached while waiting if _, err := os.Stat(cachedImagePath); err == nil { - return cachedImagePath, nil + return cachedImagePath, true, nil } cachingSemaphore <- struct{}{} // Acquire a token defer func() { <-cachingSemaphore }() // Release the token - // Download the image - resp, err := http.Get(imageURL) + // Create a custom http.Client that skips SSL certificate verification + client := &http.Client{ + Timeout: 15 * time.Second, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }, + } + + // Download the image using the custom client + resp, err := client.Get(imageURL) if err != nil { - return "", err + recordInvalidImageID(imageID) + return "", false, err } defer resp.Body.Close() // Read the image data into a byte slice data, err := io.ReadAll(resp.Body) if err != nil { - return "", err + recordInvalidImageID(imageID) + return "", false, err } - // Detect the content type + // Check if the response is actually an image contentType := http.DetectContentType(data) - - // If content type is HTML, skip caching - if strings.HasPrefix(contentType, "text/html") { - return "", fmt.Errorf("URL returned HTML content instead of an image: %s", imageURL) + if !strings.HasPrefix(contentType, "image/") { + recordInvalidImageID(imageID) + return "", false, fmt.Errorf("URL did not return an image: %s", imageURL) } // Handle SVG files directly @@ -85,7 +99,8 @@ func cacheImage(imageURL, filename string) (string, error) { // Save the SVG file as-is err = os.WriteFile(cachedImagePath, data, 0644) if err != nil { - return "", err + recordInvalidImageID(imageID) + return "", false, err } // Clean up mutex @@ -93,7 +108,7 @@ func cacheImage(imageURL, filename string) (string, error) { delete(cachingImages, imageURL) cachingImagesMu.Unlock() - return cachedImagePath, nil + return cachedImagePath, true, nil } // Decode the image based on the content type @@ -112,11 +127,13 @@ func cacheImage(imageURL, filename string) (string, error) { case "image/tiff": img, err = tiff.Decode(bytes.NewReader(data)) default: - return "", fmt.Errorf("unsupported image type: %s", contentType) + recordInvalidImageID(imageID) + return "", false, fmt.Errorf("unsupported image type: %s", contentType) } if err != nil { - return "", fmt.Errorf("failed to decode image: %v", err) + recordInvalidImageID(imageID) + return "", false, fmt.Errorf("failed to decode image: %v", err) } // Ensure the cache directory exists @@ -127,7 +144,8 @@ func cacheImage(imageURL, filename string) (string, error) { // Open the cached file for writing outFile, err := os.Create(cachedImagePath) if err != nil { - return "", err + recordInvalidImageID(imageID) + return "", false, err } defer outFile.Close() @@ -135,7 +153,8 @@ func cacheImage(imageURL, filename string) (string, error) { options := &webp.Options{Lossless: false, Quality: 80} err = webp.Encode(outFile, img, options) if err != nil { - return "", err + recordInvalidImageID(imageID) + return "", false, err } // Clean up mutex @@ -143,7 +162,7 @@ func cacheImage(imageURL, filename string) (string, error) { delete(cachingImages, imageURL) cachingImagesMu.Unlock() - return cachedImagePath, nil + return cachedImagePath, true, nil } func handleCachedImages(w http.ResponseWriter, r *http.Request) { @@ -152,21 +171,15 @@ func handleCachedImages(w http.ResponseWriter, r *http.Request) { cachedImagePath := filepath.Join(cacheDir, imageName) if _, err := os.Stat(cachedImagePath); os.IsNotExist(err) { - // Serve placeholder image with no-store headers - placeholderPath := "static/images/placeholder.webp" - placeholderContentType := "image/webp" - - // You can also check for SVG placeholder if needed - if strings.HasSuffix(imageName, ".svg") { - placeholderPath = "static/images/placeholder.svg" - placeholderContentType = "image/svg+xml" - } - - w.Header().Set("Content-Type", placeholderContentType) - w.Header().Set("Cache-Control", "no-store, must-revalidate") - w.Header().Set("Pragma", "no-cache") - w.Header().Set("Expires", "0") - http.ServeFile(w, r, placeholderPath) + printDebug("Cached image not found: %s, serving missing.svg", cachedImagePath) + // Serve missing image + missingImagePath := filepath.Join("static", "images", "missing.svg") + w.Header().Set("Content-Type", "image/svg+xml") + http.ServeFile(w, r, missingImagePath) + return + } else if err != nil { + printWarn("Error checking image file: %v", err) + http.Error(w, "Internal Server Error", http.StatusInternalServerError) return } @@ -199,13 +212,21 @@ func handleImageStatus(w http.ResponseWriter, r *http.Request) { ids := strings.Split(imageIDs, ",") statusMap := make(map[string]string) - cacheDir := "image_cache" printDebug("Received image status request for IDs: %v", ids) - printDebug("Status map: %v", statusMap) + + invalidImageIDsMu.Lock() + defer invalidImageIDsMu.Unlock() for _, id := range ids { + // Check if the image ID is in the invalidImageIDs map + if _, invalid := invalidImageIDs[id]; invalid { + // Image is invalid, set status to "missing" + statusMap[id] = "/static/images/missing.svg" + continue + } + // Check for different possible extensions extensions := []string{".webp", ".svg"} var cachedImagePath string @@ -224,11 +245,80 @@ func handleImageStatus(w http.ResponseWriter, r *http.Request) { if found { statusMap[id] = cachedImagePath } else { - // Image is not ready + // Image is not ready yet statusMap[id] = "" } } + printDebug("Status map: %v", statusMap) + w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(statusMap) } + +func recordInvalidImageID(imageID string) { + invalidImageIDsMu.Lock() + defer invalidImageIDsMu.Unlock() + invalidImageIDs[imageID] = struct{}{} + printDebug("Recorded invalid image ID: %s", imageID) +} + +func filterValidImages(imageResults []ImageSearchResult) []ImageSearchResult { + invalidImageIDsMu.Lock() + defer invalidImageIDsMu.Unlock() + + var filteredResults []ImageSearchResult + for _, img := range imageResults { + if _, invalid := invalidImageIDs[img.ID]; !invalid { + filteredResults = append(filteredResults, img) + } else { + printDebug("Filtering out invalid image ID: %s", img.ID) + } + } + return filteredResults +} + +func removeImageResultFromCache(query string, page int, safe bool, lang string, imageID string) { + cacheKey := CacheKey{ + Query: query, + Page: page, + Safe: safe, + Lang: lang, + Type: "image", + } + + rc := resultsCache + + rc.mu.Lock() + defer rc.mu.Unlock() + + keyStr := rc.keyToString(cacheKey) + item, exists := rc.results[keyStr] + if !exists { + return + } + + // Filter out the image with the given ID + var newResults []SearchResult + for _, r := range item.Results { + if imgResult, ok := r.(ImageSearchResult); ok { + if imgResult.ID != imageID { + newResults = append(newResults, r) + } else { + printDebug("Removing invalid image ID from cache: %s", imageID) + } + } else { + newResults = append(newResults, r) + } + } + + // Update or delete the cache entry + if len(newResults) > 0 { + rc.results[keyStr] = CachedItem{ + Results: newResults, + StoredTime: item.StoredTime, + } + } else { + delete(rc.results, keyStr) + } +} diff --git a/imageproxy.go b/imageproxy.go index 4dd7478..fcacac4 100644 --- a/imageproxy.go +++ b/imageproxy.go @@ -3,6 +3,8 @@ package main import ( "io" "net/http" + "path/filepath" + "strings" ) func handleImageProxy(w http.ResponseWriter, r *http.Request) { @@ -17,29 +19,41 @@ func handleImageProxy(w http.ResponseWriter, r *http.Request) { resp, err := http.Get(imageURL) if err != nil { printWarn("Error fetching image: %v", err) - http.Error(w, "Internal Server Error", http.StatusInternalServerError) + serveMissingImage(w, r) return } defer resp.Body.Close() // Check if the request was successful if resp.StatusCode != http.StatusOK { - http.Error(w, "Failed to fetch image", http.StatusBadGateway) + serveMissingImage(w, r) return } // Set the Content-Type header to the type of the fetched image contentType := resp.Header.Get("Content-Type") - if contentType != "" { + if contentType != "" && strings.HasPrefix(contentType, "image/") { w.Header().Set("Content-Type", contentType) } else { - // Default to octet-stream if Content-Type is not available - w.Header().Set("Content-Type", "application/octet-stream") + serveMissingImage(w, r) + return } // Write the image content to the response if _, err := io.Copy(w, resp.Body); err != nil { printWarn("Error writing image to response: %v", err) - http.Error(w, "Internal Server Error", http.StatusInternalServerError) + // Serve missing.svg + // Note: At this point, headers are already sent, so serving missing.svg won't work. + // It's better to just log the error here. } } + +// Serve missing.svg +func serveMissingImage(w http.ResponseWriter, r *http.Request) { + missingImagePath := filepath.Join("static", "images", "missing.svg") + w.Header().Set("Content-Type", "image/svg+xml") + w.Header().Set("Cache-Control", "no-store, must-revalidate") + w.Header().Set("Pragma", "no-cache") + w.Header().Set("Expires", "0") + http.ServeFile(w, r, missingImagePath) +} diff --git a/images-bing.go b/images-bing.go index eaa1c4d..bb2b8fb 100644 --- a/images-bing.go +++ b/images-bing.go @@ -68,8 +68,8 @@ func PerformBingImageSearch(query, safe, lang string, page int) ([]ImageSearchRe mediaURL, ok := data["murl"].(string) if ok { // Apply the image proxy - proxiedFullURL := "/imgproxy?url=" + imgSrc - proxiedThumbURL := "/imgproxy?url=" + mediaURL + proxiedFullURL := "/imgproxy?url=" + mediaURL + proxiedThumbURL := "/imgproxy?url=" + imgSrc results = append(results, ImageSearchResult{ Thumb: imgSrc, Title: strings.TrimSpace(title), diff --git a/images-deviantart.go b/images-deviantart.go index e408616..4efe296 100644 --- a/images-deviantart.go +++ b/images-deviantart.go @@ -149,7 +149,7 @@ func PerformDeviantArtImageSearch(query, safe, lang string, page int) ([]ImageSe go func(imgSrc, resultURL, title string) { defer wg.Done() // Verify if the image URL is accessible - if isValidImageURL(imgSrc, DeviantArtImageUserAgent, resultURL) { + if DeviantArtisValidImageURL(imgSrc, DeviantArtImageUserAgent, resultURL) { resultsChan <- ImageSearchResult{ Title: strings.TrimSpace(title), Full: imgSrc, @@ -201,7 +201,7 @@ func buildDeviantArtSearchURL(query string, page int) string { } // isValidImageURL checks if the image URL is accessible with the provided User-Agent -func isValidImageURL(imgSrc, userAgent, referer string) bool { +func DeviantArtisValidImageURL(imgSrc, userAgent, referer string) bool { client := &http.Client{} req, err := http.NewRequest("HEAD", imgSrc, nil) if err != nil { diff --git a/images.go b/images.go index 316d3d1..64ae5c1 100755 --- a/images.go +++ b/images.go @@ -68,16 +68,18 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string if results == nil { combinedResults = fetchImageResults(query, safe, lang, page) if len(combinedResults) > 0 { + combinedResults = filterValidImages(combinedResults) resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) } } else { _, _, imageResults := convertToSpecificResults(results) - combinedResults = imageResults + combinedResults = filterValidImages(imageResults) } case <-time.After(2 * time.Second): printInfo("Cache check timeout") combinedResults = fetchImageResults(query, safe, lang, page) if len(combinedResults) > 0 { + combinedResults = filterValidImages(combinedResults) resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) } } @@ -87,6 +89,7 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult { var results []ImageSearchResult + safeBool := safe == "active" for _, engine := range imageSearchEngines { printInfo("Using image search engine: %s", engine.Name) @@ -107,20 +110,28 @@ func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult { hash := hex.EncodeToString(hasher.Sum(nil)) filename := hash + ".webp" - // Set the Full URL to point to the cached image path - cacheURL := "/image_cache/" + filename - imageResult.ProxyFull = cacheURL - // Assign the ID imageResult.ID = hash - // Start caching in the background - go func(originalURL, filename string) { - _, err := cacheImage(originalURL, filename) + // Set the ProxyFull URL + imageResult.ProxyFull = "/image_cache/" + filename + + // Start caching and validation in the background + go func(imgResult ImageSearchResult, originalURL, filename string) { + _, success, err := cacheImage(originalURL, filename, imgResult.ID) if err != nil { printWarn("Failed to cache image %s: %v", originalURL, err) } - }(imageResult.Full, filename) + if !success { + // Remove the image result from the cache + removeImageResultFromCache(query, page, safeBool, lang, imgResult.ID) + } + }(imageResult, imageResult.Full, filename) + + } else { + // When hard cache is not enabled, use the imgproxy URLs + imageResult.ProxyThumb = "/imgproxy?url=" + imageResult.Thumb // Proxied thumbnail + imageResult.ProxyFull = "/imgproxy?url=" + imageResult.Full // Proxied full-size image } results = append(results, imageResult) } @@ -151,3 +162,82 @@ func wrapImageSearchFunc(f func(string, string, string, int) ([]ImageSearchResul return searchResults, duration, nil } } + +// func isValidImageURL(imageURL string) bool { +// client := &http.Client{ +// Timeout: 10 * time.Second, +// Transport: &http.Transport{ +// TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, +// }, +// } + +// req, err := http.NewRequest("GET", imageURL, nil) +// if err != nil { +// return false +// } + +// // Set headers to mimic a real browser +// req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "+ +// "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36") +// req.Header.Set("Accept", "image/webp,image/*,*/*;q=0.8") +// req.Header.Set("Accept-Language", "en-US,en;q=0.9") +// req.Header.Set("Referer", imageURL) // Some servers require a referer + +// resp, err := client.Do(req) +// if err != nil { +// return false +// } +// defer resp.Body.Close() + +// if resp.StatusCode < 200 || resp.StatusCode >= 400 { +// return false +// } + +// // Limit the amount of data read to 10KB +// limitedReader := io.LimitReader(resp.Body, 10240) // 10KB + +// // Attempt to decode image configuration +// _, _, err = image.DecodeConfig(limitedReader) +// if err != nil { +// return false +// } + +// return true +// } + +// // This function can be used alternatively to isValidImageURL(), Its slower but reliable +// func isImageAccessible(imageURL string) bool { +// client := &http.Client{ +// Timeout: 5 * time.Second, +// CheckRedirect: func(req *http.Request, via []*http.Request) error { +// if len(via) >= 10 { +// return http.ErrUseLastResponse +// } +// return nil +// }, +// } + +// resp, err := client.Get(imageURL) +// if err != nil { +// return false +// } +// defer resp.Body.Close() + +// if resp.StatusCode < 200 || resp.StatusCode >= 400 { +// return false +// } + +// // Read the entire image data +// data, err := io.ReadAll(resp.Body) +// if err != nil { +// return false +// } + +// // Try to decode the image +// _, _, err = image.Decode(bytes.NewReader(data)) +// if err != nil { +// return false +// } + +// return true +// } diff --git a/static/images/missing.svg b/static/images/missing.svg new file mode 100644 index 0000000..9633374 --- /dev/null +++ b/static/images/missing.svg @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/static/images/placeholder.svg b/static/images/placeholder.svg index 41256d5..22137c8 100644 --- a/static/images/placeholder.svg +++ b/static/images/placeholder.svg @@ -1,2 +1,6 @@ - \ No newline at end of file + + + + + \ No newline at end of file