From 3d47c80446e10c156f711f806987ad45cfbb748f Mon Sep 17 00:00:00 2001 From: partisan Date: Sun, 13 Oct 2024 00:04:46 +0200 Subject: [PATCH] added caching of images to the drive --- .gitignore | 3 +- cache-images.go | 223 ++++++++++++++++++++++++++++++++++ cache.go | 1 + config.go | 36 ++++-- go.mod | 2 +- go.sum | 2 + images.go | 33 ++++- init.go | 36 +++--- main.go | 2 + static/images/placeholder.svg | 17 +++ templates/images.html | 129 +++++++++++++++++++- 11 files changed, 451 insertions(+), 33 deletions(-) create mode 100644 cache-images.go create mode 100644 static/images/placeholder.svg diff --git a/.gitignore b/.gitignore index 41819de..93681d1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ config.json opensearch.xml -config.ini \ No newline at end of file +config.ini +image_cache/ \ No newline at end of file diff --git a/cache-images.go b/cache-images.go new file mode 100644 index 0000000..5c9eec8 --- /dev/null +++ b/cache-images.go @@ -0,0 +1,223 @@ +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "image" + "image/gif" + "image/jpeg" + "image/png" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/chai2010/webp" + "golang.org/x/image/bmp" + "golang.org/x/image/tiff" +) + +var ( + cachingImages = make(map[string]*sync.Mutex) + cachingImagesMu sync.Mutex + cachingSemaphore = make(chan struct{}, 10) // Limit to 10 concurrent downloads +) + +func cacheImage(imageURL, filename string) (string, error) { + cacheDir := "image_cache" + cachedImagePath := filepath.Join(cacheDir, filename) + + // Check if the image is already cached + if _, err := os.Stat(cachedImagePath); err == nil { + return cachedImagePath, nil + } + + // Ensure only one goroutine caches the same image + cachingImagesMu.Lock() + if _, exists := cachingImages[imageURL]; !exists { + cachingImages[imageURL] = &sync.Mutex{} + } + mu := cachingImages[imageURL] + cachingImagesMu.Unlock() + + mu.Lock() + defer mu.Unlock() + + // Double-check if the image was cached while waiting + if _, err := os.Stat(cachedImagePath); err == nil { + return cachedImagePath, nil + } + + cachingSemaphore <- struct{}{} // Acquire a token + defer func() { <-cachingSemaphore }() // Release the token + + // Download the image + resp, err := http.Get(imageURL) + if err != nil { + return "", err + } + defer resp.Body.Close() + + // Read the image data into a byte slice + data, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + // Detect the content type + contentType := http.DetectContentType(data) + + // If content type is HTML, skip caching + if strings.HasPrefix(contentType, "text/html") { + return "", fmt.Errorf("URL returned HTML content instead of an image: %s", imageURL) + } + + // Handle SVG files directly + if contentType == "image/svg+xml" { + // Ensure the cache directory exists + if _, err := os.Stat(cacheDir); os.IsNotExist(err) { + os.Mkdir(cacheDir, os.ModePerm) + } + + // Save the SVG file as-is + err = os.WriteFile(cachedImagePath, data, 0644) + if err != nil { + return "", err + } + + // Clean up mutex + cachingImagesMu.Lock() + delete(cachingImages, imageURL) + cachingImagesMu.Unlock() + + return cachedImagePath, nil + } + + // Decode the image based on the content type + var img image.Image + switch contentType { + case "image/jpeg": + img, err = jpeg.Decode(bytes.NewReader(data)) + case "image/png": + img, err = png.Decode(bytes.NewReader(data)) + case "image/gif": + img, err = gif.Decode(bytes.NewReader(data)) + case "image/webp": + img, err = webp.Decode(bytes.NewReader(data)) + case "image/bmp": + img, err = bmp.Decode(bytes.NewReader(data)) + case "image/tiff": + img, err = tiff.Decode(bytes.NewReader(data)) + default: + return "", fmt.Errorf("unsupported image type: %s", contentType) + } + + if err != nil { + return "", err + } + + // Ensure the cache directory exists + if _, err := os.Stat(cacheDir); os.IsNotExist(err) { + os.Mkdir(cacheDir, os.ModePerm) + } + + // Open the cached file for writing + outFile, err := os.Create(cachedImagePath) + if err != nil { + return "", err + } + defer outFile.Close() + + // Encode the image to WebP and save + options := &webp.Options{Lossless: false, Quality: 80} + err = webp.Encode(outFile, img, options) + if err != nil { + return "", err + } + + // Clean up mutex + cachingImagesMu.Lock() + delete(cachingImages, imageURL) + cachingImagesMu.Unlock() + + return cachedImagePath, nil +} + +func handleCachedImages(w http.ResponseWriter, r *http.Request) { + imageName := filepath.Base(r.URL.Path) + cacheDir := "image_cache" + cachedImagePath := filepath.Join(cacheDir, imageName) + + if _, err := os.Stat(cachedImagePath); os.IsNotExist(err) { + // Serve placeholder image with no-store headers + placeholderPath := "static/images/placeholder.webp" + placeholderContentType := "image/webp" + + // You can also check for SVG placeholder if needed + if strings.HasSuffix(imageName, ".svg") { + placeholderPath = "static/images/placeholder.svg" + placeholderContentType = "image/svg+xml" + } + + w.Header().Set("Content-Type", placeholderContentType) + w.Header().Set("Cache-Control", "no-store, must-revalidate") + w.Header().Set("Pragma", "no-cache") + w.Header().Set("Expires", "0") + http.ServeFile(w, r, placeholderPath) + return + } + + // Determine the content type based on the file extension + extension := strings.ToLower(filepath.Ext(cachedImagePath)) + var contentType string + switch extension { + case ".svg": + contentType = "image/svg+xml" + case ".jpg", ".jpeg": + contentType = "image/jpeg" + case ".png": + contentType = "image/png" + case ".gif": + contentType = "image/gif" + case ".webp": + contentType = "image/webp" + default: + // Default to binary stream if unknown + contentType = "application/octet-stream" + } + + w.Header().Set("Content-Type", contentType) + w.Header().Set("Cache-Control", "public, max-age=31536000") // Cache the image for 1 year + http.ServeFile(w, r, cachedImagePath) +} + +func handleImageStatus(w http.ResponseWriter, r *http.Request) { + imageIDs := r.URL.Query().Get("image_ids") + ids := strings.Split(imageIDs, ",") + + statusMap := make(map[string]string) + + cacheDir := "image_cache" + + printDebug("Received image status request for IDs: %v", ids) + printDebug("Status map: %v", statusMap) + + for _, id := range ids { + filename := id + ".webp" + cachedImagePath := filepath.Join(cacheDir, filename) + + if _, err := os.Stat(cachedImagePath); err == nil { + // Image is cached and ready + statusMap[id] = "/image_cache/" + filename + } else { + // Image is not ready + statusMap[id] = "" + } + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(statusMap) +} diff --git a/cache.go b/cache.go index b0b7a18..a885745 100644 --- a/cache.go +++ b/cache.go @@ -25,6 +25,7 @@ type TextSearchResult struct { } type ImageSearchResult struct { + ID string Thumbnail string Title string Media string diff --git a/config.go b/config.go index a411961..ab83b48 100644 --- a/config.go +++ b/config.go @@ -7,6 +7,7 @@ import ( "strconv" "strings" "sync" + "time" "github.com/fsnotify/fsnotify" "gopkg.in/ini.v1" @@ -78,6 +79,7 @@ func saveConfig(config Config) { sec.Key("CrawlerEnabled").SetValue(strconv.FormatBool(config.CrawlerEnabled)) sec.Key("WebsiteEnabled").SetValue(strconv.FormatBool(config.WebsiteEnabled)) sec.Key("LogLevel").SetValue(strconv.Itoa(config.LogLevel)) + sec.Key("HardCacheDuration").SetValue(config.HardCacheDuration.String()) err := cfg.SaveTo(configFilePath) if err != nil { @@ -130,16 +132,32 @@ func loadConfig() Config { logLevel = 1 } + // Read HardCacheDuration + hardCacheStr := cfg.Section("").Key("HardCacheDuration").String() + var hardCacheDuration time.Duration + if hardCacheStr != "" { + duration, err := time.ParseDuration(hardCacheStr) + if err != nil { + printWarn("Invalid HardCacheDuration format, defaulting to 0: %v", err) + hardCacheDuration = 0 + } else { + hardCacheDuration = duration + } + } else { + hardCacheDuration = 0 // Default to 0 if not set + } + config = Config{ - Port: port, - AuthCode: cfg.Section("").Key("AuthCode").String(), - PeerID: cfg.Section("").Key("PeerID").String(), - Peers: peers, - Domain: domain, - NodesEnabled: nodesEnabled, - CrawlerEnabled: crawlerEnabled, - WebsiteEnabled: websiteEnabled, - LogLevel: logLevel, + Port: port, + AuthCode: cfg.Section("").Key("AuthCode").String(), + PeerID: cfg.Section("").Key("PeerID").String(), + Peers: peers, + Domain: domain, + NodesEnabled: nodesEnabled, + CrawlerEnabled: crawlerEnabled, + WebsiteEnabled: websiteEnabled, + LogLevel: logLevel, + HardCacheDuration: hardCacheDuration, } return config diff --git a/go.mod b/go.mod index f2a2234..ca34e6f 100644 --- a/go.mod +++ b/go.mod @@ -27,6 +27,6 @@ require ( github.com/disintegration/imaging v1.6.2 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/leonelquinteros/gotext v1.7.0 // indirect - golang.org/x/image v0.20.0 // indirect + golang.org/x/image v0.21.0 // indirect gopkg.in/ini.v1 v1.67.0 // indirect ) diff --git a/go.sum b/go.sum index 084267e..baf7362 100644 --- a/go.sum +++ b/go.sum @@ -41,6 +41,8 @@ golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8 h1:hVwzHzIUGRjiF7EcUjqNxk3 golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/image v0.20.0 h1:7cVCUjQwfL18gyBJOmYvptfSHS8Fb3YUDtfLIZ7Nbpw= golang.org/x/image v0.20.0/go.mod h1:0a88To4CYVBAHp5FXJm8o7QbUl37Vd85ply1vyD8auM= +golang.org/x/image v0.21.0 h1:c5qV36ajHpdj4Qi0GnE0jUc/yuo33OLFaa0d+crTD5s= +golang.org/x/image v0.21.0/go.mod h1:vUbsLavqK/W303ZroQQVKQ+Af3Yl6Uz1Ppu5J/cLz78= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= diff --git a/images.go b/images.go index f67ca3d..54126ab 100755 --- a/images.go +++ b/images.go @@ -1,6 +1,8 @@ package main import ( + "crypto/md5" + "encoding/hex" "fmt" "net/http" "time" @@ -96,16 +98,41 @@ func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult { } for _, result := range searchResults { - results = append(results, result.(ImageSearchResult)) + imageResult := result.(ImageSearchResult) + if config.HardCacheDuration > 0 { + // Save the original Media URL before overwriting + originalMediaURL := imageResult.Media + + // Generate hash from the original media URL + hasher := md5.New() + hasher.Write([]byte(originalMediaURL)) + hash := hex.EncodeToString(hasher.Sum(nil)) + filename := hash + ".webp" + + // Set the Media URL to point to the cached image path + cacheURL := "/image_cache/" + filename + imageResult.Media = cacheURL + imageResult.ThumbProxy = cacheURL + + // Assign the ID + imageResult.ID = hash + + // Start caching in the background + go func(originalURL, filename string) { + _, err := cacheImage(originalURL, filename) + if err != nil { + printWarn("Failed to cache image %s: %v", originalURL, err) + } + }(originalMediaURL, filename) + } + results = append(results, imageResult) } - // If results are found, break out of the loop if len(results) > 0 { break } } - // If no results found after trying all engines if len(results) == 0 { printWarn("No image results found for query: %s, trying other nodes", query) results = tryOtherNodesForImageSearch(query, safe, lang, page, []string{hostID}) diff --git a/init.go b/init.go index 8074dd5..9c59e31 100644 --- a/init.go +++ b/init.go @@ -5,26 +5,28 @@ import ( ) type Config struct { - Port int - AuthCode string - PeerID string - Peers []string - Domain string - NodesEnabled bool - CrawlerEnabled bool - WebsiteEnabled bool - LogLevel int + Port int + AuthCode string + PeerID string + Peers []string + Domain string + NodesEnabled bool + CrawlerEnabled bool + WebsiteEnabled bool + LogLevel int + HardCacheDuration time.Duration } var defaultConfig = Config{ - Port: 5000, - Domain: "localhost", - Peers: []string{}, - AuthCode: generateStrongRandomString(64), - NodesEnabled: true, - CrawlerEnabled: true, - WebsiteEnabled: true, - LogLevel: 1, + Port: 5000, + Domain: "localhost", + Peers: []string{}, + AuthCode: generateStrongRandomString(64), + NodesEnabled: true, + CrawlerEnabled: true, + WebsiteEnabled: true, + LogLevel: 1, + HardCacheDuration: 0, } const configFilePath = "config.ini" diff --git a/main.go b/main.go index 2541c58..98d3d45 100755 --- a/main.go +++ b/main.go @@ -204,6 +204,8 @@ func runServer() { http.HandleFunc("/node", handleNodeRequest) http.HandleFunc("/settings", handleSettings) http.HandleFunc("/save-settings", handleSaveSettings) + http.HandleFunc("/image_cache/", handleCachedImages) + http.HandleFunc("/image_status", handleImageStatus) http.HandleFunc("/opensearch.xml", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/opensearchdescription+xml") http.ServeFile(w, r, "static/opensearch.xml") diff --git a/static/images/placeholder.svg b/static/images/placeholder.svg new file mode 100644 index 0000000..fb7b0f8 --- /dev/null +++ b/static/images/placeholder.svg @@ -0,0 +1,17 @@ + + + + + image-picture + Created with Sketch Beta. + + + + + + + + + + + \ No newline at end of file diff --git a/templates/images.html b/templates/images.html index 4a3d4fd..91fd5de 100755 --- a/templates/images.html +++ b/templates/images.html @@ -74,13 +74,19 @@ {{ range $index, $result := .Results }}
- {{ .Title }} + {{ .Title }}
{{ .Width }} × {{ .Height }}
-
+ {{ end }} @@ -216,6 +222,125 @@ }); }); + +