added website crawling and indexing crawled results
This commit is contained in:
parent
5b90a372a1
commit
047cccd19f
10 changed files with 819 additions and 97 deletions
|
@ -24,15 +24,15 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
cachingImages = make(map[string]*sync.Mutex)
|
||||
cachingImagesMu sync.Mutex
|
||||
// cachingSemaphore = make(chan struct{}, 100) // Limit to concurrent downloads
|
||||
cachingImages = make(map[string]*sync.Mutex)
|
||||
cachingImagesMu sync.Mutex
|
||||
cachingSemaphore = make(chan struct{}, 100)
|
||||
|
||||
invalidImageIDs = make(map[string]struct{})
|
||||
invalidImageIDsMu sync.Mutex
|
||||
|
||||
imageURLMap = make(map[string]string) // mapping from imageID_type to imageURL
|
||||
imageURLMapMu sync.RWMutex // mutex for thread-safe access
|
||||
imageURLMap = make(map[string]string)
|
||||
imageURLMapMu sync.RWMutex
|
||||
)
|
||||
|
||||
func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error) {
|
||||
|
@ -49,7 +49,13 @@ func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error
|
|||
filename = fmt.Sprintf("%s_full.webp", imageID)
|
||||
}
|
||||
|
||||
cachedImagePath := filepath.Join(config.DriveCache.Path, filename)
|
||||
// Make sure we store inside: config.DriveCache.Path / images
|
||||
imageCacheDir := filepath.Join(config.DriveCache.Path, "images")
|
||||
if err := os.MkdirAll(imageCacheDir, 0755); err != nil {
|
||||
return "", false, fmt.Errorf("couldn't create images folder: %v", err)
|
||||
}
|
||||
|
||||
cachedImagePath := filepath.Join(imageCacheDir, filename)
|
||||
tempImagePath := cachedImagePath + ".tmp"
|
||||
|
||||
// Check if the image is already cached
|
||||
|
@ -73,9 +79,8 @@ func cacheImage(imageURL, imageID string, isThumbnail bool) (string, bool, error
|
|||
return cachedImagePath, true, nil
|
||||
}
|
||||
|
||||
// // Limit max concurrent downloads
|
||||
// cachingSemaphore <- struct{}{} // Acquire a token
|
||||
// defer func() { <-cachingSemaphore }() // Release the token
|
||||
cachingSemaphore <- struct{}{}
|
||||
defer func() { <-cachingSemaphore }()
|
||||
|
||||
// Create a custom http.Client that skips SSL certificate verification
|
||||
client := &http.Client{
|
||||
|
@ -217,7 +222,8 @@ func handleImageServe(w http.ResponseWriter, r *http.Request) {
|
|||
imageType = parts[1]
|
||||
|
||||
filename := fmt.Sprintf("%s_%s.webp", imageID, imageType)
|
||||
cachedImagePath := filepath.Join(config.DriveCache.Path, filename)
|
||||
// Adjust to read from config.DriveCache.Path / images
|
||||
cachedImagePath := filepath.Join(config.DriveCache.Path, "images", filename)
|
||||
|
||||
if hasExtension && imageType == "thumb" {
|
||||
// Requesting cached image (thumbnail or full)
|
||||
|
@ -329,7 +335,7 @@ func handleImageStatus(w http.ResponseWriter, r *http.Request) {
|
|||
// Check thumbnail first
|
||||
for _, ext := range extensions {
|
||||
thumbFilename := fmt.Sprintf("%s_thumb.%s", id, ext)
|
||||
thumbPath := filepath.Join(config.DriveCache.Path, thumbFilename)
|
||||
thumbPath := filepath.Join(config.DriveCache.Path, "images", thumbFilename)
|
||||
|
||||
if _, err := os.Stat(thumbPath); err == nil {
|
||||
statusMap[id] = fmt.Sprintf("/image/%s_thumb.%s", id, ext)
|
||||
|
@ -342,7 +348,7 @@ func handleImageStatus(w http.ResponseWriter, r *http.Request) {
|
|||
if !imageReady {
|
||||
for _, ext := range extensions {
|
||||
fullFilename := fmt.Sprintf("%s_full.%s", id, ext)
|
||||
fullPath := filepath.Join(config.DriveCache.Path, fullFilename)
|
||||
fullPath := filepath.Join(config.DriveCache.Path, "images", fullFilename)
|
||||
|
||||
if _, err := os.Stat(fullPath); err == nil {
|
||||
statusMap[id] = fmt.Sprintf("/image/%s_full.%s", id, ext)
|
||||
|
@ -447,7 +453,9 @@ func cleanExpiredCachedImages() {
|
|||
}
|
||||
|
||||
func cleanupCache() {
|
||||
files, err := os.ReadDir(config.DriveCache.Path)
|
||||
// Read from: config.DriveCache.Path / images
|
||||
imageCacheDir := filepath.Join(config.DriveCache.Path, "images")
|
||||
files, err := os.ReadDir(imageCacheDir)
|
||||
if err != nil {
|
||||
printErr("Failed to read DriveCache directory: %v", err)
|
||||
return
|
||||
|
@ -462,19 +470,17 @@ func cleanupCache() {
|
|||
continue
|
||||
}
|
||||
|
||||
filePath := filepath.Join(config.DriveCache.Path, file.Name())
|
||||
filePath := filepath.Join(imageCacheDir, file.Name())
|
||||
|
||||
// Check for expired files based on modification time
|
||||
if config.DriveCache.Duration > 0 && time.Since(info.ModTime()) > config.DriveCache.Duration {
|
||||
if err := os.Remove(filePath); err == nil {
|
||||
printDebug("Removed expired cache file: %s", filePath)
|
||||
} else {
|
||||
printErr("Failed to remove expired cache file: %s", filePath)
|
||||
}
|
||||
continue // Skip adding this file to the list
|
||||
continue
|
||||
}
|
||||
|
||||
// Accumulate total size and store file info for potential deletion
|
||||
totalSize += uint64(info.Size())
|
||||
fileInfos = append(fileInfos, info)
|
||||
}
|
||||
|
@ -491,7 +497,7 @@ func cleanupCache() {
|
|||
break
|
||||
}
|
||||
|
||||
filePath := filepath.Join(config.DriveCache.Path, info.Name())
|
||||
filePath := filepath.Join(imageCacheDir, info.Name())
|
||||
fileSize := uint64(info.Size())
|
||||
|
||||
if err := os.Remove(filePath); err == nil {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue