package main import ( "bytes" "crypto/md5" "crypto/tls" "encoding/base64" "encoding/hex" "fmt" "image" "image/gif" "image/jpeg" "image/png" "io" "net/http" "net/url" "os" "path/filepath" "regexp" "strings" "sync" "time" "github.com/chai2010/webp" "github.com/fyne-io/image/ico" "golang.org/x/image/bmp" "golang.org/x/image/draw" "golang.org/x/image/tiff" "golang.org/x/net/html" ) var ( faviconCache = struct { sync.RWMutex m map[string]bool // tracks in-progress downloads }{m: make(map[string]bool)} // Common favicon paths to try commonFaviconPaths = []string{ "/favicon.ico", "/favicon.png", "/favicon.jpg", "/favicon.jpeg", "/favicon.webp", "/apple-touch-icon.png", "/apple-touch-icon-precomposed.png", } // Regex to extract favicon URLs from HTML iconLinkRegex = regexp.MustCompile(`]+rel=["'](?:icon|shortcut icon|apple-touch-icon)["'][^>]+href=["']([^"']+)["']`) ) // Add this near the top with other vars var ( faviconDownloadQueue = make(chan faviconDownloadRequest, 1000) ) type faviconDownloadRequest struct { faviconURL string pageURL string cacheID string } func init() { // Start 5 worker goroutines to process favicon downloads for i := 0; i < 5; i++ { go faviconDownloadWorker() } } func faviconDownloadWorker() { for req := range faviconDownloadQueue { cacheFavicon(req.faviconURL, req.cacheID) } } // Generates a cache ID from URL func faviconIDFromURL(rawURL string) string { hasher := md5.New() hasher.Write([]byte(rawURL)) return hex.EncodeToString(hasher.Sum(nil)) } // Resolves favicon URL using multiple methods func resolveFaviconURL(rawFavicon, pageURL string) (faviconURL, cacheID string) { cacheID = faviconIDFromURL(pageURL) // Handle data URLs first if strings.HasPrefix(rawFavicon, "data:image") { parts := strings.SplitN(rawFavicon, ";base64,", 2) if len(parts) == 2 { data, err := base64.StdEncoding.DecodeString(parts[1]) if err == nil { hasher := md5.New() hasher.Write(data) return rawFavicon, hex.EncodeToString(hasher.Sum(nil)) } } return "", "" // Invalid data URL } // Existing URL handling logic if rawFavicon != "" && strings.HasPrefix(rawFavicon, "http") { cacheID = faviconIDFromURL(rawFavicon) return rawFavicon, cacheID } parsedPage, err := url.Parse(pageURL) if err != nil { return "", "" } // Method 1: Parse HTML if favicon := findFaviconInHTML(pageURL); favicon != "" { if strings.HasPrefix(favicon, "http") { return favicon, faviconIDFromURL(favicon) } resolved := resolveRelativeURL(parsedPage, favicon) return resolved, faviconIDFromURL(resolved) } // Method 2: Common paths for _, path := range commonFaviconPaths { testURL := "https://" + parsedPage.Host + path if checkURLExists(testURL) { return testURL, faviconIDFromURL(testURL) } } // Method 3: HTTP headers if headerIcon := findFaviconInHeaders(pageURL); headerIcon != "" { if strings.HasPrefix(headerIcon, "http") { return headerIcon, faviconIDFromURL(headerIcon) } resolved := resolveRelativeURL(parsedPage, headerIcon) return resolved, faviconIDFromURL(resolved) } // Fallback fallbackURL := "https://" + parsedPage.Host + "/favicon.ico" return fallbackURL, faviconIDFromURL(fallbackURL) } // Checks HTTP headers for favicon links func findFaviconInHeaders(pageURL string) string { client := &http.Client{ Timeout: 3 * time.Second, // like 3 seconds for favicon should be enough Transport: &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, }, } req, err := http.NewRequest("HEAD", pageURL, nil) if err != nil { return "" } // Add User-Agent userAgent, err := GetUserAgent("findFaviconInHeaders") if err != nil { printWarn("Error getting User-Agent: %v", err) } req.Header.Set("User-Agent", userAgent) resp, err := client.Do(req) if err != nil { return "" } defer resp.Body.Close() // Check Link headers (common for favicons) if links, ok := resp.Header["Link"]; ok { for _, link := range links { parts := strings.Split(link, ";") if len(parts) < 2 { continue } urlPart := strings.TrimSpace(parts[0]) if !strings.HasPrefix(urlPart, "<") || !strings.HasSuffix(urlPart, ">") { continue } urlPart = urlPart[1 : len(urlPart)-1] // Remove < and > for _, part := range parts[1:] { part = strings.TrimSpace(part) if strings.EqualFold(part, `rel="icon"`) || strings.EqualFold(part, `rel=icon`) || strings.EqualFold(part, `rel="shortcut icon"`) || strings.EqualFold(part, `rel=shortcut icon`) { return urlPart } } } } return "" } // Helper to resolve relative URLs func resolveRelativeURL(base *url.URL, relative string) string { if strings.HasPrefix(relative, "http") { return relative } if strings.HasPrefix(relative, "//") { return base.Scheme + ":" + relative } if strings.HasPrefix(relative, "/") { return base.Scheme + "://" + base.Host + relative } return base.Scheme + "://" + base.Host + base.Path + "/" + relative } // Checks if a URL exists (returns 200 OK) func checkURLExists(url string) bool { client := &http.Client{ Timeout: 5 * time.Second, Transport: &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, }, } req, err := http.NewRequest("HEAD", url, nil) if err != nil { return false } // Add User-Agent userAgent, err := GetUserAgent("Text-Search-Brave") if err != nil { printWarn("Error getting User-Agent: %v", err) } req.Header.Set("checkURLExists", userAgent) resp, err := client.Do(req) if err != nil { return false } resp.Body.Close() return resp.StatusCode == http.StatusOK } // Fetches HTML and looks for favicon links func findFaviconInHTML(pageURL string) string { client := &http.Client{ Timeout: 10 * time.Second, Transport: &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, }, } req, err := http.NewRequest("GET", pageURL, nil) if err != nil { return "" } // Add User-Agent userAgent, err := GetUserAgent("findFaviconInHTML") if err != nil { printWarn("Error getting User-Agent: %v", err) } req.Header.Set("User-Agent", userAgent) resp, err := client.Do(req) if err != nil { return "" } defer resp.Body.Close() // Check if this is an AMP page isAMP := false for _, attr := range resp.Header["Link"] { if strings.Contains(attr, "rel=\"amphtml\"") { isAMP = true break } } // Parse HTML doc, err := html.Parse(resp.Body) if err != nil { return "" } var faviconURL string var findLinks func(*html.Node) findLinks = func(n *html.Node) { if n.Type == html.ElementNode && n.Data == "link" { var rel, href string for _, attr := range n.Attr { switch attr.Key { case "rel": rel = attr.Val case "href": href = attr.Val } } // Prioritize different favicon types if href != "" { switch rel { case "icon", "shortcut icon", "apple-touch-icon", "apple-touch-icon-precomposed": // For AMP pages, prefer the non-versioned URL if possible if isAMP { if u, err := url.Parse(href); err == nil { u.RawQuery = "" // Remove query parameters href = u.String() } } if faviconURL == "" || // First found rel == "apple-touch-icon" || // Prefer apple-touch-icon rel == "icon" { // Then regular icon faviconURL = href } } } } for c := n.FirstChild; c != nil; c = c.NextSibling { findLinks(c) } } findLinks(doc) return faviconURL } func getFaviconProxyURL(rawFavicon, pageURL string) string { if pageURL == "" { return "/static/images/globe.svg" } cacheID := faviconIDFromURL(pageURL) filename := fmt.Sprintf("%s_icon.webp", cacheID) cachedPath := filepath.Join(config.DriveCache.Path, "images", filename) if _, err := os.Stat(cachedPath); err == nil { return fmt.Sprintf("/image/%s_icon.webp", cacheID) } // Resolve URL faviconURL, _ := resolveFaviconURL(rawFavicon, pageURL) if faviconURL == "" { recordInvalidImageID(cacheID) return "/static/images/globe.svg" } // Check if already downloading faviconCache.RLock() downloading := faviconCache.m[cacheID] faviconCache.RUnlock() if !downloading { faviconCache.Lock() faviconCache.m[cacheID] = true faviconCache.Unlock() // Send to download queue instead of starting goroutine faviconDownloadQueue <- faviconDownloadRequest{ faviconURL: faviconURL, pageURL: pageURL, cacheID: cacheID, } } return fmt.Sprintf("/image/%s_icon.webp", cacheID) } // Caches favicon, always saving *_icon.webp func cacheFavicon(imageURL, imageID string) (string, bool, error) { // if imageURL == "" { // recordInvalidImageID(imageID) // return "", false, fmt.Errorf("empty image URL for image ID %s", imageID) // } // Debug fmt.Printf("Downloading favicon [%s] for ID [%s]\n", imageURL, imageID) filename := fmt.Sprintf("%s_icon.webp", imageID) imageCacheDir := filepath.Join(config.DriveCache.Path, "images") if err := os.MkdirAll(imageCacheDir, 0755); err != nil { return "", false, fmt.Errorf("couldn't create images folder: %v", err) } cachedImagePath := filepath.Join(imageCacheDir, filename) tempImagePath := cachedImagePath + ".tmp" // Already cached? if _, err := os.Stat(cachedImagePath); err == nil { return cachedImagePath, true, nil } cachingImagesMu.Lock() if _, exists := cachingImages[imageURL]; !exists { cachingImages[imageURL] = &sync.Mutex{} } mu := cachingImages[imageURL] cachingImagesMu.Unlock() mu.Lock() defer mu.Unlock() // Recheck after lock if _, err := os.Stat(cachedImagePath); err == nil { return cachedImagePath, true, nil } cachingSemaphore <- struct{}{} defer func() { <-cachingSemaphore }() var data []byte var contentType string // Handle data URLs if strings.HasPrefix(imageURL, "data:") { commaIndex := strings.Index(imageURL, ",") if commaIndex == -1 { recordInvalidImageID(imageID) return "", false, fmt.Errorf("invalid data URL: no comma") } headerPart := imageURL[:commaIndex] dataPart := imageURL[commaIndex+1:] mediaType := "text/plain" base64Encoded := false if strings.HasPrefix(headerPart, "data:") { mediaTypePart := headerPart[5:] mediaTypeParts := strings.SplitN(mediaTypePart, ";", 2) mediaType = mediaTypeParts[0] if len(mediaTypeParts) > 1 { for _, param := range strings.Split(mediaTypeParts[1], ";") { param = strings.TrimSpace(param) if param == "base64" { base64Encoded = true } } } } if base64Encoded { data, _ = base64.StdEncoding.DecodeString(dataPart) } else { decodedStr, err := url.QueryUnescape(dataPart) if err != nil { data = []byte(dataPart) } else { data = []byte(decodedStr) } } contentType = mediaType } else { // Download from HTTP URL client := &http.Client{ Timeout: 15 * time.Second, Transport: &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, }, } req, err := http.NewRequest("GET", imageURL, nil) if err != nil { recordInvalidImageID(imageID) return "", false, err } // Add User-Agent userAgent, err := GetUserAgent("Text-Search-Brave") if err != nil { printWarn("Error getting User-Agent: %v", err) } req.Header.Set("User-Agent", userAgent) resp, err := client.Do(req) if err != nil { recordInvalidImageID(imageID) return "", false, err } defer resp.Body.Close() data, err = io.ReadAll(resp.Body) if err != nil { recordInvalidImageID(imageID) return "", false, err } contentType = http.DetectContentType(data) } if !strings.HasPrefix(contentType, "image/") { recordInvalidImageID(imageID) return "", false, fmt.Errorf("URL did not return an image: %s", imageURL) } // SVG special case if contentType == "image/svg+xml" { err := os.WriteFile(tempImagePath, data, 0644) if err != nil { recordInvalidImageID(imageID) return "", false, err } err = os.Rename(tempImagePath, cachedImagePath) if err != nil { recordInvalidImageID(imageID) return "", false, err } cachingImagesMu.Lock() delete(cachingImages, imageURL) cachingImagesMu.Unlock() return cachedImagePath, true, nil } // Decode image var img image.Image var err error switch contentType { case "image/x-icon", "image/vnd.microsoft.icon": img, err = ico.Decode(bytes.NewReader(data)) case "image/jpeg": img, err = jpeg.Decode(bytes.NewReader(data)) case "image/png": img, err = png.Decode(bytes.NewReader(data)) case "image/gif": img, err = gif.Decode(bytes.NewReader(data)) case "image/webp": img, err = webp.Decode(bytes.NewReader(data)) case "image/bmp": img, err = bmp.Decode(bytes.NewReader(data)) case "image/tiff": img, err = tiff.Decode(bytes.NewReader(data)) default: recordInvalidImageID(imageID) return "", false, fmt.Errorf("unsupported image type: %s", contentType) } if err != nil { recordInvalidImageID(imageID) return "", false, err } // Resize maxSize := 16 width := img.Bounds().Dx() height := img.Bounds().Dy() if width > maxSize || height > maxSize { dst := image.NewRGBA(image.Rect(0, 0, maxSize, maxSize)) draw.ApproxBiLinear.Scale(dst, dst.Bounds(), img, img.Bounds(), draw.Over, nil) img = dst } // Save as WebP outFile, err := os.Create(tempImagePath) if err != nil { recordInvalidImageID(imageID) return "", false, err } defer outFile.Close() options := &webp.Options{Lossless: false, Quality: 80} err = webp.Encode(outFile, img, options) if err != nil { recordInvalidImageID(imageID) return "", false, err } err = os.Rename(tempImagePath, cachedImagePath) if err != nil { recordInvalidImageID(imageID) return "", false, err } cachingImagesMu.Lock() delete(cachingImages, imageURL) cachingImagesMu.Unlock() return cachedImagePath, true, nil }