Initial favicon add
This commit is contained in:
parent
6445be87a9
commit
bc89f5b819
8 changed files with 755 additions and 21 deletions
549
favicon.go
Normal file
549
favicon.go
Normal file
|
@ -0,0 +1,549 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/md5"
|
||||
"crypto/tls"
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"image"
|
||||
"image/gif"
|
||||
"image/jpeg"
|
||||
"image/png"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/chai2010/webp"
|
||||
"github.com/fyne-io/image/ico"
|
||||
"golang.org/x/image/bmp"
|
||||
"golang.org/x/image/draw"
|
||||
"golang.org/x/image/tiff"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
var (
|
||||
faviconCache = struct {
|
||||
sync.RWMutex
|
||||
m map[string]bool // tracks in-progress downloads
|
||||
}{m: make(map[string]bool)}
|
||||
|
||||
// Common favicon paths to try
|
||||
commonFaviconPaths = []string{
|
||||
"/favicon.ico",
|
||||
"/favicon.png",
|
||||
"/favicon.jpg",
|
||||
"/favicon.jpeg",
|
||||
"/favicon.webp",
|
||||
"/apple-touch-icon.png",
|
||||
"/apple-touch-icon-precomposed.png",
|
||||
}
|
||||
|
||||
// Regex to extract favicon URLs from HTML
|
||||
iconLinkRegex = regexp.MustCompile(`<link[^>]+rel=["'](?:icon|shortcut icon|apple-touch-icon)["'][^>]+href=["']([^"']+)["']`)
|
||||
)
|
||||
|
||||
// Generates a cache ID from URL
|
||||
func faviconIDFromURL(rawURL string) string {
|
||||
hasher := md5.New()
|
||||
hasher.Write([]byte(rawURL))
|
||||
return hex.EncodeToString(hasher.Sum(nil))
|
||||
}
|
||||
|
||||
// Resolves favicon URL using multiple methods
|
||||
func resolveFaviconURL(rawFavicon, pageURL string) (faviconURL, cacheID string) {
|
||||
// Handle data URLs first
|
||||
if strings.HasPrefix(rawFavicon, "data:image") {
|
||||
parts := strings.SplitN(rawFavicon, ";base64,", 2)
|
||||
if len(parts) == 2 {
|
||||
data, err := base64.StdEncoding.DecodeString(parts[1])
|
||||
if err == nil {
|
||||
hasher := md5.New()
|
||||
hasher.Write(data)
|
||||
return rawFavicon, hex.EncodeToString(hasher.Sum(nil))
|
||||
}
|
||||
}
|
||||
return "", "" // Invalid data URL
|
||||
}
|
||||
|
||||
// Existing URL handling logic
|
||||
if rawFavicon != "" && strings.HasPrefix(rawFavicon, "http") {
|
||||
cacheID = faviconIDFromURL(rawFavicon)
|
||||
return rawFavicon, cacheID
|
||||
}
|
||||
|
||||
parsedPage, err := url.Parse(pageURL)
|
||||
if err != nil {
|
||||
return "", ""
|
||||
}
|
||||
|
||||
// Method 1: Parse HTML
|
||||
if favicon := findFaviconInHTML(pageURL); favicon != "" {
|
||||
if strings.HasPrefix(favicon, "http") {
|
||||
return favicon, faviconIDFromURL(favicon)
|
||||
}
|
||||
resolved := resolveRelativeURL(parsedPage, favicon)
|
||||
return resolved, faviconIDFromURL(resolved)
|
||||
}
|
||||
|
||||
// Method 2: Common paths
|
||||
for _, path := range commonFaviconPaths {
|
||||
testURL := "https://" + parsedPage.Host + path
|
||||
if checkURLExists(testURL) {
|
||||
return testURL, faviconIDFromURL(testURL)
|
||||
}
|
||||
}
|
||||
|
||||
// Method 3: HTTP headers
|
||||
if headerIcon := findFaviconInHeaders(pageURL); headerIcon != "" {
|
||||
if strings.HasPrefix(headerIcon, "http") {
|
||||
return headerIcon, faviconIDFromURL(headerIcon)
|
||||
}
|
||||
resolved := resolveRelativeURL(parsedPage, headerIcon)
|
||||
return resolved, faviconIDFromURL(resolved)
|
||||
}
|
||||
|
||||
// Fallback
|
||||
fallbackURL := "https://" + parsedPage.Host + "/favicon.ico"
|
||||
return fallbackURL, faviconIDFromURL(fallbackURL)
|
||||
}
|
||||
|
||||
// Checks HTTP headers for favicon links
|
||||
func findFaviconInHeaders(pageURL string) string {
|
||||
client := &http.Client{
|
||||
Timeout: 3 * time.Second, // like 3 seconds for favicon should be enough
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
},
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("HEAD", pageURL, nil)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Add User-Agent
|
||||
userAgent, err := GetUserAgent("findFaviconInHeaders")
|
||||
if err != nil {
|
||||
printWarn("Error getting User-Agent: %v", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Check Link headers (common for favicons)
|
||||
if links, ok := resp.Header["Link"]; ok {
|
||||
for _, link := range links {
|
||||
parts := strings.Split(link, ";")
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
urlPart := strings.TrimSpace(parts[0])
|
||||
if !strings.HasPrefix(urlPart, "<") || !strings.HasSuffix(urlPart, ">") {
|
||||
continue
|
||||
}
|
||||
|
||||
urlPart = urlPart[1 : len(urlPart)-1] // Remove < and >
|
||||
for _, part := range parts[1:] {
|
||||
part = strings.TrimSpace(part)
|
||||
if strings.EqualFold(part, `rel="icon"`) ||
|
||||
strings.EqualFold(part, `rel=icon`) ||
|
||||
strings.EqualFold(part, `rel="shortcut icon"`) ||
|
||||
strings.EqualFold(part, `rel=shortcut icon`) {
|
||||
return urlPart
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// Helper to resolve relative URLs
|
||||
func resolveRelativeURL(base *url.URL, relative string) string {
|
||||
if strings.HasPrefix(relative, "http") {
|
||||
return relative
|
||||
}
|
||||
if strings.HasPrefix(relative, "//") {
|
||||
return base.Scheme + ":" + relative
|
||||
}
|
||||
if strings.HasPrefix(relative, "/") {
|
||||
return base.Scheme + "://" + base.Host + relative
|
||||
}
|
||||
return base.Scheme + "://" + base.Host + base.Path + "/" + relative
|
||||
}
|
||||
|
||||
// Checks if a URL exists (returns 200 OK)
|
||||
func checkURLExists(url string) bool {
|
||||
client := &http.Client{
|
||||
Timeout: 5 * time.Second,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
},
|
||||
}
|
||||
req, err := http.NewRequest("HEAD", url, nil)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Add User-Agent
|
||||
userAgent, err := GetUserAgent("Text-Search-Brave")
|
||||
if err != nil {
|
||||
printWarn("Error getting User-Agent: %v", err)
|
||||
}
|
||||
req.Header.Set("checkURLExists", userAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
resp.Body.Close()
|
||||
return resp.StatusCode == http.StatusOK
|
||||
}
|
||||
|
||||
// Fetches HTML and looks for favicon links
|
||||
func findFaviconInHTML(pageURL string) string {
|
||||
client := &http.Client{
|
||||
Timeout: 10 * time.Second,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
},
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", pageURL, nil)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Add User-Agent
|
||||
userAgent, err := GetUserAgent("findFaviconInHTML")
|
||||
if err != nil {
|
||||
printWarn("Error getting User-Agent: %v", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Check if this is an AMP page
|
||||
isAMP := false
|
||||
for _, attr := range resp.Header["Link"] {
|
||||
if strings.Contains(attr, "rel=\"amphtml\"") {
|
||||
isAMP = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Parse HTML
|
||||
doc, err := html.Parse(resp.Body)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
var faviconURL string
|
||||
var findLinks func(*html.Node)
|
||||
findLinks = func(n *html.Node) {
|
||||
if n.Type == html.ElementNode && n.Data == "link" {
|
||||
var rel, href string
|
||||
for _, attr := range n.Attr {
|
||||
switch attr.Key {
|
||||
case "rel":
|
||||
rel = attr.Val
|
||||
case "href":
|
||||
href = attr.Val
|
||||
}
|
||||
}
|
||||
|
||||
// Prioritize different favicon types
|
||||
if href != "" {
|
||||
switch rel {
|
||||
case "icon", "shortcut icon", "apple-touch-icon", "apple-touch-icon-precomposed":
|
||||
// For AMP pages, prefer the non-versioned URL if possible
|
||||
if isAMP {
|
||||
if u, err := url.Parse(href); err == nil {
|
||||
u.RawQuery = "" // Remove query parameters
|
||||
href = u.String()
|
||||
}
|
||||
}
|
||||
if faviconURL == "" || // First found
|
||||
rel == "apple-touch-icon" || // Prefer apple-touch-icon
|
||||
rel == "icon" { // Then regular icon
|
||||
faviconURL = href
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
findLinks(c)
|
||||
}
|
||||
}
|
||||
findLinks(doc)
|
||||
|
||||
return faviconURL
|
||||
}
|
||||
|
||||
// Get proxy URL (cached) - remains mostly the same
|
||||
func getFaviconProxyURL(rawFavicon, pageURL string) string {
|
||||
// First try cache without any locks
|
||||
cacheID := faviconIDFromURL(pageURL) // Simple hash of pageURL
|
||||
filename := fmt.Sprintf("%s_thumb.webp", cacheID)
|
||||
cachedPath := filepath.Join(config.DriveCache.Path, "images", filename)
|
||||
|
||||
if _, err := os.Stat(cachedPath); err == nil {
|
||||
return fmt.Sprintf("/image/%s_thumb.webp", cacheID)
|
||||
}
|
||||
|
||||
// Cache miss - resolve favicon URL (may hit network)
|
||||
faviconURL, cacheID := resolveFaviconURL(rawFavicon, pageURL)
|
||||
if faviconURL == "" || cacheID == "" {
|
||||
return "/static/images/missing.svg"
|
||||
}
|
||||
|
||||
// Recheck cache after resolution (in case another request cached it)
|
||||
if _, err := os.Stat(cachedPath); err == nil {
|
||||
return fmt.Sprintf("/image/%s_thumb.webp", cacheID)
|
||||
}
|
||||
|
||||
// Check download status with lock
|
||||
faviconCache.RLock()
|
||||
downloading := faviconCache.m[cacheID]
|
||||
faviconCache.RUnlock()
|
||||
|
||||
if !downloading {
|
||||
faviconCache.Lock()
|
||||
faviconCache.m[cacheID] = true
|
||||
faviconCache.Unlock()
|
||||
|
||||
go func() {
|
||||
defer func() {
|
||||
faviconCache.Lock()
|
||||
delete(faviconCache.m, cacheID)
|
||||
faviconCache.Unlock()
|
||||
}()
|
||||
cacheFavicon(faviconURL, cacheID)
|
||||
}()
|
||||
}
|
||||
|
||||
return fmt.Sprintf("/image/%s_thumb.webp", cacheID)
|
||||
}
|
||||
|
||||
// Caches favicon, always saving *_thumb.webp
|
||||
func cacheFavicon(imageURL, imageID string) (string, bool, error) {
|
||||
if imageURL == "" {
|
||||
recordInvalidImageID(imageID)
|
||||
return "", false, fmt.Errorf("empty image URL for image ID %s", imageID)
|
||||
}
|
||||
|
||||
filename := fmt.Sprintf("%s_thumb.webp", imageID)
|
||||
imageCacheDir := filepath.Join(config.DriveCache.Path, "images")
|
||||
if err := os.MkdirAll(imageCacheDir, 0755); err != nil {
|
||||
return "", false, fmt.Errorf("couldn't create images folder: %v", err)
|
||||
}
|
||||
cachedImagePath := filepath.Join(imageCacheDir, filename)
|
||||
tempImagePath := cachedImagePath + ".tmp"
|
||||
|
||||
// Already cached?
|
||||
if _, err := os.Stat(cachedImagePath); err == nil {
|
||||
return cachedImagePath, true, nil
|
||||
}
|
||||
|
||||
cachingImagesMu.Lock()
|
||||
if _, exists := cachingImages[imageURL]; !exists {
|
||||
cachingImages[imageURL] = &sync.Mutex{}
|
||||
}
|
||||
mu := cachingImages[imageURL]
|
||||
cachingImagesMu.Unlock()
|
||||
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
|
||||
// Recheck after lock
|
||||
if _, err := os.Stat(cachedImagePath); err == nil {
|
||||
return cachedImagePath, true, nil
|
||||
}
|
||||
|
||||
cachingSemaphore <- struct{}{}
|
||||
defer func() { <-cachingSemaphore }()
|
||||
|
||||
var data []byte
|
||||
var contentType string
|
||||
|
||||
// Handle data URLs
|
||||
if strings.HasPrefix(imageURL, "data:") {
|
||||
commaIndex := strings.Index(imageURL, ",")
|
||||
if commaIndex == -1 {
|
||||
recordInvalidImageID(imageID)
|
||||
return "", false, fmt.Errorf("invalid data URL: no comma")
|
||||
}
|
||||
headerPart := imageURL[:commaIndex]
|
||||
dataPart := imageURL[commaIndex+1:]
|
||||
|
||||
mediaType := "text/plain"
|
||||
base64Encoded := false
|
||||
if strings.HasPrefix(headerPart, "data:") {
|
||||
mediaTypePart := headerPart[5:]
|
||||
mediaTypeParts := strings.SplitN(mediaTypePart, ";", 2)
|
||||
mediaType = mediaTypeParts[0]
|
||||
if len(mediaTypeParts) > 1 {
|
||||
for _, param := range strings.Split(mediaTypeParts[1], ";") {
|
||||
param = strings.TrimSpace(param)
|
||||
if param == "base64" {
|
||||
base64Encoded = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if base64Encoded {
|
||||
data, _ = base64.StdEncoding.DecodeString(dataPart)
|
||||
} else {
|
||||
decodedStr, err := url.QueryUnescape(dataPart)
|
||||
if err != nil {
|
||||
data = []byte(dataPart)
|
||||
} else {
|
||||
data = []byte(decodedStr)
|
||||
}
|
||||
}
|
||||
|
||||
contentType = mediaType
|
||||
} else {
|
||||
// Download from HTTP URL
|
||||
client := &http.Client{
|
||||
Timeout: 15 * time.Second,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
},
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", imageURL, nil)
|
||||
if err != nil {
|
||||
recordInvalidImageID(imageID)
|
||||
return "", false, err
|
||||
}
|
||||
|
||||
// Add User-Agent
|
||||
userAgent, err := GetUserAgent("Text-Search-Brave")
|
||||
if err != nil {
|
||||
printWarn("Error getting User-Agent: %v", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
recordInvalidImageID(imageID)
|
||||
return "", false, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
data, err = io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
recordInvalidImageID(imageID)
|
||||
return "", false, err
|
||||
}
|
||||
|
||||
contentType = http.DetectContentType(data)
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(contentType, "image/") {
|
||||
recordInvalidImageID(imageID)
|
||||
return "", false, fmt.Errorf("URL did not return an image: %s", imageURL)
|
||||
}
|
||||
|
||||
// SVG special case
|
||||
if contentType == "image/svg+xml" {
|
||||
err := os.WriteFile(tempImagePath, data, 0644)
|
||||
if err != nil {
|
||||
recordInvalidImageID(imageID)
|
||||
return "", false, err
|
||||
}
|
||||
err = os.Rename(tempImagePath, cachedImagePath)
|
||||
if err != nil {
|
||||
recordInvalidImageID(imageID)
|
||||
return "", false, err
|
||||
}
|
||||
cachingImagesMu.Lock()
|
||||
delete(cachingImages, imageURL)
|
||||
cachingImagesMu.Unlock()
|
||||
return cachedImagePath, true, nil
|
||||
}
|
||||
|
||||
// Decode image
|
||||
var img image.Image
|
||||
var err error
|
||||
switch contentType {
|
||||
case "image/x-icon", "image/vnd.microsoft.icon":
|
||||
img, err = ico.Decode(bytes.NewReader(data))
|
||||
case "image/jpeg":
|
||||
img, err = jpeg.Decode(bytes.NewReader(data))
|
||||
case "image/png":
|
||||
img, err = png.Decode(bytes.NewReader(data))
|
||||
case "image/gif":
|
||||
img, err = gif.Decode(bytes.NewReader(data))
|
||||
case "image/webp":
|
||||
img, err = webp.Decode(bytes.NewReader(data))
|
||||
case "image/bmp":
|
||||
img, err = bmp.Decode(bytes.NewReader(data))
|
||||
case "image/tiff":
|
||||
img, err = tiff.Decode(bytes.NewReader(data))
|
||||
default:
|
||||
recordInvalidImageID(imageID)
|
||||
return "", false, fmt.Errorf("unsupported image type: %s", contentType)
|
||||
}
|
||||
if err != nil {
|
||||
recordInvalidImageID(imageID)
|
||||
return "", false, err
|
||||
}
|
||||
|
||||
// Resize
|
||||
maxSize := 16
|
||||
width := img.Bounds().Dx()
|
||||
height := img.Bounds().Dy()
|
||||
|
||||
if width > maxSize || height > maxSize {
|
||||
dst := image.NewRGBA(image.Rect(0, 0, maxSize, maxSize))
|
||||
draw.ApproxBiLinear.Scale(dst, dst.Bounds(), img, img.Bounds(), draw.Over, nil)
|
||||
img = dst
|
||||
}
|
||||
|
||||
// Save as WebP
|
||||
outFile, err := os.Create(tempImagePath)
|
||||
if err != nil {
|
||||
recordInvalidImageID(imageID)
|
||||
return "", false, err
|
||||
}
|
||||
defer outFile.Close()
|
||||
|
||||
options := &webp.Options{Lossless: false, Quality: 80}
|
||||
err = webp.Encode(outFile, img, options)
|
||||
if err != nil {
|
||||
recordInvalidImageID(imageID)
|
||||
return "", false, err
|
||||
}
|
||||
|
||||
err = os.Rename(tempImagePath, cachedImagePath)
|
||||
if err != nil {
|
||||
recordInvalidImageID(imageID)
|
||||
return "", false, err
|
||||
}
|
||||
|
||||
cachingImagesMu.Lock()
|
||||
delete(cachingImages, imageURL)
|
||||
cachingImagesMu.Unlock()
|
||||
|
||||
return cachedImagePath, true, nil
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue