fixed issue with no images being retrieved from quant (incorrect user agents)
Some checks failed
Run Integration Tests / test (push) Failing after 40s

This commit is contained in:
partisan 2025-01-28 21:05:43 +01:00
parent 8db4e18ee4
commit dc4a3a4bec

162
agent.go
View file

@ -11,11 +11,13 @@ import (
"time" "time"
) )
// BrowserVersion represents the version & global usage from the caniuse data
type BrowserVersion struct { type BrowserVersion struct {
Version string `json:"version"` Version string `json:"version"`
Global float64 `json:"global"` Global float64 `json:"global"`
} }
// BrowserData holds sets of versions for Firefox and Chromium
type BrowserData struct { type BrowserData struct {
Firefox []BrowserVersion `json:"firefox"` Firefox []BrowserVersion `json:"firefox"`
Chromium []BrowserVersion `json:"chrome"` Chromium []BrowserVersion `json:"chrome"`
@ -28,6 +30,7 @@ var (
}{ }{
data: make(map[string]string), data: make(map[string]string),
} }
browserCache = struct { browserCache = struct {
sync.RWMutex sync.RWMutex
data BrowserData data BrowserData
@ -37,26 +40,19 @@ var (
} }
) )
// fetchLatestBrowserVersions retrieves usage data from caniuse.coms fulldata JSON.
func fetchLatestBrowserVersions() (BrowserData, error) { func fetchLatestBrowserVersions() (BrowserData, error) {
url := "https://raw.githubusercontent.com/Fyrd/caniuse/master/fulldata-json/data-2.0.json" const urlCaniuse = "https://raw.githubusercontent.com/Fyrd/caniuse/master/fulldata-json/data-2.0.json"
// // Optional: skip TLS verification to avoid certificate errors
// transport := &http.Transport{
// TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
// }
// Increase the HTTP client timeout
client := &http.Client{ client := &http.Client{
Timeout: 30 * time.Second, Timeout: 30 * time.Second,
// Transport: transport,
} }
// Build the request manually to set headers req, err := http.NewRequest("GET", urlCaniuse, nil)
req, err := http.NewRequest("GET", url, nil)
if err != nil { if err != nil {
return BrowserData{}, err return BrowserData{}, err
} }
// Custom user agent and English language preference
// Set a simple custom User-Agent and language
req.Header.Set("User-Agent", "MyCustomAgent/1.0 (compatible; +https://example.com)") req.Header.Set("User-Agent", "MyCustomAgent/1.0 (compatible; +https://example.com)")
req.Header.Set("Accept-Language", "en-US,en;q=0.9") req.Header.Set("Accept-Language", "en-US,en;q=0.9")
@ -71,36 +67,42 @@ func fetchLatestBrowserVersions() (BrowserData, error) {
return BrowserData{}, err return BrowserData{}, err
} }
var rawData map[string]interface{} var rawData map[string]any
if err := json.Unmarshal(body, &rawData); err != nil { if err := json.Unmarshal(body, &rawData); err != nil {
return BrowserData{}, err return BrowserData{}, err
} }
stats := rawData["agents"].(map[string]interface{}) stats, ok := rawData["agents"].(map[string]any)
if !ok {
return BrowserData{}, fmt.Errorf("unexpected JSON structure (no 'agents' field)")
}
var data BrowserData var data BrowserData
if firefoxData, ok := stats["firefox"].(map[string]interface{}); ok { // Extract Firefox data
for version, usage := range firefoxData["usage_global"].(map[string]interface{}) { if firefoxData, ok := stats["firefox"].(map[string]any); ok {
data.Firefox = append(data.Firefox, BrowserVersion{ if usageMap, ok := firefoxData["usage_global"].(map[string]any); ok {
Version: version, for version, usage := range usageMap {
Global: usage.(float64), val, _ := usage.(float64)
}) data.Firefox = append(data.Firefox, BrowserVersion{Version: version, Global: val})
}
} }
} }
if chromeData, ok := stats["chrome"].(map[string]interface{}); ok { // Extract Chrome data
for version, usage := range chromeData["usage_global"].(map[string]interface{}) { if chromeData, ok := stats["chrome"].(map[string]any); ok {
data.Chromium = append(data.Chromium, BrowserVersion{ if usageMap, ok := chromeData["usage_global"].(map[string]any); ok {
Version: version, for version, usage := range usageMap {
Global: usage.(float64), val, _ := usage.(float64)
}) data.Chromium = append(data.Chromium, BrowserVersion{Version: version, Global: val})
}
} }
} }
return data, nil return data, nil
} }
// getLatestBrowserVersions checks the cache and fetches new data if expired
func getLatestBrowserVersions() (BrowserData, error) { func getLatestBrowserVersions() (BrowserData, error) {
browserCache.RLock() browserCache.RLock()
if time.Now().Before(browserCache.expires) { if time.Now().Before(browserCache.expires) {
@ -117,37 +119,36 @@ func getLatestBrowserVersions() (BrowserData, error) {
browserCache.Lock() browserCache.Lock()
browserCache.data = data browserCache.data = data
browserCache.expires = time.Now().Add(24 * time.Hour) browserCache.expires = time.Now().Add(24 * time.Hour) // Refresh daily
browserCache.Unlock() browserCache.Unlock()
return data, nil return data, nil
} }
// randomUserAgent picks a random browser (Firefox/Chromium), selects a version based on usage,
// picks an OS string, and composes a User-Agent header.
func randomUserAgent() (string, error) { func randomUserAgent() (string, error) {
browsers, err := getLatestBrowserVersions() browsers, err := getLatestBrowserVersions()
if err != nil { if err != nil {
return "", err return "", err
} }
rand := rand.New(rand.NewSource(time.Now().UnixNano())) r := rand.New(rand.NewSource(time.Now().UnixNano()))
// Simulated browser usage statistics (in percentages) // Overall usage: 80% chance for Chromium, 20% for Firefox
usageStats := map[string]float64{ usageStats := map[string]float64{
"Firefox": 30.0, "Firefox": 20.0,
"Chromium": 70.0, "Chromium": 80.0,
} }
// Calculate the probabilities for the versions // Weighted random selection of the browser type
probabilities := []float64{0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, 0.0078125, 0.00390625}
// Select a browser based on usage statistics
browserType := "" browserType := ""
randVal := rand.Float64() * 100 randVal := r.Float64() * 100
cumulative := 0.0 cumulative := 0.0
for browser, usage := range usageStats { for bType, usage := range usageStats {
cumulative += usage cumulative += usage
if randVal < cumulative { if randVal < cumulative {
browserType = browser browserType = bType
break break
} }
} }
@ -164,14 +165,16 @@ func randomUserAgent() (string, error) {
return "", fmt.Errorf("no versions found for browser: %s", browserType) return "", fmt.Errorf("no versions found for browser: %s", browserType)
} }
// Sort versions by usage (descending order) // Sort by global usage descending
sort.Slice(versions, func(i, j int) bool { sort.Slice(versions, func(i, j int) bool {
return versions[i].Global > versions[j].Global return versions[i].Global > versions[j].Global
}) })
// Select a version based on the probabilities // Probability distribution for top few versions
probabilities := []float64{0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, 0.0078125, 0.00390625}
version := "" version := ""
randVal = rand.Float64() randVal = r.Float64()
cumulative = 0.0 cumulative = 0.0
for i, p := range probabilities { for i, p := range probabilities {
cumulative += p cumulative += p
@ -181,68 +184,72 @@ func randomUserAgent() (string, error) {
} }
} }
// Fallback to the last version if none matched // Fallback to the least used version if none matched
if version == "" { if version == "" {
version = versions[len(versions)-1].Version version = versions[len(versions)-1].Version
} }
// Generate the user agent string userAgent := generateUserAgent(browserType, version, r)
userAgent := generateUserAgent(browserType, version)
return userAgent, nil return userAgent, nil
} }
func generateUserAgent(browser, version string) string { // generateUserAgent composes the final UA string given the browser, version, and OS.
func generateUserAgent(browser, version string, r *rand.Rand) string {
oses := []struct { oses := []struct {
os string os string
probability float64 probability float64
}{ }{
{"Windows NT 10.0; Win64; x64", 44.0}, {"Windows NT 10.0; Win64; x64", 44.0},
{"Windows NT 11.0; Win64; x64", 44.0}, {"X11; Linux x86_64", 2.0},
{"X11; Linux x86_64", 1.0}, {"X11; Ubuntu; Linux x86_64", 2.0},
{"X11; Ubuntu; Linux x86_64", 1.0},
{"Macintosh; Intel Mac OS X 10_15_7", 10.0}, {"Macintosh; Intel Mac OS X 10_15_7", 10.0},
} }
// Select an OS based on probabilities // Weighted random selection for OS
randVal := rand.Float64() * 100 randVal := r.Float64() * 100
cumulative := 0.0 cumulative := 0.0
selectedOS := "" selectedOS := oses[0].os // Default in case distribution is off
for _, os := range oses { for _, entry := range oses {
cumulative += os.probability cumulative += entry.probability
if randVal < cumulative { if randVal < cumulative {
selectedOS = os.os selectedOS = entry.os
break break
} }
} }
switch browser { switch browser {
case "Firefox": case "Firefox":
// Example: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:117.0) Gecko/20100101 Firefox/117.0
return fmt.Sprintf("Mozilla/5.0 (%s; rv:%s) Gecko/20100101 Firefox/%s", selectedOS, version, version) return fmt.Sprintf("Mozilla/5.0 (%s; rv:%s) Gecko/20100101 Firefox/%s", selectedOS, version, version)
case "Chromium": case "Chromium":
// Example: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.132 Safari/537.36
return fmt.Sprintf("Mozilla/5.0 (%s) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", selectedOS, version) return fmt.Sprintf("Mozilla/5.0 (%s) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", selectedOS, version)
default:
return ""
} }
return ""
} }
// updateCachedUserAgents randomly updates half of the cached UAs to new versions
func updateCachedUserAgents(newVersions BrowserData) { func updateCachedUserAgents(newVersions BrowserData) {
cache.Lock() cache.Lock()
defer cache.Unlock() defer cache.Unlock()
r := rand.New(rand.NewSource(time.Now().UnixNano()))
for key, userAgent := range cache.data { for key, userAgent := range cache.data {
randVal := rand.Float64() if r.Float64() < 0.5 {
if randVal < 0.5 { updatedUserAgent := updateUserAgentVersion(userAgent, newVersions, r)
updatedUserAgent := updateUserAgentVersion(userAgent, newVersions)
cache.data[key] = updatedUserAgent cache.data[key] = updatedUserAgent
} }
} }
} }
func updateUserAgentVersion(userAgent string, newVersions BrowserData) string { // updateUserAgentVersion tries to parse the old UA, detect its browser, and update the version
// Parse the current user agent to extract browser and version func updateUserAgentVersion(userAgent string, newVersions BrowserData, r *rand.Rand) string {
var browserType, version string var browserType, version string
// Attempt to detect old UA patterns (Chromium or Firefox)
if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil { if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
browserType = "Chromium" browserType = "Chromium"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
browserType = "Chromium"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil { } else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
browserType = "Chromium" browserType = "Chromium"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil { } else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
@ -251,8 +258,6 @@ func updateUserAgentVersion(userAgent string, newVersions BrowserData) string {
browserType = "Chromium" browserType = "Chromium"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil { } else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
browserType = "Firefox" browserType = "Firefox"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 11.0; Win64; x64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
browserType = "Firefox"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Linux x86_64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil { } else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Linux x86_64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
browserType = "Firefox" browserType = "Firefox"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil { } else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
@ -261,22 +266,37 @@ func updateUserAgentVersion(userAgent string, newVersions BrowserData) string {
browserType = "Firefox" browserType = "Firefox"
} }
// Get the latest version for that browser // Grab the newest version from the fetched data
var latestVersion string var latestVersion string
if browserType == "Firefox" && len(newVersions.Firefox) > 0 { if browserType == "Firefox" && len(newVersions.Firefox) > 0 {
// Sort by usage descending
sort.Slice(newVersions.Firefox, func(i, j int) bool {
return newVersions.Firefox[i].Global > newVersions.Firefox[j].Global
})
latestVersion = newVersions.Firefox[0].Version latestVersion = newVersions.Firefox[0].Version
} else if browserType == "Chromium" && len(newVersions.Chromium) > 0 { } else if browserType == "Chromium" && len(newVersions.Chromium) > 0 {
// Sort by usage descending
sort.Slice(newVersions.Chromium, func(i, j int) bool {
return newVersions.Chromium[i].Global > newVersions.Chromium[j].Global
})
latestVersion = newVersions.Chromium[0].Version latestVersion = newVersions.Chromium[0].Version
} }
// Update the user agent string with the new version // If we failed to detect the browser or have no data, just return the old UA
return generateUserAgent(browserType, latestVersion) if browserType == "" || latestVersion == "" {
return userAgent
}
// Create a new random OS-based UA string with the latest version
return generateUserAgent(browserType, latestVersion, r)
} }
// periodicAgentUpdate periodically refreshes browser data and user agents
func periodicAgentUpdate() { func periodicAgentUpdate() {
for { for {
// Sleep for a random interval between 1 and 2 days // Sleep a random interval between 1 and 2 days
time.Sleep(time.Duration(24+rand.Intn(24)) * time.Hour) r := rand.New(rand.NewSource(time.Now().UnixNano()))
time.Sleep(time.Duration(24+r.Intn(24)) * time.Hour)
// Fetch the latest browser versions // Fetch the latest browser versions
newVersions, err := fetchLatestBrowserVersions() newVersions, err := fetchLatestBrowserVersions()
@ -296,6 +316,7 @@ func periodicAgentUpdate() {
} }
} }
// GetUserAgent returns a cached UA for the given key or creates one if none exists.
func GetUserAgent(cacheKey string) (string, error) { func GetUserAgent(cacheKey string) (string, error) {
cache.RLock() cache.RLock()
userAgent, found := cache.data[cacheKey] userAgent, found := cache.data[cacheKey]
@ -314,9 +335,11 @@ func GetUserAgent(cacheKey string) (string, error) {
cache.data[cacheKey] = userAgent cache.data[cacheKey] = userAgent
cache.Unlock() cache.Unlock()
printDebug("Generated (cached or new) user agent: %s", userAgent)
return userAgent, nil return userAgent, nil
} }
// GetNewUserAgent always returns a newly generated UA, overwriting the cache.
func GetNewUserAgent(cacheKey string) (string, error) { func GetNewUserAgent(cacheKey string) (string, error) {
userAgent, err := randomUserAgent() userAgent, err := randomUserAgent()
if err != nil { if err != nil {
@ -327,6 +350,7 @@ func GetNewUserAgent(cacheKey string) (string, error) {
cache.data[cacheKey] = userAgent cache.data[cacheKey] = userAgent
cache.Unlock() cache.Unlock()
printDebug("Generated new user agent: %s", userAgent)
return userAgent, nil return userAgent, nil
} }