fixed issue with no images being retrieved from quant (incorrect user agents)
Some checks failed
Run Integration Tests / test (push) Failing after 40s

This commit is contained in:
partisan 2025-01-28 21:05:43 +01:00
parent 8db4e18ee4
commit dc4a3a4bec

162
agent.go
View file

@ -11,11 +11,13 @@ import (
"time"
)
// BrowserVersion represents the version & global usage from the caniuse data
type BrowserVersion struct {
Version string `json:"version"`
Global float64 `json:"global"`
}
// BrowserData holds sets of versions for Firefox and Chromium
type BrowserData struct {
Firefox []BrowserVersion `json:"firefox"`
Chromium []BrowserVersion `json:"chrome"`
@ -28,6 +30,7 @@ var (
}{
data: make(map[string]string),
}
browserCache = struct {
sync.RWMutex
data BrowserData
@ -37,26 +40,19 @@ var (
}
)
// fetchLatestBrowserVersions retrieves usage data from caniuse.coms fulldata JSON.
func fetchLatestBrowserVersions() (BrowserData, error) {
url := "https://raw.githubusercontent.com/Fyrd/caniuse/master/fulldata-json/data-2.0.json"
// // Optional: skip TLS verification to avoid certificate errors
// transport := &http.Transport{
// TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
// }
// Increase the HTTP client timeout
const urlCaniuse = "https://raw.githubusercontent.com/Fyrd/caniuse/master/fulldata-json/data-2.0.json"
client := &http.Client{
Timeout: 30 * time.Second,
// Transport: transport,
}
// Build the request manually to set headers
req, err := http.NewRequest("GET", url, nil)
req, err := http.NewRequest("GET", urlCaniuse, nil)
if err != nil {
return BrowserData{}, err
}
// Custom user agent and English language preference
// Set a simple custom User-Agent and language
req.Header.Set("User-Agent", "MyCustomAgent/1.0 (compatible; +https://example.com)")
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
@ -71,36 +67,42 @@ func fetchLatestBrowserVersions() (BrowserData, error) {
return BrowserData{}, err
}
var rawData map[string]interface{}
var rawData map[string]any
if err := json.Unmarshal(body, &rawData); err != nil {
return BrowserData{}, err
}
stats := rawData["agents"].(map[string]interface{})
stats, ok := rawData["agents"].(map[string]any)
if !ok {
return BrowserData{}, fmt.Errorf("unexpected JSON structure (no 'agents' field)")
}
var data BrowserData
if firefoxData, ok := stats["firefox"].(map[string]interface{}); ok {
for version, usage := range firefoxData["usage_global"].(map[string]interface{}) {
data.Firefox = append(data.Firefox, BrowserVersion{
Version: version,
Global: usage.(float64),
})
// Extract Firefox data
if firefoxData, ok := stats["firefox"].(map[string]any); ok {
if usageMap, ok := firefoxData["usage_global"].(map[string]any); ok {
for version, usage := range usageMap {
val, _ := usage.(float64)
data.Firefox = append(data.Firefox, BrowserVersion{Version: version, Global: val})
}
}
}
if chromeData, ok := stats["chrome"].(map[string]interface{}); ok {
for version, usage := range chromeData["usage_global"].(map[string]interface{}) {
data.Chromium = append(data.Chromium, BrowserVersion{
Version: version,
Global: usage.(float64),
})
// Extract Chrome data
if chromeData, ok := stats["chrome"].(map[string]any); ok {
if usageMap, ok := chromeData["usage_global"].(map[string]any); ok {
for version, usage := range usageMap {
val, _ := usage.(float64)
data.Chromium = append(data.Chromium, BrowserVersion{Version: version, Global: val})
}
}
}
return data, nil
}
// getLatestBrowserVersions checks the cache and fetches new data if expired
func getLatestBrowserVersions() (BrowserData, error) {
browserCache.RLock()
if time.Now().Before(browserCache.expires) {
@ -117,37 +119,36 @@ func getLatestBrowserVersions() (BrowserData, error) {
browserCache.Lock()
browserCache.data = data
browserCache.expires = time.Now().Add(24 * time.Hour)
browserCache.expires = time.Now().Add(24 * time.Hour) // Refresh daily
browserCache.Unlock()
return data, nil
}
// randomUserAgent picks a random browser (Firefox/Chromium), selects a version based on usage,
// picks an OS string, and composes a User-Agent header.
func randomUserAgent() (string, error) {
browsers, err := getLatestBrowserVersions()
if err != nil {
return "", err
}
rand := rand.New(rand.NewSource(time.Now().UnixNano()))
r := rand.New(rand.NewSource(time.Now().UnixNano()))
// Simulated browser usage statistics (in percentages)
// Overall usage: 80% chance for Chromium, 20% for Firefox
usageStats := map[string]float64{
"Firefox": 30.0,
"Chromium": 70.0,
"Firefox": 20.0,
"Chromium": 80.0,
}
// Calculate the probabilities for the versions
probabilities := []float64{0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, 0.0078125, 0.00390625}
// Select a browser based on usage statistics
// Weighted random selection of the browser type
browserType := ""
randVal := rand.Float64() * 100
randVal := r.Float64() * 100
cumulative := 0.0
for browser, usage := range usageStats {
for bType, usage := range usageStats {
cumulative += usage
if randVal < cumulative {
browserType = browser
browserType = bType
break
}
}
@ -164,14 +165,16 @@ func randomUserAgent() (string, error) {
return "", fmt.Errorf("no versions found for browser: %s", browserType)
}
// Sort versions by usage (descending order)
// Sort by global usage descending
sort.Slice(versions, func(i, j int) bool {
return versions[i].Global > versions[j].Global
})
// Select a version based on the probabilities
// Probability distribution for top few versions
probabilities := []float64{0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, 0.0078125, 0.00390625}
version := ""
randVal = rand.Float64()
randVal = r.Float64()
cumulative = 0.0
for i, p := range probabilities {
cumulative += p
@ -181,68 +184,72 @@ func randomUserAgent() (string, error) {
}
}
// Fallback to the last version if none matched
// Fallback to the least used version if none matched
if version == "" {
version = versions[len(versions)-1].Version
}
// Generate the user agent string
userAgent := generateUserAgent(browserType, version)
userAgent := generateUserAgent(browserType, version, r)
return userAgent, nil
}
func generateUserAgent(browser, version string) string {
// generateUserAgent composes the final UA string given the browser, version, and OS.
func generateUserAgent(browser, version string, r *rand.Rand) string {
oses := []struct {
os string
probability float64
}{
{"Windows NT 10.0; Win64; x64", 44.0},
{"Windows NT 11.0; Win64; x64", 44.0},
{"X11; Linux x86_64", 1.0},
{"X11; Ubuntu; Linux x86_64", 1.0},
{"X11; Linux x86_64", 2.0},
{"X11; Ubuntu; Linux x86_64", 2.0},
{"Macintosh; Intel Mac OS X 10_15_7", 10.0},
}
// Select an OS based on probabilities
randVal := rand.Float64() * 100
// Weighted random selection for OS
randVal := r.Float64() * 100
cumulative := 0.0
selectedOS := ""
for _, os := range oses {
cumulative += os.probability
selectedOS := oses[0].os // Default in case distribution is off
for _, entry := range oses {
cumulative += entry.probability
if randVal < cumulative {
selectedOS = os.os
selectedOS = entry.os
break
}
}
switch browser {
case "Firefox":
// Example: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:117.0) Gecko/20100101 Firefox/117.0
return fmt.Sprintf("Mozilla/5.0 (%s; rv:%s) Gecko/20100101 Firefox/%s", selectedOS, version, version)
case "Chromium":
// Example: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.132 Safari/537.36
return fmt.Sprintf("Mozilla/5.0 (%s) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", selectedOS, version)
}
default:
return ""
}
}
// updateCachedUserAgents randomly updates half of the cached UAs to new versions
func updateCachedUserAgents(newVersions BrowserData) {
cache.Lock()
defer cache.Unlock()
r := rand.New(rand.NewSource(time.Now().UnixNano()))
for key, userAgent := range cache.data {
randVal := rand.Float64()
if randVal < 0.5 {
updatedUserAgent := updateUserAgentVersion(userAgent, newVersions)
if r.Float64() < 0.5 {
updatedUserAgent := updateUserAgentVersion(userAgent, newVersions, r)
cache.data[key] = updatedUserAgent
}
}
}
func updateUserAgentVersion(userAgent string, newVersions BrowserData) string {
// Parse the current user agent to extract browser and version
// updateUserAgentVersion tries to parse the old UA, detect its browser, and update the version
func updateUserAgentVersion(userAgent string, newVersions BrowserData, r *rand.Rand) string {
var browserType, version string
// Attempt to detect old UA patterns (Chromium or Firefox)
if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
browserType = "Chromium"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
browserType = "Chromium"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
browserType = "Chromium"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
@ -251,8 +258,6 @@ func updateUserAgentVersion(userAgent string, newVersions BrowserData) string {
browserType = "Chromium"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
browserType = "Firefox"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 11.0; Win64; x64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
browserType = "Firefox"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Linux x86_64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
browserType = "Firefox"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
@ -261,22 +266,37 @@ func updateUserAgentVersion(userAgent string, newVersions BrowserData) string {
browserType = "Firefox"
}
// Get the latest version for that browser
// Grab the newest version from the fetched data
var latestVersion string
if browserType == "Firefox" && len(newVersions.Firefox) > 0 {
// Sort by usage descending
sort.Slice(newVersions.Firefox, func(i, j int) bool {
return newVersions.Firefox[i].Global > newVersions.Firefox[j].Global
})
latestVersion = newVersions.Firefox[0].Version
} else if browserType == "Chromium" && len(newVersions.Chromium) > 0 {
// Sort by usage descending
sort.Slice(newVersions.Chromium, func(i, j int) bool {
return newVersions.Chromium[i].Global > newVersions.Chromium[j].Global
})
latestVersion = newVersions.Chromium[0].Version
}
// Update the user agent string with the new version
return generateUserAgent(browserType, latestVersion)
// If we failed to detect the browser or have no data, just return the old UA
if browserType == "" || latestVersion == "" {
return userAgent
}
// Create a new random OS-based UA string with the latest version
return generateUserAgent(browserType, latestVersion, r)
}
// periodicAgentUpdate periodically refreshes browser data and user agents
func periodicAgentUpdate() {
for {
// Sleep for a random interval between 1 and 2 days
time.Sleep(time.Duration(24+rand.Intn(24)) * time.Hour)
// Sleep a random interval between 1 and 2 days
r := rand.New(rand.NewSource(time.Now().UnixNano()))
time.Sleep(time.Duration(24+r.Intn(24)) * time.Hour)
// Fetch the latest browser versions
newVersions, err := fetchLatestBrowserVersions()
@ -296,6 +316,7 @@ func periodicAgentUpdate() {
}
}
// GetUserAgent returns a cached UA for the given key or creates one if none exists.
func GetUserAgent(cacheKey string) (string, error) {
cache.RLock()
userAgent, found := cache.data[cacheKey]
@ -314,9 +335,11 @@ func GetUserAgent(cacheKey string) (string, error) {
cache.data[cacheKey] = userAgent
cache.Unlock()
printDebug("Generated (cached or new) user agent: %s", userAgent)
return userAgent, nil
}
// GetNewUserAgent always returns a newly generated UA, overwriting the cache.
func GetNewUserAgent(cacheKey string) (string, error) {
userAgent, err := randomUserAgent()
if err != nil {
@ -327,6 +350,7 @@ func GetNewUserAgent(cacheKey string) (string, error) {
cache.data[cacheKey] = userAgent
cache.Unlock()
printDebug("Generated new user agent: %s", userAgent)
return userAgent, nil
}