fixed issue with no images being retrieved from quant (incorrect user agents)
Some checks failed
Run Integration Tests / test (push) Failing after 40s
Some checks failed
Run Integration Tests / test (push) Failing after 40s
This commit is contained in:
parent
8db4e18ee4
commit
dc4a3a4bec
1 changed files with 93 additions and 69 deletions
162
agent.go
162
agent.go
|
@ -11,11 +11,13 @@ import (
|
|||
"time"
|
||||
)
|
||||
|
||||
// BrowserVersion represents the version & global usage from the caniuse data
|
||||
type BrowserVersion struct {
|
||||
Version string `json:"version"`
|
||||
Global float64 `json:"global"`
|
||||
}
|
||||
|
||||
// BrowserData holds sets of versions for Firefox and Chromium
|
||||
type BrowserData struct {
|
||||
Firefox []BrowserVersion `json:"firefox"`
|
||||
Chromium []BrowserVersion `json:"chrome"`
|
||||
|
@ -28,6 +30,7 @@ var (
|
|||
}{
|
||||
data: make(map[string]string),
|
||||
}
|
||||
|
||||
browserCache = struct {
|
||||
sync.RWMutex
|
||||
data BrowserData
|
||||
|
@ -37,26 +40,19 @@ var (
|
|||
}
|
||||
)
|
||||
|
||||
// fetchLatestBrowserVersions retrieves usage data from caniuse.com’s fulldata JSON.
|
||||
func fetchLatestBrowserVersions() (BrowserData, error) {
|
||||
url := "https://raw.githubusercontent.com/Fyrd/caniuse/master/fulldata-json/data-2.0.json"
|
||||
|
||||
// // Optional: skip TLS verification to avoid certificate errors
|
||||
// transport := &http.Transport{
|
||||
// TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
// }
|
||||
|
||||
// Increase the HTTP client timeout
|
||||
const urlCaniuse = "https://raw.githubusercontent.com/Fyrd/caniuse/master/fulldata-json/data-2.0.json"
|
||||
client := &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
// Transport: transport,
|
||||
}
|
||||
|
||||
// Build the request manually to set headers
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
req, err := http.NewRequest("GET", urlCaniuse, nil)
|
||||
if err != nil {
|
||||
return BrowserData{}, err
|
||||
}
|
||||
// Custom user agent and English language preference
|
||||
|
||||
// Set a simple custom User-Agent and language
|
||||
req.Header.Set("User-Agent", "MyCustomAgent/1.0 (compatible; +https://example.com)")
|
||||
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||
|
||||
|
@ -71,36 +67,42 @@ func fetchLatestBrowserVersions() (BrowserData, error) {
|
|||
return BrowserData{}, err
|
||||
}
|
||||
|
||||
var rawData map[string]interface{}
|
||||
var rawData map[string]any
|
||||
if err := json.Unmarshal(body, &rawData); err != nil {
|
||||
return BrowserData{}, err
|
||||
}
|
||||
|
||||
stats := rawData["agents"].(map[string]interface{})
|
||||
stats, ok := rawData["agents"].(map[string]any)
|
||||
if !ok {
|
||||
return BrowserData{}, fmt.Errorf("unexpected JSON structure (no 'agents' field)")
|
||||
}
|
||||
|
||||
var data BrowserData
|
||||
|
||||
if firefoxData, ok := stats["firefox"].(map[string]interface{}); ok {
|
||||
for version, usage := range firefoxData["usage_global"].(map[string]interface{}) {
|
||||
data.Firefox = append(data.Firefox, BrowserVersion{
|
||||
Version: version,
|
||||
Global: usage.(float64),
|
||||
})
|
||||
// Extract Firefox data
|
||||
if firefoxData, ok := stats["firefox"].(map[string]any); ok {
|
||||
if usageMap, ok := firefoxData["usage_global"].(map[string]any); ok {
|
||||
for version, usage := range usageMap {
|
||||
val, _ := usage.(float64)
|
||||
data.Firefox = append(data.Firefox, BrowserVersion{Version: version, Global: val})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if chromeData, ok := stats["chrome"].(map[string]interface{}); ok {
|
||||
for version, usage := range chromeData["usage_global"].(map[string]interface{}) {
|
||||
data.Chromium = append(data.Chromium, BrowserVersion{
|
||||
Version: version,
|
||||
Global: usage.(float64),
|
||||
})
|
||||
// Extract Chrome data
|
||||
if chromeData, ok := stats["chrome"].(map[string]any); ok {
|
||||
if usageMap, ok := chromeData["usage_global"].(map[string]any); ok {
|
||||
for version, usage := range usageMap {
|
||||
val, _ := usage.(float64)
|
||||
data.Chromium = append(data.Chromium, BrowserVersion{Version: version, Global: val})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// getLatestBrowserVersions checks the cache and fetches new data if expired
|
||||
func getLatestBrowserVersions() (BrowserData, error) {
|
||||
browserCache.RLock()
|
||||
if time.Now().Before(browserCache.expires) {
|
||||
|
@ -117,37 +119,36 @@ func getLatestBrowserVersions() (BrowserData, error) {
|
|||
|
||||
browserCache.Lock()
|
||||
browserCache.data = data
|
||||
browserCache.expires = time.Now().Add(24 * time.Hour)
|
||||
browserCache.expires = time.Now().Add(24 * time.Hour) // Refresh daily
|
||||
browserCache.Unlock()
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// randomUserAgent picks a random browser (Firefox/Chromium), selects a version based on usage,
|
||||
// picks an OS string, and composes a User-Agent header.
|
||||
func randomUserAgent() (string, error) {
|
||||
browsers, err := getLatestBrowserVersions()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
rand := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
|
||||
// Simulated browser usage statistics (in percentages)
|
||||
// Overall usage: 80% chance for Chromium, 20% for Firefox
|
||||
usageStats := map[string]float64{
|
||||
"Firefox": 30.0,
|
||||
"Chromium": 70.0,
|
||||
"Firefox": 20.0,
|
||||
"Chromium": 80.0,
|
||||
}
|
||||
|
||||
// Calculate the probabilities for the versions
|
||||
probabilities := []float64{0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, 0.0078125, 0.00390625}
|
||||
|
||||
// Select a browser based on usage statistics
|
||||
// Weighted random selection of the browser type
|
||||
browserType := ""
|
||||
randVal := rand.Float64() * 100
|
||||
randVal := r.Float64() * 100
|
||||
cumulative := 0.0
|
||||
for browser, usage := range usageStats {
|
||||
for bType, usage := range usageStats {
|
||||
cumulative += usage
|
||||
if randVal < cumulative {
|
||||
browserType = browser
|
||||
browserType = bType
|
||||
break
|
||||
}
|
||||
}
|
||||
|
@ -164,14 +165,16 @@ func randomUserAgent() (string, error) {
|
|||
return "", fmt.Errorf("no versions found for browser: %s", browserType)
|
||||
}
|
||||
|
||||
// Sort versions by usage (descending order)
|
||||
// Sort by global usage descending
|
||||
sort.Slice(versions, func(i, j int) bool {
|
||||
return versions[i].Global > versions[j].Global
|
||||
})
|
||||
|
||||
// Select a version based on the probabilities
|
||||
// Probability distribution for top few versions
|
||||
probabilities := []float64{0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, 0.0078125, 0.00390625}
|
||||
|
||||
version := ""
|
||||
randVal = rand.Float64()
|
||||
randVal = r.Float64()
|
||||
cumulative = 0.0
|
||||
for i, p := range probabilities {
|
||||
cumulative += p
|
||||
|
@ -181,68 +184,72 @@ func randomUserAgent() (string, error) {
|
|||
}
|
||||
}
|
||||
|
||||
// Fallback to the last version if none matched
|
||||
// Fallback to the least used version if none matched
|
||||
if version == "" {
|
||||
version = versions[len(versions)-1].Version
|
||||
}
|
||||
|
||||
// Generate the user agent string
|
||||
userAgent := generateUserAgent(browserType, version)
|
||||
userAgent := generateUserAgent(browserType, version, r)
|
||||
return userAgent, nil
|
||||
}
|
||||
|
||||
func generateUserAgent(browser, version string) string {
|
||||
// generateUserAgent composes the final UA string given the browser, version, and OS.
|
||||
func generateUserAgent(browser, version string, r *rand.Rand) string {
|
||||
oses := []struct {
|
||||
os string
|
||||
probability float64
|
||||
}{
|
||||
{"Windows NT 10.0; Win64; x64", 44.0},
|
||||
{"Windows NT 11.0; Win64; x64", 44.0},
|
||||
{"X11; Linux x86_64", 1.0},
|
||||
{"X11; Ubuntu; Linux x86_64", 1.0},
|
||||
{"X11; Linux x86_64", 2.0},
|
||||
{"X11; Ubuntu; Linux x86_64", 2.0},
|
||||
{"Macintosh; Intel Mac OS X 10_15_7", 10.0},
|
||||
}
|
||||
|
||||
// Select an OS based on probabilities
|
||||
randVal := rand.Float64() * 100
|
||||
// Weighted random selection for OS
|
||||
randVal := r.Float64() * 100
|
||||
cumulative := 0.0
|
||||
selectedOS := ""
|
||||
for _, os := range oses {
|
||||
cumulative += os.probability
|
||||
selectedOS := oses[0].os // Default in case distribution is off
|
||||
for _, entry := range oses {
|
||||
cumulative += entry.probability
|
||||
if randVal < cumulative {
|
||||
selectedOS = os.os
|
||||
selectedOS = entry.os
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
switch browser {
|
||||
case "Firefox":
|
||||
// Example: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:117.0) Gecko/20100101 Firefox/117.0
|
||||
return fmt.Sprintf("Mozilla/5.0 (%s; rv:%s) Gecko/20100101 Firefox/%s", selectedOS, version, version)
|
||||
case "Chromium":
|
||||
// Example: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.132 Safari/537.36
|
||||
return fmt.Sprintf("Mozilla/5.0 (%s) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", selectedOS, version)
|
||||
}
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// updateCachedUserAgents randomly updates half of the cached UAs to new versions
|
||||
func updateCachedUserAgents(newVersions BrowserData) {
|
||||
cache.Lock()
|
||||
defer cache.Unlock()
|
||||
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
for key, userAgent := range cache.data {
|
||||
randVal := rand.Float64()
|
||||
if randVal < 0.5 {
|
||||
updatedUserAgent := updateUserAgentVersion(userAgent, newVersions)
|
||||
if r.Float64() < 0.5 {
|
||||
updatedUserAgent := updateUserAgentVersion(userAgent, newVersions, r)
|
||||
cache.data[key] = updatedUserAgent
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func updateUserAgentVersion(userAgent string, newVersions BrowserData) string {
|
||||
// Parse the current user agent to extract browser and version
|
||||
// updateUserAgentVersion tries to parse the old UA, detect its browser, and update the version
|
||||
func updateUserAgentVersion(userAgent string, newVersions BrowserData, r *rand.Rand) string {
|
||||
var browserType, version string
|
||||
|
||||
// Attempt to detect old UA patterns (Chromium or Firefox)
|
||||
if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
|
||||
browserType = "Chromium"
|
||||
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
|
||||
browserType = "Chromium"
|
||||
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
|
||||
browserType = "Chromium"
|
||||
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
|
||||
|
@ -251,8 +258,6 @@ func updateUserAgentVersion(userAgent string, newVersions BrowserData) string {
|
|||
browserType = "Chromium"
|
||||
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
|
||||
browserType = "Firefox"
|
||||
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 11.0; Win64; x64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
|
||||
browserType = "Firefox"
|
||||
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Linux x86_64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
|
||||
browserType = "Firefox"
|
||||
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
|
||||
|
@ -261,22 +266,37 @@ func updateUserAgentVersion(userAgent string, newVersions BrowserData) string {
|
|||
browserType = "Firefox"
|
||||
}
|
||||
|
||||
// Get the latest version for that browser
|
||||
// Grab the newest version from the fetched data
|
||||
var latestVersion string
|
||||
if browserType == "Firefox" && len(newVersions.Firefox) > 0 {
|
||||
// Sort by usage descending
|
||||
sort.Slice(newVersions.Firefox, func(i, j int) bool {
|
||||
return newVersions.Firefox[i].Global > newVersions.Firefox[j].Global
|
||||
})
|
||||
latestVersion = newVersions.Firefox[0].Version
|
||||
} else if browserType == "Chromium" && len(newVersions.Chromium) > 0 {
|
||||
// Sort by usage descending
|
||||
sort.Slice(newVersions.Chromium, func(i, j int) bool {
|
||||
return newVersions.Chromium[i].Global > newVersions.Chromium[j].Global
|
||||
})
|
||||
latestVersion = newVersions.Chromium[0].Version
|
||||
}
|
||||
|
||||
// Update the user agent string with the new version
|
||||
return generateUserAgent(browserType, latestVersion)
|
||||
// If we failed to detect the browser or have no data, just return the old UA
|
||||
if browserType == "" || latestVersion == "" {
|
||||
return userAgent
|
||||
}
|
||||
|
||||
// Create a new random OS-based UA string with the latest version
|
||||
return generateUserAgent(browserType, latestVersion, r)
|
||||
}
|
||||
|
||||
// periodicAgentUpdate periodically refreshes browser data and user agents
|
||||
func periodicAgentUpdate() {
|
||||
for {
|
||||
// Sleep for a random interval between 1 and 2 days
|
||||
time.Sleep(time.Duration(24+rand.Intn(24)) * time.Hour)
|
||||
// Sleep a random interval between 1 and 2 days
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
time.Sleep(time.Duration(24+r.Intn(24)) * time.Hour)
|
||||
|
||||
// Fetch the latest browser versions
|
||||
newVersions, err := fetchLatestBrowserVersions()
|
||||
|
@ -296,6 +316,7 @@ func periodicAgentUpdate() {
|
|||
}
|
||||
}
|
||||
|
||||
// GetUserAgent returns a cached UA for the given key or creates one if none exists.
|
||||
func GetUserAgent(cacheKey string) (string, error) {
|
||||
cache.RLock()
|
||||
userAgent, found := cache.data[cacheKey]
|
||||
|
@ -314,9 +335,11 @@ func GetUserAgent(cacheKey string) (string, error) {
|
|||
cache.data[cacheKey] = userAgent
|
||||
cache.Unlock()
|
||||
|
||||
printDebug("Generated (cached or new) user agent: %s", userAgent)
|
||||
return userAgent, nil
|
||||
}
|
||||
|
||||
// GetNewUserAgent always returns a newly generated UA, overwriting the cache.
|
||||
func GetNewUserAgent(cacheKey string) (string, error) {
|
||||
userAgent, err := randomUserAgent()
|
||||
if err != nil {
|
||||
|
@ -327,6 +350,7 @@ func GetNewUserAgent(cacheKey string) (string, error) {
|
|||
cache.data[cacheKey] = userAgent
|
||||
cache.Unlock()
|
||||
|
||||
printDebug("Generated new user agent: %s", userAgent)
|
||||
return userAgent, nil
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue