fixed issue with no images being retrieved from quant (incorrect user agents)
Some checks failed
Run Integration Tests / test (push) Failing after 40s
Some checks failed
Run Integration Tests / test (push) Failing after 40s
This commit is contained in:
parent
8db4e18ee4
commit
dc4a3a4bec
1 changed files with 93 additions and 69 deletions
162
agent.go
162
agent.go
|
@ -11,11 +11,13 @@ import (
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// BrowserVersion represents the version & global usage from the caniuse data
|
||||||
type BrowserVersion struct {
|
type BrowserVersion struct {
|
||||||
Version string `json:"version"`
|
Version string `json:"version"`
|
||||||
Global float64 `json:"global"`
|
Global float64 `json:"global"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// BrowserData holds sets of versions for Firefox and Chromium
|
||||||
type BrowserData struct {
|
type BrowserData struct {
|
||||||
Firefox []BrowserVersion `json:"firefox"`
|
Firefox []BrowserVersion `json:"firefox"`
|
||||||
Chromium []BrowserVersion `json:"chrome"`
|
Chromium []BrowserVersion `json:"chrome"`
|
||||||
|
@ -28,6 +30,7 @@ var (
|
||||||
}{
|
}{
|
||||||
data: make(map[string]string),
|
data: make(map[string]string),
|
||||||
}
|
}
|
||||||
|
|
||||||
browserCache = struct {
|
browserCache = struct {
|
||||||
sync.RWMutex
|
sync.RWMutex
|
||||||
data BrowserData
|
data BrowserData
|
||||||
|
@ -37,26 +40,19 @@ var (
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// fetchLatestBrowserVersions retrieves usage data from caniuse.com’s fulldata JSON.
|
||||||
func fetchLatestBrowserVersions() (BrowserData, error) {
|
func fetchLatestBrowserVersions() (BrowserData, error) {
|
||||||
url := "https://raw.githubusercontent.com/Fyrd/caniuse/master/fulldata-json/data-2.0.json"
|
const urlCaniuse = "https://raw.githubusercontent.com/Fyrd/caniuse/master/fulldata-json/data-2.0.json"
|
||||||
|
|
||||||
// // Optional: skip TLS verification to avoid certificate errors
|
|
||||||
// transport := &http.Transport{
|
|
||||||
// TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Increase the HTTP client timeout
|
|
||||||
client := &http.Client{
|
client := &http.Client{
|
||||||
Timeout: 30 * time.Second,
|
Timeout: 30 * time.Second,
|
||||||
// Transport: transport,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build the request manually to set headers
|
req, err := http.NewRequest("GET", urlCaniuse, nil)
|
||||||
req, err := http.NewRequest("GET", url, nil)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return BrowserData{}, err
|
return BrowserData{}, err
|
||||||
}
|
}
|
||||||
// Custom user agent and English language preference
|
|
||||||
|
// Set a simple custom User-Agent and language
|
||||||
req.Header.Set("User-Agent", "MyCustomAgent/1.0 (compatible; +https://example.com)")
|
req.Header.Set("User-Agent", "MyCustomAgent/1.0 (compatible; +https://example.com)")
|
||||||
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||||
|
|
||||||
|
@ -71,36 +67,42 @@ func fetchLatestBrowserVersions() (BrowserData, error) {
|
||||||
return BrowserData{}, err
|
return BrowserData{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var rawData map[string]interface{}
|
var rawData map[string]any
|
||||||
if err := json.Unmarshal(body, &rawData); err != nil {
|
if err := json.Unmarshal(body, &rawData); err != nil {
|
||||||
return BrowserData{}, err
|
return BrowserData{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
stats := rawData["agents"].(map[string]interface{})
|
stats, ok := rawData["agents"].(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
return BrowserData{}, fmt.Errorf("unexpected JSON structure (no 'agents' field)")
|
||||||
|
}
|
||||||
|
|
||||||
var data BrowserData
|
var data BrowserData
|
||||||
|
|
||||||
if firefoxData, ok := stats["firefox"].(map[string]interface{}); ok {
|
// Extract Firefox data
|
||||||
for version, usage := range firefoxData["usage_global"].(map[string]interface{}) {
|
if firefoxData, ok := stats["firefox"].(map[string]any); ok {
|
||||||
data.Firefox = append(data.Firefox, BrowserVersion{
|
if usageMap, ok := firefoxData["usage_global"].(map[string]any); ok {
|
||||||
Version: version,
|
for version, usage := range usageMap {
|
||||||
Global: usage.(float64),
|
val, _ := usage.(float64)
|
||||||
})
|
data.Firefox = append(data.Firefox, BrowserVersion{Version: version, Global: val})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if chromeData, ok := stats["chrome"].(map[string]interface{}); ok {
|
// Extract Chrome data
|
||||||
for version, usage := range chromeData["usage_global"].(map[string]interface{}) {
|
if chromeData, ok := stats["chrome"].(map[string]any); ok {
|
||||||
data.Chromium = append(data.Chromium, BrowserVersion{
|
if usageMap, ok := chromeData["usage_global"].(map[string]any); ok {
|
||||||
Version: version,
|
for version, usage := range usageMap {
|
||||||
Global: usage.(float64),
|
val, _ := usage.(float64)
|
||||||
})
|
data.Chromium = append(data.Chromium, BrowserVersion{Version: version, Global: val})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return data, nil
|
return data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getLatestBrowserVersions checks the cache and fetches new data if expired
|
||||||
func getLatestBrowserVersions() (BrowserData, error) {
|
func getLatestBrowserVersions() (BrowserData, error) {
|
||||||
browserCache.RLock()
|
browserCache.RLock()
|
||||||
if time.Now().Before(browserCache.expires) {
|
if time.Now().Before(browserCache.expires) {
|
||||||
|
@ -117,37 +119,36 @@ func getLatestBrowserVersions() (BrowserData, error) {
|
||||||
|
|
||||||
browserCache.Lock()
|
browserCache.Lock()
|
||||||
browserCache.data = data
|
browserCache.data = data
|
||||||
browserCache.expires = time.Now().Add(24 * time.Hour)
|
browserCache.expires = time.Now().Add(24 * time.Hour) // Refresh daily
|
||||||
browserCache.Unlock()
|
browserCache.Unlock()
|
||||||
|
|
||||||
return data, nil
|
return data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// randomUserAgent picks a random browser (Firefox/Chromium), selects a version based on usage,
|
||||||
|
// picks an OS string, and composes a User-Agent header.
|
||||||
func randomUserAgent() (string, error) {
|
func randomUserAgent() (string, error) {
|
||||||
browsers, err := getLatestBrowserVersions()
|
browsers, err := getLatestBrowserVersions()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
rand := rand.New(rand.NewSource(time.Now().UnixNano()))
|
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||||
|
|
||||||
// Simulated browser usage statistics (in percentages)
|
// Overall usage: 80% chance for Chromium, 20% for Firefox
|
||||||
usageStats := map[string]float64{
|
usageStats := map[string]float64{
|
||||||
"Firefox": 30.0,
|
"Firefox": 20.0,
|
||||||
"Chromium": 70.0,
|
"Chromium": 80.0,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate the probabilities for the versions
|
// Weighted random selection of the browser type
|
||||||
probabilities := []float64{0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, 0.0078125, 0.00390625}
|
|
||||||
|
|
||||||
// Select a browser based on usage statistics
|
|
||||||
browserType := ""
|
browserType := ""
|
||||||
randVal := rand.Float64() * 100
|
randVal := r.Float64() * 100
|
||||||
cumulative := 0.0
|
cumulative := 0.0
|
||||||
for browser, usage := range usageStats {
|
for bType, usage := range usageStats {
|
||||||
cumulative += usage
|
cumulative += usage
|
||||||
if randVal < cumulative {
|
if randVal < cumulative {
|
||||||
browserType = browser
|
browserType = bType
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -164,14 +165,16 @@ func randomUserAgent() (string, error) {
|
||||||
return "", fmt.Errorf("no versions found for browser: %s", browserType)
|
return "", fmt.Errorf("no versions found for browser: %s", browserType)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort versions by usage (descending order)
|
// Sort by global usage descending
|
||||||
sort.Slice(versions, func(i, j int) bool {
|
sort.Slice(versions, func(i, j int) bool {
|
||||||
return versions[i].Global > versions[j].Global
|
return versions[i].Global > versions[j].Global
|
||||||
})
|
})
|
||||||
|
|
||||||
// Select a version based on the probabilities
|
// Probability distribution for top few versions
|
||||||
|
probabilities := []float64{0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, 0.0078125, 0.00390625}
|
||||||
|
|
||||||
version := ""
|
version := ""
|
||||||
randVal = rand.Float64()
|
randVal = r.Float64()
|
||||||
cumulative = 0.0
|
cumulative = 0.0
|
||||||
for i, p := range probabilities {
|
for i, p := range probabilities {
|
||||||
cumulative += p
|
cumulative += p
|
||||||
|
@ -181,68 +184,72 @@ func randomUserAgent() (string, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback to the last version if none matched
|
// Fallback to the least used version if none matched
|
||||||
if version == "" {
|
if version == "" {
|
||||||
version = versions[len(versions)-1].Version
|
version = versions[len(versions)-1].Version
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate the user agent string
|
userAgent := generateUserAgent(browserType, version, r)
|
||||||
userAgent := generateUserAgent(browserType, version)
|
|
||||||
return userAgent, nil
|
return userAgent, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func generateUserAgent(browser, version string) string {
|
// generateUserAgent composes the final UA string given the browser, version, and OS.
|
||||||
|
func generateUserAgent(browser, version string, r *rand.Rand) string {
|
||||||
oses := []struct {
|
oses := []struct {
|
||||||
os string
|
os string
|
||||||
probability float64
|
probability float64
|
||||||
}{
|
}{
|
||||||
{"Windows NT 10.0; Win64; x64", 44.0},
|
{"Windows NT 10.0; Win64; x64", 44.0},
|
||||||
{"Windows NT 11.0; Win64; x64", 44.0},
|
{"X11; Linux x86_64", 2.0},
|
||||||
{"X11; Linux x86_64", 1.0},
|
{"X11; Ubuntu; Linux x86_64", 2.0},
|
||||||
{"X11; Ubuntu; Linux x86_64", 1.0},
|
|
||||||
{"Macintosh; Intel Mac OS X 10_15_7", 10.0},
|
{"Macintosh; Intel Mac OS X 10_15_7", 10.0},
|
||||||
}
|
}
|
||||||
|
|
||||||
// Select an OS based on probabilities
|
// Weighted random selection for OS
|
||||||
randVal := rand.Float64() * 100
|
randVal := r.Float64() * 100
|
||||||
cumulative := 0.0
|
cumulative := 0.0
|
||||||
selectedOS := ""
|
selectedOS := oses[0].os // Default in case distribution is off
|
||||||
for _, os := range oses {
|
for _, entry := range oses {
|
||||||
cumulative += os.probability
|
cumulative += entry.probability
|
||||||
if randVal < cumulative {
|
if randVal < cumulative {
|
||||||
selectedOS = os.os
|
selectedOS = entry.os
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch browser {
|
switch browser {
|
||||||
case "Firefox":
|
case "Firefox":
|
||||||
|
// Example: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:117.0) Gecko/20100101 Firefox/117.0
|
||||||
return fmt.Sprintf("Mozilla/5.0 (%s; rv:%s) Gecko/20100101 Firefox/%s", selectedOS, version, version)
|
return fmt.Sprintf("Mozilla/5.0 (%s; rv:%s) Gecko/20100101 Firefox/%s", selectedOS, version, version)
|
||||||
case "Chromium":
|
case "Chromium":
|
||||||
|
// Example: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.132 Safari/537.36
|
||||||
return fmt.Sprintf("Mozilla/5.0 (%s) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", selectedOS, version)
|
return fmt.Sprintf("Mozilla/5.0 (%s) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", selectedOS, version)
|
||||||
}
|
default:
|
||||||
return ""
|
return ""
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// updateCachedUserAgents randomly updates half of the cached UAs to new versions
|
||||||
func updateCachedUserAgents(newVersions BrowserData) {
|
func updateCachedUserAgents(newVersions BrowserData) {
|
||||||
cache.Lock()
|
cache.Lock()
|
||||||
defer cache.Unlock()
|
defer cache.Unlock()
|
||||||
|
|
||||||
|
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||||
for key, userAgent := range cache.data {
|
for key, userAgent := range cache.data {
|
||||||
randVal := rand.Float64()
|
if r.Float64() < 0.5 {
|
||||||
if randVal < 0.5 {
|
updatedUserAgent := updateUserAgentVersion(userAgent, newVersions, r)
|
||||||
updatedUserAgent := updateUserAgentVersion(userAgent, newVersions)
|
|
||||||
cache.data[key] = updatedUserAgent
|
cache.data[key] = updatedUserAgent
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func updateUserAgentVersion(userAgent string, newVersions BrowserData) string {
|
// updateUserAgentVersion tries to parse the old UA, detect its browser, and update the version
|
||||||
// Parse the current user agent to extract browser and version
|
func updateUserAgentVersion(userAgent string, newVersions BrowserData, r *rand.Rand) string {
|
||||||
var browserType, version string
|
var browserType, version string
|
||||||
|
|
||||||
|
// Attempt to detect old UA patterns (Chromium or Firefox)
|
||||||
if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
|
if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
|
||||||
browserType = "Chromium"
|
browserType = "Chromium"
|
||||||
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
|
|
||||||
browserType = "Chromium"
|
|
||||||
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
|
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
|
||||||
browserType = "Chromium"
|
browserType = "Chromium"
|
||||||
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
|
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
|
||||||
|
@ -251,8 +258,6 @@ func updateUserAgentVersion(userAgent string, newVersions BrowserData) string {
|
||||||
browserType = "Chromium"
|
browserType = "Chromium"
|
||||||
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
|
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
|
||||||
browserType = "Firefox"
|
browserType = "Firefox"
|
||||||
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 11.0; Win64; x64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
|
|
||||||
browserType = "Firefox"
|
|
||||||
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Linux x86_64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
|
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Linux x86_64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
|
||||||
browserType = "Firefox"
|
browserType = "Firefox"
|
||||||
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
|
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
|
||||||
|
@ -261,22 +266,37 @@ func updateUserAgentVersion(userAgent string, newVersions BrowserData) string {
|
||||||
browserType = "Firefox"
|
browserType = "Firefox"
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the latest version for that browser
|
// Grab the newest version from the fetched data
|
||||||
var latestVersion string
|
var latestVersion string
|
||||||
if browserType == "Firefox" && len(newVersions.Firefox) > 0 {
|
if browserType == "Firefox" && len(newVersions.Firefox) > 0 {
|
||||||
|
// Sort by usage descending
|
||||||
|
sort.Slice(newVersions.Firefox, func(i, j int) bool {
|
||||||
|
return newVersions.Firefox[i].Global > newVersions.Firefox[j].Global
|
||||||
|
})
|
||||||
latestVersion = newVersions.Firefox[0].Version
|
latestVersion = newVersions.Firefox[0].Version
|
||||||
} else if browserType == "Chromium" && len(newVersions.Chromium) > 0 {
|
} else if browserType == "Chromium" && len(newVersions.Chromium) > 0 {
|
||||||
|
// Sort by usage descending
|
||||||
|
sort.Slice(newVersions.Chromium, func(i, j int) bool {
|
||||||
|
return newVersions.Chromium[i].Global > newVersions.Chromium[j].Global
|
||||||
|
})
|
||||||
latestVersion = newVersions.Chromium[0].Version
|
latestVersion = newVersions.Chromium[0].Version
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the user agent string with the new version
|
// If we failed to detect the browser or have no data, just return the old UA
|
||||||
return generateUserAgent(browserType, latestVersion)
|
if browserType == "" || latestVersion == "" {
|
||||||
|
return userAgent
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new random OS-based UA string with the latest version
|
||||||
|
return generateUserAgent(browserType, latestVersion, r)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// periodicAgentUpdate periodically refreshes browser data and user agents
|
||||||
func periodicAgentUpdate() {
|
func periodicAgentUpdate() {
|
||||||
for {
|
for {
|
||||||
// Sleep for a random interval between 1 and 2 days
|
// Sleep a random interval between 1 and 2 days
|
||||||
time.Sleep(time.Duration(24+rand.Intn(24)) * time.Hour)
|
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||||
|
time.Sleep(time.Duration(24+r.Intn(24)) * time.Hour)
|
||||||
|
|
||||||
// Fetch the latest browser versions
|
// Fetch the latest browser versions
|
||||||
newVersions, err := fetchLatestBrowserVersions()
|
newVersions, err := fetchLatestBrowserVersions()
|
||||||
|
@ -296,6 +316,7 @@ func periodicAgentUpdate() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetUserAgent returns a cached UA for the given key or creates one if none exists.
|
||||||
func GetUserAgent(cacheKey string) (string, error) {
|
func GetUserAgent(cacheKey string) (string, error) {
|
||||||
cache.RLock()
|
cache.RLock()
|
||||||
userAgent, found := cache.data[cacheKey]
|
userAgent, found := cache.data[cacheKey]
|
||||||
|
@ -314,9 +335,11 @@ func GetUserAgent(cacheKey string) (string, error) {
|
||||||
cache.data[cacheKey] = userAgent
|
cache.data[cacheKey] = userAgent
|
||||||
cache.Unlock()
|
cache.Unlock()
|
||||||
|
|
||||||
|
printDebug("Generated (cached or new) user agent: %s", userAgent)
|
||||||
return userAgent, nil
|
return userAgent, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetNewUserAgent always returns a newly generated UA, overwriting the cache.
|
||||||
func GetNewUserAgent(cacheKey string) (string, error) {
|
func GetNewUserAgent(cacheKey string) (string, error) {
|
||||||
userAgent, err := randomUserAgent()
|
userAgent, err := randomUserAgent()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -327,6 +350,7 @@ func GetNewUserAgent(cacheKey string) (string, error) {
|
||||||
cache.data[cacheKey] = userAgent
|
cache.data[cacheKey] = userAgent
|
||||||
cache.Unlock()
|
cache.Unlock()
|
||||||
|
|
||||||
|
printDebug("Generated new user agent: %s", userAgent)
|
||||||
return userAgent, nil
|
return userAgent, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue