Search/agent.go

397 lines
12 KiB
Go
Raw Normal View History

2024-08-13 16:31:28 +02:00
package main
import (
"encoding/json"
"fmt"
"io"
2024-08-13 16:31:28 +02:00
"math/rand"
"net/http"
"sort"
"sync"
"time"
)
// BrowserVersion represents the version & global usage from the caniuse data
2024-08-13 16:31:28 +02:00
type BrowserVersion struct {
Version string `json:"version"`
Global float64 `json:"global"`
}
// BrowserData holds sets of versions for Firefox and Chromium
2024-08-13 16:31:28 +02:00
type BrowserData struct {
Firefox []BrowserVersion `json:"firefox"`
Chromium []BrowserVersion `json:"chrome"`
}
var (
cache = struct {
sync.RWMutex
data map[string]string
}{
data: make(map[string]string),
}
2024-08-13 16:31:28 +02:00
browserCache = struct {
sync.RWMutex
data BrowserData
expires time.Time
}{
expires: time.Now(),
}
)
// fetchLatestBrowserVersions retrieves usage data from caniuse.coms fulldata JSON.
2024-08-13 16:31:28 +02:00
func fetchLatestBrowserVersions() (BrowserData, error) {
const urlCaniuse = "https://raw.githubusercontent.com/Fyrd/caniuse/master/fulldata-json/data-2.0.json"
client := &http.Client{
Timeout: 30 * time.Second,
}
req, err := http.NewRequest("GET", urlCaniuse, nil)
if err != nil {
return BrowserData{}, err
}
// Set a simple custom User-Agent and language
req.Header.Set("User-Agent", "MyCustomAgent/1.0 (compatible; +https://example.com)")
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
resp, err := client.Do(req)
2024-08-13 16:31:28 +02:00
if err != nil {
return BrowserData{}, err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
2024-08-13 16:31:28 +02:00
if err != nil {
return BrowserData{}, err
}
var rawData map[string]any
2024-08-13 16:31:28 +02:00
if err := json.Unmarshal(body, &rawData); err != nil {
return BrowserData{}, err
}
stats, ok := rawData["agents"].(map[string]any)
if !ok {
return BrowserData{}, fmt.Errorf("unexpected JSON structure (no 'agents' field)")
}
2024-08-13 16:31:28 +02:00
var data BrowserData
// Extract Firefox data
if firefoxData, ok := stats["firefox"].(map[string]any); ok {
if usageMap, ok := firefoxData["usage_global"].(map[string]any); ok {
for version, usage := range usageMap {
val, _ := usage.(float64)
data.Firefox = append(data.Firefox, BrowserVersion{Version: version, Global: val})
}
2024-08-13 16:31:28 +02:00
}
}
// Extract Chrome data
if chromeData, ok := stats["chrome"].(map[string]any); ok {
if usageMap, ok := chromeData["usage_global"].(map[string]any); ok {
for version, usage := range usageMap {
val, _ := usage.(float64)
data.Chromium = append(data.Chromium, BrowserVersion{Version: version, Global: val})
}
2024-08-13 16:31:28 +02:00
}
}
return data, nil
}
// getLatestBrowserVersions checks the cache and fetches new data if expired
2024-08-13 16:31:28 +02:00
func getLatestBrowserVersions() (BrowserData, error) {
browserCache.RLock()
if time.Now().Before(browserCache.expires) {
data := browserCache.data
browserCache.RUnlock()
return data, nil
}
browserCache.RUnlock()
data, err := fetchLatestBrowserVersions()
if err != nil {
return BrowserData{}, err
}
browserCache.Lock()
browserCache.data = data
browserCache.expires = time.Now().Add(24 * time.Hour) // Refresh daily
2024-08-13 16:31:28 +02:00
browserCache.Unlock()
return data, nil
}
// randomUserAgent picks a random browser (Firefox/Chromium), selects a version based on usage,
// picks an OS string, and composes a User-Agent header.
2024-08-13 16:31:28 +02:00
func randomUserAgent() (string, error) {
browsers, err := getLatestBrowserVersions()
if err != nil {
return "", err
}
r := rand.New(rand.NewSource(time.Now().UnixNano()))
2024-08-13 16:31:28 +02:00
// Overall usage: 80% chance for Chromium, 20% for Firefox
2024-08-13 16:31:28 +02:00
usageStats := map[string]float64{
"Firefox": 20.0,
"Chromium": 80.0,
2024-08-13 16:31:28 +02:00
}
// Weighted random selection of the browser type
2024-08-13 16:31:28 +02:00
browserType := ""
randVal := r.Float64() * 100
2024-08-13 16:31:28 +02:00
cumulative := 0.0
for bType, usage := range usageStats {
2024-08-13 16:31:28 +02:00
cumulative += usage
if randVal < cumulative {
browserType = bType
2024-08-13 16:31:28 +02:00
break
}
}
var versions []BrowserVersion
switch browserType {
case "Firefox":
versions = browsers.Firefox
case "Chromium":
versions = browsers.Chromium
}
if len(versions) == 0 {
return "", fmt.Errorf("no versions found for browser: %s", browserType)
}
// Sort by global usage descending
2024-08-13 16:31:28 +02:00
sort.Slice(versions, func(i, j int) bool {
return versions[i].Global > versions[j].Global
})
// Probability distribution for top few versions
probabilities := []float64{0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, 0.0078125, 0.00390625}
2024-08-13 16:31:28 +02:00
version := ""
randVal = r.Float64()
2024-08-13 16:31:28 +02:00
cumulative = 0.0
for i, p := range probabilities {
cumulative += p
if randVal < cumulative && i < len(versions) {
version = versions[i].Version
break
}
}
// Fallback to the least used version if none matched
2024-08-13 16:31:28 +02:00
if version == "" {
version = versions[len(versions)-1].Version
}
userAgent := generateUserAgent(browserType, version, r)
2024-08-13 16:31:28 +02:00
return userAgent, nil
}
// generateUserAgent composes the final UA string given the browser, version, and OS.
func generateUserAgent(browser, version string, r *rand.Rand) string {
2024-08-13 16:31:28 +02:00
oses := []struct {
os string
probability float64
}{
{"Windows NT 10.0; Win64; x64", 44.0},
{"X11; Linux x86_64", 2.0},
{"X11; Ubuntu; Linux x86_64", 2.0},
2024-08-13 16:31:28 +02:00
{"Macintosh; Intel Mac OS X 10_15_7", 10.0},
}
// Weighted random selection for OS
randVal := r.Float64() * 100
2024-08-13 16:31:28 +02:00
cumulative := 0.0
selectedOS := oses[0].os // Default in case distribution is off
for _, entry := range oses {
cumulative += entry.probability
2024-08-13 16:31:28 +02:00
if randVal < cumulative {
selectedOS = entry.os
2024-08-13 16:31:28 +02:00
break
}
}
switch browser {
case "Firefox":
// Example: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:117.0) Gecko/20100101 Firefox/117.0
2024-08-13 16:31:28 +02:00
return fmt.Sprintf("Mozilla/5.0 (%s; rv:%s) Gecko/20100101 Firefox/%s", selectedOS, version, version)
case "Chromium":
// Example: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.132 Safari/537.36
2024-08-13 16:31:28 +02:00
return fmt.Sprintf("Mozilla/5.0 (%s) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", selectedOS, version)
default:
return ""
2024-08-13 16:31:28 +02:00
}
}
// updateCachedUserAgents randomly updates half of the cached UAs to new versions
2024-08-13 16:31:28 +02:00
func updateCachedUserAgents(newVersions BrowserData) {
cache.Lock()
defer cache.Unlock()
r := rand.New(rand.NewSource(time.Now().UnixNano()))
2024-08-13 16:31:28 +02:00
for key, userAgent := range cache.data {
if r.Float64() < 0.5 {
updatedUserAgent := updateUserAgentVersion(userAgent, newVersions, r)
2024-08-13 16:31:28 +02:00
cache.data[key] = updatedUserAgent
}
}
}
// updateUserAgentVersion tries to parse the old UA, detect its browser, and update the version
func updateUserAgentVersion(userAgent string, newVersions BrowserData, r *rand.Rand) string {
2024-08-13 16:31:28 +02:00
var browserType, version string
// Attempt to detect old UA patterns (Chromium or Firefox)
2024-08-13 16:31:28 +02:00
if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
browserType = "Chromium"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
browserType = "Chromium"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
browserType = "Chromium"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36", &version); err == nil {
browserType = "Chromium"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
browserType = "Firefox"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Linux x86_64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
browserType = "Firefox"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
browserType = "Firefox"
} else if _, err := fmt.Sscanf(userAgent, "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7; rv:%s) Gecko/20100101 Firefox/%s", &version, &version); err == nil {
browserType = "Firefox"
}
// Grab the newest version from the fetched data
2024-08-13 16:31:28 +02:00
var latestVersion string
if browserType == "Firefox" && len(newVersions.Firefox) > 0 {
// Sort by usage descending
sort.Slice(newVersions.Firefox, func(i, j int) bool {
return newVersions.Firefox[i].Global > newVersions.Firefox[j].Global
})
2024-08-13 16:31:28 +02:00
latestVersion = newVersions.Firefox[0].Version
} else if browserType == "Chromium" && len(newVersions.Chromium) > 0 {
// Sort by usage descending
sort.Slice(newVersions.Chromium, func(i, j int) bool {
return newVersions.Chromium[i].Global > newVersions.Chromium[j].Global
})
2024-08-13 16:31:28 +02:00
latestVersion = newVersions.Chromium[0].Version
}
// If we failed to detect the browser or have no data, just return the old UA
if browserType == "" || latestVersion == "" {
return userAgent
}
// Create a new random OS-based UA string with the latest version
return generateUserAgent(browserType, latestVersion, r)
2024-08-13 16:31:28 +02:00
}
// periodicAgentUpdate periodically refreshes browser data and user agents
func periodicAgentUpdate() {
2024-08-13 16:31:28 +02:00
for {
// Sleep a random interval between 1 and 2 days
r := rand.New(rand.NewSource(time.Now().UnixNano()))
time.Sleep(time.Duration(24+r.Intn(24)) * time.Hour)
2024-08-13 16:31:28 +02:00
// Fetch the latest browser versions
newVersions, err := fetchLatestBrowserVersions()
if err != nil {
printWarn("Error fetching latest browser versions: %v", err)
continue
}
// Update the browser version cache
browserCache.Lock()
browserCache.data = newVersions
browserCache.expires = time.Now().Add(24 * time.Hour)
browserCache.Unlock()
// Update the cached user agents
updateCachedUserAgents(newVersions)
}
}
// GetUserAgent returns a cached UA for the given key or creates one if none exists.
2024-08-13 16:31:28 +02:00
func GetUserAgent(cacheKey string) (string, error) {
cache.RLock()
userAgent, found := cache.data[cacheKey]
cache.RUnlock()
if found {
return userAgent, nil
}
userAgent, err := randomUserAgent()
if err != nil {
return "", err
}
cache.Lock()
cache.data[cacheKey] = userAgent
cache.Unlock()
printDebug("Generated (cached or new) user agent: %s", userAgent)
2024-08-13 16:31:28 +02:00
return userAgent, nil
}
// GetNewUserAgent always returns a newly generated UA, overwriting the cache.
2024-08-13 16:31:28 +02:00
func GetNewUserAgent(cacheKey string) (string, error) {
userAgent, err := randomUserAgent()
if err != nil {
return "", err
}
cache.Lock()
cache.data[cacheKey] = userAgent
cache.Unlock()
printDebug("Generated new user agent: %s", userAgent)
2024-08-13 16:31:28 +02:00
return userAgent, nil
}
// func main() {
// go periodicAgentUpdate() // not needed here
2024-08-13 16:31:28 +02:00
// cacheKey := "image-search"
// userAgent, err := GetUserAgent(cacheKey)
// if err != nil {
// fmt.Println("Error:", err)
// return
// }
// fmt.Println("Generated User Agent:", userAgent)
// // Request a new user agent for the same key
// newUserAgent, err := GetNewUserAgent(cacheKey)
// if err != nil {
// fmt.Println("Error:", err)
// return
// }
// fmt.Println("New User Agent:", newUserAgent)
// AcacheKey := "image-search"
// AuserAgent, err := GetUserAgent(AcacheKey)
// if err != nil {
// fmt.Println("Error:", err)
// return
// }
// fmt.Println("Generated User Agent:", AuserAgent)
// DcacheKey := "image-search"
// DuserAgent, err := GetUserAgent(DcacheKey)
// if err != nil {
// fmt.Println("Error:", err)
// return
// }
// fmt.Println("Generated User Agent:", DuserAgent)
// }