2025-01-12 16:46:52 +01:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"net/http"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"golang.org/x/net/proxy"
|
|
|
|
)
|
|
|
|
|
|
|
|
// ProxyConfig holds configuration for a single proxy.
|
|
|
|
type ProxyConfig struct {
|
|
|
|
Address string
|
|
|
|
Username string
|
|
|
|
Password string
|
|
|
|
}
|
|
|
|
|
|
|
|
// ProxyClient provides an HTTP client pool for proxies.
|
|
|
|
type ProxyClient struct {
|
|
|
|
clients []*http.Client
|
|
|
|
lock sync.Mutex
|
|
|
|
index int
|
|
|
|
}
|
|
|
|
|
|
|
|
// Package-level proxy clients
|
|
|
|
var (
|
|
|
|
metaProxyClient *ProxyClient
|
|
|
|
crawlerProxyClient *ProxyClient
|
|
|
|
)
|
|
|
|
|
2025-02-22 22:36:54 +01:00
|
|
|
// NewProxyClientPool creates a pool of HTTP clients with SOCKS5 proxies.
|
2025-01-12 16:46:52 +01:00
|
|
|
func NewProxyClientPool(proxies []ProxyConfig, timeout time.Duration) (*ProxyClient, error) {
|
|
|
|
if len(proxies) == 0 {
|
|
|
|
return nil, fmt.Errorf("no proxies provided")
|
|
|
|
}
|
|
|
|
|
|
|
|
clients := make([]*http.Client, len(proxies))
|
|
|
|
|
2025-02-22 22:36:54 +01:00
|
|
|
for i, pc := range proxies {
|
2025-01-12 16:46:52 +01:00
|
|
|
var auth *proxy.Auth
|
2025-02-22 22:36:54 +01:00
|
|
|
if pc.Username != "" || pc.Password != "" {
|
2025-01-12 16:46:52 +01:00
|
|
|
auth = &proxy.Auth{
|
2025-02-22 22:36:54 +01:00
|
|
|
User: pc.Username,
|
|
|
|
Password: pc.Password,
|
2025-01-12 16:46:52 +01:00
|
|
|
}
|
|
|
|
}
|
2025-02-22 22:36:54 +01:00
|
|
|
dialer, err := proxy.SOCKS5("tcp", pc.Address, auth, proxy.Direct)
|
2025-01-12 16:46:52 +01:00
|
|
|
if err != nil {
|
2025-02-22 22:36:54 +01:00
|
|
|
return nil, fmt.Errorf("failed to create SOCKS5 dialer for %s: %w", pc.Address, err)
|
2025-01-12 16:46:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
transport := &http.Transport{Dial: dialer.Dial}
|
|
|
|
clients[i] = &http.Client{
|
|
|
|
Transport: transport,
|
|
|
|
Timeout: timeout,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return &ProxyClient{clients: clients}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Do sends an HTTP request using the next proxy in the pool.
|
|
|
|
func (p *ProxyClient) Do(req *http.Request) (*http.Response, error) {
|
|
|
|
p.lock.Lock()
|
|
|
|
client := p.clients[p.index]
|
|
|
|
p.index = (p.index + 1) % len(p.clients)
|
|
|
|
p.lock.Unlock()
|
|
|
|
return client.Do(req)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *ProxyClient) GetProxy() string {
|
|
|
|
p.lock.Lock()
|
|
|
|
defer p.lock.Unlock()
|
|
|
|
|
|
|
|
if len(p.clients) == 0 {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
// Round-robin proxy retrieval
|
|
|
|
client := p.clients[p.index]
|
|
|
|
p.index = (p.index + 1) % len(p.clients)
|
|
|
|
|
|
|
|
// Assume each client has a proxy string saved
|
|
|
|
// Example implementation depends on how your proxies are configured
|
|
|
|
proxyTransport, ok := client.Transport.(*http.Transport)
|
|
|
|
if ok && proxyTransport.Proxy != nil {
|
|
|
|
proxyURL, _ := proxyTransport.Proxy(nil)
|
|
|
|
if proxyURL != nil {
|
|
|
|
return proxyURL.String()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
// ParseProxies parses the proxy strings in the format ADDRESS:PORT or ADDRESS:PORT:USER:PASSWORD.
|
|
|
|
func ParseProxies(proxyStrings []string) []ProxyConfig {
|
|
|
|
var proxies []ProxyConfig
|
2025-02-22 22:36:54 +01:00
|
|
|
for _, proxyStr := range proxyStrings {
|
|
|
|
parts := strings.Split(proxyStr, ":")
|
|
|
|
switch len(parts) {
|
|
|
|
case 2: // ADDRESS:PORT
|
2025-01-12 16:46:52 +01:00
|
|
|
proxies = append(proxies, ProxyConfig{
|
|
|
|
Address: fmt.Sprintf("%s:%s", parts[0], parts[1]),
|
|
|
|
})
|
2025-02-22 22:36:54 +01:00
|
|
|
case 4: // ADDRESS:PORT:USER:PASSWORD
|
2025-01-12 16:46:52 +01:00
|
|
|
proxies = append(proxies, ProxyConfig{
|
|
|
|
Address: fmt.Sprintf("%s:%s", parts[0], parts[1]),
|
|
|
|
Username: parts[2],
|
|
|
|
Password: parts[3],
|
|
|
|
})
|
2025-02-22 22:36:54 +01:00
|
|
|
default:
|
|
|
|
fmt.Printf("Invalid proxy format: %s\n", proxyStr)
|
2025-01-12 16:46:52 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return proxies
|
|
|
|
}
|
|
|
|
|
|
|
|
// InitProxies initializes the proxy clients for Meta and Crawler proxies.
|
|
|
|
func InitProxies() {
|
|
|
|
// Initialize Meta Proxy Client
|
|
|
|
if config.MetaProxyEnabled {
|
|
|
|
metaProxies := ParseProxies(config.MetaProxies)
|
|
|
|
client, err := NewProxyClientPool(metaProxies, 30*time.Second)
|
|
|
|
if err != nil {
|
|
|
|
if config.MetaProxyStrict {
|
|
|
|
panic(fmt.Sprintf("Failed to initialize Meta proxies: %v", err))
|
|
|
|
}
|
|
|
|
fmt.Printf("Warning: Meta proxy initialization failed: %v\n", err)
|
|
|
|
}
|
|
|
|
metaProxyClient = client
|
|
|
|
}
|
|
|
|
|
|
|
|
// Initialize Crawler Proxy Client
|
|
|
|
if config.CrawlerProxyEnabled {
|
|
|
|
crawlerProxies := ParseProxies(config.CrawlerProxies)
|
|
|
|
client, err := NewProxyClientPool(crawlerProxies, 30*time.Second)
|
|
|
|
if err != nil {
|
|
|
|
if config.CrawlerProxyStrict {
|
|
|
|
panic(fmt.Sprintf("Failed to initialize Crawler proxies: %v", err))
|
|
|
|
}
|
|
|
|
fmt.Printf("Warning: Crawler proxy initialization failed: %v\n", err)
|
|
|
|
}
|
|
|
|
crawlerProxyClient = client
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-02-22 22:36:54 +01:00
|
|
|
// Doer is an interface so we can accept *http.Client or *ProxyClient for requests.
|
|
|
|
type Doer interface {
|
|
|
|
Do(*http.Request) (*http.Response, error)
|
|
|
|
}
|
|
|
|
|
|
|
|
// DoProxyRequest handles “try direct, then proxy if needed,” with retries if proxy is used.
|
|
|
|
//
|
|
|
|
// - strict: if true, always try proxy first if enabled; if not available, do one direct attempt
|
|
|
|
// - enabled: whether this type of proxy is turned on
|
|
|
|
// - retryCount: how many times to retry with the proxy
|
|
|
|
// - proxyClient: the pool of proxy connections
|
|
|
|
func DoProxyRequest(req *http.Request, strict bool, enabled bool, retryCount int, proxyClient *ProxyClient) (*http.Response, error) {
|
|
|
|
// 1) If !strict => try direct once first
|
|
|
|
if !strict {
|
|
|
|
resp, err := tryRequestOnce(req, http.DefaultClient)
|
|
|
|
if isSuccessful(resp, err) {
|
|
|
|
return resp, nil
|
|
|
|
}
|
|
|
|
// If direct fails => if proxy is enabled, retry
|
|
|
|
if enabled && proxyClient != nil {
|
|
|
|
resp, err = tryRequestWithRetry(req, proxyClient, retryCount)
|
|
|
|
if isSuccessful(resp, err) {
|
|
|
|
return resp, nil
|
|
|
|
}
|
|
|
|
return nil, fmt.Errorf("failed after direct & proxy attempts: %v", err)
|
|
|
|
}
|
|
|
|
return nil, fmt.Errorf("request failed direct, no valid proxy: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// 2) If strict => if proxy is enabled, try it up to “retryCount”
|
|
|
|
if enabled && proxyClient != nil {
|
|
|
|
resp, err := tryRequestWithRetry(req, proxyClient, retryCount)
|
|
|
|
if isSuccessful(resp, err) {
|
|
|
|
return resp, nil
|
|
|
|
}
|
|
|
|
return nil, fmt.Errorf("failed after %d proxy attempts: %v", retryCount, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// If strict but no proxy => direct once
|
|
|
|
resp, err := tryRequestOnce(req, http.DefaultClient)
|
|
|
|
if isSuccessful(resp, err) {
|
|
|
|
return resp, nil
|
|
|
|
}
|
|
|
|
return nil, fmt.Errorf("direct request failed in strict mode, no proxy: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Helper Wrapper functions for DoProxyRequest()
|
|
|
|
func DoMetaProxyRequest(req *http.Request) (*http.Response, error) {
|
|
|
|
return DoProxyRequest(
|
|
|
|
req,
|
|
|
|
config.MetaProxyStrict,
|
|
|
|
config.MetaProxyEnabled,
|
|
|
|
config.MetaProxyRetry,
|
|
|
|
metaProxyClient,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
func DoCrawlerProxyRequest(req *http.Request) (*http.Response, error) {
|
|
|
|
return DoProxyRequest(
|
|
|
|
req,
|
|
|
|
config.CrawlerProxyStrict,
|
|
|
|
config.CrawlerProxyEnabled,
|
|
|
|
config.CrawlerProxyRetry,
|
|
|
|
metaProxyClient,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
// tryRequestWithRetry tries the request up to "retries" times, waiting 200ms between attempts.
|
|
|
|
func tryRequestWithRetry(req *http.Request, client Doer, retries int) (*http.Response, error) {
|
|
|
|
var resp *http.Response
|
|
|
|
var err error
|
|
|
|
for i := 1; i <= retries; i++ {
|
|
|
|
if resp != nil {
|
|
|
|
resp.Body.Close()
|
|
|
|
}
|
|
|
|
printDebug("Attempt %d of %d with proxy/client...", i, retries)
|
|
|
|
resp, err = tryRequestOnce(req, client)
|
|
|
|
if isSuccessful(resp, err) {
|
|
|
|
return resp, nil
|
|
|
|
}
|
|
|
|
time.Sleep(200 * time.Millisecond)
|
|
|
|
}
|
|
|
|
return resp, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// tryRequestOnce sends a single request with the given client. If client is nil, uses default client.
|
|
|
|
func tryRequestOnce(req *http.Request, client Doer) (*http.Response, error) {
|
|
|
|
if client == nil {
|
|
|
|
client = http.DefaultClient
|
|
|
|
}
|
|
|
|
resp, err := client.Do(req)
|
|
|
|
return resp, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// isSuccessful checks if err==nil & resp != nil & resp.StatusCode in [200..299].
|
|
|
|
func isSuccessful(resp *http.Response, err error) bool {
|
|
|
|
if err != nil || resp == nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return resp.StatusCode >= 200 && resp.StatusCode < 300
|
|
|
|
}
|
|
|
|
|
2025-01-12 16:46:52 +01:00
|
|
|
// func main() {
|
|
|
|
// config := loadConfig()
|
|
|
|
|
|
|
|
// // Initialize proxies if enabled
|
|
|
|
// if config.CrawlerProxyEnabled || config.MetaProxyEnabled {
|
|
|
|
// InitProxies()
|
|
|
|
// }
|
|
|
|
|
|
|
|
// // Example usage
|
|
|
|
// if metaProxyClient != nil {
|
|
|
|
// req, _ := http.NewRequest("GET", "https://example.com", nil)
|
|
|
|
// resp, err := metaProxyClient.Do(req)
|
|
|
|
// if err != nil {
|
|
|
|
// fmt.Printf("Error using MetaProxyClient: %v\n", err)
|
|
|
|
// } else {
|
|
|
|
// fmt.Printf("Meta Proxy Response Status: %s\n", resp.Status)
|
|
|
|
// resp.Body.Close()
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
// }
|