Search/proxy.go

271 lines
7.4 KiB
Go
Raw Normal View History

2025-01-12 16:46:52 +01:00
package main
import (
"fmt"
"net/http"
"strings"
"sync"
"time"
"golang.org/x/net/proxy"
)
// ProxyConfig holds configuration for a single proxy.
type ProxyConfig struct {
Address string
Username string
Password string
}
// ProxyClient provides an HTTP client pool for proxies.
type ProxyClient struct {
clients []*http.Client
lock sync.Mutex
index int
}
// Package-level proxy clients
var (
metaProxyClient *ProxyClient
crawlerProxyClient *ProxyClient
)
// NewProxyClientPool creates a pool of HTTP clients with SOCKS5 proxies.
2025-01-12 16:46:52 +01:00
func NewProxyClientPool(proxies []ProxyConfig, timeout time.Duration) (*ProxyClient, error) {
if len(proxies) == 0 {
return nil, fmt.Errorf("no proxies provided")
}
clients := make([]*http.Client, len(proxies))
for i, pc := range proxies {
2025-01-12 16:46:52 +01:00
var auth *proxy.Auth
if pc.Username != "" || pc.Password != "" {
2025-01-12 16:46:52 +01:00
auth = &proxy.Auth{
User: pc.Username,
Password: pc.Password,
2025-01-12 16:46:52 +01:00
}
}
dialer, err := proxy.SOCKS5("tcp", pc.Address, auth, proxy.Direct)
2025-01-12 16:46:52 +01:00
if err != nil {
return nil, fmt.Errorf("failed to create SOCKS5 dialer for %s: %w", pc.Address, err)
2025-01-12 16:46:52 +01:00
}
transport := &http.Transport{Dial: dialer.Dial}
clients[i] = &http.Client{
Transport: transport,
Timeout: timeout,
}
}
return &ProxyClient{clients: clients}, nil
}
// Do sends an HTTP request using the next proxy in the pool.
func (p *ProxyClient) Do(req *http.Request) (*http.Response, error) {
p.lock.Lock()
client := p.clients[p.index]
p.index = (p.index + 1) % len(p.clients)
p.lock.Unlock()
return client.Do(req)
}
func (p *ProxyClient) GetProxy() string {
p.lock.Lock()
defer p.lock.Unlock()
if len(p.clients) == 0 {
return ""
}
// Round-robin proxy retrieval
client := p.clients[p.index]
p.index = (p.index + 1) % len(p.clients)
// Assume each client has a proxy string saved
// Example implementation depends on how your proxies are configured
proxyTransport, ok := client.Transport.(*http.Transport)
if ok && proxyTransport.Proxy != nil {
proxyURL, _ := proxyTransport.Proxy(nil)
if proxyURL != nil {
return proxyURL.String()
}
}
return ""
}
// ParseProxies parses the proxy strings in the format ADDRESS:PORT or ADDRESS:PORT:USER:PASSWORD.
func ParseProxies(proxyStrings []string) []ProxyConfig {
var proxies []ProxyConfig
for _, proxyStr := range proxyStrings {
parts := strings.Split(proxyStr, ":")
switch len(parts) {
case 2: // ADDRESS:PORT
2025-01-12 16:46:52 +01:00
proxies = append(proxies, ProxyConfig{
Address: fmt.Sprintf("%s:%s", parts[0], parts[1]),
})
case 4: // ADDRESS:PORT:USER:PASSWORD
2025-01-12 16:46:52 +01:00
proxies = append(proxies, ProxyConfig{
Address: fmt.Sprintf("%s:%s", parts[0], parts[1]),
Username: parts[2],
Password: parts[3],
})
default:
fmt.Printf("Invalid proxy format: %s\n", proxyStr)
2025-01-12 16:46:52 +01:00
}
}
return proxies
}
// InitProxies initializes the proxy clients for Meta and Crawler proxies.
func InitProxies() {
// Initialize Meta Proxy Client
if config.MetaProxyEnabled {
metaProxies := ParseProxies(config.MetaProxies)
client, err := NewProxyClientPool(metaProxies, 30*time.Second)
if err != nil {
if config.MetaProxyStrict {
panic(fmt.Sprintf("Failed to initialize Meta proxies: %v", err))
}
fmt.Printf("Warning: Meta proxy initialization failed: %v\n", err)
}
metaProxyClient = client
}
// Initialize Crawler Proxy Client
if config.CrawlerProxyEnabled {
crawlerProxies := ParseProxies(config.CrawlerProxies)
client, err := NewProxyClientPool(crawlerProxies, 30*time.Second)
if err != nil {
if config.CrawlerProxyStrict {
panic(fmt.Sprintf("Failed to initialize Crawler proxies: %v", err))
}
fmt.Printf("Warning: Crawler proxy initialization failed: %v\n", err)
}
crawlerProxyClient = client
}
}
// Doer is an interface so we can accept *http.Client or *ProxyClient for requests.
type Doer interface {
Do(*http.Request) (*http.Response, error)
}
// DoProxyRequest handles “try direct, then proxy if needed,” with retries if proxy is used.
//
// - strict: if true, always try proxy first if enabled; if not available, do one direct attempt
// - enabled: whether this type of proxy is turned on
// - retryCount: how many times to retry with the proxy
// - proxyClient: the pool of proxy connections
func DoProxyRequest(req *http.Request, strict bool, enabled bool, retryCount int, proxyClient *ProxyClient) (*http.Response, error) {
// 1) If !strict => try direct once first
if !strict {
resp, err := tryRequestOnce(req, http.DefaultClient)
if isSuccessful(resp, err) {
return resp, nil
}
// If direct fails => if proxy is enabled, retry
if enabled && proxyClient != nil {
resp, err = tryRequestWithRetry(req, proxyClient, retryCount)
if isSuccessful(resp, err) {
return resp, nil
}
return nil, fmt.Errorf("failed after direct & proxy attempts: %v", err)
}
return nil, fmt.Errorf("request failed direct, no valid proxy: %v", err)
}
// 2) If strict => if proxy is enabled, try it up to “retryCount”
if enabled && proxyClient != nil {
resp, err := tryRequestWithRetry(req, proxyClient, retryCount)
if isSuccessful(resp, err) {
return resp, nil
}
return nil, fmt.Errorf("failed after %d proxy attempts: %v", retryCount, err)
}
// If strict but no proxy => direct once
resp, err := tryRequestOnce(req, http.DefaultClient)
if isSuccessful(resp, err) {
return resp, nil
}
return nil, fmt.Errorf("direct request failed in strict mode, no proxy: %v", err)
}
// Helper Wrapper functions for DoProxyRequest()
func DoMetaProxyRequest(req *http.Request) (*http.Response, error) {
return DoProxyRequest(
req,
config.MetaProxyStrict,
config.MetaProxyEnabled,
config.MetaProxyRetry,
metaProxyClient,
)
}
func DoCrawlerProxyRequest(req *http.Request) (*http.Response, error) {
return DoProxyRequest(
req,
config.CrawlerProxyStrict,
config.CrawlerProxyEnabled,
config.CrawlerProxyRetry,
metaProxyClient,
)
}
// tryRequestWithRetry tries the request up to "retries" times, waiting 200ms between attempts.
func tryRequestWithRetry(req *http.Request, client Doer, retries int) (*http.Response, error) {
var resp *http.Response
var err error
for i := 1; i <= retries; i++ {
if resp != nil {
resp.Body.Close()
}
printDebug("Attempt %d of %d with proxy/client...", i, retries)
resp, err = tryRequestOnce(req, client)
if isSuccessful(resp, err) {
return resp, nil
}
time.Sleep(200 * time.Millisecond)
}
return resp, err
}
// tryRequestOnce sends a single request with the given client. If client is nil, uses default client.
func tryRequestOnce(req *http.Request, client Doer) (*http.Response, error) {
if client == nil {
client = http.DefaultClient
}
resp, err := client.Do(req)
return resp, err
}
// isSuccessful checks if err==nil & resp != nil & resp.StatusCode in [200..299].
func isSuccessful(resp *http.Response, err error) bool {
if err != nil || resp == nil {
return false
}
return resp.StatusCode >= 200 && resp.StatusCode < 300
}
2025-01-12 16:46:52 +01:00
// func main() {
// config := loadConfig()
// // Initialize proxies if enabled
// if config.CrawlerProxyEnabled || config.MetaProxyEnabled {
// InitProxies()
// }
// // Example usage
// if metaProxyClient != nil {
// req, _ := http.NewRequest("GET", "https://example.com", nil)
// resp, err := metaProxyClient.Do(req)
// if err != nil {
// fmt.Printf("Error using MetaProxyClient: %v\n", err)
// } else {
// fmt.Printf("Meta Proxy Response Status: %s\n", resp.Status)
// resp.Body.Close()
// }
// }
// }