added SOCKS5 proxy support
All checks were successful
Run Integration Tests / test (push) Successful in 33s

This commit is contained in:
partisan 2025-01-12 16:46:52 +01:00
parent 234f1dd3be
commit 614ce8903e
22 changed files with 501 additions and 106 deletions

View file

@ -23,18 +23,25 @@ type CacheConfig struct {
}
type Config struct {
Port int // Added
AuthCode string // Added
PeerID string // Added
Peers []string
Domain string // Added
NodesEnabled bool // Added
CrawlerEnabled bool // Added
IndexerEnabled bool // Added
WebsiteEnabled bool // Added
RamCacheEnabled bool
DriveCacheEnabled bool // Added
LogLevel int // Added
Port int // Added
AuthCode string // Added
PeerID string // Added
Peers []string
Domain string // Added
NodesEnabled bool // Added
MetaSearchEnabled bool // Added
IndexerEnabled bool // Added
WebsiteEnabled bool // Added
RamCacheEnabled bool
DriveCacheEnabled bool // Added
MetaProxyEnabled bool // Added
MetaProxyStrict bool // Added
MetaProxies []string // Added
CrawlerProxyEnabled bool // Added
CrawlerProxyStrict bool // Added
CrawlerProxies []string // Added
// Maybye add Proxy support for Image Extraction?
LogLevel int // Added
ConcurrentStandardCrawlers int
ConcurrentChromeCrawlers int
CrawlingInterval time.Duration // Refres crawled results in...
@ -51,11 +58,17 @@ var defaultConfig = Config{
Peers: []string{},
AuthCode: generateStrongRandomString(64),
NodesEnabled: false,
CrawlerEnabled: true,
MetaSearchEnabled: true,
IndexerEnabled: false,
WebsiteEnabled: true,
RamCacheEnabled: true,
DriveCacheEnabled: false,
MetaProxyEnabled: false,
MetaProxyStrict: true,
MetaProxies: []string{},
CrawlerProxyEnabled: false,
CrawlerProxyStrict: true,
CrawlerProxies: []string{},
ConcurrentStandardCrawlers: 12,
ConcurrentChromeCrawlers: 4,
CrawlingInterval: 24 * time.Hour,
@ -245,14 +258,23 @@ func saveConfig(config Config) {
// Features section
featuresSec := cfg.Section("Features")
featuresSec.Key("Nodes").SetValue(strconv.FormatBool(config.NodesEnabled))
featuresSec.Key("Crawler").SetValue(strconv.FormatBool(config.CrawlerEnabled))
featuresSec.Key("Crawler").SetValue(strconv.FormatBool(config.MetaSearchEnabled))
featuresSec.Key("Indexer").SetValue(strconv.FormatBool(config.IndexerEnabled))
featuresSec.Key("Website").SetValue(strconv.FormatBool(config.WebsiteEnabled))
featuresSec.Key("MetaProxy").SetValue(strconv.FormatBool(config.MetaProxyEnabled))
featuresSec.Key("CrawlerProxy").SetValue(strconv.FormatBool(config.CrawlerProxyEnabled))
// Proxies section
proxiesSec := cfg.Section("Proxies")
proxiesSec.Key("MetaProxyStrict").SetValue(strconv.FormatBool(config.MetaProxyStrict))
proxiesSec.Key("MetaProxies").SetValue(strings.Join(config.MetaProxies, ","))
proxiesSec.Key("CrawlerProxyStrict").SetValue(strconv.FormatBool(config.CrawlerProxyStrict))
proxiesSec.Key("CrawlerProxies").SetValue(strings.Join(config.CrawlerProxies, ","))
// Indexer section
indexerSec := cfg.Section("Indexer")
indexerSec.Key("ConcurrentStandardCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
indexerSec.Key("ConcurrentChromeCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
indexerSec.Key("ConcurrentChromeCrawlers").SetValue(strconv.Itoa(config.ConcurrentChromeCrawlers))
indexerSec.Key("CrawlingInterval").SetValue(config.CrawlingInterval.String())
indexerSec.Key("MaxPagesPerDomain").SetValue(strconv.Itoa(config.MaxPagesPerDomain))
indexerSec.Key("IndexBatchSize").SetValue(strconv.Itoa(config.IndexBatchSize))
@ -292,11 +314,19 @@ func loadConfig() Config {
// Features
nodesEnabled := getConfigValueBool(cfg.Section("Features").Key("Nodes"), defaultConfig.NodesEnabled)
crawlerEnabled := getConfigValueBool(cfg.Section("Features").Key("Crawler"), defaultConfig.CrawlerEnabled)
metaSearchEnabled := getConfigValueBool(cfg.Section("Features").Key("Crawler"), defaultConfig.MetaSearchEnabled)
indexerEnabled := getConfigValueBool(cfg.Section("Features").Key("Indexer"), defaultConfig.IndexerEnabled)
websiteEnabled := getConfigValueBool(cfg.Section("Features").Key("Website"), defaultConfig.WebsiteEnabled)
ramCacheEnabled := getConfigValueBool(cfg.Section("Features").Key("RamCache"), defaultConfig.RamCacheEnabled)
driveCacheEnabled := getConfigValueBool(cfg.Section("Features").Key("DriveCache"), defaultConfig.DriveCacheEnabled)
metaProxyEnabled := getConfigValueBool(cfg.Section("Features").Key("MetaProxy"), defaultConfig.MetaProxyEnabled)
crawlerProxyEnabled := getConfigValueBool(cfg.Section("Features").Key("CrawlerProxy"), defaultConfig.CrawlerProxyEnabled)
// Proxies
metaProxyStrict := getConfigValueBool(cfg.Section("Proxies").Key("MetaProxyStrict"), defaultConfig.MetaProxyStrict)
metaProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("MetaProxies"), ""), ",")
crawlerProxyStrict := getConfigValueBool(cfg.Section("Proxies").Key("CrawlerProxyStrict"), defaultConfig.CrawlerProxyStrict)
crawlerProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("CrawlerProxies"), ""), ",")
// Indexing
concurrentStandardCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentStandardCrawlers"), defaultConfig.ConcurrentStandardCrawlers, strconv.Atoi)
@ -325,11 +355,17 @@ func loadConfig() Config {
AuthCode: authCode,
Peers: peers,
NodesEnabled: nodesEnabled,
CrawlerEnabled: crawlerEnabled,
MetaSearchEnabled: metaSearchEnabled,
IndexerEnabled: indexerEnabled,
WebsiteEnabled: websiteEnabled,
RamCacheEnabled: ramCacheEnabled,
DriveCacheEnabled: driveCacheEnabled,
MetaProxyEnabled: metaProxyEnabled,
MetaProxyStrict: metaProxyStrict,
MetaProxies: metaProxies,
CrawlerProxyEnabled: crawlerProxyEnabled,
CrawlerProxyStrict: crawlerProxyStrict,
CrawlerProxies: crawlerProxies,
ConcurrentStandardCrawlers: concurrentStandardCrawlers,
ConcurrentChromeCrawlers: concurrentChromeCrawlers,
CrawlingInterval: crawlingInterval,