Search/config.go
partisan 614ce8903e
All checks were successful
Run Integration Tests / test (push) Successful in 33s
added SOCKS5 proxy support
2025-01-12 16:46:52 +01:00

509 lines
19 KiB
Go

package main
import (
"bufio"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
"github.com/shirou/gopsutil/mem"
"gopkg.in/ini.v1"
)
var configFilePath = "./config.ini"
type CacheConfig struct {
Duration time.Duration
MaxUsageBytes uint64 // Store as bytes for uniformity
Path string
}
type Config struct {
Port int // Added
AuthCode string // Added
PeerID string // Added
Peers []string
Domain string // Added
NodesEnabled bool // Added
MetaSearchEnabled bool // Added
IndexerEnabled bool // Added
WebsiteEnabled bool // Added
RamCacheEnabled bool
DriveCacheEnabled bool // Added
MetaProxyEnabled bool // Added
MetaProxyStrict bool // Added
MetaProxies []string // Added
CrawlerProxyEnabled bool // Added
CrawlerProxyStrict bool // Added
CrawlerProxies []string // Added
// Maybye add Proxy support for Image Extraction?
LogLevel int // Added
ConcurrentStandardCrawlers int
ConcurrentChromeCrawlers int
CrawlingInterval time.Duration // Refres crawled results in...
MaxPagesPerDomain int // Max pages to crawl per domain
IndexBatchSize int
DriveCache CacheConfig
RamCache CacheConfig
}
var defaultConfig = Config{
Port: 5000,
Domain: "localhost",
Peers: []string{},
AuthCode: generateStrongRandomString(64),
NodesEnabled: false,
MetaSearchEnabled: true,
IndexerEnabled: false,
WebsiteEnabled: true,
RamCacheEnabled: true,
DriveCacheEnabled: false,
MetaProxyEnabled: false,
MetaProxyStrict: true,
MetaProxies: []string{},
CrawlerProxyEnabled: false,
CrawlerProxyStrict: true,
CrawlerProxies: []string{},
ConcurrentStandardCrawlers: 12,
ConcurrentChromeCrawlers: 4,
CrawlingInterval: 24 * time.Hour,
MaxPagesPerDomain: 10,
IndexBatchSize: 50,
LogLevel: 1,
DriveCache: CacheConfig{
Duration: 48 * time.Hour, // Added
Path: "./cache", // Added
MaxUsageBytes: parseMaxUsageDrive("90 %", config.DriveCache.Path), // Added
},
RamCache: CacheConfig{
Duration: 6 * time.Hour, // Added
MaxUsageBytes: parseMaxUsageRam("90%"), // Added
},
}
func initConfig() error {
// Check if the configuration file exists
if _, err := os.Stat(configFilePath); os.IsNotExist(err) {
// If not, create a new configuration
return createConfig()
}
printInfo("Configuration file already exists. Loading configuration.")
// Load existing configuration
config = loadConfig()
return nil
}
func createConfig() error {
reader := bufio.NewReader(os.Stdin)
printMessage("Configuration file not found. Let's set it up.")
printMessage("Do you want to use default values? (yes/NO): ")
useDefaults, _ := reader.ReadString('\n')
if strings.TrimSpace(strings.ToLower(useDefaults)) != "yes" {
// Server settings
printMessage("Enter port (default 5000): ")
portStr, _ := reader.ReadString('\n')
portStr = strings.TrimSpace(portStr)
if portStr != "" {
port, err := strconv.Atoi(portStr)
if err == nil {
config.Port = port
} else {
printWarn("Invalid port, using default (5000).")
config.Port = defaultConfig.Port
}
} else {
config.Port = defaultConfig.Port
}
printMessage("Enter your domain address (default localhost): ")
domain, _ := reader.ReadString('\n')
config.Domain = strings.TrimSpace(domain)
if config.Domain == "" {
config.Domain = defaultConfig.Domain
}
// printMessage("Use Indexer? (YES/no): ")
// indexerChoice, _ := reader.ReadString('\n')
// indexerChoice = strings.TrimSpace(strings.ToLower(indexerChoice))
// if indexerChoice == "no" {
// config.IndexerEnabled = false
// } else {
// config.IndexerEnabled = true
// }
// Cache settings
printMessage("Would you like to configure Cache settings (yes/NO): ")
configureCache, _ := reader.ReadString('\n')
if strings.TrimSpace(strings.ToLower(configureCache)) == "yes" {
// RamCache settings
printMessage("Enter duration to store results in Ram (default 6h): ")
ramDurationStr, _ := reader.ReadString('\n')
ramDurationStr = strings.TrimSpace(ramDurationStr)
if ramDurationStr == "" {
config.RamCache.Duration = defaultConfig.RamCache.Duration
config.RamCacheEnabled = defaultConfig.RamCacheEnabled
} else {
ramDuration, err := time.ParseDuration(ramDurationStr)
if err != nil || ramDuration == 0 {
printWarn("Invalid duration, using default (6h).")
config.RamCache.Duration = defaultConfig.RamCache.Duration
config.RamCacheEnabled = defaultConfig.RamCacheEnabled
} else {
config.RamCache.Duration = ramDuration
config.RamCacheEnabled = true
}
}
printMessage("Enter RamCache max usage, e.g., 2 GiB or 80%% (default 90%%): ")
ramMaxUsage, _ := reader.ReadString('\n')
ramMaxUsage = strings.TrimSpace(ramMaxUsage)
if ramMaxUsage == "" {
config.RamCache.MaxUsageBytes = defaultConfig.RamCache.MaxUsageBytes
} else if ramMaxUsage == "0" || parseMaxUsageRam(ramMaxUsage) == 0 {
config.RamCacheEnabled = false
} else {
config.RamCache.MaxUsageBytes = parseMaxUsageRam(ramMaxUsage)
if config.RamCache.MaxUsageBytes == 0 {
printWarn("Invalid RamCache max usage, using default (90%%).")
config.RamCache.MaxUsageBytes = defaultConfig.RamCache.MaxUsageBytes
}
}
// DriveCache settings
printMessage("Enter duration to store results in DriveCache (default 0h): ")
driveDurationStr, _ := reader.ReadString('\n')
driveDurationStr = strings.TrimSpace(driveDurationStr)
if driveDurationStr == "" {
config.DriveCache.Duration = defaultConfig.DriveCache.Duration
config.DriveCacheEnabled = defaultConfig.DriveCacheEnabled
} else {
driveDuration, err := time.ParseDuration(driveDurationStr)
if err != nil || driveDuration == 0 {
printErr("Invalid duration, using default (48h).")
config.DriveCache.Duration = defaultConfig.DriveCache.Duration
config.DriveCacheEnabled = defaultConfig.DriveCacheEnabled
} else {
config.DriveCache.Duration = driveDuration
config.DriveCacheEnabled = true
}
}
printMessage("Enter DriveCache path (default ./cache): ")
drivePath, _ := reader.ReadString('\n')
drivePath = strings.TrimSpace(drivePath)
if drivePath == "" {
config.DriveCache.Path = defaultConfig.DriveCache.Path
} else {
config.DriveCache.Path = drivePath
}
printMessage("Enter DriveCache max usage, e.g., 2 GiB or 90%% (default 90%%): ")
driveMaxUsage, _ := reader.ReadString('\n')
driveMaxUsage = strings.TrimSpace(driveMaxUsage)
if driveMaxUsage == "" {
config.DriveCache.MaxUsageBytes = defaultConfig.DriveCache.MaxUsageBytes
} else if driveMaxUsage == "0" || parseMaxUsageDrive(driveMaxUsage, drivePath) == 0 {
config.DriveCacheEnabled = false
} else {
config.DriveCache.MaxUsageBytes = parseMaxUsageDrive(driveMaxUsage, drivePath)
if config.DriveCache.MaxUsageBytes == 0 {
printWarn("Invalid DriveCache max usage, using default.")
config.DriveCache.MaxUsageBytes = defaultConfig.DriveCache.MaxUsageBytes
}
}
} else {
printInfo("Cache settings skipped. Using default values.")
config.RamCache = defaultConfig.RamCache
config.DriveCache = defaultConfig.DriveCache
}
} else {
// Use default configuration
config = defaultConfig
}
// Generate AuthCode if missing
if config.AuthCode == "" {
config.AuthCode = generateStrongRandomString(64)
printMessage("Generated connection code: %s\n", config.AuthCode)
}
saveConfig(config)
printInfo("Configuration saved successfully.")
return nil
}
func saveConfig(config Config) {
cfg := ini.Empty()
// Server section
sec := cfg.Section("Server")
sec.Key("Port").SetValue(strconv.Itoa(config.Port))
sec.Key("Domain").SetValue(config.Domain)
sec.Key("LogLevel").SetValue(strconv.Itoa(config.LogLevel))
// Peers section
peersSec := cfg.Section("Peers")
peersSec.Key("AuthCode").SetValue(config.AuthCode)
peersSec.Key("PeerID").SetValue(config.PeerID)
peersSec.Key("Peers").SetValue(strings.Join(config.Peers, ","))
// Features section
featuresSec := cfg.Section("Features")
featuresSec.Key("Nodes").SetValue(strconv.FormatBool(config.NodesEnabled))
featuresSec.Key("Crawler").SetValue(strconv.FormatBool(config.MetaSearchEnabled))
featuresSec.Key("Indexer").SetValue(strconv.FormatBool(config.IndexerEnabled))
featuresSec.Key("Website").SetValue(strconv.FormatBool(config.WebsiteEnabled))
featuresSec.Key("MetaProxy").SetValue(strconv.FormatBool(config.MetaProxyEnabled))
featuresSec.Key("CrawlerProxy").SetValue(strconv.FormatBool(config.CrawlerProxyEnabled))
// Proxies section
proxiesSec := cfg.Section("Proxies")
proxiesSec.Key("MetaProxyStrict").SetValue(strconv.FormatBool(config.MetaProxyStrict))
proxiesSec.Key("MetaProxies").SetValue(strings.Join(config.MetaProxies, ","))
proxiesSec.Key("CrawlerProxyStrict").SetValue(strconv.FormatBool(config.CrawlerProxyStrict))
proxiesSec.Key("CrawlerProxies").SetValue(strings.Join(config.CrawlerProxies, ","))
// Indexer section
indexerSec := cfg.Section("Indexer")
indexerSec.Key("ConcurrentStandardCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
indexerSec.Key("ConcurrentChromeCrawlers").SetValue(strconv.Itoa(config.ConcurrentChromeCrawlers))
indexerSec.Key("CrawlingInterval").SetValue(config.CrawlingInterval.String())
indexerSec.Key("MaxPagesPerDomain").SetValue(strconv.Itoa(config.MaxPagesPerDomain))
indexerSec.Key("IndexBatchSize").SetValue(strconv.Itoa(config.IndexBatchSize))
// DriveCache section
driveSec := cfg.Section("DriveCache")
driveSec.Key("Duration").SetValue(config.DriveCache.Duration.String())
driveSec.Key("MaxUsage").SetValue(formatMaxUsage(config.DriveCache.MaxUsageBytes))
driveSec.Key("Path").SetValue(config.DriveCache.Path)
// driveSec.Key("MaxConcurrentDownloads.Thumbnail").SetValue(strconv.Itoa(config.DriveCache.MaxConcurrentThumbnailDownloads))
// RamCache section
ramSec := cfg.Section("RamCache")
ramSec.Key("Duration").SetValue(config.RamCache.Duration.String())
ramSec.Key("MaxUsage").SetValue(formatMaxUsage(config.RamCache.MaxUsageBytes))
err := cfg.SaveTo(configFilePath)
if err != nil {
printErr("Error writing to config file: %v", err)
}
}
func loadConfig() Config {
cfg, err := ini.Load(configFilePath)
if err != nil {
printErr("Error opening config file: %v", err)
}
// Server
port := getConfigValue(cfg.Section("Server").Key("Port"), defaultConfig.Port, strconv.Atoi)
domain := getConfigValueString(cfg.Section("Server").Key("Domain"), defaultConfig.Domain)
logLevel := getConfigValue(cfg.Section("Server").Key("LogLevel"), defaultConfig.LogLevel, strconv.Atoi)
// Peers
authCode := getConfigValueString(cfg.Section("Peers").Key("AuthCode"), defaultConfig.AuthCode)
peers := strings.Split(getConfigValueString(cfg.Section("Peers").Key("Peers"), ""), ",")
// Features
nodesEnabled := getConfigValueBool(cfg.Section("Features").Key("Nodes"), defaultConfig.NodesEnabled)
metaSearchEnabled := getConfigValueBool(cfg.Section("Features").Key("Crawler"), defaultConfig.MetaSearchEnabled)
indexerEnabled := getConfigValueBool(cfg.Section("Features").Key("Indexer"), defaultConfig.IndexerEnabled)
websiteEnabled := getConfigValueBool(cfg.Section("Features").Key("Website"), defaultConfig.WebsiteEnabled)
ramCacheEnabled := getConfigValueBool(cfg.Section("Features").Key("RamCache"), defaultConfig.RamCacheEnabled)
driveCacheEnabled := getConfigValueBool(cfg.Section("Features").Key("DriveCache"), defaultConfig.DriveCacheEnabled)
metaProxyEnabled := getConfigValueBool(cfg.Section("Features").Key("MetaProxy"), defaultConfig.MetaProxyEnabled)
crawlerProxyEnabled := getConfigValueBool(cfg.Section("Features").Key("CrawlerProxy"), defaultConfig.CrawlerProxyEnabled)
// Proxies
metaProxyStrict := getConfigValueBool(cfg.Section("Proxies").Key("MetaProxyStrict"), defaultConfig.MetaProxyStrict)
metaProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("MetaProxies"), ""), ",")
crawlerProxyStrict := getConfigValueBool(cfg.Section("Proxies").Key("CrawlerProxyStrict"), defaultConfig.CrawlerProxyStrict)
crawlerProxies := strings.Split(getConfigValueString(cfg.Section("Proxies").Key("CrawlerProxies"), ""), ",")
// Indexing
concurrentStandardCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentStandardCrawlers"), defaultConfig.ConcurrentStandardCrawlers, strconv.Atoi)
concurrentChromeCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentChromeCrawlers"), defaultConfig.ConcurrentChromeCrawlers, strconv.Atoi)
crawlingInterval := getConfigValue(cfg.Section("Indexer").Key("CrawlingInterval"), defaultConfig.CrawlingInterval, time.ParseDuration)
maxPagesPerDomain := getConfigValue(cfg.Section("Indexer").Key("MaxPagesPerDomain"), defaultConfig.MaxPagesPerDomain, strconv.Atoi)
indexBatchSize := getConfigValue(cfg.Section("Indexer").Key("IndexBatchSize"), defaultConfig.IndexBatchSize, strconv.Atoi)
// DriveCache
driveDuration := getConfigValue(cfg.Section("DriveCache").Key("Duration"), defaultConfig.DriveCache.Duration, time.ParseDuration)
drivePath := getConfigValueString(cfg.Section("DriveCache").Key("Path"), defaultConfig.DriveCache.Path)
driveMaxUsage := parseMaxUsageDrive(getConfigValueString(cfg.Section("DriveCache").Key("MaxUsage"), formatMaxUsage(defaultConfig.DriveCache.MaxUsageBytes)), drivePath)
// maxConcurrentDownloads, _ := cfg.Section("DriveCache").Key("MaxConcurrentDownloads.Thumbnail").Int()
// if maxConcurrentDownloads == 0 {
// maxConcurrentDownloads = defaultConfig.DriveCache.MaxConcurrentThumbnailDownloads
// }
// RamCache
ramDuration := getConfigValue(cfg.Section("RamCache").Key("Duration"), defaultConfig.RamCache.Duration, time.ParseDuration)
ramMaxUsage := parseMaxUsageRam(getConfigValueString(cfg.Section("RamCache").Key("MaxUsage"), formatMaxUsage(defaultConfig.RamCache.MaxUsageBytes)))
return Config{
Port: port,
Domain: domain,
LogLevel: logLevel,
AuthCode: authCode,
Peers: peers,
NodesEnabled: nodesEnabled,
MetaSearchEnabled: metaSearchEnabled,
IndexerEnabled: indexerEnabled,
WebsiteEnabled: websiteEnabled,
RamCacheEnabled: ramCacheEnabled,
DriveCacheEnabled: driveCacheEnabled,
MetaProxyEnabled: metaProxyEnabled,
MetaProxyStrict: metaProxyStrict,
MetaProxies: metaProxies,
CrawlerProxyEnabled: crawlerProxyEnabled,
CrawlerProxyStrict: crawlerProxyStrict,
CrawlerProxies: crawlerProxies,
ConcurrentStandardCrawlers: concurrentStandardCrawlers,
ConcurrentChromeCrawlers: concurrentChromeCrawlers,
CrawlingInterval: crawlingInterval,
MaxPagesPerDomain: maxPagesPerDomain,
IndexBatchSize: indexBatchSize,
DriveCache: CacheConfig{
Duration: driveDuration,
MaxUsageBytes: driveMaxUsage,
Path: drivePath,
},
RamCache: CacheConfig{
Duration: ramDuration,
MaxUsageBytes: ramMaxUsage,
},
}
}
// getConfigValue retrieves a configuration value or returns a default value from defaultConfig.
func getConfigValue[T any](key *ini.Key, defaultValue T, parseFunc func(string) (T, error)) T {
if key == nil || key.String() == "" {
return defaultValue
}
value, err := parseFunc(key.String())
if err != nil {
return defaultValue
}
return value
}
// getConfigValueString retrieves a string value or falls back to the default.
func getConfigValueString(key *ini.Key, defaultValue string) string {
if key == nil || key.String() == "" {
return defaultValue
}
return key.String()
}
// getConfigValueBool retrieves a boolean value or falls back to the default.
func getConfigValueBool(key *ini.Key, defaultValue bool) bool {
if key == nil || key.String() == "" {
return defaultValue
}
return key.MustBool(defaultValue)
}
// Helper to parse MaxUsage string into bytes
func parseMaxUsageRam(value string) uint64 {
const GiB = 1024 * 1024 * 1024
value = strings.TrimSpace(value)
valueNoSpaces := strings.ReplaceAll(value, " ", "")
if strings.HasSuffix(valueNoSpaces, "%") {
percentStr := strings.TrimSuffix(valueNoSpaces, "%")
percent, err := strconv.ParseFloat(percentStr, 64)
if err != nil {
return 0
}
totalMem := getTotalMemory()
return uint64(float64(totalMem) * (percent / 100))
} else if strings.HasSuffix(valueNoSpaces, "GiB") {
sizeStr := strings.TrimSuffix(valueNoSpaces, "GiB")
size, err := strconv.ParseFloat(sizeStr, 64)
if err != nil {
return 0
}
return uint64(size * GiB)
}
return 0
}
// Helper to parse MaxUsage string into bytes based on drive space
func parseMaxUsageDrive(value string, cachePath string) uint64 {
const GiB = 1024 * 1024 * 1024
value = strings.TrimSpace(value)
valueNoSpaces := strings.ReplaceAll(value, " ", "")
totalDiskSpace := getTotalDiskSpace(cachePath)
if totalDiskSpace == 0 {
printErr("Failed to retrieve disk space for path: %s", cachePath)
return 0
}
if strings.HasSuffix(valueNoSpaces, "%") {
percentStr := strings.TrimSuffix(valueNoSpaces, "%")
percent, err := strconv.ParseFloat(percentStr, 64)
if err != nil {
return 0
}
return uint64(float64(totalDiskSpace) * (percent / 100))
} else if strings.HasSuffix(valueNoSpaces, "GiB") {
sizeStr := strings.TrimSuffix(valueNoSpaces, "GiB")
size, err := strconv.ParseFloat(sizeStr, 64)
if err != nil {
return 0
}
return uint64(size * GiB)
}
return 0
}
// Get total disk space of the system where cachePath resides
func getTotalDiskSpace(cachePath string) uint64 {
var stat syscall.Statfs_t
// Get filesystem stats for the cache path
absPath, err := filepath.Abs(cachePath)
if err != nil {
printErr("Failed to resolve absolute path for: %s", cachePath)
return 0
}
err = syscall.Statfs(absPath, &stat)
if err != nil {
printErr("Failed to retrieve filesystem stats for: %s", absPath)
return 0
}
// Total disk space in bytes
return stat.Blocks * uint64(stat.Bsize)
}
// Helper to format bytes back to human-readable string
func formatMaxUsage(bytes uint64) string {
const GiB = 1024 * 1024 * 1024
if bytes >= GiB {
return fmt.Sprintf("%.2fGiB", float64(bytes)/GiB)
}
return fmt.Sprintf("%dbytes", bytes)
}
// Get total memory of the system
func getTotalMemory() uint64 {
v, err := mem.VirtualMemory()
if err != nil {
printErr("Failed to retrieve system memory: %v", err)
return 0
}
return v.Total
}