changed indexing buffer to save to ram not to file

This commit is contained in:
partisan 2025-01-02 12:55:44 +01:00
parent 918e1823df
commit 61266c461a
4 changed files with 155 additions and 62 deletions

View file

@ -39,7 +39,7 @@ type Config struct {
ConcurrentChromeCrawlers int
CrawlingInterval time.Duration // Refres crawled results in...
MaxPagesPerDomain int // Max pages to crawl per domain
IndexRefreshInterval time.Duration // Interval for periodic index refresh (e.g., "10m")
IndexBatchSize int
DriveCache CacheConfig
RamCache CacheConfig
@ -60,7 +60,7 @@ var defaultConfig = Config{
ConcurrentChromeCrawlers: 4,
CrawlingInterval: 24 * time.Hour,
MaxPagesPerDomain: 10,
IndexRefreshInterval: 2 * time.Minute,
IndexBatchSize: 50,
LogLevel: 1,
DriveCache: CacheConfig{
Duration: 48 * time.Hour, // Added
@ -255,7 +255,7 @@ func saveConfig(config Config) {
indexerSec.Key("ConcurrentChromeCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
indexerSec.Key("CrawlingInterval").SetValue(config.CrawlingInterval.String())
indexerSec.Key("MaxPagesPerDomain").SetValue(strconv.Itoa(config.MaxPagesPerDomain))
indexerSec.Key("IndexRefreshInterval").SetValue(config.IndexRefreshInterval.String())
indexerSec.Key("IndexBatchSize").SetValue(strconv.Itoa(config.IndexBatchSize))
// DriveCache section
driveSec := cfg.Section("DriveCache")
@ -303,7 +303,7 @@ func loadConfig() Config {
concurrentChromeCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentChromeCrawlers"), defaultConfig.ConcurrentChromeCrawlers, strconv.Atoi)
crawlingInterval := getConfigValue(cfg.Section("Indexer").Key("CrawlingInterval"), defaultConfig.CrawlingInterval, time.ParseDuration)
maxPagesPerDomain := getConfigValue(cfg.Section("Indexer").Key("MaxPagesPerDomain"), defaultConfig.MaxPagesPerDomain, strconv.Atoi)
indexRefreshInterval := getConfigValue(cfg.Section("Indexer").Key("IndexRefreshInterval"), defaultConfig.IndexRefreshInterval, time.ParseDuration)
indexBatchSize := getConfigValue(cfg.Section("Indexer").Key("IndexBatchSize"), defaultConfig.IndexBatchSize, strconv.Atoi)
// DriveCache
driveDuration := getConfigValue(cfg.Section("DriveCache").Key("Duration"), defaultConfig.DriveCache.Duration, time.ParseDuration)
@ -334,7 +334,7 @@ func loadConfig() Config {
ConcurrentChromeCrawlers: concurrentChromeCrawlers,
CrawlingInterval: crawlingInterval,
MaxPagesPerDomain: maxPagesPerDomain,
IndexRefreshInterval: indexRefreshInterval,
IndexBatchSize: indexBatchSize,
DriveCache: CacheConfig{
Duration: driveDuration,
MaxUsageBytes: driveMaxUsage,