added privacy policy page and about section, improved dir check, fixed crash when idexer is disabled

This commit is contained in:
partisan 2025-01-05 19:23:53 +01:00
parent 61266c461a
commit 5ae97da6d0
18 changed files with 698 additions and 107 deletions

View file

@ -14,8 +14,7 @@ var visitedStore *VisitedStore
// webCrawlerInit is called during init on program start
func webCrawlerInit() {
// Initialize the store with, say, batchSize=50
store, err := NewVisitedStore(filepath.Join(config.DriveCache.Path, "visited-urls.txt"), 50)
store, err := NewVisitedStore(filepath.Join(config.DriveCache.Path, "visited-urls.txt"), config.IndexBatchSize)
if err != nil {
printErr("Failed to initialize visited store: %v", err)
}
@ -170,7 +169,7 @@ func crawlDomainsToFile(domains [][2]string, maxPages int) error {
userAgent, _ := GetUserAgent("crawler-chrome")
title, desc, keywords := fetchPageMetadataChrome(fullURL, userAgent)
if title == "" || desc == "" {
printWarn("Skipping %s: unable to get title/desc data", fullURL)
printDebug("Skipping %s: unable to get title/desc data", fullURL) // Here is print for all domains that fail to be crawled
continue
}