added privacy policy page and about section, improved dir check, fixed crash when idexer is disabled
This commit is contained in:
parent
61266c461a
commit
5ae97da6d0
18 changed files with 698 additions and 107 deletions
|
@ -14,8 +14,7 @@ var visitedStore *VisitedStore
|
|||
|
||||
// webCrawlerInit is called during init on program start
|
||||
func webCrawlerInit() {
|
||||
// Initialize the store with, say, batchSize=50
|
||||
store, err := NewVisitedStore(filepath.Join(config.DriveCache.Path, "visited-urls.txt"), 50)
|
||||
store, err := NewVisitedStore(filepath.Join(config.DriveCache.Path, "visited-urls.txt"), config.IndexBatchSize)
|
||||
if err != nil {
|
||||
printErr("Failed to initialize visited store: %v", err)
|
||||
}
|
||||
|
@ -170,7 +169,7 @@ func crawlDomainsToFile(domains [][2]string, maxPages int) error {
|
|||
userAgent, _ := GetUserAgent("crawler-chrome")
|
||||
title, desc, keywords := fetchPageMetadataChrome(fullURL, userAgent)
|
||||
if title == "" || desc == "" {
|
||||
printWarn("Skipping %s: unable to get title/desc data", fullURL)
|
||||
printDebug("Skipping %s: unable to get title/desc data", fullURL) // Here is print for all domains that fail to be crawled
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue