added website crawling and indexing crawled results

This commit is contained in:
partisan 2024-12-29 22:54:55 +01:00
parent 5b90a372a1
commit 047cccd19f
10 changed files with 819 additions and 97 deletions

113
text.go
View file

@ -73,14 +73,10 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
select {
case results := <-cacheChan:
if results == nil {
// Fetch only if the cache miss occurs and Crawler is enabled
if config.CrawlerEnabled {
combinedResults = fetchTextResults(query, safe, lang, page)
if len(combinedResults) > 0 {
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
}
} else {
printInfo("Crawler disabled; skipping fetching.")
// Always attempt to fetch results on a cache miss
combinedResults = fetchTextResults(query, safe, lang, page)
if len(combinedResults) > 0 {
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
}
} else {
textResults, _, _, _ := convertToSpecificResults(results)
@ -88,13 +84,10 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
}
case <-time.After(2 * time.Second):
printInfo("Cache check timeout")
if config.CrawlerEnabled {
combinedResults = fetchTextResults(query, safe, lang, page)
if len(combinedResults) > 0 {
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
}
} else {
printInfo("Crawler disabled; skipping fetching.")
// Even on timeout, attempt to fetch results
combinedResults = fetchTextResults(query, safe, lang, page)
if len(combinedResults) > 0 {
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
}
}
@ -121,54 +114,70 @@ func prefetchPage(query, safe, lang string, page int) {
func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
var results []TextSearchResult
// If Crawler is disabled, do not fetch from search engines
if !config.CrawlerEnabled {
printDebug("Crawler is disabled; skipping search engine fetching.")
return results // Return an empty list
}
printDebug("Crawler is disabled; fetching from local index.")
engineCount := len(textSearchEngines)
// Calculate the starting position based on the page number
indexedResults, err := SearchIndex(query, page, 10)
if err != nil {
printErr("Error searching the index: %v", err)
return results // Return empty results on error
}
// Determine which engine to use for the current page
engineIndex := (page - 1) % engineCount
engine := textSearchEngines[engineIndex]
// Convert indexed results to TextSearchResult format
for _, doc := range indexedResults {
results = append(results, TextSearchResult{
URL: doc.Link,
Header: doc.Title,
Description: doc.Description,
Source: doc.Tags,
})
}
// Calculate the page number for this engine
enginePage := (page-1)/engineCount + 1
// Debug print to verify engine and page number being fetched
printDebug("Fetching results for overall page %d using engine: %s (engine page %d)", page, engine.Name, enginePage)
// Fetch results from the selected engine
searchResults, _, err := engine.Func(query, safe, lang, enginePage)
if err != nil {
printWarn("Error performing search with %s: %v", engine.Name, err)
return results
} else {
results = append(results, validateResults(searchResults)...)
}
// Crawler is enabled, so use the search engines
engineCount := len(textSearchEngines)
// If no results are found with the selected engine, try the next in line
if len(results) == 0 {
for i := 1; i < engineCount; i++ {
nextEngine := textSearchEngines[(engineIndex+i)%engineCount]
enginePage = (page-1)/engineCount + 1 // Recalculate for the new engine
printInfo("No results found, trying next engine: %s (engine page %d)", nextEngine.Name, enginePage)
// Determine which engine to use for the current page
engineIndex := (page - 1) % engineCount
engine := textSearchEngines[engineIndex]
searchResults, _, err := nextEngine.Func(query, safe, lang, enginePage)
if err != nil {
printWarn("Error performing search with %s: %v", nextEngine.Name, err)
continue
}
// Calculate the page number for this engine
enginePage := (page-1)/engineCount + 1
printDebug("Fetching results for overall page %d using engine: %s (engine page %d)", page, engine.Name, enginePage)
// Fetch results from the selected engine
searchResults, _, err := engine.Func(query, safe, lang, enginePage)
if err != nil {
printWarn("Error performing search with %s: %v", engine.Name, err)
} else {
results = append(results, validateResults(searchResults)...)
if len(results) > 0 {
break
}
// If no results are found with the selected engine, try the next in line
if len(results) == 0 {
for i := 1; i < engineCount; i++ {
nextEngine := textSearchEngines[(engineIndex+i)%engineCount]
enginePage = (page-1)/engineCount + 1
printInfo("No results found, trying next engine: %s (engine page %d)", nextEngine.Name, enginePage)
searchResults, _, err := nextEngine.Func(query, safe, lang, enginePage)
if err != nil {
printWarn("Error performing search with %s: %v", nextEngine.Name, err)
continue
}
results = append(results, validateResults(searchResults)...)
if len(results) > 0 {
break
}
}
}
printInfo("Fetched %d results for overall page %d", len(results), page)
return results
}
printInfo("Fetched %d results for overall page %d", len(results), page)
return results
}
func validateResults(searchResults []SearchResult) []TextSearchResult {