added website crawling and indexing crawled results
This commit is contained in:
parent
5b90a372a1
commit
047cccd19f
10 changed files with 819 additions and 97 deletions
113
text.go
113
text.go
|
@ -73,14 +73,10 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
|
|||
select {
|
||||
case results := <-cacheChan:
|
||||
if results == nil {
|
||||
// Fetch only if the cache miss occurs and Crawler is enabled
|
||||
if config.CrawlerEnabled {
|
||||
combinedResults = fetchTextResults(query, safe, lang, page)
|
||||
if len(combinedResults) > 0 {
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||
}
|
||||
} else {
|
||||
printInfo("Crawler disabled; skipping fetching.")
|
||||
// Always attempt to fetch results on a cache miss
|
||||
combinedResults = fetchTextResults(query, safe, lang, page)
|
||||
if len(combinedResults) > 0 {
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||
}
|
||||
} else {
|
||||
textResults, _, _, _ := convertToSpecificResults(results)
|
||||
|
@ -88,13 +84,10 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
|
|||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
printInfo("Cache check timeout")
|
||||
if config.CrawlerEnabled {
|
||||
combinedResults = fetchTextResults(query, safe, lang, page)
|
||||
if len(combinedResults) > 0 {
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||
}
|
||||
} else {
|
||||
printInfo("Crawler disabled; skipping fetching.")
|
||||
// Even on timeout, attempt to fetch results
|
||||
combinedResults = fetchTextResults(query, safe, lang, page)
|
||||
if len(combinedResults) > 0 {
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -121,54 +114,70 @@ func prefetchPage(query, safe, lang string, page int) {
|
|||
func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
|
||||
var results []TextSearchResult
|
||||
|
||||
// If Crawler is disabled, do not fetch from search engines
|
||||
if !config.CrawlerEnabled {
|
||||
printDebug("Crawler is disabled; skipping search engine fetching.")
|
||||
return results // Return an empty list
|
||||
}
|
||||
printDebug("Crawler is disabled; fetching from local index.")
|
||||
|
||||
engineCount := len(textSearchEngines)
|
||||
// Calculate the starting position based on the page number
|
||||
indexedResults, err := SearchIndex(query, page, 10)
|
||||
if err != nil {
|
||||
printErr("Error searching the index: %v", err)
|
||||
return results // Return empty results on error
|
||||
}
|
||||
|
||||
// Determine which engine to use for the current page
|
||||
engineIndex := (page - 1) % engineCount
|
||||
engine := textSearchEngines[engineIndex]
|
||||
// Convert indexed results to TextSearchResult format
|
||||
for _, doc := range indexedResults {
|
||||
results = append(results, TextSearchResult{
|
||||
URL: doc.Link,
|
||||
Header: doc.Title,
|
||||
Description: doc.Description,
|
||||
Source: doc.Tags,
|
||||
})
|
||||
}
|
||||
|
||||
// Calculate the page number for this engine
|
||||
enginePage := (page-1)/engineCount + 1
|
||||
|
||||
// Debug print to verify engine and page number being fetched
|
||||
printDebug("Fetching results for overall page %d using engine: %s (engine page %d)", page, engine.Name, enginePage)
|
||||
|
||||
// Fetch results from the selected engine
|
||||
searchResults, _, err := engine.Func(query, safe, lang, enginePage)
|
||||
if err != nil {
|
||||
printWarn("Error performing search with %s: %v", engine.Name, err)
|
||||
return results
|
||||
} else {
|
||||
results = append(results, validateResults(searchResults)...)
|
||||
}
|
||||
// Crawler is enabled, so use the search engines
|
||||
engineCount := len(textSearchEngines)
|
||||
|
||||
// If no results are found with the selected engine, try the next in line
|
||||
if len(results) == 0 {
|
||||
for i := 1; i < engineCount; i++ {
|
||||
nextEngine := textSearchEngines[(engineIndex+i)%engineCount]
|
||||
enginePage = (page-1)/engineCount + 1 // Recalculate for the new engine
|
||||
printInfo("No results found, trying next engine: %s (engine page %d)", nextEngine.Name, enginePage)
|
||||
// Determine which engine to use for the current page
|
||||
engineIndex := (page - 1) % engineCount
|
||||
engine := textSearchEngines[engineIndex]
|
||||
|
||||
searchResults, _, err := nextEngine.Func(query, safe, lang, enginePage)
|
||||
if err != nil {
|
||||
printWarn("Error performing search with %s: %v", nextEngine.Name, err)
|
||||
continue
|
||||
}
|
||||
// Calculate the page number for this engine
|
||||
enginePage := (page-1)/engineCount + 1
|
||||
|
||||
printDebug("Fetching results for overall page %d using engine: %s (engine page %d)", page, engine.Name, enginePage)
|
||||
|
||||
// Fetch results from the selected engine
|
||||
searchResults, _, err := engine.Func(query, safe, lang, enginePage)
|
||||
if err != nil {
|
||||
printWarn("Error performing search with %s: %v", engine.Name, err)
|
||||
} else {
|
||||
results = append(results, validateResults(searchResults)...)
|
||||
if len(results) > 0 {
|
||||
break
|
||||
}
|
||||
|
||||
// If no results are found with the selected engine, try the next in line
|
||||
if len(results) == 0 {
|
||||
for i := 1; i < engineCount; i++ {
|
||||
nextEngine := textSearchEngines[(engineIndex+i)%engineCount]
|
||||
enginePage = (page-1)/engineCount + 1
|
||||
printInfo("No results found, trying next engine: %s (engine page %d)", nextEngine.Name, enginePage)
|
||||
|
||||
searchResults, _, err := nextEngine.Func(query, safe, lang, enginePage)
|
||||
if err != nil {
|
||||
printWarn("Error performing search with %s: %v", nextEngine.Name, err)
|
||||
continue
|
||||
}
|
||||
results = append(results, validateResults(searchResults)...)
|
||||
if len(results) > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printInfo("Fetched %d results for overall page %d", len(results), page)
|
||||
return results
|
||||
}
|
||||
|
||||
printInfo("Fetched %d results for overall page %d", len(results), page)
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
func validateResults(searchResults []SearchResult) []TextSearchResult {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue