From 6b99213ec4ffd1bb20665d8435f401f632f0a08a Mon Sep 17 00:00:00 2001 From: partisan Date: Fri, 9 Aug 2024 12:59:37 +0200 Subject: [PATCH] added "VisitedNodes" to message, to prevent re-requesting --- files.go | 41 ++++++++++++++++++++++++++++------------- forums.go | 30 +++++++++++++++++++++--------- images.go | 42 ++++++++++++++++++++++++++++++++---------- node-request-search.go | 17 ----------------- node.go | 16 ++++++++-------- text.go | 26 ++++++++++++++++---------- video.go | 24 +++++++++++++++--------- 7 files changed, 120 insertions(+), 76 deletions(-) diff --git a/files.go b/files.go index 35c227c..a331623 100644 --- a/files.go +++ b/files.go @@ -82,10 +82,10 @@ func handleFileSearch(w http.ResponseWriter, query, safe, lang string, page int) Settings: Settings{UxLang: lang, Safe: safe}, } - // Debugging: Print results before rendering template - for _, result := range combinedResults { - fmt.Printf("Title: %s, Magnet: %s\n", result.Title, result.Magnet) - } + // // Debugging: Print results before rendering template + // for _, result := range combinedResults { + // fmt.Printf("Title: %s, Magnet: %s\n", result.Title, result.Magnet) + // } if err := tmpl.Execute(w, data); err != nil { log.Printf("Failed to render template: %v", err) @@ -150,15 +150,18 @@ func fetchFileResults(query, safe, lang string, page int) []TorrentResult { if len(results) == 0 { log.Printf("No file results found for query: %s, trying other nodes", query) - results = tryOtherNodesForFileSearch(query, safe, lang, page) + results = tryOtherNodesForFileSearch(query, safe, lang, page, []string{hostID}) } return results } -func tryOtherNodesForFileSearch(query, safe, lang string, page int) []TorrentResult { +func tryOtherNodesForFileSearch(query, safe, lang string, page int, visitedNodes []string) []TorrentResult { for _, nodeAddr := range peers { - results, err := sendFileSearchRequestToNode(nodeAddr, query, safe, lang, page) + if contains(visitedNodes, nodeAddr) { + continue // Skip nodes already visited + } + results, err := sendFileSearchRequestToNode(nodeAddr, query, safe, lang, page, visitedNodes) if err != nil { log.Printf("Error contacting node %s: %v", nodeAddr, err) continue @@ -170,19 +173,22 @@ func tryOtherNodesForFileSearch(query, safe, lang string, page int) []TorrentRes return nil } -func sendFileSearchRequestToNode(nodeAddr, query, safe, lang string, page int) ([]TorrentResult, error) { +func sendFileSearchRequestToNode(nodeAddr, query, safe, lang string, page int, visitedNodes []string) ([]TorrentResult, error) { + visitedNodes = append(visitedNodes, nodeAddr) searchParams := struct { - Query string `json:"query"` - Safe string `json:"safe"` - Lang string `json:"lang"` - Page int `json:"page"` - ResponseAddr string `json:"responseAddr"` + Query string `json:"query"` + Safe string `json:"safe"` + Lang string `json:"lang"` + Page int `json:"page"` + ResponseAddr string `json:"responseAddr"` + VisitedNodes []string `json:"visitedNodes"` }{ Query: query, Safe: safe, Lang: lang, Page: page, ResponseAddr: fmt.Sprintf("http://localhost:%d/node", config.Port), + VisitedNodes: visitedNodes, } msgBytes, err := json.Marshal(searchParams) @@ -323,3 +329,12 @@ func sanitizeFileName(name string) string { sanitized = regexp.MustCompile(`[^a-zA-Z0-9\-\(\)]`).ReplaceAllString(sanitized, "") return sanitized } + +func contains(slice []string, item string) bool { + for _, v := range slice { + if v == item { + return true + } + } + return false +} diff --git a/forums.go b/forums.go index 7c4b6c6..9d643a3 100644 --- a/forums.go +++ b/forums.go @@ -100,7 +100,7 @@ func PerformRedditSearch(query string, safe string, page int) ([]ForumSearchResu func handleForumsSearch(w http.ResponseWriter, query, safe, lang string, page int) { results, err := PerformRedditSearch(query, safe, page) - if err != nil || len(results) == 0 || 0 == 0 { // 0 == 0 to force search by other node + if err != nil || len(results) == 0 { // 0 == 0 to force search by other node log.Printf("No results from primary search, trying other nodes") results = tryOtherNodesForForumSearch(query, safe, lang, page) } @@ -141,7 +141,7 @@ func handleForumsSearch(w http.ResponseWriter, query, safe, lang string, page in func tryOtherNodesForForumSearch(query, safe, lang string, page int) []ForumSearchResult { for _, nodeAddr := range peers { - results, err := sendSearchRequestToNode(nodeAddr, query, safe, lang, page) + results, err := sendSearchRequestToNode(nodeAddr, query, safe, lang, page, []string{}) if err != nil { log.Printf("Error contacting node %s: %v", nodeAddr, err) continue @@ -153,19 +153,31 @@ func tryOtherNodesForForumSearch(query, safe, lang string, page int) []ForumSear return nil } -func sendSearchRequestToNode(nodeAddr, query, safe, lang string, page int) ([]ForumSearchResult, error) { +func sendSearchRequestToNode(nodeAddr, query, safe, lang string, page int, visitedNodes []string) ([]ForumSearchResult, error) { + // Check if the current node has already been visited + for _, node := range visitedNodes { + if node == hostID { + return nil, fmt.Errorf("loop detected: this node (%s) has already been visited", hostID) + } + } + + // Add current node to the list of visited nodes + visitedNodes = append(visitedNodes, hostID) + searchParams := struct { - Query string `json:"query"` - Safe string `json:"safe"` - Lang string `json:"lang"` - Page int `json:"page"` - ResponseAddr string `json:"responseAddr"` + Query string `json:"query"` + Safe string `json:"safe"` + Lang string `json:"lang"` + Page int `json:"page"` + ResponseAddr string `json:"responseAddr"` + VisitedNodes []string `json:"visitedNodes"` }{ Query: query, Safe: safe, Lang: lang, Page: page, ResponseAddr: fmt.Sprintf("http://localhost:%d/node", config.Port), + VisitedNodes: visitedNodes, } msgBytes, err := json.Marshal(searchParams) @@ -188,7 +200,7 @@ func sendSearchRequestToNode(nodeAddr, query, safe, lang string, page int) ([]Fo select { case res := <-resultsChan: return res, nil - case <-time.After(20 * time.Second): // Increased timeout duration + case <-time.After(20 * time.Second): return nil, fmt.Errorf("timeout waiting for results from node %s", nodeAddr) } } diff --git a/images.go b/images.go index 429b4df..b78cac7 100644 --- a/images.go +++ b/images.go @@ -10,6 +10,7 @@ import ( ) var imageSearchEngines []SearchEngine +var imageResultsChan = make(chan []ImageSearchResult) func init() { imageSearchEngines = []SearchEngine{ @@ -124,15 +125,18 @@ func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult { // If no results found after trying all engines if len(results) == 0 { log.Printf("No image results found for query: %s, trying other nodes", query) - results = tryOtherNodesForImageSearch(query, safe, lang, page) + results = tryOtherNodesForImageSearch(query, safe, lang, page, []string{hostID}) } return results } -func tryOtherNodesForImageSearch(query, safe, lang string, page int) []ImageSearchResult { +func tryOtherNodesForImageSearch(query, safe, lang string, page int, visitedNodes []string) []ImageSearchResult { for _, nodeAddr := range peers { - results, err := sendImageSearchRequestToNode(nodeAddr, query, safe, lang, page) + if contains(visitedNodes, nodeAddr) { + continue // Skip nodes already visited + } + results, err := sendImageSearchRequestToNode(nodeAddr, query, safe, lang, page, visitedNodes) if err != nil { log.Printf("Error contacting node %s: %v", nodeAddr, err) continue @@ -144,19 +148,22 @@ func tryOtherNodesForImageSearch(query, safe, lang string, page int) []ImageSear return nil } -func sendImageSearchRequestToNode(nodeAddr, query, safe, lang string, page int) ([]ImageSearchResult, error) { +func sendImageSearchRequestToNode(nodeAddr, query, safe, lang string, page int, visitedNodes []string) ([]ImageSearchResult, error) { + visitedNodes = append(visitedNodes, nodeAddr) searchParams := struct { - Query string `json:"query"` - Safe string `json:"safe"` - Lang string `json:"lang"` - Page int `json:"page"` - ResponseAddr string `json:"responseAddr"` + Query string `json:"query"` + Safe string `json:"safe"` + Lang string `json:"lang"` + Page int `json:"page"` + ResponseAddr string `json:"responseAddr"` + VisitedNodes []string `json:"visitedNodes"` }{ Query: query, Safe: safe, Lang: lang, Page: page, ResponseAddr: fmt.Sprintf("http://localhost:%d/node", config.Port), + VisitedNodes: visitedNodes, } msgBytes, err := json.Marshal(searchParams) @@ -179,7 +186,7 @@ func sendImageSearchRequestToNode(nodeAddr, query, safe, lang string, page int) select { case res := <-imageResultsChan: return res, nil - case <-time.After(30 * time.Second): // Need to handle this better, setting a static number is stupid + case <-time.After(30 * time.Second): return nil, fmt.Errorf("timeout waiting for results from node %s", nodeAddr) } } @@ -197,3 +204,18 @@ func wrapImageSearchFunc(f func(string, string, string, int) ([]ImageSearchResul return searchResults, duration, nil } } + +func handleImageResultsMessage(msg Message) { + var results []ImageSearchResult + err := json.Unmarshal([]byte(msg.Content), &results) + if err != nil { + log.Printf("Error unmarshalling image results: %v", err) + return + } + + log.Printf("Received image results: %+v", results) + // Send results to imageResultsChan + go func() { + imageResultsChan <- results + }() +} diff --git a/node-request-search.go b/node-request-search.go index 2d61c1b..5bc331e 100644 --- a/node-request-search.go +++ b/node-request-search.go @@ -61,21 +61,4 @@ func fetchForumResults(query, safe, lang string, page int) []ForumSearchResult { ////// IMAGES ///// -var imageResultsChan = make(chan []ImageSearchResult) - -func handleImageResultsMessage(msg Message) { - var results []ImageSearchResult - err := json.Unmarshal([]byte(msg.Content), &results) - if err != nil { - log.Printf("Error unmarshalling image results: %v", err) - return - } - - log.Printf("Received image results: %+v", results) - // Send results to imageResultsChan - go func() { - imageResultsChan <- results - }() -} - ////// IMAGES ///// diff --git a/node.go b/node.go index 56ffd82..dc0a4eb 100644 --- a/node.go +++ b/node.go @@ -21,9 +21,10 @@ var ( ) type Message struct { - ID string `json:"id"` - Type string `json:"type"` - Content string `json:"content"` + ID string `json:"id"` + Type string `json:"type"` + Content string `json:"content"` + VisitedNodes []string `json:"visitedNodes"` } type CrawlerConfig struct { @@ -149,18 +150,17 @@ func interpretMessage(msg Message) { case "search-file": handleSearchFileMessage(msg) case "search-forum": - log.Println("Received search-forum message:", msg.Content) handleSearchForumMessage(msg) case "forum-results": handleForumResultsMessage(msg) case "text-results": - handleTextResultsMessage(msg) // need to implement + handleTextResultsMessage(msg) case "image-results": - handleImageResultsMessage(msg) // need to implement + handleImageResultsMessage(msg) case "video-results": - handleVideoResultsMessage(msg) // need to implement + handleVideoResultsMessage(msg) case "file-results": - handleFileResultsMessage(msg) // need to implement + handleFileResultsMessage(msg) default: fmt.Println("Received unknown message type:", msg.Type) } diff --git a/text.go b/text.go index a6c3283..a0ab6bf 100644 --- a/text.go +++ b/text.go @@ -149,7 +149,7 @@ func fetchTextResults(query, safe, lang string, page int) []TextSearchResult { // If no results found after trying all engines if len(results) == 0 { log.Printf("No text results found for query: %s, trying other nodes", query) - results = tryOtherNodesForTextSearch(query, safe, lang, page) + results = tryOtherNodesForTextSearch(query, safe, lang, page, []string{hostID}) } return results @@ -183,9 +183,12 @@ func wrapTextSearchFunc(f func(string, string, string, int) ([]TextSearchResult, } } -func tryOtherNodesForTextSearch(query, safe, lang string, page int) []TextSearchResult { +func tryOtherNodesForTextSearch(query, safe, lang string, page int, visitedNodes []string) []TextSearchResult { for _, nodeAddr := range peers { - results, err := sendTextSearchRequestToNode(nodeAddr, query, safe, lang, page) + if contains(visitedNodes, nodeAddr) { + continue // Skip nodes already visited + } + results, err := sendTextSearchRequestToNode(nodeAddr, query, safe, lang, page, visitedNodes) if err != nil { log.Printf("Error contacting node %s: %v", nodeAddr, err) continue @@ -197,19 +200,22 @@ func tryOtherNodesForTextSearch(query, safe, lang string, page int) []TextSearch return nil } -func sendTextSearchRequestToNode(nodeAddr, query, safe, lang string, page int) ([]TextSearchResult, error) { +func sendTextSearchRequestToNode(nodeAddr, query, safe, lang string, page int, visitedNodes []string) ([]TextSearchResult, error) { + visitedNodes = append(visitedNodes, nodeAddr) searchParams := struct { - Query string `json:"query"` - Safe string `json:"safe"` - Lang string `json:"lang"` - Page int `json:"page"` - ResponseAddr string `json:"responseAddr"` + Query string `json:"query"` + Safe string `json:"safe"` + Lang string `json:"lang"` + Page int `json:"page"` + ResponseAddr string `json:"responseAddr"` + VisitedNodes []string `json:"visitedNodes"` }{ Query: query, Safe: safe, Lang: lang, Page: page, ResponseAddr: fmt.Sprintf("http://localhost:%d/node", config.Port), + VisitedNodes: visitedNodes, } msgBytes, err := json.Marshal(searchParams) @@ -232,7 +238,7 @@ func sendTextSearchRequestToNode(nodeAddr, query, safe, lang string, page int) ( select { case res := <-textResultsChan: return res, nil - case <-time.After(20 * time.Second): // Increased timeout duration + case <-time.After(20 * time.Second): return nil, fmt.Errorf("timeout waiting for results from node %s", nodeAddr) } } diff --git a/video.go b/video.go index 697f3c7..974705d 100644 --- a/video.go +++ b/video.go @@ -155,7 +155,7 @@ func handleVideoSearch(w http.ResponseWriter, query, safe, lang string, page int results := fetchVideoResults(query, safe, lang, page) if len(results) == 0 { log.Printf("No results from primary search, trying other nodes") - results = tryOtherNodesForVideoSearch(query, safe, lang, page) + results = tryOtherNodesForVideoSearch(query, safe, lang, page, []string{hostID}) } elapsed := time.Since(start) @@ -210,9 +210,12 @@ func fetchVideoResults(query, safe, lang string, page int) []VideoResult { return results } -func tryOtherNodesForVideoSearch(query, safe, lang string, page int) []VideoResult { +func tryOtherNodesForVideoSearch(query, safe, lang string, page int, visitedNodes []string) []VideoResult { for _, nodeAddr := range peers { - results, err := sendVideoSearchRequestToNode(nodeAddr, query, safe, lang, page) + if contains(visitedNodes, nodeAddr) { + continue // Skip nodes already visited + } + results, err := sendVideoSearchRequestToNode(nodeAddr, query, safe, lang, page, visitedNodes) if err != nil { log.Printf("Error contacting node %s: %v", nodeAddr, err) continue @@ -224,19 +227,22 @@ func tryOtherNodesForVideoSearch(query, safe, lang string, page int) []VideoResu return nil } -func sendVideoSearchRequestToNode(nodeAddr, query, safe, lang string, page int) ([]VideoResult, error) { +func sendVideoSearchRequestToNode(nodeAddr, query, safe, lang string, page int, visitedNodes []string) ([]VideoResult, error) { + visitedNodes = append(visitedNodes, nodeAddr) searchParams := struct { - Query string `json:"query"` - Safe string `json:"safe"` - Lang string `json:"lang"` - Page int `json:"page"` - ResponseAddr string `json:"responseAddr"` + Query string `json:"query"` + Safe string `json:"safe"` + Lang string `json:"lang"` + Page int `json:"page"` + ResponseAddr string `json:"responseAddr"` + VisitedNodes []string `json:"visitedNodes"` }{ Query: query, Safe: safe, Lang: lang, Page: page, ResponseAddr: fmt.Sprintf("http://localhost:%d/node", config.Port), + VisitedNodes: visitedNodes, } msgBytes, err := json.Marshal(searchParams)