diff --git a/.gitignore b/.gitignore
index 118b838..5f5aeab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,5 @@ image_cache/
cache/
*.min.js
*.min.css
-qgato
\ No newline at end of file
+qgato
+test.py
\ No newline at end of file
diff --git a/config.go b/config.go
index 4ea4eb2..bdd9ccc 100644
--- a/config.go
+++ b/config.go
@@ -23,43 +23,45 @@ type CacheConfig struct {
}
type Config struct {
- Port int // Added
- AuthCode string // Added
- PeerID string // Added
- Peers []string
- Domain string // Added
- NodesEnabled bool // Added
- CrawlerEnabled bool // Added
- IndexerEnabled bool // Added
- WebsiteEnabled bool // Added
- RamCacheEnabled bool
- DriveCacheEnabled bool // Added
- LogLevel int // Added
- ConcurrentCrawlers int // Number of concurrent crawlers
- CrawlingInterval time.Duration // Refres crawled results in...
- MaxPagesPerDomain int // Max pages to crawl per domain
- IndexRefreshInterval time.Duration // Interval for periodic index refresh (e.g., "10m")
+ Port int // Added
+ AuthCode string // Added
+ PeerID string // Added
+ Peers []string
+ Domain string // Added
+ NodesEnabled bool // Added
+ CrawlerEnabled bool // Added
+ IndexerEnabled bool // Added
+ WebsiteEnabled bool // Added
+ RamCacheEnabled bool
+ DriveCacheEnabled bool // Added
+ LogLevel int // Added
+ ConcurrentStandardCrawlers int
+ ConcurrentChromeCrawlers int
+ CrawlingInterval time.Duration // Refres crawled results in...
+ MaxPagesPerDomain int // Max pages to crawl per domain
+ IndexRefreshInterval time.Duration // Interval for periodic index refresh (e.g., "10m")
DriveCache CacheConfig
RamCache CacheConfig
}
var defaultConfig = Config{
- Port: 5000,
- Domain: "localhost",
- Peers: []string{},
- AuthCode: generateStrongRandomString(64),
- NodesEnabled: false,
- CrawlerEnabled: true,
- IndexerEnabled: false,
- WebsiteEnabled: true,
- RamCacheEnabled: true,
- DriveCacheEnabled: false,
- ConcurrentCrawlers: 5,
- CrawlingInterval: 24 * time.Hour,
- MaxPagesPerDomain: 10,
- IndexRefreshInterval: 2 * time.Minute,
- LogLevel: 1,
+ Port: 5000,
+ Domain: "localhost",
+ Peers: []string{},
+ AuthCode: generateStrongRandomString(64),
+ NodesEnabled: false,
+ CrawlerEnabled: true,
+ IndexerEnabled: false,
+ WebsiteEnabled: true,
+ RamCacheEnabled: true,
+ DriveCacheEnabled: false,
+ ConcurrentStandardCrawlers: 12,
+ ConcurrentChromeCrawlers: 4,
+ CrawlingInterval: 24 * time.Hour,
+ MaxPagesPerDomain: 10,
+ IndexRefreshInterval: 2 * time.Minute,
+ LogLevel: 1,
DriveCache: CacheConfig{
Duration: 48 * time.Hour, // Added
Path: "./cache", // Added
@@ -249,7 +251,8 @@ func saveConfig(config Config) {
// Indexer section
indexerSec := cfg.Section("Indexer")
- indexerSec.Key("ConcurrentCrawlers").SetValue(strconv.Itoa(config.ConcurrentCrawlers))
+ indexerSec.Key("ConcurrentStandardCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
+ indexerSec.Key("ConcurrentChromeCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
indexerSec.Key("CrawlingInterval").SetValue(config.CrawlingInterval.String())
indexerSec.Key("MaxPagesPerDomain").SetValue(strconv.Itoa(config.MaxPagesPerDomain))
indexerSec.Key("IndexRefreshInterval").SetValue(config.IndexRefreshInterval.String())
@@ -296,7 +299,8 @@ func loadConfig() Config {
driveCacheEnabled := getConfigValueBool(cfg.Section("Features").Key("DriveCache"), defaultConfig.DriveCacheEnabled)
// Indexing
- concurrentCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentCrawlers"), defaultConfig.ConcurrentCrawlers, strconv.Atoi)
+ concurrentStandardCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentStandardCrawlers"), defaultConfig.ConcurrentStandardCrawlers, strconv.Atoi)
+ concurrentChromeCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentChromeCrawlers"), defaultConfig.ConcurrentChromeCrawlers, strconv.Atoi)
crawlingInterval := getConfigValue(cfg.Section("Indexer").Key("CrawlingInterval"), defaultConfig.CrawlingInterval, time.ParseDuration)
maxPagesPerDomain := getConfigValue(cfg.Section("Indexer").Key("MaxPagesPerDomain"), defaultConfig.MaxPagesPerDomain, strconv.Atoi)
indexRefreshInterval := getConfigValue(cfg.Section("Indexer").Key("IndexRefreshInterval"), defaultConfig.IndexRefreshInterval, time.ParseDuration)
@@ -315,21 +319,22 @@ func loadConfig() Config {
ramMaxUsage := parseMaxUsageRam(getConfigValueString(cfg.Section("RamCache").Key("MaxUsage"), formatMaxUsage(defaultConfig.RamCache.MaxUsageBytes)))
return Config{
- Port: port,
- Domain: domain,
- LogLevel: logLevel,
- AuthCode: authCode,
- Peers: peers,
- NodesEnabled: nodesEnabled,
- CrawlerEnabled: crawlerEnabled,
- IndexerEnabled: indexerEnabled,
- WebsiteEnabled: websiteEnabled,
- RamCacheEnabled: ramCacheEnabled,
- DriveCacheEnabled: driveCacheEnabled,
- ConcurrentCrawlers: concurrentCrawlers,
- CrawlingInterval: crawlingInterval,
- MaxPagesPerDomain: maxPagesPerDomain,
- IndexRefreshInterval: indexRefreshInterval,
+ Port: port,
+ Domain: domain,
+ LogLevel: logLevel,
+ AuthCode: authCode,
+ Peers: peers,
+ NodesEnabled: nodesEnabled,
+ CrawlerEnabled: crawlerEnabled,
+ IndexerEnabled: indexerEnabled,
+ WebsiteEnabled: websiteEnabled,
+ RamCacheEnabled: ramCacheEnabled,
+ DriveCacheEnabled: driveCacheEnabled,
+ ConcurrentStandardCrawlers: concurrentStandardCrawlers,
+ ConcurrentChromeCrawlers: concurrentChromeCrawlers,
+ CrawlingInterval: crawlingInterval,
+ MaxPagesPerDomain: maxPagesPerDomain,
+ IndexRefreshInterval: indexRefreshInterval,
DriveCache: CacheConfig{
Duration: driveDuration,
MaxUsageBytes: driveMaxUsage,
diff --git a/crawler-extraction.go b/crawler-extraction.go
index 1594bef..4ce8b9d 100644
--- a/crawler-extraction.go
+++ b/crawler-extraction.go
@@ -1,69 +1,99 @@
package main
import (
+ "context"
"net/http"
"net/url"
"strings"
"time"
+ "github.com/chromedp/cdproto/emulation"
+ "github.com/chromedp/chromedp"
"github.com/go-shiori/go-readability"
"golang.org/x/net/html"
)
-// fetchPageMetadata tries extracting title/description/keywords from standard HTML,
-// OG, Twitter, then falls back to go-readability if needed. If after all that we
-// still have no title or no description, we return ("", "", "") so the caller
-// can skip saving it.
-//
-// 1.
, ,
-// 2. ,
-// 3. ,
-// 4. go-readability fallback (if title or description is still missing)
-// 5. Basic heuristic to detect “wrong” content from readability (e.g. raw HTML or “readability-page-1”).
-func fetchPageMetadata(pageURL string) (string, string, string) {
- userAgent, err := GetUserAgent("crawler")
+// fetchPageMetadataStandard tries standard HTML parse + go-readability only.
+func fetchPageMetadataStandard(pageURL, userAgent string) (string, string, string) {
+ // 1. Standard HTML parse
+ title, desc, keywords := extractStandard(pageURL, userAgent)
+
+ // 2. Fallback: go-readability
+ if title == "" || desc == "" {
+ title, desc, keywords = fallbackReadability(pageURL, userAgent, title, desc, keywords)
+ }
+
+ // If still empty, return ("", "", "")
+ if title == "" || desc == "" {
+ return "", "", ""
+ }
+ return sanitize(title), sanitize(desc), sanitize(keywords)
+}
+
+// fetchPageMetadataChrome uses Chromedp to handle JavaScript-rendered pages.
+func fetchPageMetadataChrome(pageURL, userAgent string) (string, string, string) {
+ // Create context
+ ctx, cancel := chromedp.NewContext(context.Background())
+ defer cancel()
+
+ var renderedHTML string
+ err := chromedp.Run(ctx,
+ emulation.SetUserAgentOverride(userAgent).WithAcceptLanguage("en-US,en;q=0.9"),
+ chromedp.Navigate(pageURL),
+ chromedp.Sleep(2*time.Second), // Let JS run a bit
+ chromedp.OuterHTML("html", &renderedHTML),
+ )
if err != nil {
- printDebug("Failed to generate User-Agent: %v", err)
+ printDebug("chromedp error for %s: %v", pageURL, err)
return "", "", ""
}
+ doc, err := html.Parse(strings.NewReader(renderedHTML))
+ if err != nil {
+ printDebug("chromedp parse error for %s: %v", pageURL, err)
+ return "", "", ""
+ }
+
+ return extractParsedDOM(doc)
+}
+
+// extractStandard does the normal HTML parse with OG, Twitter, etc.
+func extractStandard(pageURL, userAgent string) (title, desc, keywords string) {
client := &http.Client{Timeout: 15 * time.Second}
req, err := http.NewRequest("GET", pageURL, nil)
if err != nil {
printDebug("Failed to create request for %s: %v", pageURL, err)
- return "", "", ""
+ return
}
-
- // Force English content when possible
req.Header.Set("User-Agent", userAgent)
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
resp, err := client.Do(req)
if err != nil {
printDebug("Failed to GET %s: %v", pageURL, err)
- return "", "", ""
+ return
}
defer resp.Body.Close()
- // Skip non-2xx
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
printDebug("Skipping %s due to HTTP status: %d", pageURL, resp.StatusCode)
- return "", "", ""
+ return
}
- // First pass: standard HTML parse
doc, err := html.Parse(resp.Body)
if err != nil {
printDebug("HTML parse error for %s: %v", pageURL, err)
- return "", "", ""
+ return
}
- var (
- title, desc, keywords string
- ogTitle, ogDesc string
- twTitle, twDesc string
- foundTitle, foundDesc bool
- )
+ return extractParsedDOM(doc)
+}
+
+// extractParsedDOM uses the same logic to parse , meta, OG, Twitter.
+func extractParsedDOM(doc *html.Node) (title, desc, keywords string) {
+ var ogTitle, ogDesc string
+ var twTitle, twDesc string
+ var foundTitle, foundDesc bool
var walk func(*html.Node)
walk = func(n *html.Node) {
@@ -87,7 +117,6 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
}
}
- // Standard meta tags
switch metaName {
case "description":
desc = contentVal
@@ -100,7 +129,6 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
twDesc = contentVal
}
- // Open Graph tags
switch metaProperty {
case "og:title":
ogTitle = contentVal
@@ -115,7 +143,7 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
}
walk(doc)
- // Fallback to OG or Twitter if /description are missing
+ // fallback to OG/Twitter if missing
if !foundTitle {
if ogTitle != "" {
title = ogTitle
@@ -131,43 +159,7 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
}
}
- // If still missing title or desc, fallback to go-readability
- if title == "" || desc == "" {
- parsedURL, parseErr := url.Parse(pageURL)
- if parseErr != nil {
- printDebug("Failed to parse URL %s: %v", pageURL, parseErr)
- // We must skip if we can't parse the URL for readability
- return "", "", ""
- }
-
- readResp, readErr := client.Get(pageURL)
- if readErr == nil && readResp.StatusCode >= 200 && readResp.StatusCode < 300 {
- defer readResp.Body.Close()
-
- article, rdErr := readability.FromReader(readResp.Body, parsedURL)
- if rdErr == nil {
- // If we still have no title, try from readability
- if title == "" && article.Title != "" {
- title = article.Title
- }
- // If we still have no description, try article.Excerpt
- if desc == "" && article.Excerpt != "" {
- desc = article.Excerpt
- } else if desc == "" && len(article.Content) > 0 {
- // If excerpt is empty, use a snippet from article.Content
- snippet := article.Content
- if len(snippet) > 200 {
- snippet = snippet[:200] + "..."
- }
- desc = snippet
- }
- } else {
- printDebug("go-readability failed for %s: %v", pageURL, rdErr)
- }
- }
- }
-
- // Heuristic: discard obviously incorrect HTML-y strings or placeholders
+ // Heuristic check
if looksLikeRawHTML(title) {
title = ""
}
@@ -175,16 +167,68 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
desc = ""
}
- // If after all that we have no title or description, skip
- if title == "" || desc == "" {
- return "", "", ""
- }
-
- return sanitize(title), sanitize(desc), sanitize(keywords)
+ return title, desc, keywords
}
-// looksLikeRawHTML is a simple heuristic to check for leftover HTML or
-// go-readability noise (e.g., "readability-page-1").
+// fallbackReadability tries go-readability if title/desc is missing.
+func fallbackReadability(pageURL, userAgent, title, desc, keywords string) (string, string, string) {
+ if title != "" && desc != "" {
+ return title, desc, keywords
+ }
+
+ client := &http.Client{Timeout: 15 * time.Second}
+ readReq, err := http.NewRequest("GET", pageURL, nil)
+ if err != nil {
+ printDebug("Failed to create fallbackReadability request: %v", err)
+ return title, desc, keywords
+ }
+ readReq.Header.Set("User-Agent", userAgent)
+ readReq.Header.Set("Accept-Language", "en-US,en;q=0.9")
+
+ readResp, err := client.Do(readReq)
+ if err != nil || readResp.StatusCode < 200 || readResp.StatusCode >= 300 {
+ if err != nil {
+ printDebug("go-readability GET error for %s: %v", pageURL, err)
+ }
+ if readResp != nil {
+ readResp.Body.Close()
+ }
+ return title, desc, keywords
+ }
+ defer readResp.Body.Close()
+
+ parsedURL, parseErr := url.Parse(pageURL)
+ if parseErr != nil {
+ printDebug("Failed to parse URL: %v", parseErr)
+ return title, desc, keywords
+ }
+
+ article, rdErr := readability.FromReader(readResp.Body, parsedURL)
+ if rdErr != nil {
+ printDebug("go-readability error for %s: %v", pageURL, rdErr)
+ return title, desc, keywords
+ }
+
+ if title == "" && article.Title != "" && !looksLikeRawHTML(article.Title) {
+ title = article.Title
+ }
+ if desc == "" {
+ if article.Excerpt != "" && !looksLikeRawHTML(article.Excerpt) {
+ desc = article.Excerpt
+ } else if len(article.Content) > 0 {
+ snippet := article.Content
+ if len(snippet) > 200 {
+ snippet = snippet[:200] + "..."
+ }
+ if !looksLikeRawHTML(snippet) {
+ desc = snippet
+ }
+ }
+ }
+ return title, desc, keywords
+}
+
+// looksLikeRawHTML is a simple heuristic check for leftover or invalid HTML text
func looksLikeRawHTML(text string) bool {
textLower := strings.ToLower(text)
if strings.Contains(textLower, "readability-page") {
@@ -196,7 +240,7 @@ func looksLikeRawHTML(text string) bool {
return false
}
-// sanitize removes pipes and newlines so they don't break our output format.
+// sanitize removes pipes/newlines so they don't break our output format.
func sanitize(input string) string {
input = strings.ReplaceAll(input, "|", " ")
input = strings.ReplaceAll(input, "\n", " ")
diff --git a/crawler.go b/crawler.go
index 2a934f6..45dc76f 100644
--- a/crawler.go
+++ b/crawler.go
@@ -35,7 +35,7 @@ func runCrawlerAndIndexer() {
// 2. Crawl each domain and write results to data_to_index.txt
outFile := filepath.Join(config.DriveCache.Path, "data_to_index.txt")
- if err := crawlDomainsToFile(domains, outFile, config.MaxPagesPerDomain, config.ConcurrentCrawlers); err != nil {
+ if err := crawlDomainsToFile(domains, outFile, config.MaxPagesPerDomain); err != nil {
printErr("Error crawling domains: %v", err)
return
}
@@ -75,18 +75,20 @@ func readDomainsCSV(csvPath string) ([][2]string, error) {
return result, scanner.Err()
}
-// crawlDomainsToFile visits each domain, extracts minimal data, writes results to outFile
-func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int, concurrentCrawlers int) error {
+// crawlDomainsToFile does an async pipeline:
+// 1. "standard" goroutines read from standardCh -> attempt standard extraction -> if fails, push to chromeCh
+// 2. "chrome" goroutines read from chromeCh -> attempt chromedp extraction -> if fails, skip
+func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error {
existingEntries := make(map[string]bool)
- var mu sync.Mutex // Mutex to protect access to the map
+ var mu sync.Mutex // For existingEntries + file writes
+ // read existing entries from outFile if it exists
if _, err := os.Stat(outFile); err == nil {
file, err := os.Open(outFile)
if err != nil {
return fmt.Errorf("unable to open %s: %v", outFile, err)
}
defer file.Close()
-
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
@@ -104,47 +106,109 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int, concu
}
defer file.Close()
- semaphore := make(chan struct{}, concurrentCrawlers)
- var wg sync.WaitGroup
+ // Prepare channels
+ standardCh := make(chan [2]string, 1000) // buffered channels help avoid blocking
+ chromeCh := make(chan [2]string, 1000)
- for _, d := range domains {
- wg.Add(1)
- semaphore <- struct{}{}
- go func(domain [2]string) {
- defer wg.Done()
- defer func() { <-semaphore }()
+ // 1) Spawn standard workers
+ var wgStandard sync.WaitGroup
+ for i := 0; i < config.ConcurrentStandardCrawlers; i++ {
+ wgStandard.Add(1)
+ go func() {
+ defer wgStandard.Done()
+ for dom := range standardCh {
+ rank := dom[0]
+ domainName := dom[1]
+ fullURL := "https://" + domainName
- rank := domain[0]
- domainName := domain[1]
- fullURL := "https://" + domainName
-
- mu.Lock()
- if domainName == "" || existingEntries[fullURL] {
+ // Mark domain existing so we don't re-crawl duplicates
+ mu.Lock()
+ if domainName == "" || existingEntries[fullURL] {
+ mu.Unlock()
+ continue
+ }
+ existingEntries[fullURL] = true
+ mu.Unlock()
+
+ // get a standard user agent
+ userAgent, _ := GetUserAgent("crawler-std")
+ title, desc, keywords := fetchPageMetadataStandard(fullURL, userAgent)
+
+ if title == "" || desc == "" {
+ // push to chromeCh
+ chromeCh <- dom
+ continue
+ }
+
+ // write to file
+ line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
+ fullURL, title, keywords, desc, rank)
+
+ mu.Lock()
+ file.WriteString(line)
mu.Unlock()
- return
}
- existingEntries[fullURL] = true
- mu.Unlock()
-
- title, desc, keywords := fetchPageMetadata(fullURL)
-
- // Skip saving if title or description is missing
- if title == "" || desc == "" {
- printDebug("Skipping %s: missing title or description", fullURL)
- return
- }
-
- line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
- fullURL,
- title,
- keywords,
- desc,
- rank,
- )
- file.WriteString(line)
- }(d)
+ }()
}
- wg.Wait()
+ // 2) Spawn chrome workers
+ var wgChrome sync.WaitGroup
+ for i := 0; i < config.ConcurrentChromeCrawlers; i++ {
+ wgChrome.Add(1)
+ go func() {
+ defer wgChrome.Done()
+ for dom := range chromeCh {
+ rank := dom[0]
+ domainName := dom[1]
+ fullURL := "https://" + domainName
+
+ // Mark domain existing if not already
+ mu.Lock()
+ if domainName == "" || existingEntries[fullURL] {
+ mu.Unlock()
+ continue
+ }
+ existingEntries[fullURL] = true
+ mu.Unlock()
+
+ // get a chrome user agent
+ userAgent, _ := GetUserAgent("crawler-chrome")
+ title, desc, keywords := fetchPageMetadataChrome(fullURL, userAgent)
+
+ if title == "" || desc == "" {
+ printWarn("Skipping (Chrome) %s: missing title/desc", fullURL)
+ continue
+ }
+
+ // write to file
+ line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
+ fullURL, title, keywords, desc, rank)
+
+ mu.Lock()
+ file.WriteString(line)
+ mu.Unlock()
+ }
+ }()
+ }
+
+ // Feed domains into standardCh
+ go func() {
+ for _, dom := range domains {
+ // optionally, if maxPages is relevant, you can track how many have been processed
+ standardCh <- dom
+ }
+ // close the standardCh once all are queued
+ close(standardCh)
+ }()
+
+ // Wait for standard workers to finish, then close chromeCh
+ go func() {
+ wgStandard.Wait()
+ close(chromeCh)
+ }()
+
+ // Wait for chrome workers to finish
+ wgChrome.Wait()
+
return nil
}
diff --git a/go.mod b/go.mod
index a293a75..c8200d3 100644
--- a/go.mod
+++ b/go.mod
@@ -41,13 +41,21 @@ require (
github.com/blevesearch/zapx/v14 v14.3.10 // indirect
github.com/blevesearch/zapx/v15 v15.3.17 // indirect
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b // indirect
+ github.com/chromedp/cdproto v0.0.0-20241022234722-4d5d5faf59fb // indirect
+ github.com/chromedp/chromedp v0.11.2 // indirect
+ github.com/chromedp/sysutil v1.1.0 // indirect
github.com/go-ole/go-ole v1.3.0 // indirect
github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect
+ github.com/gobwas/httphead v0.1.0 // indirect
+ github.com/gobwas/pool v0.2.1 // indirect
+ github.com/gobwas/ws v1.4.0 // indirect
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect
github.com/golang/geo v0.0.0-20230421003525-6adc56603217 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/golang/snappy v0.0.4 // indirect
+ github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
+ github.com/mailru/easyjson v0.7.7 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/mschoch/smat v0.2.0 // indirect
diff --git a/go.sum b/go.sum
index 59414b4..148146f 100644
--- a/go.sum
+++ b/go.sum
@@ -47,6 +47,12 @@ github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b h1:ju9Az5Y
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b/go.mod h1:BlrYNpOu4BvVRslmIG+rLtKhmjIaRhIbG8sb9scGTwI=
github.com/chai2010/webp v1.1.1 h1:jTRmEccAJ4MGrhFOrPMpNGIJ/eybIgwKpcACsrTEapk=
github.com/chai2010/webp v1.1.1/go.mod h1:0XVwvZWdjjdxpUEIf7b9g9VkHFnInUSYujwqTLEuldU=
+github.com/chromedp/cdproto v0.0.0-20241022234722-4d5d5faf59fb h1:noKVm2SsG4v0Yd0lHNtFYc9EUxIVvrr4kJ6hM8wvIYU=
+github.com/chromedp/cdproto v0.0.0-20241022234722-4d5d5faf59fb/go.mod h1:4XqMl3iIW08jtieURWL6Tt5924w21pxirC6th662XUM=
+github.com/chromedp/chromedp v0.11.2 h1:ZRHTh7DjbNTlfIv3NFTbB7eVeu5XCNkgrpcGSpn2oX0=
+github.com/chromedp/chromedp v0.11.2/go.mod h1:lr8dFRLKsdTTWb75C/Ttol2vnBKOSnt0BW8R9Xaupi8=
+github.com/chromedp/sysutil v1.1.0 h1:PUFNv5EcprjqXZD9nJb9b/c9ibAbxiYo4exNWZyipwM=
+github.com/chromedp/sysutil v1.1.0/go.mod h1:WiThHUdltqCNKGc4gaU50XgYjwjYIhKWoHGPTUfWTJ8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -57,6 +63,12 @@ github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c h1:wpkoddUomPfHiOziH
github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c/go.mod h1:oVDCh3qjJMLVUSILBRwrm+Bc6RNXGZYtoh9xdvf1ffM=
github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f h1:cypj7SJh+47G9J3VCPdMzT3uWcXWAWDJA54ErTfOigI=
github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f/go.mod h1:YWa00ashoPZMAOElrSn4E1cJErhDVU6PWAll4Hxzn+w=
+github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU=
+github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
+github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
+github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
+github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs=
+github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc=
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f h1:3BSP1Tbs2djlpprl7wCLuiqMaUh5SJkkzI2gDs+FgLs=
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14=
github.com/golang/geo v0.0.0-20230421003525-6adc56603217 h1:HKlyj6in2JV6wVkmQ4XmG/EIm+SCYlPZ+V4GWit7Z+I=
@@ -68,10 +80,14 @@ github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/leonelquinteros/gotext v1.7.0 h1:jcJmF4AXqyamP7vuw2MMIKs+O3jAEmvrc5JQiI8Ht/8=
github.com/leonelquinteros/gotext v1.7.0/go.mod h1:qJdoQuERPpccw7L70uoU+K/BvTfRBHYsisCQyFLXyvw=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
@@ -137,6 +153,7 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=