improved crawler data extraction (added chromedp)
This commit is contained in:
parent
3494457336
commit
c71808aa1e
6 changed files with 305 additions and 166 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -6,3 +6,4 @@ cache/
|
||||||
*.min.js
|
*.min.js
|
||||||
*.min.css
|
*.min.css
|
||||||
qgato
|
qgato
|
||||||
|
test.py
|
101
config.go
101
config.go
|
@ -23,43 +23,45 @@ type CacheConfig struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
Port int // Added
|
Port int // Added
|
||||||
AuthCode string // Added
|
AuthCode string // Added
|
||||||
PeerID string // Added
|
PeerID string // Added
|
||||||
Peers []string
|
Peers []string
|
||||||
Domain string // Added
|
Domain string // Added
|
||||||
NodesEnabled bool // Added
|
NodesEnabled bool // Added
|
||||||
CrawlerEnabled bool // Added
|
CrawlerEnabled bool // Added
|
||||||
IndexerEnabled bool // Added
|
IndexerEnabled bool // Added
|
||||||
WebsiteEnabled bool // Added
|
WebsiteEnabled bool // Added
|
||||||
RamCacheEnabled bool
|
RamCacheEnabled bool
|
||||||
DriveCacheEnabled bool // Added
|
DriveCacheEnabled bool // Added
|
||||||
LogLevel int // Added
|
LogLevel int // Added
|
||||||
ConcurrentCrawlers int // Number of concurrent crawlers
|
ConcurrentStandardCrawlers int
|
||||||
CrawlingInterval time.Duration // Refres crawled results in...
|
ConcurrentChromeCrawlers int
|
||||||
MaxPagesPerDomain int // Max pages to crawl per domain
|
CrawlingInterval time.Duration // Refres crawled results in...
|
||||||
IndexRefreshInterval time.Duration // Interval for periodic index refresh (e.g., "10m")
|
MaxPagesPerDomain int // Max pages to crawl per domain
|
||||||
|
IndexRefreshInterval time.Duration // Interval for periodic index refresh (e.g., "10m")
|
||||||
|
|
||||||
DriveCache CacheConfig
|
DriveCache CacheConfig
|
||||||
RamCache CacheConfig
|
RamCache CacheConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
var defaultConfig = Config{
|
var defaultConfig = Config{
|
||||||
Port: 5000,
|
Port: 5000,
|
||||||
Domain: "localhost",
|
Domain: "localhost",
|
||||||
Peers: []string{},
|
Peers: []string{},
|
||||||
AuthCode: generateStrongRandomString(64),
|
AuthCode: generateStrongRandomString(64),
|
||||||
NodesEnabled: false,
|
NodesEnabled: false,
|
||||||
CrawlerEnabled: true,
|
CrawlerEnabled: true,
|
||||||
IndexerEnabled: false,
|
IndexerEnabled: false,
|
||||||
WebsiteEnabled: true,
|
WebsiteEnabled: true,
|
||||||
RamCacheEnabled: true,
|
RamCacheEnabled: true,
|
||||||
DriveCacheEnabled: false,
|
DriveCacheEnabled: false,
|
||||||
ConcurrentCrawlers: 5,
|
ConcurrentStandardCrawlers: 12,
|
||||||
CrawlingInterval: 24 * time.Hour,
|
ConcurrentChromeCrawlers: 4,
|
||||||
MaxPagesPerDomain: 10,
|
CrawlingInterval: 24 * time.Hour,
|
||||||
IndexRefreshInterval: 2 * time.Minute,
|
MaxPagesPerDomain: 10,
|
||||||
LogLevel: 1,
|
IndexRefreshInterval: 2 * time.Minute,
|
||||||
|
LogLevel: 1,
|
||||||
DriveCache: CacheConfig{
|
DriveCache: CacheConfig{
|
||||||
Duration: 48 * time.Hour, // Added
|
Duration: 48 * time.Hour, // Added
|
||||||
Path: "./cache", // Added
|
Path: "./cache", // Added
|
||||||
|
@ -249,7 +251,8 @@ func saveConfig(config Config) {
|
||||||
|
|
||||||
// Indexer section
|
// Indexer section
|
||||||
indexerSec := cfg.Section("Indexer")
|
indexerSec := cfg.Section("Indexer")
|
||||||
indexerSec.Key("ConcurrentCrawlers").SetValue(strconv.Itoa(config.ConcurrentCrawlers))
|
indexerSec.Key("ConcurrentStandardCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
|
||||||
|
indexerSec.Key("ConcurrentChromeCrawlers").SetValue(strconv.Itoa(config.ConcurrentStandardCrawlers))
|
||||||
indexerSec.Key("CrawlingInterval").SetValue(config.CrawlingInterval.String())
|
indexerSec.Key("CrawlingInterval").SetValue(config.CrawlingInterval.String())
|
||||||
indexerSec.Key("MaxPagesPerDomain").SetValue(strconv.Itoa(config.MaxPagesPerDomain))
|
indexerSec.Key("MaxPagesPerDomain").SetValue(strconv.Itoa(config.MaxPagesPerDomain))
|
||||||
indexerSec.Key("IndexRefreshInterval").SetValue(config.IndexRefreshInterval.String())
|
indexerSec.Key("IndexRefreshInterval").SetValue(config.IndexRefreshInterval.String())
|
||||||
|
@ -296,7 +299,8 @@ func loadConfig() Config {
|
||||||
driveCacheEnabled := getConfigValueBool(cfg.Section("Features").Key("DriveCache"), defaultConfig.DriveCacheEnabled)
|
driveCacheEnabled := getConfigValueBool(cfg.Section("Features").Key("DriveCache"), defaultConfig.DriveCacheEnabled)
|
||||||
|
|
||||||
// Indexing
|
// Indexing
|
||||||
concurrentCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentCrawlers"), defaultConfig.ConcurrentCrawlers, strconv.Atoi)
|
concurrentStandardCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentStandardCrawlers"), defaultConfig.ConcurrentStandardCrawlers, strconv.Atoi)
|
||||||
|
concurrentChromeCrawlers := getConfigValue(cfg.Section("Indexer").Key("ConcurrentChromeCrawlers"), defaultConfig.ConcurrentChromeCrawlers, strconv.Atoi)
|
||||||
crawlingInterval := getConfigValue(cfg.Section("Indexer").Key("CrawlingInterval"), defaultConfig.CrawlingInterval, time.ParseDuration)
|
crawlingInterval := getConfigValue(cfg.Section("Indexer").Key("CrawlingInterval"), defaultConfig.CrawlingInterval, time.ParseDuration)
|
||||||
maxPagesPerDomain := getConfigValue(cfg.Section("Indexer").Key("MaxPagesPerDomain"), defaultConfig.MaxPagesPerDomain, strconv.Atoi)
|
maxPagesPerDomain := getConfigValue(cfg.Section("Indexer").Key("MaxPagesPerDomain"), defaultConfig.MaxPagesPerDomain, strconv.Atoi)
|
||||||
indexRefreshInterval := getConfigValue(cfg.Section("Indexer").Key("IndexRefreshInterval"), defaultConfig.IndexRefreshInterval, time.ParseDuration)
|
indexRefreshInterval := getConfigValue(cfg.Section("Indexer").Key("IndexRefreshInterval"), defaultConfig.IndexRefreshInterval, time.ParseDuration)
|
||||||
|
@ -315,21 +319,22 @@ func loadConfig() Config {
|
||||||
ramMaxUsage := parseMaxUsageRam(getConfigValueString(cfg.Section("RamCache").Key("MaxUsage"), formatMaxUsage(defaultConfig.RamCache.MaxUsageBytes)))
|
ramMaxUsage := parseMaxUsageRam(getConfigValueString(cfg.Section("RamCache").Key("MaxUsage"), formatMaxUsage(defaultConfig.RamCache.MaxUsageBytes)))
|
||||||
|
|
||||||
return Config{
|
return Config{
|
||||||
Port: port,
|
Port: port,
|
||||||
Domain: domain,
|
Domain: domain,
|
||||||
LogLevel: logLevel,
|
LogLevel: logLevel,
|
||||||
AuthCode: authCode,
|
AuthCode: authCode,
|
||||||
Peers: peers,
|
Peers: peers,
|
||||||
NodesEnabled: nodesEnabled,
|
NodesEnabled: nodesEnabled,
|
||||||
CrawlerEnabled: crawlerEnabled,
|
CrawlerEnabled: crawlerEnabled,
|
||||||
IndexerEnabled: indexerEnabled,
|
IndexerEnabled: indexerEnabled,
|
||||||
WebsiteEnabled: websiteEnabled,
|
WebsiteEnabled: websiteEnabled,
|
||||||
RamCacheEnabled: ramCacheEnabled,
|
RamCacheEnabled: ramCacheEnabled,
|
||||||
DriveCacheEnabled: driveCacheEnabled,
|
DriveCacheEnabled: driveCacheEnabled,
|
||||||
ConcurrentCrawlers: concurrentCrawlers,
|
ConcurrentStandardCrawlers: concurrentStandardCrawlers,
|
||||||
CrawlingInterval: crawlingInterval,
|
ConcurrentChromeCrawlers: concurrentChromeCrawlers,
|
||||||
MaxPagesPerDomain: maxPagesPerDomain,
|
CrawlingInterval: crawlingInterval,
|
||||||
IndexRefreshInterval: indexRefreshInterval,
|
MaxPagesPerDomain: maxPagesPerDomain,
|
||||||
|
IndexRefreshInterval: indexRefreshInterval,
|
||||||
DriveCache: CacheConfig{
|
DriveCache: CacheConfig{
|
||||||
Duration: driveDuration,
|
Duration: driveDuration,
|
||||||
MaxUsageBytes: driveMaxUsage,
|
MaxUsageBytes: driveMaxUsage,
|
||||||
|
|
|
@ -1,69 +1,99 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/chromedp/cdproto/emulation"
|
||||||
|
"github.com/chromedp/chromedp"
|
||||||
"github.com/go-shiori/go-readability"
|
"github.com/go-shiori/go-readability"
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
)
|
)
|
||||||
|
|
||||||
// fetchPageMetadata tries extracting title/description/keywords from standard HTML,
|
// fetchPageMetadataStandard tries standard HTML parse + go-readability only.
|
||||||
// OG, Twitter, then falls back to go-readability if needed. If after all that we
|
func fetchPageMetadataStandard(pageURL, userAgent string) (string, string, string) {
|
||||||
// still have no title or no description, we return ("", "", "") so the caller
|
// 1. Standard HTML parse
|
||||||
// can skip saving it.
|
title, desc, keywords := extractStandard(pageURL, userAgent)
|
||||||
//
|
|
||||||
// 1. <title>, <meta name="description"/>, <meta name="keywords"/>
|
// 2. Fallback: go-readability
|
||||||
// 2. <meta property="og:title">, <meta property="og:description">
|
if title == "" || desc == "" {
|
||||||
// 3. <meta name="twitter:title">, <meta name="twitter:description">
|
title, desc, keywords = fallbackReadability(pageURL, userAgent, title, desc, keywords)
|
||||||
// 4. go-readability fallback (if title or description is still missing)
|
}
|
||||||
// 5. Basic heuristic to detect “wrong” content from readability (e.g. raw HTML or “readability-page-1”).
|
|
||||||
func fetchPageMetadata(pageURL string) (string, string, string) {
|
// If still empty, return ("", "", "")
|
||||||
userAgent, err := GetUserAgent("crawler")
|
if title == "" || desc == "" {
|
||||||
|
return "", "", ""
|
||||||
|
}
|
||||||
|
return sanitize(title), sanitize(desc), sanitize(keywords)
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetchPageMetadataChrome uses Chromedp to handle JavaScript-rendered pages.
|
||||||
|
func fetchPageMetadataChrome(pageURL, userAgent string) (string, string, string) {
|
||||||
|
// Create context
|
||||||
|
ctx, cancel := chromedp.NewContext(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
var renderedHTML string
|
||||||
|
err := chromedp.Run(ctx,
|
||||||
|
emulation.SetUserAgentOverride(userAgent).WithAcceptLanguage("en-US,en;q=0.9"),
|
||||||
|
chromedp.Navigate(pageURL),
|
||||||
|
chromedp.Sleep(2*time.Second), // Let JS run a bit
|
||||||
|
chromedp.OuterHTML("html", &renderedHTML),
|
||||||
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
printDebug("Failed to generate User-Agent: %v", err)
|
printDebug("chromedp error for %s: %v", pageURL, err)
|
||||||
return "", "", ""
|
return "", "", ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
doc, err := html.Parse(strings.NewReader(renderedHTML))
|
||||||
|
if err != nil {
|
||||||
|
printDebug("chromedp parse error for %s: %v", pageURL, err)
|
||||||
|
return "", "", ""
|
||||||
|
}
|
||||||
|
|
||||||
|
return extractParsedDOM(doc)
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractStandard does the normal HTML parse with OG, Twitter, etc.
|
||||||
|
func extractStandard(pageURL, userAgent string) (title, desc, keywords string) {
|
||||||
client := &http.Client{Timeout: 15 * time.Second}
|
client := &http.Client{Timeout: 15 * time.Second}
|
||||||
req, err := http.NewRequest("GET", pageURL, nil)
|
req, err := http.NewRequest("GET", pageURL, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
printDebug("Failed to create request for %s: %v", pageURL, err)
|
printDebug("Failed to create request for %s: %v", pageURL, err)
|
||||||
return "", "", ""
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Force English content when possible
|
|
||||||
req.Header.Set("User-Agent", userAgent)
|
req.Header.Set("User-Agent", userAgent)
|
||||||
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||||
|
|
||||||
resp, err := client.Do(req)
|
resp, err := client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
printDebug("Failed to GET %s: %v", pageURL, err)
|
printDebug("Failed to GET %s: %v", pageURL, err)
|
||||||
return "", "", ""
|
return
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
// Skip non-2xx
|
|
||||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
printDebug("Skipping %s due to HTTP status: %d", pageURL, resp.StatusCode)
|
printDebug("Skipping %s due to HTTP status: %d", pageURL, resp.StatusCode)
|
||||||
return "", "", ""
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// First pass: standard HTML parse
|
|
||||||
doc, err := html.Parse(resp.Body)
|
doc, err := html.Parse(resp.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
printDebug("HTML parse error for %s: %v", pageURL, err)
|
printDebug("HTML parse error for %s: %v", pageURL, err)
|
||||||
return "", "", ""
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
return extractParsedDOM(doc)
|
||||||
title, desc, keywords string
|
}
|
||||||
ogTitle, ogDesc string
|
|
||||||
twTitle, twDesc string
|
// extractParsedDOM uses the same logic to parse <title>, meta, OG, Twitter.
|
||||||
foundTitle, foundDesc bool
|
func extractParsedDOM(doc *html.Node) (title, desc, keywords string) {
|
||||||
)
|
var ogTitle, ogDesc string
|
||||||
|
var twTitle, twDesc string
|
||||||
|
var foundTitle, foundDesc bool
|
||||||
|
|
||||||
var walk func(*html.Node)
|
var walk func(*html.Node)
|
||||||
walk = func(n *html.Node) {
|
walk = func(n *html.Node) {
|
||||||
|
@ -87,7 +117,6 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Standard meta tags
|
|
||||||
switch metaName {
|
switch metaName {
|
||||||
case "description":
|
case "description":
|
||||||
desc = contentVal
|
desc = contentVal
|
||||||
|
@ -100,7 +129,6 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
|
||||||
twDesc = contentVal
|
twDesc = contentVal
|
||||||
}
|
}
|
||||||
|
|
||||||
// Open Graph tags
|
|
||||||
switch metaProperty {
|
switch metaProperty {
|
||||||
case "og:title":
|
case "og:title":
|
||||||
ogTitle = contentVal
|
ogTitle = contentVal
|
||||||
|
@ -115,7 +143,7 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
|
||||||
}
|
}
|
||||||
walk(doc)
|
walk(doc)
|
||||||
|
|
||||||
// Fallback to OG or Twitter if <title>/description are missing
|
// fallback to OG/Twitter if missing
|
||||||
if !foundTitle {
|
if !foundTitle {
|
||||||
if ogTitle != "" {
|
if ogTitle != "" {
|
||||||
title = ogTitle
|
title = ogTitle
|
||||||
|
@ -131,43 +159,7 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If still missing title or desc, fallback to go-readability
|
// Heuristic check
|
||||||
if title == "" || desc == "" {
|
|
||||||
parsedURL, parseErr := url.Parse(pageURL)
|
|
||||||
if parseErr != nil {
|
|
||||||
printDebug("Failed to parse URL %s: %v", pageURL, parseErr)
|
|
||||||
// We must skip if we can't parse the URL for readability
|
|
||||||
return "", "", ""
|
|
||||||
}
|
|
||||||
|
|
||||||
readResp, readErr := client.Get(pageURL)
|
|
||||||
if readErr == nil && readResp.StatusCode >= 200 && readResp.StatusCode < 300 {
|
|
||||||
defer readResp.Body.Close()
|
|
||||||
|
|
||||||
article, rdErr := readability.FromReader(readResp.Body, parsedURL)
|
|
||||||
if rdErr == nil {
|
|
||||||
// If we still have no title, try from readability
|
|
||||||
if title == "" && article.Title != "" {
|
|
||||||
title = article.Title
|
|
||||||
}
|
|
||||||
// If we still have no description, try article.Excerpt
|
|
||||||
if desc == "" && article.Excerpt != "" {
|
|
||||||
desc = article.Excerpt
|
|
||||||
} else if desc == "" && len(article.Content) > 0 {
|
|
||||||
// If excerpt is empty, use a snippet from article.Content
|
|
||||||
snippet := article.Content
|
|
||||||
if len(snippet) > 200 {
|
|
||||||
snippet = snippet[:200] + "..."
|
|
||||||
}
|
|
||||||
desc = snippet
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
printDebug("go-readability failed for %s: %v", pageURL, rdErr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Heuristic: discard obviously incorrect HTML-y strings or placeholders
|
|
||||||
if looksLikeRawHTML(title) {
|
if looksLikeRawHTML(title) {
|
||||||
title = ""
|
title = ""
|
||||||
}
|
}
|
||||||
|
@ -175,16 +167,68 @@ func fetchPageMetadata(pageURL string) (string, string, string) {
|
||||||
desc = ""
|
desc = ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// If after all that we have no title or description, skip
|
return title, desc, keywords
|
||||||
if title == "" || desc == "" {
|
|
||||||
return "", "", ""
|
|
||||||
}
|
|
||||||
|
|
||||||
return sanitize(title), sanitize(desc), sanitize(keywords)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// looksLikeRawHTML is a simple heuristic to check for leftover HTML or
|
// fallbackReadability tries go-readability if title/desc is missing.
|
||||||
// go-readability noise (e.g., "readability-page-1").
|
func fallbackReadability(pageURL, userAgent, title, desc, keywords string) (string, string, string) {
|
||||||
|
if title != "" && desc != "" {
|
||||||
|
return title, desc, keywords
|
||||||
|
}
|
||||||
|
|
||||||
|
client := &http.Client{Timeout: 15 * time.Second}
|
||||||
|
readReq, err := http.NewRequest("GET", pageURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
printDebug("Failed to create fallbackReadability request: %v", err)
|
||||||
|
return title, desc, keywords
|
||||||
|
}
|
||||||
|
readReq.Header.Set("User-Agent", userAgent)
|
||||||
|
readReq.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||||
|
|
||||||
|
readResp, err := client.Do(readReq)
|
||||||
|
if err != nil || readResp.StatusCode < 200 || readResp.StatusCode >= 300 {
|
||||||
|
if err != nil {
|
||||||
|
printDebug("go-readability GET error for %s: %v", pageURL, err)
|
||||||
|
}
|
||||||
|
if readResp != nil {
|
||||||
|
readResp.Body.Close()
|
||||||
|
}
|
||||||
|
return title, desc, keywords
|
||||||
|
}
|
||||||
|
defer readResp.Body.Close()
|
||||||
|
|
||||||
|
parsedURL, parseErr := url.Parse(pageURL)
|
||||||
|
if parseErr != nil {
|
||||||
|
printDebug("Failed to parse URL: %v", parseErr)
|
||||||
|
return title, desc, keywords
|
||||||
|
}
|
||||||
|
|
||||||
|
article, rdErr := readability.FromReader(readResp.Body, parsedURL)
|
||||||
|
if rdErr != nil {
|
||||||
|
printDebug("go-readability error for %s: %v", pageURL, rdErr)
|
||||||
|
return title, desc, keywords
|
||||||
|
}
|
||||||
|
|
||||||
|
if title == "" && article.Title != "" && !looksLikeRawHTML(article.Title) {
|
||||||
|
title = article.Title
|
||||||
|
}
|
||||||
|
if desc == "" {
|
||||||
|
if article.Excerpt != "" && !looksLikeRawHTML(article.Excerpt) {
|
||||||
|
desc = article.Excerpt
|
||||||
|
} else if len(article.Content) > 0 {
|
||||||
|
snippet := article.Content
|
||||||
|
if len(snippet) > 200 {
|
||||||
|
snippet = snippet[:200] + "..."
|
||||||
|
}
|
||||||
|
if !looksLikeRawHTML(snippet) {
|
||||||
|
desc = snippet
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return title, desc, keywords
|
||||||
|
}
|
||||||
|
|
||||||
|
// looksLikeRawHTML is a simple heuristic check for leftover or invalid HTML text
|
||||||
func looksLikeRawHTML(text string) bool {
|
func looksLikeRawHTML(text string) bool {
|
||||||
textLower := strings.ToLower(text)
|
textLower := strings.ToLower(text)
|
||||||
if strings.Contains(textLower, "readability-page") {
|
if strings.Contains(textLower, "readability-page") {
|
||||||
|
@ -196,7 +240,7 @@ func looksLikeRawHTML(text string) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// sanitize removes pipes and newlines so they don't break our output format.
|
// sanitize removes pipes/newlines so they don't break our output format.
|
||||||
func sanitize(input string) string {
|
func sanitize(input string) string {
|
||||||
input = strings.ReplaceAll(input, "|", " ")
|
input = strings.ReplaceAll(input, "|", " ")
|
||||||
input = strings.ReplaceAll(input, "\n", " ")
|
input = strings.ReplaceAll(input, "\n", " ")
|
||||||
|
|
146
crawler.go
146
crawler.go
|
@ -35,7 +35,7 @@ func runCrawlerAndIndexer() {
|
||||||
|
|
||||||
// 2. Crawl each domain and write results to data_to_index.txt
|
// 2. Crawl each domain and write results to data_to_index.txt
|
||||||
outFile := filepath.Join(config.DriveCache.Path, "data_to_index.txt")
|
outFile := filepath.Join(config.DriveCache.Path, "data_to_index.txt")
|
||||||
if err := crawlDomainsToFile(domains, outFile, config.MaxPagesPerDomain, config.ConcurrentCrawlers); err != nil {
|
if err := crawlDomainsToFile(domains, outFile, config.MaxPagesPerDomain); err != nil {
|
||||||
printErr("Error crawling domains: %v", err)
|
printErr("Error crawling domains: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -75,18 +75,20 @@ func readDomainsCSV(csvPath string) ([][2]string, error) {
|
||||||
return result, scanner.Err()
|
return result, scanner.Err()
|
||||||
}
|
}
|
||||||
|
|
||||||
// crawlDomainsToFile visits each domain, extracts minimal data, writes results to outFile
|
// crawlDomainsToFile does an async pipeline:
|
||||||
func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int, concurrentCrawlers int) error {
|
// 1. "standard" goroutines read from standardCh -> attempt standard extraction -> if fails, push to chromeCh
|
||||||
|
// 2. "chrome" goroutines read from chromeCh -> attempt chromedp extraction -> if fails, skip
|
||||||
|
func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int) error {
|
||||||
existingEntries := make(map[string]bool)
|
existingEntries := make(map[string]bool)
|
||||||
var mu sync.Mutex // Mutex to protect access to the map
|
var mu sync.Mutex // For existingEntries + file writes
|
||||||
|
|
||||||
|
// read existing entries from outFile if it exists
|
||||||
if _, err := os.Stat(outFile); err == nil {
|
if _, err := os.Stat(outFile); err == nil {
|
||||||
file, err := os.Open(outFile)
|
file, err := os.Open(outFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to open %s: %v", outFile, err)
|
return fmt.Errorf("unable to open %s: %v", outFile, err)
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
scanner := bufio.NewScanner(file)
|
scanner := bufio.NewScanner(file)
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Text()
|
line := scanner.Text()
|
||||||
|
@ -104,47 +106,109 @@ func crawlDomainsToFile(domains [][2]string, outFile string, maxPages int, concu
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
semaphore := make(chan struct{}, concurrentCrawlers)
|
// Prepare channels
|
||||||
var wg sync.WaitGroup
|
standardCh := make(chan [2]string, 1000) // buffered channels help avoid blocking
|
||||||
|
chromeCh := make(chan [2]string, 1000)
|
||||||
|
|
||||||
for _, d := range domains {
|
// 1) Spawn standard workers
|
||||||
wg.Add(1)
|
var wgStandard sync.WaitGroup
|
||||||
semaphore <- struct{}{}
|
for i := 0; i < config.ConcurrentStandardCrawlers; i++ {
|
||||||
go func(domain [2]string) {
|
wgStandard.Add(1)
|
||||||
defer wg.Done()
|
go func() {
|
||||||
defer func() { <-semaphore }()
|
defer wgStandard.Done()
|
||||||
|
for dom := range standardCh {
|
||||||
|
rank := dom[0]
|
||||||
|
domainName := dom[1]
|
||||||
|
fullURL := "https://" + domainName
|
||||||
|
|
||||||
rank := domain[0]
|
// Mark domain existing so we don't re-crawl duplicates
|
||||||
domainName := domain[1]
|
mu.Lock()
|
||||||
fullURL := "https://" + domainName
|
if domainName == "" || existingEntries[fullURL] {
|
||||||
|
mu.Unlock()
|
||||||
mu.Lock()
|
continue
|
||||||
if domainName == "" || existingEntries[fullURL] {
|
}
|
||||||
|
existingEntries[fullURL] = true
|
||||||
|
mu.Unlock()
|
||||||
|
|
||||||
|
// get a standard user agent
|
||||||
|
userAgent, _ := GetUserAgent("crawler-std")
|
||||||
|
title, desc, keywords := fetchPageMetadataStandard(fullURL, userAgent)
|
||||||
|
|
||||||
|
if title == "" || desc == "" {
|
||||||
|
// push to chromeCh
|
||||||
|
chromeCh <- dom
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// write to file
|
||||||
|
line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
|
||||||
|
fullURL, title, keywords, desc, rank)
|
||||||
|
|
||||||
|
mu.Lock()
|
||||||
|
file.WriteString(line)
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
return
|
|
||||||
}
|
}
|
||||||
existingEntries[fullURL] = true
|
}()
|
||||||
mu.Unlock()
|
|
||||||
|
|
||||||
title, desc, keywords := fetchPageMetadata(fullURL)
|
|
||||||
|
|
||||||
// Skip saving if title or description is missing
|
|
||||||
if title == "" || desc == "" {
|
|
||||||
printDebug("Skipping %s: missing title or description", fullURL)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
|
|
||||||
fullURL,
|
|
||||||
title,
|
|
||||||
keywords,
|
|
||||||
desc,
|
|
||||||
rank,
|
|
||||||
)
|
|
||||||
file.WriteString(line)
|
|
||||||
}(d)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
// 2) Spawn chrome workers
|
||||||
|
var wgChrome sync.WaitGroup
|
||||||
|
for i := 0; i < config.ConcurrentChromeCrawlers; i++ {
|
||||||
|
wgChrome.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wgChrome.Done()
|
||||||
|
for dom := range chromeCh {
|
||||||
|
rank := dom[0]
|
||||||
|
domainName := dom[1]
|
||||||
|
fullURL := "https://" + domainName
|
||||||
|
|
||||||
|
// Mark domain existing if not already
|
||||||
|
mu.Lock()
|
||||||
|
if domainName == "" || existingEntries[fullURL] {
|
||||||
|
mu.Unlock()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
existingEntries[fullURL] = true
|
||||||
|
mu.Unlock()
|
||||||
|
|
||||||
|
// get a chrome user agent
|
||||||
|
userAgent, _ := GetUserAgent("crawler-chrome")
|
||||||
|
title, desc, keywords := fetchPageMetadataChrome(fullURL, userAgent)
|
||||||
|
|
||||||
|
if title == "" || desc == "" {
|
||||||
|
printWarn("Skipping (Chrome) %s: missing title/desc", fullURL)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// write to file
|
||||||
|
line := fmt.Sprintf("%s|%s|%s|%s|%s\n",
|
||||||
|
fullURL, title, keywords, desc, rank)
|
||||||
|
|
||||||
|
mu.Lock()
|
||||||
|
file.WriteString(line)
|
||||||
|
mu.Unlock()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Feed domains into standardCh
|
||||||
|
go func() {
|
||||||
|
for _, dom := range domains {
|
||||||
|
// optionally, if maxPages is relevant, you can track how many have been processed
|
||||||
|
standardCh <- dom
|
||||||
|
}
|
||||||
|
// close the standardCh once all are queued
|
||||||
|
close(standardCh)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Wait for standard workers to finish, then close chromeCh
|
||||||
|
go func() {
|
||||||
|
wgStandard.Wait()
|
||||||
|
close(chromeCh)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Wait for chrome workers to finish
|
||||||
|
wgChrome.Wait()
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
8
go.mod
8
go.mod
|
@ -41,13 +41,21 @@ require (
|
||||||
github.com/blevesearch/zapx/v14 v14.3.10 // indirect
|
github.com/blevesearch/zapx/v14 v14.3.10 // indirect
|
||||||
github.com/blevesearch/zapx/v15 v15.3.17 // indirect
|
github.com/blevesearch/zapx/v15 v15.3.17 // indirect
|
||||||
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b // indirect
|
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b // indirect
|
||||||
|
github.com/chromedp/cdproto v0.0.0-20241022234722-4d5d5faf59fb // indirect
|
||||||
|
github.com/chromedp/chromedp v0.11.2 // indirect
|
||||||
|
github.com/chromedp/sysutil v1.1.0 // indirect
|
||||||
github.com/go-ole/go-ole v1.3.0 // indirect
|
github.com/go-ole/go-ole v1.3.0 // indirect
|
||||||
github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect
|
github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect
|
||||||
|
github.com/gobwas/httphead v0.1.0 // indirect
|
||||||
|
github.com/gobwas/pool v0.2.1 // indirect
|
||||||
|
github.com/gobwas/ws v1.4.0 // indirect
|
||||||
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect
|
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect
|
||||||
github.com/golang/geo v0.0.0-20230421003525-6adc56603217 // indirect
|
github.com/golang/geo v0.0.0-20230421003525-6adc56603217 // indirect
|
||||||
github.com/golang/protobuf v1.5.4 // indirect
|
github.com/golang/protobuf v1.5.4 // indirect
|
||||||
github.com/golang/snappy v0.0.4 // indirect
|
github.com/golang/snappy v0.0.4 // indirect
|
||||||
|
github.com/josharian/intern v1.0.0 // indirect
|
||||||
github.com/json-iterator/go v1.1.12 // indirect
|
github.com/json-iterator/go v1.1.12 // indirect
|
||||||
|
github.com/mailru/easyjson v0.7.7 // indirect
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||||
github.com/mschoch/smat v0.2.0 // indirect
|
github.com/mschoch/smat v0.2.0 // indirect
|
||||||
|
|
17
go.sum
17
go.sum
|
@ -47,6 +47,12 @@ github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b h1:ju9Az5Y
|
||||||
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b/go.mod h1:BlrYNpOu4BvVRslmIG+rLtKhmjIaRhIbG8sb9scGTwI=
|
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b/go.mod h1:BlrYNpOu4BvVRslmIG+rLtKhmjIaRhIbG8sb9scGTwI=
|
||||||
github.com/chai2010/webp v1.1.1 h1:jTRmEccAJ4MGrhFOrPMpNGIJ/eybIgwKpcACsrTEapk=
|
github.com/chai2010/webp v1.1.1 h1:jTRmEccAJ4MGrhFOrPMpNGIJ/eybIgwKpcACsrTEapk=
|
||||||
github.com/chai2010/webp v1.1.1/go.mod h1:0XVwvZWdjjdxpUEIf7b9g9VkHFnInUSYujwqTLEuldU=
|
github.com/chai2010/webp v1.1.1/go.mod h1:0XVwvZWdjjdxpUEIf7b9g9VkHFnInUSYujwqTLEuldU=
|
||||||
|
github.com/chromedp/cdproto v0.0.0-20241022234722-4d5d5faf59fb h1:noKVm2SsG4v0Yd0lHNtFYc9EUxIVvrr4kJ6hM8wvIYU=
|
||||||
|
github.com/chromedp/cdproto v0.0.0-20241022234722-4d5d5faf59fb/go.mod h1:4XqMl3iIW08jtieURWL6Tt5924w21pxirC6th662XUM=
|
||||||
|
github.com/chromedp/chromedp v0.11.2 h1:ZRHTh7DjbNTlfIv3NFTbB7eVeu5XCNkgrpcGSpn2oX0=
|
||||||
|
github.com/chromedp/chromedp v0.11.2/go.mod h1:lr8dFRLKsdTTWb75C/Ttol2vnBKOSnt0BW8R9Xaupi8=
|
||||||
|
github.com/chromedp/sysutil v1.1.0 h1:PUFNv5EcprjqXZD9nJb9b/c9ibAbxiYo4exNWZyipwM=
|
||||||
|
github.com/chromedp/sysutil v1.1.0/go.mod h1:WiThHUdltqCNKGc4gaU50XgYjwjYIhKWoHGPTUfWTJ8=
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
@ -57,6 +63,12 @@ github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c h1:wpkoddUomPfHiOziH
|
||||||
github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c/go.mod h1:oVDCh3qjJMLVUSILBRwrm+Bc6RNXGZYtoh9xdvf1ffM=
|
github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c/go.mod h1:oVDCh3qjJMLVUSILBRwrm+Bc6RNXGZYtoh9xdvf1ffM=
|
||||||
github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f h1:cypj7SJh+47G9J3VCPdMzT3uWcXWAWDJA54ErTfOigI=
|
github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f h1:cypj7SJh+47G9J3VCPdMzT3uWcXWAWDJA54ErTfOigI=
|
||||||
github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f/go.mod h1:YWa00ashoPZMAOElrSn4E1cJErhDVU6PWAll4Hxzn+w=
|
github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f/go.mod h1:YWa00ashoPZMAOElrSn4E1cJErhDVU6PWAll4Hxzn+w=
|
||||||
|
github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU=
|
||||||
|
github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
|
||||||
|
github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
|
||||||
|
github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
|
||||||
|
github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs=
|
||||||
|
github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc=
|
||||||
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f h1:3BSP1Tbs2djlpprl7wCLuiqMaUh5SJkkzI2gDs+FgLs=
|
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f h1:3BSP1Tbs2djlpprl7wCLuiqMaUh5SJkkzI2gDs+FgLs=
|
||||||
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14=
|
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14=
|
||||||
github.com/golang/geo v0.0.0-20230421003525-6adc56603217 h1:HKlyj6in2JV6wVkmQ4XmG/EIm+SCYlPZ+V4GWit7Z+I=
|
github.com/golang/geo v0.0.0-20230421003525-6adc56603217 h1:HKlyj6in2JV6wVkmQ4XmG/EIm+SCYlPZ+V4GWit7Z+I=
|
||||||
|
@ -68,10 +80,14 @@ github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW
|
||||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||||
|
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
||||||
|
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
||||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||||
github.com/leonelquinteros/gotext v1.7.0 h1:jcJmF4AXqyamP7vuw2MMIKs+O3jAEmvrc5JQiI8Ht/8=
|
github.com/leonelquinteros/gotext v1.7.0 h1:jcJmF4AXqyamP7vuw2MMIKs+O3jAEmvrc5JQiI8Ht/8=
|
||||||
github.com/leonelquinteros/gotext v1.7.0/go.mod h1:qJdoQuERPpccw7L70uoU+K/BvTfRBHYsisCQyFLXyvw=
|
github.com/leonelquinteros/gotext v1.7.0/go.mod h1:qJdoQuERPpccw7L70uoU+K/BvTfRBHYsisCQyFLXyvw=
|
||||||
|
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
|
||||||
|
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
||||||
github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
|
github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||||
|
@ -137,6 +153,7 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc
|
||||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
|
|
Loading…
Add table
Reference in a new issue