improved crawler data extraction

This commit is contained in:
partisan 2025-01-01 13:49:16 +01:00
parent a9a6948a44
commit 3494457336
4 changed files with 231 additions and 92 deletions

5
go.mod
View file

@ -15,12 +15,14 @@ require (
require (
github.com/blevesearch/bleve/v2 v2.4.4
github.com/go-shiori/go-readability v0.0.0-20241012063810-92284fa8a71f
golang.org/x/net v0.33.0
)
require (
github.com/RoaringBitmap/roaring v1.9.4 // indirect
github.com/andybalholm/cascadia v1.3.3 // indirect
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de // indirect
github.com/bits-and-blooms/bitset v1.20.0 // indirect
github.com/blevesearch/bleve_index_api v1.2.0 // indirect
github.com/blevesearch/geo v0.1.20 // indirect
@ -40,6 +42,8 @@ require (
github.com/blevesearch/zapx/v15 v15.3.17 // indirect
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b // indirect
github.com/go-ole/go-ole v1.3.0 // indirect
github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect
github.com/golang/geo v0.0.0-20230421003525-6adc56603217 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/golang/snappy v0.0.4 // indirect
@ -51,5 +55,6 @@ require (
github.com/yusufpapurcu/wmi v1.2.4 // indirect
go.etcd.io/bbolt v1.3.11 // indirect
golang.org/x/sys v0.28.0 // indirect
golang.org/x/text v0.21.0 // indirect
google.golang.org/protobuf v1.36.0 // indirect
)