start rewriting readability

This commit is contained in:
Nazar Kanaev 2021-03-30 11:47:36 +01:00
parent 8c44d2fc87
commit e5920259b6
8 changed files with 238 additions and 69 deletions

View file

@ -2,9 +2,6 @@ package scraper
import (
"net/url"
"strings"
"golang.org/x/net/html"
)
func any(els []string, el string, match func(string, string) bool) bool {
@ -16,44 +13,6 @@ func any(els []string, el string, match func(string, string) bool) bool {
return false
}
func getAttr(node *html.Node, key string) string {
for _, a := range node.Attr {
if a.Key == key {
return a.Val
}
}
return ""
}
func getText(node *html.Node) string {
text := make([]string, 0)
isTextNode := func(n *html.Node) bool {
return n.Type == html.TextNode
}
for _, n := range getNodes(node, isTextNode) {
text = append(text, strings.TrimSpace(n.Data))
}
return strings.Join(text, " ")
}
func getNodes(node *html.Node, match func(*html.Node) bool) []*html.Node {
nodes := make([]*html.Node, 0)
queue := make([]*html.Node, 0)
queue = append(queue, node)
for len(queue) > 0 {
var n *html.Node
n, queue = queue[0], queue[1:]
if match(n) {
nodes = append(nodes, n)
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
queue = append(queue, c)
}
}
return nodes
}
func absoluteUrl(href, base string) string {
baseUrl, err := url.Parse(base)
if err != nil {