handle html type atom text
This commit is contained in:
parent
8967936fb6
commit
e50c7e1a51
5 changed files with 45 additions and 26 deletions
|
|
@ -7,6 +7,8 @@ import (
|
|||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/nkanaev/yarr/src/content/htmlutil"
|
||||
)
|
||||
|
||||
type ItemStatus int
|
||||
|
|
@ -322,7 +324,7 @@ func (s *Storage) SyncSearch() {
|
|||
for _, item := range items {
|
||||
result, err := s.db.Exec(`
|
||||
insert into search (title, description, content) values (?, ?, ?)`,
|
||||
item.Title, HTMLText(item.Description), HTMLText(item.Content),
|
||||
item.Title, htmlutil.ExtractText(item.Description), htmlutil.ExtractText(item.Content),
|
||||
)
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
|
|
|
|||
|
|
@ -1,24 +0,0 @@
|
|||
package storage
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
func HTMLText(s string) string {
|
||||
tokenizer := html.NewTokenizer(strings.NewReader(s))
|
||||
contents := make([]string, 0)
|
||||
for {
|
||||
token := tokenizer.Next()
|
||||
if token == html.ErrorToken {
|
||||
break
|
||||
}
|
||||
if token == html.TextToken {
|
||||
content := strings.TrimSpace(html.UnescapeString(string(tokenizer.Text())))
|
||||
if len(content) > 0 {
|
||||
contents = append(contents, content)
|
||||
}
|
||||
}
|
||||
}
|
||||
return strings.Join(contents, " ")
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue