handle html type atom text
This commit is contained in:
parent
8967936fb6
commit
e50c7e1a51
5 changed files with 45 additions and 26 deletions
|
|
@ -6,6 +6,8 @@ import (
|
|||
"html"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/nkanaev/yarr/src/content/htmlutil"
|
||||
)
|
||||
|
||||
type atomFeed struct {
|
||||
|
|
@ -42,6 +44,13 @@ type atomLink struct {
|
|||
|
||||
type atomLinks []atomLink
|
||||
|
||||
func (a *atomText) Text() string {
|
||||
if a.Type == "html" {
|
||||
return htmlutil.ExtractText(a.Data)
|
||||
}
|
||||
return a.Data
|
||||
}
|
||||
|
||||
func (a *atomText) String() string {
|
||||
data := a.Data
|
||||
if a.Type == "xhtml" {
|
||||
|
|
@ -76,7 +85,7 @@ func ParseAtom(r io.Reader) (*Feed, error) {
|
|||
GUID: firstNonEmpty(srcitem.ID),
|
||||
Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)),
|
||||
URL: firstNonEmpty(srcitem.OrigLink, srcitem.Links.First("alternate"), srcitem.Links.First("")),
|
||||
Title: srcitem.Title.String(),
|
||||
Title: srcitem.Title.Text(),
|
||||
Content: firstNonEmpty(srcitem.Content.String(), srcitem.Summary.String(), srcitem.firstMediaDescription()),
|
||||
ImageURL: srcitem.firstMediaThumbnail(),
|
||||
AudioURL: "",
|
||||
|
|
|
|||
|
|
@ -77,3 +77,19 @@ func TestAtomClashingNamespaces(t *testing.T) {
|
|||
t.FailNow()
|
||||
}
|
||||
}
|
||||
|
||||
func TestAtomHTMLTitle(t *testing.T) {
|
||||
feed, _ := Parse(strings.NewReader(`
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry><title type="html">say <code>what</code>?</entry>
|
||||
</feed>
|
||||
`))
|
||||
have := feed.Items[0].Title
|
||||
want := "say what?"
|
||||
if !reflect.DeepEqual(want, have) {
|
||||
t.Logf("want: %#v", want)
|
||||
t.Logf("have: %#v", have)
|
||||
t.FailNow()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue