-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathblogspot.go
124 lines (99 loc) · 2.38 KB
/
blogspot.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
package parsefb
import (
"bufio"
"errors"
"github.com/PuerkitoBio/goquery"
"strings"
)
func GetBlogspotTimeStamp(doc *goquery.Document) (string, error) {
abbr := QuerySelector(doc, "a.timestamp-link > abbr")
t, ok := abbr.Attr("title")
if ok {
return t, nil
}
return "", errors.New("cannot find timestamp")
}
func GetBlogspotTitle(doc *goquery.Document) (string, error) {
t := QuerySelector(doc, "h3.post-title")
return strings.TrimSpace(t.Text()), nil
}
func GetBlogspotContent(doc *goquery.Document) (string, error) {
c := QuerySelector(doc, "div.post-body")
s, err := c.Html()
if err != nil {
return "", err
}
var lines []string
scanner := bufio.NewScanner(strings.NewReader(s))
for scanner.Scan() {
lines = append(lines, " "+scanner.Text())
}
if err := scanner.Err(); err != nil {
return "", err
}
return strings.Join(lines, "\n"), nil
}
func GetBlogspotUrl(doc *goquery.Document) (string, error) {
meta := QuerySelector(doc, "meta[property='og:url']")
u, ok := meta.Attr("content")
if ok {
return u, nil
}
return "", errors.New("cannot find url")
}
func GetBlogspotSummary(doc *goquery.Document) (string, error) {
meta := QuerySelector(doc, "meta[property='og:description']")
d, ok := meta.Attr("content")
if ok {
return strings.TrimSpace(d), nil
}
return "", errors.New("cannot find summary")
}
func GetBlogspotAuthor(doc *goquery.Document) (string, error) {
a := QuerySelector(doc, "span.post-author > span.fn")
return a.Text(), nil
}
func GetBlogspotTags(doc *goquery.Document) (string, error) {
s := doc.Find("span.post-labels > a")
labels := ""
s.Each(func(_ int, l *goquery.Selection) {
if labels != "" {
labels += ", "
}
labels += l.Text()
})
return labels, nil
}
func ParseBlogspotPost(doc *goquery.Document) (*FBPostData, error) {
bs := FBPostData{}
var err error
bs.TimeStamp, err = GetBlogspotTimeStamp(doc)
if err != nil {
return &bs, err
}
bs.Title, err = GetBlogspotTitle(doc)
if err != nil {
return &bs, err
}
bs.Content, err = GetBlogspotContent(doc)
if err != nil {
return &bs, err
}
bs.PostUrl, err = GetBlogspotUrl(doc)
if err != nil {
return &bs, err
}
bs.Summary, err = GetBlogspotSummary(doc)
if err != nil {
return &bs, err
}
bs.Author, err = GetBlogspotAuthor(doc)
if err != nil {
return &bs, err
}
bs.Tags, err = GetBlogspotTags(doc)
if err != nil {
return &bs, err
}
return &bs, nil
}