-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse.go
105 lines (89 loc) · 1.79 KB
/
parse.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
package kiteroot
import (
"errors"
"io"
"strings"
"golang.org/x/net/html"
)
// ErrInvalidPair is error returned by failures to parse an HTML.
var ErrInvalidPair = errors.New("open/close tag mismatched")
var isSelfClosingTag = map[string]bool{
"area": true,
"base": true,
"br": true,
"col": true,
"command": true,
"embed": true,
"hr": true,
"img": true,
"input": true,
"keygen": true,
"link": true,
"meta": true,
"param": true,
"source": true,
"track": true,
"wbr": true,
}
// Parse returns an document element tree for the HTML from the given Reader.
func Parse(r io.Reader) (*Element, error) {
var st Stack
z := html.NewTokenizer(r)
doc := NewDocument()
st.Push(doc)
ParseIterator:
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
if z.Err() == io.EOF {
break ParseIterator
}
return nil, z.Err()
case html.StartTagToken:
fallthrough
case html.SelfClosingTagToken:
t := z.Token()
sc := isSelfClosingTag[t.Data] || tt == html.SelfClosingTagToken
tag := NewTag(t.Data, sc)
for _, attr := range t.Attr {
tag.SetAttribute(attr.Key, attr.Val)
}
cur := st.Top()
if cur == nil {
return nil, ErrInvalidPair
}
cur.Append(tag)
if !sc {
st.Push(tag)
}
case html.EndTagToken:
t := z.Token()
if isSelfClosingTag[t.Data] {
continue
}
if !st.existsTag(t.Data) {
continue
}
cur := st.Pop()
for cur != nil && cur.Content != t.Data {
cur = st.Pop()
}
case html.TextToken:
cur := st.Top()
if cur == nil {
return nil, ErrInvalidPair
}
s := string(z.Text())
if s == "\n" {
continue
}
text := NewText(strings.TrimSpace(s))
cur.Append(text)
}
}
if st.Len() != 1 {
return nil, ErrInvalidPair
}
return doc, nil
}