Skip to content

Commit cd1a744

Browse files
authored
Merge pull request #138 from internetarchive/xml-tests
Add XML extractor tests
2 parents cb3b9db + d4377bc commit cd1a744

File tree

1 file changed

+103
-0
lines changed

1 file changed

+103
-0
lines changed
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
package extractor
2+
3+
import (
4+
"bytes"
5+
"io"
6+
"net/http"
7+
"net/url"
8+
"testing"
9+
)
10+
11+
func TestXML(t *testing.T) {
12+
tests := []struct {
13+
name string
14+
xmlBody string
15+
wantURLs []*url.URL
16+
wantErr bool
17+
}{
18+
{
19+
name: "Valid XML with URLs",
20+
xmlBody: `
21+
<root>
22+
<item>http://example.com</item>
23+
<nested>
24+
<url>https://example.org</url>
25+
</nested>
26+
<noturl>just some text</noturl>
27+
</root>`,
28+
wantURLs: []*url.URL{
29+
{Scheme: "http", Host: "example.com"},
30+
{Scheme: "https", Host: "example.org"},
31+
},
32+
wantErr: false,
33+
},
34+
{
35+
name: "Empty XML",
36+
xmlBody: `<root></root>`,
37+
wantURLs: nil,
38+
wantErr: false,
39+
},
40+
{
41+
name: "Invalid XML",
42+
xmlBody: `<root><unclosed>`,
43+
wantURLs: nil,
44+
wantErr: true,
45+
},
46+
{
47+
name: "XML with invalid URL",
48+
xmlBody: `
49+
<root>
50+
<item>http://example.com</item>
51+
<item>not a valid url</item>
52+
</root>`,
53+
wantURLs: []*url.URL{
54+
{Scheme: "http", Host: "example.com"},
55+
},
56+
wantErr: false,
57+
},
58+
}
59+
60+
for _, tt := range tests {
61+
t.Run(tt.name, func(t *testing.T) {
62+
resp := &http.Response{
63+
Body: io.NopCloser(bytes.NewBufferString(tt.xmlBody)),
64+
}
65+
66+
gotURLs, err := XML(resp)
67+
68+
if (err != nil) != tt.wantErr {
69+
t.Errorf("XML() error = %v, wantErr %v", err, tt.wantErr)
70+
return
71+
}
72+
73+
if !compareURLs(gotURLs, tt.wantURLs) {
74+
t.Errorf("XML() gotURLs = %v, want %v", gotURLs, tt.wantURLs)
75+
}
76+
})
77+
}
78+
}
79+
80+
func TestXMLBodyReadError(t *testing.T) {
81+
resp := &http.Response{
82+
Body: io.NopCloser(bytes.NewReader([]byte{})), // Empty reader to simulate EOF
83+
}
84+
resp.Body.Close() // Close the body to simulate a read error
85+
86+
_, err := XML(resp)
87+
if err == nil {
88+
t.Errorf("XML() expected error, got nil")
89+
}
90+
}
91+
92+
// compareURLs compares two slices of *url.URL
93+
func compareURLs(a, b []*url.URL) bool {
94+
if len(a) != len(b) {
95+
return false
96+
}
97+
for i := range a {
98+
if a[i].String() != b[i].String() {
99+
return false
100+
}
101+
}
102+
return true
103+
}

0 commit comments

Comments
 (0)