Skip to content

Commit 2ebc33d

Browse files
authored
deprecate read(Document) and parse(Document) overloadings (#42)
1 parent ff9850d commit 2ebc33d

File tree

4 files changed

+83
-72
lines changed

4 files changed

+83
-72
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ Types:
9696
* `EzXML.StreamReader`: a streaming XML reader
9797

9898
IO:
99-
* From file: `read(EzXML.Document, filename)`, `readxml(filename|stream)`, `readhtml(filename|stream)`
100-
* From string or byte array: `parse(EzXML.Document, string)`, `parsexml(string)`, `parsehtml(string)`
99+
* From file: `readxml(filename|stream)`, `readhtml(filename|stream)`
100+
* From string or byte array: `parsexml(string)`, `parsehtml(string)`
101101
* To file: `write(filename, doc)`
102102
* To stream: `print(io, doc)`
103103

src/EzXML.jl

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,15 +116,63 @@ include("buffer.jl")
116116
include("xpath.jl")
117117
include("streamreader.jl")
118118

119+
function __init__()
120+
init_error_handler()
121+
end
122+
123+
119124
# Deprecation
125+
# -----------
126+
120127
@deprecate name nodename
121128
@deprecate setname! setnodename!
122129
@deprecate content nodecontent
123130
@deprecate setcontent! setnodecontent!
124131
@deprecate depth nodedepth
125132

126-
function __init__()
127-
init_error_handler()
133+
function Base.read(::Type{Document}, filename::AbstractString)
134+
@static if VERSION > v"0.7-"
135+
@warn "read(Document, filename) is deprecated, use readxml(filename) or readhtml(filename) instead"
136+
else
137+
warn("read(Document, filename) is deprecated, use readxml(filename) or readhtml(filename) instead")
138+
end
139+
if endswith(filename, ".html") || endswith(filename, ".htm")
140+
return readhtml(filename)
141+
else
142+
return readxml(filename)
143+
end
144+
end
145+
146+
function Base.parse(::Type{Document}, inputstring::AbstractString)
147+
@static if VERSION > v"0.7-"
148+
@warn "parse(Document, string) is deprecated, use parsexml(string) or parsehtml(string) instead"
149+
else
150+
warn("parse(Document, string) is deprecated, use parsexml(string) or parsehtml(string) instead")
151+
end
152+
if is_html_like(inputstring)
153+
return parsehtml(inputstring)
154+
else
155+
return parsexml(inputstring)
156+
end
157+
end
158+
159+
function Base.parse(::Type{Document}, inputdata::Vector{UInt8})
160+
return parse(Document, String(inputdata))
161+
end
162+
163+
# Try to infer whether an input is formatted in HTML.
164+
function is_html_like(inputstring)
165+
if ismatch(r"^\s*<!DOCTYPE html", inputstring)
166+
return true
167+
elseif ismatch(r"^\s*<\?xml", inputstring)
168+
return false
169+
end
170+
i = searchindex(inputstring, "<html")
171+
if 0 < i < 100
172+
return true
173+
else
174+
return false
175+
end
128176
end
129177

130178
end # module

src/document.jl

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -52,33 +52,6 @@ function prettyprint(io::IO, doc::Document)
5252
prettyprint(io, doc.node)
5353
end
5454

55-
function Base.parse(::Type{Document}, inputstring::AbstractString)
56-
if is_html_like(inputstring)
57-
return parsehtml(inputstring)
58-
else
59-
return parsexml(inputstring)
60-
end
61-
end
62-
63-
function Base.parse(::Type{Document}, inputdata::Vector{UInt8})
64-
return parse(Document, String(inputdata))
65-
end
66-
67-
# Try to infer whether an input is formatted in HTML.
68-
function is_html_like(inputstring)
69-
if ismatch(r"^\s*<!DOCTYPE html", inputstring)
70-
return true
71-
elseif ismatch(r"^\s*<\?xml", inputstring)
72-
return false
73-
end
74-
i = searchindex(inputstring, "<html")
75-
if 0 < i < 100
76-
return true
77-
else
78-
return false
79-
end
80-
end
81-
8255
"""
8356
parsexml(xmlstring)
8457
@@ -124,14 +97,6 @@ function parsehtml(htmldata::Vector{UInt8})
12497
return parsehtml(String(htmldata))
12598
end
12699

127-
function Base.read(::Type{Document}, filename::AbstractString)
128-
if endswith(filename, ".html") || endswith(filename, ".htm")
129-
return readhtml(filename)
130-
else
131-
return readxml(filename)
132-
end
133-
end
134-
135100
"""
136101
readxml(filename)
137102

test/runtests.jl

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,16 @@ end
3030
@testset "XML" begin
3131
valid_file = joinpath(dirname(@__FILE__), "sample1.xml")
3232
invalid_file = joinpath(dirname(@__FILE__), "sample1.invalid.xml")
33-
doc = read(EzXML.Document, valid_file)
33+
doc = readxml(valid_file)
3434
@test isa(doc, EzXML.Document)
3535
@test nodetype(doc.node) === EzXML.DOCUMENT_NODE
3636
@test nodetype(readxml(valid_file).node) === EzXML.DOCUMENT_NODE
37-
@test_throws EzXML.XMLError read(EzXML.Document, invalid_file)
37+
@test_throws EzXML.XMLError readxml(invalid_file)
3838
@assert !isfile("not-exist.xml")
39-
@test_throws EzXML.XMLError read(EzXML.Document, "not-exist.xml")
4039
@test_throws EzXML.XMLError readxml("not-exist.xml")
4140

4241
# from compressed file
4342
compressed = joinpath(dirname(@__FILE__), "sample1.xml.gz")
44-
@test isa(read(EzXML.Document, compressed), EzXML.Document)
4543
@test isa(readxml(compressed), EzXML.Document)
4644

4745
# from stream
@@ -53,17 +51,17 @@ end
5351

5452
@testset "HTML" begin
5553
valid_file = joinpath(dirname(@__FILE__), "sample1.html")
56-
doc = read(EzXML.Document, valid_file)
54+
doc = readhtml(valid_file)
5755
@test isa(doc, EzXML.Document)
5856
@test nodetype(doc.node) === EzXML.HTML_DOCUMENT_NODE
5957
@test nodetype(readhtml(valid_file).node) === EzXML.HTML_DOCUMENT_NODE
6058
@assert !isfile("not-exist.html")
61-
@test_throws EzXML.XMLError read(EzXML.Document, "not-exist.html")
59+
@test_throws EzXML.XMLError readxml("not-exist.html")
6260
@test_throws EzXML.XMLError readhtml("not-exist.html")
6361

6462
# from compressed file
6563
compressed = joinpath(dirname(@__FILE__), "sample1.html.gz")
66-
@test isa(read(EzXML.Document, compressed), EzXML.Document)
64+
@test isa(readxml(compressed), EzXML.Document)
6765
@test isa(readhtml(compressed), EzXML.Document)
6866

6967
# from stream (FIXME: this causes "Misplaced DOCTYPE declaration")
@@ -101,20 +99,20 @@ end
10199
<foo>ok</foo>
102100
</root>
103101
"""
104-
doc = parse(EzXML.Document, docstr)
102+
doc = parsexml(docstr)
105103
tmp = tempname()
106104
try
107105
@test write(tmp, doc) == sizeof(docstr)
108106
@test String(read(tmp)) == docstr
109-
@test string(read(EzXML.Document, tmp)) == docstr
107+
@test string(readxml(tmp)) == docstr
110108
finally
111109
rm(tmp)
112110
end
113111
end
114112

115113
@testset "Parser" begin
116114
@testset "XML" begin
117-
doc = parse(EzXML.Document, """
115+
doc = parsexml("""
118116
<?xml version="1.0"?>
119117
<root>
120118
<child attr="value">content</child>
@@ -123,15 +121,15 @@ end
123121
@test isa(doc, EzXML.Document)
124122
@test nodetype(doc.node) === EzXML.DOCUMENT_NODE
125123

126-
doc = parse(EzXML.Document, """
124+
doc = parsexml("""
127125
<root>
128126
<child attr="value">content</child>
129127
</root>
130128
""")
131129
@test isa(doc, EzXML.Document)
132130
@test nodetype(doc.node) === EzXML.DOCUMENT_NODE
133131

134-
doc = parse(EzXML.Document, b"""
132+
doc = parsexml(b"""
135133
<?xml version="1.0"?>
136134
<root>
137135
<child attr="value">content</child>
@@ -145,7 +143,7 @@ end
145143
@test nodetype(parsexml(b"<html/>").node) === EzXML.DOCUMENT_NODE
146144

147145
# This includes multi-byte characters.
148-
doc = parse(EzXML.Document, """
146+
doc = parsexml("""
149147
<?xml version="1.0" encoding="UTF-8" ?>
150148
<Link>
151149
<Name>pubmed_pubmed</Name>
@@ -156,17 +154,17 @@ end
156154
""")
157155
@test nodetype(doc.node) === EzXML.DOCUMENT_NODE
158156

159-
@test_throws ArgumentError parse(EzXML.Document, "")
160-
@test_throws EzXML.XMLError parse(EzXML.Document, " ")
161-
@test_throws EzXML.XMLError parse(EzXML.Document, "abracadabra")
162-
@test_throws EzXML.XMLError parse(EzXML.Document, """<?xml version="1.0"?>""")
157+
@test_throws ArgumentError parsexml("")
158+
@test_throws EzXML.XMLError parsexml(" ")
159+
@test_throws EzXML.XMLError parsexml("abracadabra")
160+
@test_throws EzXML.XMLError parsexml("""<?xml version="1.0"?>""")
163161

164162
info("the following warning is expected:")
165163
@test_throws EzXML.XMLError parsexml("<gepa?>jgo<<<><<")
166164
end
167165

168166
@testset "HTML" begin
169-
doc = parse(EzXML.Document, """
167+
doc = parsehtml("""
170168
<!DOCTYPE html>
171169
<html>
172170
<head>
@@ -182,7 +180,7 @@ end
182180
@test hasdtd(doc)
183181
@test nodename(dtd(doc)) == "html"
184182

185-
doc = parse(EzXML.Document, """
183+
doc = parsehtml("""
186184
<html>
187185
<head>
188186
<title>Title</title>
@@ -196,7 +194,7 @@ end
196194
@test nodetype(doc.node) === EzXML.HTML_DOCUMENT_NODE
197195
@test hasdtd(doc)
198196

199-
doc = parse(EzXML.Document, b"""
197+
doc = parsehtml(b"""
200198
<!DOCTYPE html>
201199
<html>
202200
<head>
@@ -523,7 +521,7 @@ end
523521
@test_throws ArgumentError systemID(root(doc))
524522
@test_throws ArgumentError externalID(root(doc))
525523

526-
doc = parse(EzXML.Document, """
524+
doc = parsexml("""
527525
<?xml version="1.0"?>
528526
<r>
529527
<c1/>
@@ -558,7 +556,7 @@ end
558556
@test_throws ArgumentError prevelement(c1)
559557
@test_throws ArgumentError nextelement(c3)
560558

561-
doc = parse(EzXML.Document, """
559+
doc = parsexml("""
562560
<?xml version="1.0"?>
563561
<root attr="some attribute value"><child>some content</child></root>
564562
""")
@@ -571,7 +569,7 @@ end
571569
@test !haskey(root(doc), "attr")
572570
@test_throws KeyError root(doc)["attr"]
573571

574-
doc = parse(EzXML.Document, "<root/>")
572+
doc = parsexml("<root/>")
575573
x = root(doc)
576574
@test_throws ArgumentError firstnode(x)
577575
@test_throws ArgumentError lastnode(x)
@@ -698,7 +696,7 @@ end
698696
@test_throws ArgumentError namespace(root(doc))
699697

700698
@testset "Counters" begin
701-
doc = parse(EzXML.Document, "<root/>")
699+
doc = parsexml("<root/>")
702700
@test !hasnode(root(doc))
703701
@test countnodes(root(doc)) === 0
704702
@test countelements(root(doc)) === 0
@@ -716,7 +714,7 @@ end
716714
end
717715

718716
@testset "Iterators" begin
719-
doc = parse(EzXML.Document, "<root/>")
717+
doc = parsexml("<root/>")
720718
ns = EzXML.Node[]
721719
for (i, node) in enumerate(eachnode(root(doc)))
722720
@test isa(node, EzXML.Node)
@@ -732,7 +730,7 @@ end
732730
@test length(ns) == 0
733731
@test elements(root(doc)) == ns
734732

735-
doc = parse(EzXML.Document, """
733+
doc = parsexml("""
736734
<root><c1></c1><c2></c2></root>
737735
""")
738736
ns = EzXML.Node[]
@@ -750,7 +748,7 @@ end
750748
@test length(ns) == 2
751749
@test elements(root(doc)) == ns
752750

753-
doc = parse(EzXML.Document, """
751+
doc = parsexml("""
754752
<root>
755753
<c1></c1>
756754
<c2></c2>
@@ -771,7 +769,7 @@ end
771769
@test length(ns) == 2
772770
@test elements(root(doc)) == ns
773771

774-
doc = parse(EzXML.Document, """
772+
doc = parsexml("""
775773
<root>
776774
<c1/>
777775
<c2/>
@@ -783,7 +781,7 @@ end
783781
@test nodes(root(doc), true) == reverse(nodes(root(doc)))
784782
@test elements(root(doc), true) == reverse(elements(root(doc)))
785783

786-
doc = parse(EzXML.Document, """
784+
doc = parsexml("""
787785
<?xml version="1.0"?>
788786
<root attr1="foo" attr2="bar"></root>
789787
""")
@@ -883,7 +881,7 @@ end
883881
@test t2.owner === e2
884882
@test a1.owner === e2
885883

886-
doc = parse(EzXML.Document, "<root/>")
884+
doc = parsexml("<root/>")
887885
@test isempty(nodes(root(doc)))
888886
c1 = ElementNode("c1")
889887
link!(root(doc), c1)
@@ -918,7 +916,7 @@ end
918916
@test root(doc) == el
919917
@test [(nodename(n), nodecontent(n)) for n in attributes(root(doc))] == [("attr1", "1"), ("attr2", "2")]
920918

921-
doc = parse(EzXML.Document, """
919+
doc = parsexml("""
922920
<root></root>
923921
""")
924922
@test string(doc.node) == """
@@ -944,7 +942,7 @@ end
944942
<root><child1>some text</child1><child2/><!--some comment--><![CDATA[<cdata>]]></root>
945943
"""
946944

947-
doc = parse(EzXML.Document, """
945+
doc = parsexml("""
948946
<?xml version="1.0" encoding="UTF-8"?>
949947
<root>
950948
<c1>
@@ -962,7 +960,7 @@ end
962960
@test c1.owner == c1
963961
@test c2.owner == c1
964962

965-
doc = parse(EzXML.Document, """
963+
doc = parsexml("""
966964
<root xmlns:x="http://xxx.org/" xmlns:y="http://yyy.org/">
967965
<c x:attr="x-attr" y:attr="y-attr"/>
968966
<c y:attr="y-attr" x:attr="x-attr"/>

0 commit comments

Comments
 (0)