Skip to content

Commit 2931a16

Browse files
committed
Redo regular expressions for URI normalization to avoid Ruby warnings.
1 parent fa66bb9 commit 2931a16

File tree

2 files changed

+35
-25
lines changed

2 files changed

+35
-25
lines changed

lib/rdf/model/uri.rb

Lines changed: 34 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -29,27 +29,27 @@ class URI
2929
include RDF::Resource
3030

3131
# IRI components
32-
UCSCHAR = Regexp.compile(<<-EOS.gsub(/\s+/, ''))
33-
[\\u00A0-\\uD7FF]|[\\uF900-\\uFDCF]|[\\uFDF0-\\uFFEF]|
34-
[\\u{10000}-\\u{1FFFD}]|[\\u{20000}-\\u{2FFFD}]|[\\u{30000}-\\u{3FFFD}]|
35-
[\\u{40000}-\\u{4FFFD}]|[\\u{50000}-\\u{5FFFD}]|[\\u{60000}-\\u{6FFFD}]|
36-
[\\u{70000}-\\u{7FFFD}]|[\\u{80000}-\\u{8FFFD}]|[\\u{90000}-\\u{9FFFD}]|
37-
[\\u{A0000}-\\u{AFFFD}]|[\\u{B0000}-\\u{BFFFD}]|[\\u{C0000}-\\u{CFFFD}]|
38-
[\\u{D0000}-\\u{DFFFD}]|[\\u{E1000}-\\u{EFFFD}]
39-
EOS
40-
IPRIVATE = Regexp.compile("[\\uE000-\\uF8FF]|[\\u{F0000}-\\u{FFFFD}]|[\\u100000-\\u10FFFD]").freeze
32+
UCSCHAR = %(
33+
\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF
34+
\\u{10000}-\\u{1FFFD}\\u{20000}-\\u{2FFFD}\\u{30000}-\\u{3FFFD}
35+
\\u{40000}-\\u{4FFFD}\\u{50000}-\\u{5FFFD}\\u{60000}-\\u{6FFFD}
36+
\\u{70000}-\\u{7FFFD}\\u{80000}-\\u{8FFFD}\\u{90000}-\\u{9FFFD}
37+
\\u{A0000}-\\u{AFFFD}\\u{B0000}-\\u{BFFFD}\\u{C0000}-\\u{CFFFD}
38+
\\u{D0000}-\\u{DFFFD}\\u{E1000}-\\u{EFFFD}
39+
).gsub(/\s+/, '')
40+
IPRIVATE = Regexp.compile("[\\uE000-\\uF8FF\\u{F0000}-\\u{FFFFD}\\u{100000}-\\u{10FFFD}]").freeze
4141
SCHEME = Regexp.compile("[A-Za-z](?:[A-Za-z0-9+-\.])*").freeze
4242
PORT = Regexp.compile("[0-9]*").freeze
4343
IP_literal = Regexp.compile("\\[[0-9A-Fa-f:\\.]*\\]").freeze # Simplified, no IPvFuture
4444
PCT_ENCODED = Regexp.compile("%[0-9A-Fa-f][0-9A-Fa-f]").freeze
45-
GEN_DELIMS = Regexp.compile("[:/\\?\\#\\[\\]@]").freeze
46-
SUB_DELIMS = Regexp.compile("[!\\$&'\\(\\)\\*\\+,;=]").freeze
47-
RESERVED = Regexp.compile("(?:#{GEN_DELIMS}|#{SUB_DELIMS})").freeze
45+
GEN_DELIMS = Regexp.compile(%q{[:/\?\#\[\]@]}).freeze
46+
SUB_DELIMS = Regexp.compile(%q{[!\$&'\(\)\*\+,;=]}).freeze
47+
RESERVED = Regexp.union(GEN_DELIMS, SUB_DELIMS).freeze
4848
UNRESERVED = Regexp.compile("[A-Za-z0-9\._~-]").freeze
4949

50-
IUNRESERVED = Regexp.compile("[A-Za-z0-9\._~-]|#{UCSCHAR}").freeze
50+
IUNRESERVED = Regexp.union(UNRESERVED, Regexp.compile("[#{UCSCHAR}]")).freeze
5151

52-
IPCHAR = Regexp.compile("(?:#{IUNRESERVED}|#{PCT_ENCODED}|#{SUB_DELIMS}|:|@)").freeze
52+
IPCHAR = Regexp.union(IUNRESERVED, PCT_ENCODED, SUB_DELIMS, /[:|@]/).freeze
5353

5454
IQUERY = Regexp.compile("(?:#{IPCHAR}|#{IPRIVATE}|/|\\?)*").freeze
5555

@@ -66,7 +66,7 @@ class URI
6666
IPATH_EMPTY = Regexp.compile("").freeze
6767

6868
IREG_NAME = Regexp.compile("(?:(?:#{IUNRESERVED})|(?:#{PCT_ENCODED})|(?:#{SUB_DELIMS}))*").freeze
69-
IHOST = Regexp.compile("(?:#{IP_literal})|(?:#{IREG_NAME})").freeze
69+
IHOST = Regexp.union(IP_literal, IREG_NAME).freeze
7070
IUSERINFO = Regexp.compile("(?:(?:#{IUNRESERVED})|(?:#{PCT_ENCODED})|(?:#{SUB_DELIMS})|:)*").freeze
7171
IAUTHORITY = Regexp.compile("(?:#{IUSERINFO}@)?#{IHOST}(?::#{PORT})?").freeze
7272

@@ -119,14 +119,18 @@ class URI
119119
PN_ESCAPES = /\\#{Regexp.union(PN_ESCAPE_CHARS, /[\-_]/)}/.freeze
120120

121121
# For URI encoding
122-
ENCODE_USER = Regexp.compile("[^#{IUNRESERVED}#{SUB_DELIMS}]").freeze
123-
ENCODE_PASSWORD = Regexp.compile("[^#{IUNRESERVED}#{SUB_DELIMS}]").freeze
124-
ENCODE_ISEGMENT = Regexp.compile("[^#{IPCHAR}]").freeze
125-
ENCODE_ISEGMENT_NC = Regexp.compile("[^#{IUNRESERVED}|#{PCT_ENCODED}|[#{SUB_DELIMS}]|@]").freeze
126-
ENCODE_IQUERY = Regexp.compile("[^#{IQUERY}]").freeze
127-
ENCODE_IFRAGMENT = Regexp.compile("[^#{IFRAGMENT}]").freeze
128-
ENCODE_PORT = Regexp.compile('[^\d]').freeze
129-
ENCODE_IHOST = Regexp.compile("(?:#{IP_literal})|(?:#{IREG_NAME})").freeze
122+
# iuserinfo = *( iunreserved / pct-encoded / sub-delims / ":" )
123+
ENCODE_USER =
124+
ENCODE_PASSWORD = Regexp.compile("[^A-Za-z0-9\._~#{UCSCHAR}!$&'\(\)\*\+,;=:-]").freeze
125+
# isegment = *ipchar
126+
# ipchar = iunreserved / pct-encoded / sub-delims / ":" / "@"
127+
ENCODE_ISEGMENT = Regexp.compile("[^A-Za-z0-9\._~#{UCSCHAR}!$&'\(\)\*\+,;=:-]").freeze
128+
# isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims / "@" )
129+
ENCODE_ISEGMENT_NC = Regexp.compile("[^A-Za-z0-9\._~#{UCSCHAR}!$&'\(\)\*\+,;=-]").freeze
130+
# iquery = *( ipchar / iprivate / "/" / "?" )
131+
ENCODE_IQUERY = Regexp.compile("[^A-Za-z0-9\._~#{UCSCHAR}\\uE000-\\uF8FF\\u{F0000}-\\u{FFFFD}\\u{100000}-\\u{10FFFD}/?=]").freeze
132+
# ifragment = *( ipchar / "/" / "?" )
133+
ENCODE_IFRAGMENT = Regexp.compile("[^A-Za-z0-9\._~#{UCSCHAR}/?]").freeze
130134

131135
##
132136
# Cache size may be set through {RDF.config} using `uri_cache_size`.
@@ -1071,6 +1075,12 @@ def path=(value)
10711075
# Normalized version of path
10721076
# @return [String]
10731077
def normalized_path
1078+
if normalized_scheme == "urn"
1079+
# Special-case URI. Normalize the NID component only
1080+
nid, p = path.to_s.split(':', 2)
1081+
return "#{nid.downcase}:#{p}"
1082+
end
1083+
10741084
segments = path.to_s.split('/', -1) # preserve null segments
10751085

10761086
norm_segs = case
@@ -1103,7 +1113,7 @@ def normalized_path
11031113

11041114
res = self.class.normalize_path(norm_segs.join("/"))
11051115
# Special rules for specific protocols having empty paths
1106-
res = (res.empty? && %w(http https ftp tftp).include?(normalized_scheme)) ? '/' : res
1116+
(res.empty? && %w(http https ftp tftp).include?(normalized_scheme)) ? '/' : res
11071117
end
11081118

11091119
##

spec/model_uri_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,9 +519,9 @@
519519
it "#canonicalize #{name}" do
520520
u1 = RDF::URI(input)
521521
u2 = RDF::URI(output)
522-
expect(u1.canonicalize.hash).to eq u2.hash
523522
expect(u1.canonicalize.to_s).to eq u2.to_s
524523
expect(u1.canonicalize).to eq u1.canonicalize
524+
expect(u1.canonicalize.hash).to eq u2.hash
525525
end
526526
end
527527
it "#canonicalize! alters resource" do

0 commit comments

Comments
 (0)