@@ -29,27 +29,27 @@ class URI
29
29
include RDF ::Resource
30
30
31
31
# IRI components
32
- UCSCHAR = Regexp . compile ( <<-EOS . gsub ( / \s +/ , '' ) )
33
- [ \\ u00A0-\\ uD7FF]|[ \\ uF900-\\ uFDCF]|[ \\ uFDF0-\\ uFFEF]|
34
- [ \\ u{10000}-\\ u{1FFFD}]|[ \\ u{20000}-\\ u{2FFFD}]|[ \\ u{30000}-\\ u{3FFFD}]|
35
- [ \\ u{40000}-\\ u{4FFFD}]|[ \\ u{50000}-\\ u{5FFFD}]|[ \\ u{60000}-\\ u{6FFFD}]|
36
- [ \\ u{70000}-\\ u{7FFFD}]|[ \\ u{80000}-\\ u{8FFFD}]|[ \\ u{90000}-\\ u{9FFFD}]|
37
- [ \\ u{A0000}-\\ u{AFFFD}]|[ \\ u{B0000}-\\ u{BFFFD}]|[ \\ u{C0000}-\\ u{CFFFD}]|
38
- [ \\ u{D0000}-\\ u{DFFFD}]|[ \\ u{E1000}-\\ u{EFFFD}]
39
- EOS
40
- IPRIVATE = Regexp . compile ( "[\\ uE000-\\ uF8FF]|[ \\ u{F0000}-\\ u{FFFFD}]|[ \\ u100000 -\\ u10FFFD ]" ) . freeze
32
+ UCSCHAR = %(
33
+ \\ u00A0-\\ uD7FF\\ uF900-\\ uFDCF\\ uFDF0-\\ uFFEF
34
+ \\ u{10000}-\\ u{1FFFD}\\ u{20000}-\\ u{2FFFD}\\ u{30000}-\\ u{3FFFD}
35
+ \\ u{40000}-\\ u{4FFFD}\\ u{50000}-\\ u{5FFFD}\\ u{60000}-\\ u{6FFFD}
36
+ \\ u{70000}-\\ u{7FFFD}\\ u{80000}-\\ u{8FFFD}\\ u{90000}-\\ u{9FFFD}
37
+ \\ u{A0000}-\\ u{AFFFD}\\ u{B0000}-\\ u{BFFFD}\\ u{C0000}-\\ u{CFFFD}
38
+ \\ u{D0000}-\\ u{DFFFD}\\ u{E1000}-\\ u{EFFFD}
39
+ ) . gsub ( / \s +/ , '' )
40
+ IPRIVATE = Regexp . compile ( "[\\ uE000-\\ uF8FF\\ u{F0000}-\\ u{FFFFD}\\ u{100000} -\\ u{10FFFD} ]" ) . freeze
41
41
SCHEME = Regexp . compile ( "[A-Za-z](?:[A-Za-z0-9+-\. ])*" ) . freeze
42
42
PORT = Regexp . compile ( "[0-9]*" ) . freeze
43
43
IP_literal = Regexp . compile ( "\\ [[0-9A-Fa-f:\\ .]*\\ ]" ) . freeze # Simplified, no IPvFuture
44
44
PCT_ENCODED = Regexp . compile ( "%[0-9A-Fa-f][0-9A-Fa-f]" ) . freeze
45
- GEN_DELIMS = Regexp . compile ( " [:/\\ ? \\ # \\ [ \\ ]@]" ) . freeze
46
- SUB_DELIMS = Regexp . compile ( " [!\\ $&'\\ ( \\ ) \\ * \\ +,;=]" ) . freeze
47
- RESERVED = Regexp . compile ( "(?: #{ GEN_DELIMS } | #{ SUB_DELIMS } )" ) . freeze
45
+ GEN_DELIMS = Regexp . compile ( %q{ [:/\?\#\[\ ]@]} ) . freeze
46
+ SUB_DELIMS = Regexp . compile ( %q{ [!\$&'\(\)\*\ +,;=]} ) . freeze
47
+ RESERVED = Regexp . union ( GEN_DELIMS , SUB_DELIMS ) . freeze
48
48
UNRESERVED = Regexp . compile ( "[A-Za-z0-9\. _~-]" ) . freeze
49
49
50
- IUNRESERVED = Regexp . compile ( "[A-Za-z0-9 \. _~-]| #{ UCSCHAR } " ) . freeze
50
+ IUNRESERVED = Regexp . union ( UNRESERVED , Regexp . compile ( "[#{ UCSCHAR } ]" ) ) . freeze
51
51
52
- IPCHAR = Regexp . compile ( "(?: #{ IUNRESERVED } | #{ PCT_ENCODED } | #{ SUB_DELIMS } | :|@)" ) . freeze
52
+ IPCHAR = Regexp . union ( IUNRESERVED , PCT_ENCODED , SUB_DELIMS , /[ :|@]/ ) . freeze
53
53
54
54
IQUERY = Regexp . compile ( "(?:#{ IPCHAR } |#{ IPRIVATE } |/|\\ ?)*" ) . freeze
55
55
@@ -66,7 +66,7 @@ class URI
66
66
IPATH_EMPTY = Regexp . compile ( "" ) . freeze
67
67
68
68
IREG_NAME = Regexp . compile ( "(?:(?:#{ IUNRESERVED } )|(?:#{ PCT_ENCODED } )|(?:#{ SUB_DELIMS } ))*" ) . freeze
69
- IHOST = Regexp . compile ( "(?: #{ IP_literal } )|(?: #{ IREG_NAME } )" ) . freeze
69
+ IHOST = Regexp . union ( IP_literal , IREG_NAME ) . freeze
70
70
IUSERINFO = Regexp . compile ( "(?:(?:#{ IUNRESERVED } )|(?:#{ PCT_ENCODED } )|(?:#{ SUB_DELIMS } )|:)*" ) . freeze
71
71
IAUTHORITY = Regexp . compile ( "(?:#{ IUSERINFO } @)?#{ IHOST } (?::#{ PORT } )?" ) . freeze
72
72
@@ -119,14 +119,18 @@ class URI
119
119
PN_ESCAPES = /\\ #{ Regexp . union ( PN_ESCAPE_CHARS , /[\- _]/ ) } / . freeze
120
120
121
121
# For URI encoding
122
- ENCODE_USER = Regexp . compile ( "[^#{ IUNRESERVED } #{ SUB_DELIMS } ]" ) . freeze
123
- ENCODE_PASSWORD = Regexp . compile ( "[^#{ IUNRESERVED } #{ SUB_DELIMS } ]" ) . freeze
124
- ENCODE_ISEGMENT = Regexp . compile ( "[^#{ IPCHAR } ]" ) . freeze
125
- ENCODE_ISEGMENT_NC = Regexp . compile ( "[^#{ IUNRESERVED } |#{ PCT_ENCODED } |[#{ SUB_DELIMS } ]|@]" ) . freeze
126
- ENCODE_IQUERY = Regexp . compile ( "[^#{ IQUERY } ]" ) . freeze
127
- ENCODE_IFRAGMENT = Regexp . compile ( "[^#{ IFRAGMENT } ]" ) . freeze
128
- ENCODE_PORT = Regexp . compile ( '[^\d]' ) . freeze
129
- ENCODE_IHOST = Regexp . compile ( "(?:#{ IP_literal } )|(?:#{ IREG_NAME } )" ) . freeze
122
+ # iuserinfo = *( iunreserved / pct-encoded / sub-delims / ":" )
123
+ ENCODE_USER =
124
+ ENCODE_PASSWORD = Regexp . compile ( "[^A-Za-z0-9\. _~#{ UCSCHAR } !$&'\( \) \* \+ ,;=:-]" ) . freeze
125
+ # isegment = *ipchar
126
+ # ipchar = iunreserved / pct-encoded / sub-delims / ":" / "@"
127
+ ENCODE_ISEGMENT = Regexp . compile ( "[^A-Za-z0-9\. _~#{ UCSCHAR } !$&'\( \) \* \+ ,;=:-]" ) . freeze
128
+ # isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims / "@" )
129
+ ENCODE_ISEGMENT_NC = Regexp . compile ( "[^A-Za-z0-9\. _~#{ UCSCHAR } !$&'\( \) \* \+ ,;=-]" ) . freeze
130
+ # iquery = *( ipchar / iprivate / "/" / "?" )
131
+ ENCODE_IQUERY = Regexp . compile ( "[^A-Za-z0-9\. _~#{ UCSCHAR } \\ uE000-\\ uF8FF\\ u{F0000}-\\ u{FFFFD}\\ u{100000}-\\ u{10FFFD}/?=]" ) . freeze
132
+ # ifragment = *( ipchar / "/" / "?" )
133
+ ENCODE_IFRAGMENT = Regexp . compile ( "[^A-Za-z0-9\. _~#{ UCSCHAR } /?]" ) . freeze
130
134
131
135
##
132
136
# Cache size may be set through {RDF.config} using `uri_cache_size`.
@@ -1071,6 +1075,12 @@ def path=(value)
1071
1075
# Normalized version of path
1072
1076
# @return [String]
1073
1077
def normalized_path
1078
+ if normalized_scheme == "urn"
1079
+ # Special-case URI. Normalize the NID component only
1080
+ nid , p = path . to_s . split ( ':' , 2 )
1081
+ return "#{ nid . downcase } :#{ p } "
1082
+ end
1083
+
1074
1084
segments = path . to_s . split ( '/' , -1 ) # preserve null segments
1075
1085
1076
1086
norm_segs = case
@@ -1103,7 +1113,7 @@ def normalized_path
1103
1113
1104
1114
res = self . class . normalize_path ( norm_segs . join ( "/" ) )
1105
1115
# Special rules for specific protocols having empty paths
1106
- res = ( res . empty? && %w( http https ftp tftp ) . include? ( normalized_scheme ) ) ? '/' : res
1116
+ ( res . empty? && %w( http https ftp tftp ) . include? ( normalized_scheme ) ) ? '/' : res
1107
1117
end
1108
1118
1109
1119
##
0 commit comments