Skip to content

Commit 49804b6

Browse files
committed
Updates for latest RDF-star.
1 parent 3f6c5af commit 49804b6

File tree

6 files changed

+1013
-128
lines changed

6 files changed

+1013
-128
lines changed

etc/trig.bnf

Lines changed: 90 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -1,97 +1,93 @@
1-
trigDoc ::= ( directive | block )*
2-
block ::= triplesOrGraph
3-
| wrappedGraph
4-
| triples2
5-
| GRAPH labelOrSubject wrappedGraph
6-
triplesOrGraph ::= labelOrSubject ( wrappedGraph | predicateObjectList '.' )
7-
| quotedTriple predicateObjectList '.'
8-
triples2 ::= blankNodePropertyList predicateObjectList? '.'
9-
| collection predicateObjectList '.'
10-
wrappedGraph ::= '{' triplesBlock? '}'
11-
triplesBlock ::= triples ( '.' triplesBlock? )?
12-
labelOrSubject ::= ( iri | BlankNode )
13-
directive ::= prefixID | base | sparqlPrefix | sparqlBase
14-
prefixID ::= PREFIX PNAME_NS IRIREF "."?
15-
base ::= BASE IRIREF "."?
16-
sparqlPrefix ::= "PREFIX" PNAME_NS IRIREF
17-
sparqlBase ::= "BASE" IRIREF
18-
triples ::= subject predicateObjectList
19-
| blankNodePropertyList predicateObjectList?
20-
predicateObjectList ::= verb objectList (';' (verb objectList)? )*
21-
objectList ::= object annotation? ( "," object annotation? )*
22-
verb ::= predicate | "a"
23-
subject ::= iri | blank | quotedTriple
24-
predicate ::= iri
25-
object ::= iri | blank | blankNodePropertyList | literal | quotedTriple
26-
literal ::= RDFLiteral | NumericLiteral | BooleanLiteral
27-
blank ::= BlankNode | collection
28-
blankNodePropertyList ::= "[" predicateObjectList "]"
29-
collection ::= "(" object* ")"
30-
NumericLiteral ::= INTEGER | DECIMAL | DOUBLE
31-
RDFLiteral ::= String ( LANG_DIR | ( "^^" iri ) )?
32-
BooleanLiteral ::= "true" | "false"
33-
String ::= STRING_LITERAL_QUOTE
34-
| STRING_LITERAL_SINGLE_QUOTE
35-
| STRING_LITERAL_LONG_SINGLE_QUOTE
36-
| STRING_LITERAL_LONG_QUOTE
37-
iri ::= IRIREF | PrefixedName
38-
PrefixedName ::= PNAME_LN | PNAME_NS
39-
BlankNode ::= BLANK_NODE_LABEL | ANON
40-
quotedTriple ::= "<<" qtSubject predicate qtObject ">>"
41-
qtSubject ::= iri | BlankNode | quotedTriple
42-
qtObject ::= iri | BlankNode | literal | quotedTriple
43-
annotation ::= '{|' predicateObjectList '|}'
1+
trigDoc ::= ( directive | block )*
2+
block ::= triplesOrGraph
3+
| wrappedGraph
4+
| triples2
5+
| "GRAPH" labelOrSubject wrappedGraph
6+
triplesOrGraph ::= labelOrSubject ( wrappedGraph | predicateObjectList '.' )
7+
| reifiedTriple predicateObjectList? '.'
8+
triples2 ::= blankNodePropertyList predicateObjectList? '.'
9+
| collection predicateObjectList '.'
10+
wrappedGraph ::= '{' triplesBlock? '}'
11+
triplesBlock ::= triples ( '.' triplesBlock? )?
12+
labelOrSubject ::= iri | BlankNode
13+
directive ::= prefixID | base | sparqlPrefix | sparqlBase
14+
prefixID ::= '@prefix' PNAME_NS IRIREF '.'
15+
base ::= '@base' IRIREF '.'
16+
sparqlPrefix ::= "PREFIX" PNAME_NS IRIREF
17+
sparqlBase ::= "BASE" IRIREF
18+
triples ::= subject predicateObjectList
19+
| blankNodePropertyList predicateObjectList?
20+
| reifiedTriple predicateObjectList?
21+
predicateObjectList ::= verb objectList (';' (verb objectList)? )*
22+
objectList ::= object annotation ( ',' object annotation )*
23+
verb ::= predicate | 'a'
24+
subject ::= iri | BlankNode | collection
25+
predicate ::= iri
26+
object ::= iri | BlankNode | collection | blankNodePropertyList | literal | tripleTerm | reifiedTriple
27+
literal ::= RDFLiteral | NumericLiteral | BooleanLiteral
28+
blankNodePropertyList ::= '[' predicateObjectList ']'
29+
collection ::= '(' object* ')'
30+
NumericLiteral ::= INTEGER | DECIMAL | DOUBLE
31+
RDFLiteral ::= String ( LANG_DIR | ( '^^' iri ) )?
32+
BooleanLiteral ::= 'true' | 'false'
33+
String ::= STRING_LITERAL_QUOTE | STRING_LITERAL_SINGLE_QUOTE
34+
| STRING_LITERAL_LONG_SINGLE_QUOTE | STRING_LITERAL_LONG_QUOTE
35+
iri ::= IRIREF | PrefixedName
36+
PrefixedName ::= PNAME_LN | PNAME_NS
37+
BlankNode ::= BLANK_NODE_LABEL | ANON
38+
reifier ::= '~' (iri | BlankNode)?
39+
reifiedTriple ::= '<<' rtSubject verb rtObject reifier? '>>'
40+
rtSubject ::= iri | BlankNode | reifiedTriple
41+
rtObject ::= iri | BlankNode | literal | tripleTerm | reifiedTriple
42+
tripleTerm ::= '<<(' ttSubject verb ttObject ')>>'
43+
ttSubject ::= iri | BlankNode
44+
ttObject ::= iri | BlankNode | literal | tripleTerm
45+
annotation ::= (reifier | annotationBlock)*
46+
annotationBlock ::= '{|' predicateObjectList '|}'
4447

4548
@terminals
4649

47-
RAPH ::= [Gg][Rr][Aa][Pp][Hh]
48-
RIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>'
49-
PNAME_NS ::= PN_PREFIX? ":"
50-
PNAME_LN ::= PNAME_NS PN_LOCAL
51-
BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
52-
LANG_DIR ::= "@" [a-zA-Z]+ ( "-" [a-zA-Z0-9]+ )* ('--' [a-zA-Z]+)?`
53-
INTEGER ::= [+-]? [0-9]+
54-
DECIMAL ::= [+-]? ( ([0-9])* '.' ([0-9])+ )
55-
DOUBLE ::= [+-]? ( [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT )
56-
EXPONENT ::= [eE] [+-]? [0-9]+
57-
STRING_LITERAL_QUOTE ::= '"' ( [^#x22#x5C#xA#xD] | ECHAR | UCHAR )* '"'
58-
STRING_LITERAL_SINGLE_QUOTE ::= "'" ( [^#x27#x5C#xA#xD] | ECHAR | UCHAR )* "'"
59-
STRING_LITERAL_LONG_SINGLE_QUOTE ::= "'''" ( ( "'" | "''" )? ( [^'\] | ECHAR | UCHAR ) )* "'''"
60-
STRING_LITERAL_LONG_QUOTE ::= '"""' ( ( '"' | '""' )? ( [^"\] | ECHAR | UCHAR ) )* '"""'
61-
UCHAR ::= ( "\u" HEX HEX HEX HEX )
62-
| ( "\U" HEX HEX HEX HEX HEX HEX HEX HEX )
63-
ECHAR ::= "\" [tbnrf\"']
64-
NIL ::= "(" WS* ")"
65-
WS ::= #x20 | #x9 | #xD | #xA
66-
ANON ::= "[" WS* "]"
67-
PN_CHARS_BASE ::= [A-Z]
68-
| [a-z]
69-
| [#x00C0-#x00D6]
70-
| [#x00D8-#x00F6]
71-
| [#x00F8-#x02FF]
72-
| [#x0370-#x037D]
73-
| [#x037F-#x1FFF]
74-
| [#x200C-#x200D]
75-
| [#x2070-#x218F]
76-
| [#x2C00-#x2FEF]
77-
| [#x3001-#xD7FF]
78-
| [#xF900-#xFDCF]
79-
| [#xFDF0-#xFFFD]
80-
| [#x10000-#xEFFFF]
81-
PN_CHARS_U ::= PN_CHARS_BASE
82-
| '_'
83-
PN_CHARS ::= PN_CHARS_U
84-
| "-"
85-
| [0-9]
86-
| #x00B7
87-
| [#x0300-#x036F]
88-
| [#x203F-#x2040]
89-
PN_PREFIX ::= PN_CHARS_BASE ( ( PN_CHARS | "." )* PN_CHARS )?
90-
PN_LOCAL ::= ( PN_CHARS_U | ':' | [0-9] | PLX ) ( ( PN_CHARS | '.' | ':' | PLX )* ( PN_CHARS | ':' | PLX ) ) ?
91-
PLX ::= PERCENT
92-
| PN_LOCAL_ESC
93-
PERCENT ::= '%' HEX HEX
94-
HEX ::= [0-9] | [A-F] | [a-f]
95-
PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%' )
96-
PREFIX ::= "@"?[Pp][Rr][Ee][Ff][Ii][Xx]
97-
BASE ::= "@"?[Bb][Aa][Ss][Ee]
50+
IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>'
51+
PNAME_NS ::= PN_PREFIX? ':'
52+
PNAME_LN ::= PNAME_NS PN_LOCAL
53+
BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
54+
LANG_DIR ::= '@' [a-zA-Z]+ ( '-' [a-zA-Z0-9]+ )* ( '--' [a-zA-Z]+ )?
55+
INTEGER ::= [+-]? [0-9]+
56+
DECIMAL ::= [+-]? ( ([0-9])* '.' ([0-9])+ )
57+
DOUBLE ::= [+-]? ( [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT )
58+
EXPONENT ::= [eE] [+-]? [0-9]+
59+
STRING_LITERAL_QUOTE ::= '"' ( [^#x22#x5C#xA#xD] | ECHAR | UCHAR )* '"' /* #x22=" #x5C=\ #xA=new line #xD=carriage return */
60+
STRING_LITERAL_SINGLE_QUOTE ::= "'" ( [^#x27#x5C#xA#xD] | ECHAR | UCHAR )* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */
61+
STRING_LITERAL_LONG_SINGLE_QUOTE ::= "'''" ( ( "'" | "''" )? ( [^'\] | ECHAR | UCHAR ) )* "'''"
62+
STRING_LITERAL_LONG_QUOTE ::= '"""' ( ( '"' | '""' )? ( [^"\] | ECHAR | UCHAR ) )* '"""'
63+
UCHAR ::= ( '\u' HEX HEX HEX HEX ) | ( '\U' HEX HEX HEX HEX HEX HEX HEX HEX )
64+
ECHAR ::= ('\' [tbnrf\"'])
65+
WS ::= #x20 | #x9 | #xD | #xA /* #x20=space #x9=character tabulation #xD=carriage return #xA=new line */
66+
ANON ::= '[' WS* ']'
67+
PN_CHARS_BASE ::= ([A-Z]
68+
| [a-z]
69+
| [#x00C0-#x00D6]
70+
| [#x00D8-#x00F6]
71+
| [#x00F8-#x02FF]
72+
| [#x0370-#x037D]
73+
| [#x037F-#x1FFF]
74+
| [#x200C-#x200D]
75+
| [#x2070-#x218F]
76+
| [#x2C00-#x2FEF]
77+
| [#x3001-#xD7FF]
78+
| [#xF900-#xFDCF]
79+
| [#xFDF0-#xFFFD]
80+
| [#x10000-#xEFFFF])
81+
PN_CHARS_U ::= PN_CHARS_BASE | '_'
82+
PN_CHARS ::= (PN_CHARS_U
83+
| '-'
84+
| [0-9]
85+
| #x00B7
86+
| [#x0300-#x036F]
87+
| [#x203F-#x2040])
88+
PN_PREFIX ::= PN_CHARS_BASE ( ( PN_CHARS | '.' )* PN_CHARS )?
89+
PN_LOCAL ::= ( PN_CHARS_U | ':' | [0-9] | PLX ) ( ( PN_CHARS | '.' | ':' | PLX )* ( PN_CHARS | ':' | PLX ) ) ?
90+
PLX ::= PERCENT | PN_LOCAL_ESC
91+
PERCENT ::= '%' HEX HEX
92+
HEX ::= ([0-9] | [A-F] | [a-f])
93+
PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%' )

lib/rdf/trig/reader.rb

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ class Reader < RDF::Turtle::Reader
2424

2525
# String terminals
2626
terminal(nil, %r(
27-
[\(\),.;\[\]Aa]
27+
<<\(|\)>>
28+
| [\(\),.;~\[\]Aa]
2829
| \^\^
2930
| \{\|
3031
| \|\}
@@ -132,16 +133,19 @@ def read_block
132133

133134
# @return [Object]
134135
def read_triplesOrGraph
135-
while name = read_labelOrSubject
136+
if name = read_labelOrSubject
136137
prod(:triplesOrGraph, %(} .)) do
138+
# labelOrSubject ( wrappedGraph | predicateObjectList '.' )
137139
token = @lexer.first
138140
case token && token.value
139141
when '{'
142+
# wrappedGraph
140143
@graph_name = name
141144
read_wrappedGraph || error("Expected wrappedGraph", production: :triplesOrGraph, token: @lexer.first)
142145
@graph_name = nil
143146
true
144147
else
148+
# predicateObjectList '.'
145149
read_predicateObjectList(name) || error("Expected predicateObjectList", production: :triplesOrGraph, token: @lexer.first)
146150
unless @recovering
147151
# If recovering, we will have eaten the closing '.'
@@ -152,15 +156,28 @@ def read_triplesOrGraph
152156
end
153157
end
154158
end
159+
elsif name = read_reifiedTriple
160+
prod(:triplesOrGraph, %(} .)) do
161+
# reifiedTriple predicateObjectList? '.'
162+
read_predicateObjectList(name)
163+
unless @recovering
164+
# If recovering, we will have eaten the closing '.'
165+
token = @lexer.shift
166+
unless token && token.value == '.'
167+
error("Expected '.' following triple", production: :triplesOrGraph, token: token)
168+
end
169+
end
170+
end
155171
end
156-
true
172+
!!name
157173
end
158174

159175
# @return [Object]
160176
def read_triples2
161177
token = @lexer.first
162178
case token && token.value
163179
when '['
180+
# blankNodePropertyList predicateObjectList? '.'
164181
prod(:triples2) do
165182
# blankNodePropertyList predicateObjectList?
166183
subject = read_blankNodePropertyList || error("Failed to parse blankNodePropertyList", production: :triples2, token: @lexer.first)
@@ -174,14 +191,10 @@ def read_triples2
174191
end
175192
true
176193
end
177-
when '('
194+
when '<<'
178195
prod(:triples2) do
179-
subject = read_collection || error("Failed to parse read_collection", production: :triples2, token: @lexer.first)
180-
token = @lexer.first
181-
case token && (token.type || token.value)
182-
when 'a', :IRIREF, :PNAME_LN, :PNAME_NS then read_predicateObjectList(subject)
183-
else error("Expected predicateObjectList after collection subject", production: :triples2, token: token)
184-
end
196+
subject = read_reifiedTriple || error("Failed to parse reifiedTriple", production: :triples2, token: @lexer.first)
197+
read_predicateObjectList(subject) || subject
185198
if !@recovering || @lexer.first === '.'
186199
# If recovering, we will have eaten the closing '.'
187200
token = @lexer.shift
@@ -191,9 +204,10 @@ def read_triples2
191204
end
192205
true
193206
end
194-
when '<<'
207+
when '('
208+
# collection predicateObjectList '.'
195209
prod(:triples2) do
196-
subject = read_quotedTriple || error("Failed to parse embedded triple", production: :triples2, token: @lexer.first)
210+
subject = read_collection || error("Failed to parse read_collection", production: :triples2, token: @lexer.first)
197211
token = @lexer.first
198212
case token && (token.type || token.value)
199213
when 'a', :IRIREF, :PNAME_LN, :PNAME_NS then read_predicateObjectList(subject)
@@ -208,6 +222,8 @@ def read_triples2
208222
end
209223
true
210224
end
225+
else
226+
false
211227
end
212228
end
213229

lib/rdf/trig/writer.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ def order_graphs
215215
# Perform any statement preprocessing required. This is used to perform reference counts and determine required
216216
# prefixes.
217217
# @param [Statement] statement
218-
def preprocess_statement(statement)
218+
def preprocess_statement(statement, as_subject: true)
219219
super
220220
get_pname(statement.graph_name) if statement.has_graph?
221221
end

script/parse

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def run(input, **options)
2121
num = 0
2222
Profiler__::start_profile if options[:profile]
2323
if options[:output_format] == :nquads || options[:quiet]
24-
r = reader_class.new(input, options[:parser_options])
24+
r = reader_class.new(input, **options[:parser_options])
2525
r.each do |statement|
2626
num += 1
2727
if options[:errors] && statement.invalid?
@@ -82,41 +82,68 @@ options = {
8282
}
8383
input = nil
8484

85-
opts = GetoptLong.new(
86-
["--dbg", GetoptLong::NO_ARGUMENT],
87-
["--base", GetoptLong::REQUIRED_ARGUMENT],
88-
["--errors", GetoptLong::NO_ARGUMENT],
89-
["--execute", "-e", GetoptLong::REQUIRED_ARGUMENT],
90-
["--canonicalize", GetoptLong::NO_ARGUMENT],
91-
["--freebase", GetoptLong::NO_ARGUMENT],
92-
["--format", GetoptLong::REQUIRED_ARGUMENT],
93-
["--input-format", GetoptLong::REQUIRED_ARGUMENT],
94-
["--output", "-o", GetoptLong::REQUIRED_ARGUMENT],
95-
["--profile", GetoptLong::NO_ARGUMENT],
96-
["--progress", GetoptLong::NO_ARGUMENT],
97-
["--quiet", GetoptLong::NO_ARGUMENT],
98-
["--rdfstar", GetoptLong::NO_ARGUMENT],
99-
["--validate", GetoptLong::NO_ARGUMENT],
100-
["--verbose", GetoptLong::NO_ARGUMENT]
101-
)
85+
OPT_ARGS = [
86+
["--benchmark", GetoptLong::NO_ARGUMENT, "Just parse, do not process output"],
87+
["--canonicalize", GetoptLong::NO_ARGUMENT, "Canonize all terms"],
88+
["--data", GetoptLong::NO_ARGUMENT, "Remove all except plain RDF triples (formulae, forAll, etc)v"],
89+
["--debug", GetoptLong::NO_ARGUMENT, "Debugging output"],
90+
["--errors", GetoptLong::NO_ARGUMENT, "Display invalid statements"],
91+
["--execute", "-e", GetoptLong::REQUIRED_ARGUMENT, "Run against source in argument"],
92+
["--format", GetoptLong::REQUIRED_ARGUMENT, "Output format, any RDF format symbol, sxp, or inspect"],
93+
["--help", "-?", GetoptLong::NO_ARGUMENT, "print this message"],
94+
["--input-format", GetoptLong::REQUIRED_ARGUMENT, "Format of the input file, defaults to ttl"],
95+
["--info", GetoptLong::NO_ARGUMENT, "Show progress on execution"],
96+
["--output", "-o", GetoptLong::REQUIRED_ARGUMENT, "Save output to file"],
97+
["--profile", GetoptLong::NO_ARGUMENT, "Show an execution profile"],
98+
["--quiet", GetoptLong::NO_ARGUMENT, "Do not show parser output"],
99+
["--rdfstar", GetoptLong::NO_ARGUMENT, "Parse as RDF-star"],
100+
["--stream", GetoptLong::NO_ARGUMENT, "Use streaming writer"],
101+
["--uri", GetoptLong::REQUIRED_ARGUMENT, "Default base URI"],
102+
["--validate", GetoptLong::NO_ARGUMENT, "Run parser in strict validation mode"],
103+
["--verbose", GetoptLong::NO_ARGUMENT, "Verbose output"],
104+
]
105+
106+
def usage
107+
STDERR.puts %{
108+
RDF::Turtle version #{RDF::Turtle::VERSION}
109+
110+
Usage: #{$0} [options] file ...
111+
}.gsub(/^ /, '')
112+
width = OPT_ARGS.map do |o|
113+
l = o.first.length
114+
l += o[1].length + 2 if o[1].is_a?(String)
115+
l
116+
end.max
117+
OPT_ARGS.each do |o|
118+
s = " %-*s " % [width, (o[1].is_a?(String) ? "#{o[0,2].join(', ')}" : o[0])]
119+
s += o.last
120+
STDERR.puts s
121+
end
122+
exit(1)
123+
end
124+
125+
opts = GetoptLong.new(*OPT_ARGS.map {|o| o[0..-2]})
126+
102127
opts.each do |opt, arg|
103128
case opt
104-
when '--dbg' then logger.level = Logger::DEBUG
105129
when '--base' then parser_options[:base_uri] = writer_options[:base_uri] = arg
130+
when '--benchmark' then options[:benchmark] = true
106131
when '--canonicalize' then parser_options[:canonicalize] = true
132+
when '--debug' then logger.level = Logger::DEBUG
107133
when '--errors' then options[:errors] = true
108134
when '--execute' then input = arg
109135
when '--format' then options[:output_format] = arg.to_sym
110136
when '--freebase' then parser_options[:freebase] = true
137+
when "--help" then usage()
138+
when '--info' then logger.level = Logger::INFO
111139
when '--input-format' then options[:input_format] = arg.to_sym
112140
when '--output' then options[:output] = File.open(arg, "w")
113141
when '--profile' then options[:profile] = true
114-
when '--progress' then logger.level = Logger::INFO
142+
when '--rdfstar' then parser_options[:rdfstar] = true
115143
when '--quiet'
116144
options[:quiet] = options[:quiet].to_i + 1
117145
logger.level = Logger::FATAL
118146
when '--stream' then writer_options[:stream] = true
119-
when '--rdfstar' then parser_options[:rdfstar] = true
120147
when '--validate' then parser_options[:validate] = true
121148
when '--verbose' then $verbose = true
122149
end

0 commit comments

Comments
 (0)