-
Notifications
You must be signed in to change notification settings - Fork 246
Expand file tree
/
Copy pathjson_tokens.cpp
More file actions
247 lines (198 loc) · 7.19 KB
/
json_tokens.cpp
File metadata and controls
247 lines (198 loc) · 7.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
// Copyright (c) 2020-2026 Dr. Colin Hirsch and Daniel Frey
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt)
#if !defined( __cpp_exceptions )
#include <iostream>
int main()
{
std::cerr << "Exception support required, example unavailable." << std::endl;
return 1;
}
#else
#include <cassert>
#include <iostream>
#include <utility>
#include <vector>
#include <tao/pegtl.hpp>
#include <tao/pegtl/action/change_action_and_state.hpp>
#include <tao/pegtl/control/must_if.hpp>
#include <tao/pegtl/debug/analyze.hpp>
#include <tao/pegtl/example/json.hpp>
#include <tao/pegtl/extra/unescape.hpp>
#include <tao/pegtl/member.hpp>
namespace pegtl = TAO_PEGTL_NAMESPACE;
// This example shows a separate JSON lexer and parser.
// Part 1 -- Lexer.
namespace example
{
struct json_one_rule
: pegtl::one< '[', ']', '{', '}', ':', ',' >
{};
struct json_word_rule
: pegtl::sor< pegtl::json::false_, pegtl::json::true_, pegtl::json::null >
{};
struct token_rule
: pegtl::sor< json_one_rule, json_word_rule, pegtl::json::string, pegtl::json::number >
{};
struct lexer_rule
: pegtl::seq< pegtl::star< pegtl::json::ws >, pegtl::until< pegtl::eof, pegtl::json::padr< token_rule > > >
{};
enum class token_type : char
{
null = 'n',
true_ = 't',
false_ = 'f',
string = '"',
number = '0', // default
begin_array = '[',
end_array = ']',
begin_object = '{',
end_object = '}',
name_separator = ':',
value_separator = ','
};
struct json_token
{
explicit json_token( const token_type t ) noexcept
: type( t )
{}
json_token( const token_type t, std::string&& d ) noexcept
: type( t ),
data( std::move( d ) )
{}
json_token( const token_type t, const std::string& d )
: type( t ),
data( d )
{}
token_type type;
std::string data;
// text_position pos;
};
template< typename Rule >
struct lexer_action
: pegtl::nothing< Rule >
{};
struct token_char_action
{
template< typename ActionInput >
static void apply( const ActionInput& in, std::vector< json_token >& out )
{
assert( !in.empty() );
out.emplace_back( token_type( in.peek_char() ) );
}
};
template<>
struct lexer_action< json_one_rule >
: token_char_action
{};
template<>
struct lexer_action< json_word_rule >
: token_char_action
{};
template<>
struct lexer_action< pegtl::json::string::content >
: pegtl::change_action_and_state< pegtl::unescape, std::string >
{
template< typename ParseInput >
static void success( const ParseInput& /*unused*/, std::string& unescaped, std::vector< json_token >& out )
{
out.emplace_back( token_type::string, unescaped );
}
};
template<>
struct lexer_action< pegtl::json::number >
{
template< typename ParseInput >
static void apply( const ParseInput& in, std::vector< json_token >& out )
{
out.emplace_back( token_type::number, in.string() ); // Keep this simple, no conversion.
}
};
// clang-format off
template< typename > inline constexpr const char* lexer_errors = nullptr;
template<> inline constexpr auto lexer_errors< token_rule > = "Expected a valid JSON token.";
template<> inline constexpr auto lexer_errors< lexer_rule > = "Expected sequence of valid JSON.";
// clang-format on
struct lexer_error
{
template< typename Rule >
static constexpr auto message = lexer_errors< Rule >;
};
template< typename Rule >
using lexer_control = pegtl::must_if_n< lexer_error >::type< Rule >;
} // namespace example
// Part 2 -- Parser (in the classical sense).
namespace example
{
template< token_type T >
struct token_type_rule
: pegtl::member::one< &json_token::type, T >
{};
// clang-format off
struct null_token : token_type_rule< token_type::null > {};
struct true_token : token_type_rule< token_type::true_ > {};
struct false_token : token_type_rule< token_type::false_ > {};
struct number_token : token_type_rule< token_type::number > {};
struct string_token : token_type_rule< token_type::string > {};
struct begin_array : token_type_rule< token_type::begin_array > {};
struct end_array : token_type_rule< token_type::end_array > {};
struct begin_object : token_type_rule< token_type::begin_object > {};
struct end_object : token_type_rule< token_type::end_object > {};
struct name_separator : token_type_rule< token_type::name_separator > {};
struct value_separator : token_type_rule< token_type::value_separator > {};
struct value;
struct array_tokens : pegtl::seq< begin_array, pegtl::opt< pegtl::list< value, value_separator > >, end_array > {};
struct object_member : pegtl::seq< string_token, name_separator, value > {};
struct object_tokens : pegtl::seq< begin_object, pegtl::opt< pegtl::list< object_member, value_separator > >, end_object > {};
struct value : pegtl::sor< null_token, true_token, false_token, number_token, string_token, array_tokens, object_tokens > {};
struct parser_rule : pegtl::until< pegtl::eof, value > {};
template< typename > inline constexpr const char* parser_errors = nullptr;
template<> inline constexpr auto parser_errors< name_separator > = "Expected ':' separator.";
template<> inline constexpr auto parser_errors< end_array > = "Expected end of array (or another element).";
template<> inline constexpr auto parser_errors< end_object > = "Expected end of object (or another member).";
template<> inline constexpr auto parser_errors< parser_rule > = "Expected sequence of valid JSON.";
// clang-format on
struct parser_error
{
template< typename Rule >
static constexpr auto message = parser_errors< Rule >;
};
template< typename Rule >
using parser_control = pegtl::must_if_n< parser_error >::type< Rule >;
} // namespace example
// Part 3 -- Combine Lexer and Parser.
namespace example
{
[[nodiscard]] int main( const std::filesystem::path& file )
{
std::vector< json_token > tokens;
try {
pegtl::text_file_input in( file );
pegtl::parse< lexer_rule, lexer_action, lexer_control >( in, tokens );
}
catch( const std::exception& e ) {
std::cerr << "Parse error in lexer: " << e.what() << std::endl;
return 1;
}
std::cout << "Lexer succeeded with " << tokens.size() << " tokens." << std::endl;
try {
pegtl::copy_input< void, std::vector< json_token > > in( tokens );
pegtl::parse< parser_rule, pegtl::normal, parser_control >( in );
}
catch( const std::exception& e ) {
std::cerr << "Parse error in parser: " << e.what() << std::endl;
return 1;
}
std::cout << "Parser succeeded" << std::endl;
return 0;
}
} // namespace example
int main( int argc, char** argv ) // NOLINT(bugprone-exception-escape)
{
if( argc != 2 ) {
std::cerr << "usage: " << argv[ 0 ] << " <filename.json>\n";
return 1;
}
return example::main( argv[ 1 ] );
}
#endif