12
12
#include < optional>
13
13
14
14
/* *
15
+ * Unicode operations. These functions are not part of our public API and may
16
+ * change at any time.
17
+ *
18
+ * @private
15
19
* @namespace ada::unicode
16
20
* @brief Includes the definitions for unicode operations
17
21
*/
18
22
namespace ada ::unicode {
19
23
20
24
/* *
25
+ * @private
21
26
* We receive a UTF-8 string representing a domain name.
22
27
* If the string is percent encoded, we apply percent decoding.
23
28
*
@@ -61,11 +66,13 @@ bool to_ascii(std::optional<std::string>& out, std::string_view plain,
61
66
size_t first_percent);
62
67
63
68
/* *
69
+ * @private
64
70
* @see https://www.unicode.org/reports/tr46/#ToUnicode
65
71
*/
66
72
std::string to_unicode (std::string_view input);
67
73
68
74
/* *
75
+ * @private
69
76
* Checks if the input has tab or newline characters.
70
77
*
71
78
* @attention The has_tabs_or_newline function is a bottleneck and it is simple
@@ -75,19 +82,22 @@ ada_really_inline bool has_tabs_or_newline(
75
82
std::string_view user_input) noexcept ;
76
83
77
84
/* *
85
+ * @private
78
86
* Checks if the input is a forbidden host code point.
79
87
* @see https://url.spec.whatwg.org/#forbidden-host-code-point
80
88
*/
81
89
ada_really_inline constexpr bool is_forbidden_host_code_point (char c) noexcept ;
82
90
83
91
/* *
92
+ * @private
84
93
* Checks if the input contains a forbidden domain code point.
85
94
* @see https://url.spec.whatwg.org/#forbidden-domain-code-point
86
95
*/
87
96
ada_really_inline constexpr bool contains_forbidden_domain_code_point (
88
97
const char * input, size_t length) noexcept ;
89
98
90
99
/* *
100
+ * @private
91
101
* Checks if the input contains a forbidden domain code point in which case
92
102
* the first bit is set to 1. If the input contains an upper case ASCII letter,
93
103
* then the second bit is set to 1.
@@ -98,18 +108,21 @@ contains_forbidden_domain_code_point_or_upper(const char* input,
98
108
size_t length) noexcept ;
99
109
100
110
/* *
111
+ * @private
101
112
* Checks if the input is a forbidden domain code point.
102
113
* @see https://url.spec.whatwg.org/#forbidden-domain-code-point
103
114
*/
104
115
ada_really_inline constexpr bool is_forbidden_domain_code_point (
105
116
char c) noexcept ;
106
117
107
118
/* *
119
+ * @private
108
120
* Checks if the input is alphanumeric, '+', '-' or '.'
109
121
*/
110
122
ada_really_inline constexpr bool is_alnum_plus (char c) noexcept ;
111
123
112
124
/* *
125
+ * @private
113
126
* @details An ASCII hex digit is an ASCII upper hex digit or ASCII lower hex
114
127
* digit. An ASCII upper hex digit is an ASCII digit or a code point in the
115
128
* range U+0041 (A) to U+0046 (F), inclusive. An ASCII lower hex digit is an
@@ -118,6 +131,7 @@ ada_really_inline constexpr bool is_alnum_plus(char c) noexcept;
118
131
ada_really_inline constexpr bool is_ascii_hex_digit (char c) noexcept ;
119
132
120
133
/* *
134
+ * @private
121
135
* Checks if the input is a C0 control or space character.
122
136
*
123
137
* @details A C0 control or space is a C0 control or U+0020 SPACE.
@@ -127,38 +141,44 @@ ada_really_inline constexpr bool is_ascii_hex_digit(char c) noexcept;
127
141
ada_really_inline constexpr bool is_c0_control_or_space (char c) noexcept ;
128
142
129
143
/* *
144
+ * @private
130
145
* Checks if the input is a ASCII tab or newline character.
131
146
*
132
147
* @details An ASCII tab or newline is U+0009 TAB, U+000A LF, or U+000D CR.
133
148
*/
134
149
ada_really_inline constexpr bool is_ascii_tab_or_newline (char c) noexcept ;
135
150
136
151
/* *
152
+ * @private
137
153
* @details A double-dot path segment must be ".." or an ASCII case-insensitive
138
154
* match for ".%2e", "%2e.", or "%2e%2e".
139
155
*/
140
156
ada_really_inline ada_constexpr bool is_double_dot_path_segment (
141
157
std::string_view input) noexcept ;
142
158
143
159
/* *
160
+ * @private
144
161
* @details A single-dot path segment must be "." or an ASCII case-insensitive
145
162
* match for "%2e".
146
163
*/
147
164
ada_really_inline constexpr bool is_single_dot_path_segment (
148
165
std::string_view input) noexcept ;
149
166
150
167
/* *
168
+ * @private
151
169
* @details ipv4 character might contain 0-9 or a-f character ranges.
152
170
*/
153
171
ada_really_inline constexpr bool is_lowercase_hex (char c) noexcept ;
154
172
155
173
/* *
174
+ * @private
156
175
* @details Convert hex to binary. Caller is responsible to ensure that
157
176
* the parameter is an hexadecimal digit (0-9, A-F, a-f).
158
177
*/
159
178
ada_really_inline unsigned constexpr convert_hex_to_binary (char c) noexcept ;
160
179
161
180
/* *
181
+ * @private
162
182
* first_percent should be = input.find('%')
163
183
*
164
184
* @todo It would be faster as noexcept maybe, but it could be unsafe since.
@@ -169,19 +189,22 @@ ada_really_inline unsigned constexpr convert_hex_to_binary(char c) noexcept;
169
189
std::string percent_decode (std::string_view input, size_t first_percent);
170
190
171
191
/* *
192
+ * @private
172
193
* Returns a percent-encoding string whether percent encoding was needed or not.
173
194
* @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L226
174
195
*/
175
196
std::string percent_encode (std::string_view input,
176
197
const uint8_t character_set[]);
177
198
/* *
199
+ * @private
178
200
* Returns a percent-encoded string version of input, while starting the percent
179
201
* encoding at the provided index.
180
202
* @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L226
181
203
*/
182
204
std::string percent_encode (std::string_view input,
183
205
const uint8_t character_set[], size_t index);
184
206
/* *
207
+ * @private
185
208
* Returns true if percent encoding was needed, in which case, we store
186
209
* the percent-encoded content in 'out'. If the boolean 'append' is set to
187
210
* true, the content is appended to 'out'.
@@ -192,12 +215,14 @@ template <bool append>
192
215
bool percent_encode (std::string_view input, const uint8_t character_set[],
193
216
std::string& out);
194
217
/* *
218
+ * @private
195
219
* Returns the index at which percent encoding should start, or (equivalently),
196
220
* the length of the prefix that does not require percent encoding.
197
221
*/
198
222
ada_really_inline size_t percent_encode_index (std::string_view input,
199
223
const uint8_t character_set[]);
200
224
/* *
225
+ * @private
201
226
* Lowers the string in-place, assuming that the content is ASCII.
202
227
* Return true if the content was ASCII.
203
228
*/
0 commit comments