13
13
#include < ios>
14
14
#include < vector>
15
15
#include < fstream>
16
+ #include < algorithm>
17
+ #include < cctype>
18
+ #include < locale>
19
+ #include < functional>
16
20
17
21
#include < pegtl.hh>
18
22
#include < pegtl/analyze.hh>
@@ -37,6 +41,8 @@ DECLARE_bool(helpshort); // Defined by the gflags library.
37
41
DECLARE_bool (version); // Defined by the gflags library.
38
42
DEFINE_bool (v, false , " Display the version of the binary." );
39
43
DEFINE_bool (h, false , " Display help." );
44
+ DEFINE_string (schemes, " http,https" , " The list of allowed schemas. If set to empty, yields URIs with all schemas." );
45
+
40
46
41
47
namespace pegtl {
42
48
namespace uri {
@@ -63,6 +69,32 @@ namespace pegtl {
63
69
}
64
70
};
65
71
72
+ namespace {
73
+ std::string tolower (const std::string& input) {
74
+ const std::locale utf8 (" en_US.UTF-8" );
75
+ std::string output;
76
+ std::transform (input.begin (), input.end (), std::back_inserter (output), [&utf8](char c) {
77
+ return std::tolower (c, utf8);
78
+ });
79
+ return output;
80
+ }
81
+ }
82
+
83
+ template <>
84
+ struct URIActions <scheme> {
85
+ static void apply (const pegtl::input& input,
86
+ URIState& parsed_uri) {
87
+ parsed_uri.scheme .assign (tolower (input.string ()));
88
+ }
89
+ };
90
+
91
+ template <>
92
+ struct URIActions <iana_scheme> {
93
+ static void apply (const pegtl::input& input,
94
+ URIState& parsed_uri) {
95
+ parsed_uri.scheme .assign (tolower (input.string ()));
96
+ }
97
+ };
66
98
}
67
99
}
68
100
@@ -82,7 +114,7 @@ bool IsMatchingBracket(char lhs, char rhs) {
82
114
return false ;
83
115
}
84
116
85
- void grepurl (const std::string& thunk) {
117
+ void grepurl (const std::string& thunk, const std::set<std::string>& allowed_schemes ) {
86
118
// Trim.
87
119
size_t start = 0 , end = thunk.size ();
88
120
for (; start < thunk.size () && end > 0 &&
@@ -94,16 +126,28 @@ void grepurl(const std::string& thunk) {
94
126
thunk.data () + start, thunk.data () + end, " stdin" , state);
95
127
96
128
// Output.
97
- if (parse_result && !state.uri .empty ()) {
129
+ if (parse_result && !state.uri .empty () && (allowed_schemes.empty () ||
130
+ allowed_schemes.find (state.scheme ) != allowed_schemes.end ())) {
98
131
std::cout << state.uri << ' \n ' ;
99
132
}
100
133
}
101
134
135
+
136
+ void split (const std::string& s, char delim, std::set<std::string>* elems) {
137
+ std::stringstream ss (s);
138
+ std::string item;
139
+ while (std::getline (ss, item, delim)) {
140
+ elems->insert (item);
141
+ }
142
+ }
143
+
102
144
template <class IStream >
103
145
void process (IStream& in) {
146
+ std::set<std::string> allowed_schemes;
147
+ split (FLAGS_schemes, ' ,' , &allowed_schemes);
104
148
std::string thunk;
105
149
while (in >> thunk) {
106
- grepurl (thunk);
150
+ grepurl (thunk, allowed_schemes );
107
151
}
108
152
}
109
153
0 commit comments