Skip to content

Commit af82d6f

Browse files
committed
Add support for filtering output based on schemes.
1 parent ddeccfb commit af82d6f

File tree

3 files changed

+51
-5
lines changed

3 files changed

+51
-5
lines changed

grepurls.cc

+47-3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
#include <ios>
1414
#include <vector>
1515
#include <fstream>
16+
#include <algorithm>
17+
#include <cctype>
18+
#include <locale>
19+
#include <functional>
1620

1721
#include <pegtl.hh>
1822
#include <pegtl/analyze.hh>
@@ -37,6 +41,8 @@ DECLARE_bool(helpshort); // Defined by the gflags library.
3741
DECLARE_bool(version); // Defined by the gflags library.
3842
DEFINE_bool(v, false, "Display the version of the binary.");
3943
DEFINE_bool(h, false, "Display help.");
44+
DEFINE_string(schemes, "http,https", "The list of allowed schemas. If set to empty, yields URIs with all schemas.");
45+
4046

4147
namespace pegtl {
4248
namespace uri {
@@ -63,6 +69,32 @@ namespace pegtl {
6369
}
6470
};
6571

72+
namespace {
73+
std::string tolower(const std::string& input) {
74+
const std::locale utf8("en_US.UTF-8");
75+
std::string output;
76+
std::transform(input.begin(), input.end(), std::back_inserter(output), [&utf8](char c) {
77+
return std::tolower(c, utf8);
78+
});
79+
return output;
80+
}
81+
}
82+
83+
template<>
84+
struct URIActions<scheme> {
85+
static void apply(const pegtl::input& input,
86+
URIState& parsed_uri) {
87+
parsed_uri.scheme.assign(tolower(input.string()));
88+
}
89+
};
90+
91+
template<>
92+
struct URIActions<iana_scheme> {
93+
static void apply(const pegtl::input& input,
94+
URIState& parsed_uri) {
95+
parsed_uri.scheme.assign(tolower(input.string()));
96+
}
97+
};
6698
}
6799
}
68100

@@ -82,7 +114,7 @@ bool IsMatchingBracket(char lhs, char rhs) {
82114
return false;
83115
}
84116

85-
void grepurl(const std::string& thunk) {
117+
void grepurl(const std::string& thunk, const std::set<std::string>& allowed_schemes) {
86118
// Trim.
87119
size_t start = 0, end = thunk.size();
88120
for (; start < thunk.size() && end > 0 &&
@@ -94,16 +126,28 @@ void grepurl(const std::string& thunk) {
94126
thunk.data() + start, thunk.data() + end, "stdin", state);
95127

96128
// Output.
97-
if (parse_result && !state.uri.empty()) {
129+
if (parse_result && !state.uri.empty() && (allowed_schemes.empty() ||
130+
allowed_schemes.find(state.scheme) != allowed_schemes.end())) {
98131
std::cout << state.uri << '\n';
99132
}
100133
}
101134

135+
136+
void split(const std::string& s, char delim, std::set<std::string>* elems) {
137+
std::stringstream ss(s);
138+
std::string item;
139+
while (std::getline(ss, item, delim)) {
140+
elems->insert(item);
141+
}
142+
}
143+
102144
template<class IStream>
103145
void process(IStream& in) {
146+
std::set<std::string> allowed_schemes;
147+
split(FLAGS_schemes, ',', &allowed_schemes);
104148
std::string thunk;
105149
while (in >> thunk) {
106-
grepurl(thunk);
150+
grepurl(thunk, allowed_schemes);
107151
}
108152
}
109153

run_tests.sh

+3-2
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,19 @@
88
set -e
99

1010
binary=./grepurls
11+
flags=--schemes=
1112

1213
for f in `ls testdata`; do
1314
filename=testdata/${f}
1415
echo -n Running ${filename}...
1516

1617
# Use shell redirection to supply input
1718
redirect_output=/tmp/grepurls.redirect.out
18-
${binary} < ${filename} > ${redirect_output}
19+
${binary} ${flags} < ${filename} > ${redirect_output}
1920

2021
# Use direct filename read to supply input and pipe it to wc -l
2122
file_output=/tmp/grepurls.file.out
22-
${binary} ${filename} > ${file_output}
23+
${binary} ${flags} ${filename} > ${file_output}
2324

2425
# Diff outputs.
2526
diff_redirect_output=/tmp/grepurls.redirect.diff

uri_state.h

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ namespace pegtl {
99
// The state associated with the URI parse.
1010
// (available to all actions).
1111
struct URIState {
12+
std::string scheme;
1213
std::string uri;
1314
char opening_context;
1415
};

0 commit comments

Comments
 (0)