Skip to content

Commit a7f5f82

Browse files
committed
Add regex filtering to CLI
1 parent a620757 commit a7f5f82

File tree

4 files changed

+27
-6
lines changed

4 files changed

+27
-6
lines changed

src/CLIOptions.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ namespace tsm::cli {
2222
("regen-indices", "Regenerate indices.", cxxopts::value<bool>())
2323
("f,forecast", "Forecast dataset type.", cxxopts::value<bool>())
2424
("h,historical", "Historical dataset type.", cxxopts::value<bool>())
25+
("r,regex", "Regex to apply to input directory.", cxxopts::value<std::string>())
2526
("help", "Print help.")
2627
;
2728

@@ -49,13 +50,15 @@ struct [[nodiscard]] CLIOptions {
4950
explicit CLIOptions(const cxxopts::ParseResult& result) : InputDir{ result["input-dir"].as<std::string>() },
5051
DatasetName{ result["dataset-name"].as<std::string>() },
5152
OutputDir{ result["output-dir"].as<std::string>() },
53+
RegexPattern{ result.count("regex") > 0 ? result["regex"].as<std::string>() : ".*" },
5254
RegenIndices{ result.count("regen-indices") > 0 },
5355
Forecast{ result.count("forecast") > 0 },
5456
Historical{ result.count("historical") > 0 } {}
5557

5658
const std::string InputDir;
5759
const std::string DatasetName;
5860
const std::string OutputDir;
61+
const std::string RegexPattern;
5962
const bool RegenIndices{ false };
6063
const bool Forecast{ false };
6164
const bool Historical{ false };

src/TimestampMapper.cpp

+20-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <exception>
66
#include <iostream>
77
#include <fstream>
8+
#include <regex>
89

910
namespace fs = std::filesystem;
1011

@@ -14,11 +15,13 @@ namespace tsm {
1415
TimestampMapper::TimestampMapper(const std::filesystem::path& inputDir,
1516
const std::filesystem::path& outputDir,
1617
const std::string& datasetName,
18+
const std::string& regexPattern,
1719
const ds::DATASET_TYPE datasetType,
1820
const bool regenIndices) :
1921
m_inputDir{ sanitizeDirectoryPath(inputDir) },
2022
m_outputDir{ sanitizeDirectoryPath(outputDir) },
2123
m_datasetName{ datasetName },
24+
m_regexPattern{ regexPattern },
2225
m_datasetType{ datasetType },
2326
m_regenIndices{ regenIndices },
2427
m_filesToIndexPath{ m_inputDir / "files_to_index.txt" },
@@ -47,7 +50,7 @@ bool TimestampMapper::exec() {
4750
}
4851

4952
std::cout << "Creating list of all .nc files in " << (m_indexFileExists ? m_filesToIndexPath : m_inputDir) << "..." << std::endl;
50-
const auto& filePaths{ createFileList(m_indexFileExists ? m_filesToIndexPath : m_inputDir) };
53+
const auto& filePaths{ createFileList(m_indexFileExists ? m_filesToIndexPath : m_inputDir, m_regexPattern) };
5154
if (filePaths.empty()) {
5255
std::cout << "No .nc files found." << "\nExiting..." << std::endl;
5356
return false;
@@ -90,7 +93,7 @@ bool TimestampMapper::createDirectory(const std::filesystem::path& path) const n
9093
}
9194

9295
/***********************************************************************************/
93-
std::vector<fs::path> TimestampMapper::createFileList(const std::filesystem::path& inputDirOrIndexFile) const {
96+
std::vector<fs::path> TimestampMapper::createFileList(const std::filesystem::path& inputDirOrIndexFile, const std::string& regex) const {
9497
std::vector<fs::path> paths;
9598

9699
// If file_to_index.txt exists, pull the file paths from there.
@@ -114,12 +117,24 @@ std::vector<fs::path> TimestampMapper::createFileList(const std::filesystem::pat
114117

115118
using recursive_directory_iterator = std::filesystem::recursive_directory_iterator;
116119
const auto options{fs::directory_options::follow_directory_symlink};
120+
121+
try {
122+
const std::regex r(regex);
117123

118-
for (const auto& file : recursive_directory_iterator(inputDirOrIndexFile, options)) {
119-
if (fs::path(file).extension() == ".nc") {
120-
paths.emplace_back(file);
124+
for (const auto& file : recursive_directory_iterator(inputDirOrIndexFile, options)) {
125+
if (fs::path(file).extension() == ".nc" && std::regex_match(fs::path(file).string(), r)) {
126+
paths.emplace_back(file);
127+
}
121128
}
122129
}
130+
catch(const std::regex_error& e) {
131+
std::cerr << "Regex error: " << e.what() << std::endl;
132+
std::exit(EXIT_FAILURE);
133+
}
134+
catch(...) {
135+
std::cerr << "Caught unknown exception." << std::endl;
136+
std::exit(EXIT_FAILURE);
137+
}
123138

124139
return paths;
125140
}

src/TimestampMapper.hpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ class TimestampMapper {
1515
TimestampMapper(const std::filesystem::path& inputDir,
1616
const std::filesystem::path& outputDir,
1717
const std::string& datasetName,
18+
const std::string& regexPattern,
1819
const ds::DATASET_TYPE datasetType,
1920
const bool regenIndices);
2021

@@ -48,14 +49,15 @@ class TimestampMapper {
4849
[[nodiscard]] bool createDirectory(const std::filesystem::path& path) const noexcept;
4950
///
5051
[[nodiscard]] std::vector<std::filesystem::path>
51-
createFileList(const std::filesystem::path& inputDirOrIndexFile) const;
52+
createFileList(const std::filesystem::path& inputDirOrIndexFile, const std::string& regex) const;
5253

5354
///
5455
void deleteIndexFile();
5556

5657
const std::filesystem::path m_inputDir;
5758
const std::filesystem::path m_outputDir;
5859
const std::string m_datasetName;
60+
const std::string m_regexPattern;
5961
const ds::DATASET_TYPE m_datasetType;
6062
const bool m_regenIndices{ false };
6163

src/main.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ int main(int argc, char** argv) {
2828
tsm::TimestampMapper mapper{opts.InputDir,
2929
opts.OutputDir,
3030
opts.DatasetName,
31+
opts.RegexPattern,
3132
opts.Forecast ? tsm::ds::DATASET_TYPE::FORECAST : tsm::ds::DATASET_TYPE::HISTORICAL,
3233
opts.RegenIndices
3334
};

0 commit comments

Comments
 (0)