Skip to content

Commit aa07e9b

Browse files
authored
Merge pull request #44 from KoslickiLab/fix-absent-name
fix absent name and absent md5, better error reporting, fixes #42
2 parents 610e63f + ac0eb8f commit aa07e9b

File tree

3 files changed

+86
-8
lines changed

3 files changed

+86
-8
lines changed

.github/workflows/ci.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ name: CI Pipeline
33
on:
44
push:
55
branches:
6-
- main
6+
- '**'
77
pull_request:
88
branches:
9-
- main
9+
- '**'
1010

1111
jobs:
1212
build-and-test:

src/compare.cpp

+18
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,25 @@ void do_compare(Arguments& args) {
100100
}
101101
}
102102

103+
// create the directory of output file if it does not exist
104+
string output_dir = args.output_filename.substr(0, args.output_filename.find_last_of("/"));
105+
struct stat info;
106+
if (stat(output_dir.c_str(), &info) != 0) {
107+
cout << "The directory " << output_dir << " does not exist. Creating..." << endl;
108+
// create the directory
109+
string create_dir_command = "mkdir -p " + output_dir;
110+
if (system(create_dir_command.c_str()) != 0) {
111+
cerr << "Error in creating the directory " << output_dir << endl;
112+
exit(1);
113+
}
114+
}
115+
103116
// write the header in the output file
104117
ofstream output_file(args.output_filename);
118+
if (!output_file.is_open()) {
119+
cerr << "Error in opening the output file " << args.output_filename << endl;
120+
exit(1);
121+
}
105122
output_file << "query_id,query_name,query_md5,query_sketch_size,match_id,match_name,match_md5,match_sketch_size,jaccard,containment_query_in_match,containment_match_in_query,max_containment,max_containment_ani" << endl;
106123
output_file.close();
107124

@@ -111,6 +128,7 @@ void do_compare(Arguments& args) {
111128
combine_command += filename + " ";
112129
}
113130
combine_command += " >> " + args.output_filename;
131+
cout << "Combining small files by the command: " << combine_command << endl;
114132
// call the system command and check if it is successful
115133
if (system(combine_command.c_str()) != 0) {
116134
cerr << "Error in combining the files." << endl;

src/utils.cpp

+66-6
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,73 @@ Sketch read_min_hashes(const std::string& json_filename) {
1515
json jsonData;
1616
inputFile >> jsonData;
1717

18+
// data
19+
std::vector<hash_t> min_hashes;
20+
std::string name;
21+
std::string md5;
22+
int ksize;
23+
hash_t max_hash;
24+
int seed;
25+
1826
// Access and print values
19-
std::vector<hash_t> min_hashes = jsonData[0]["signatures"][0]["mins"];
20-
std::string name = jsonData[0]["name"];
21-
std::string md5 = jsonData[0]["signatures"][0]["md5sum"];
22-
int ksize = jsonData[0]["signatures"][0]["ksize"];
23-
hash_t max_hash = jsonData[0]["signatures"][0]["max_hash"];
24-
int seed = jsonData[0]["signatures"][0]["seed"];
27+
try {
28+
std::vector<hash_t> min_hashes_loaded = jsonData[0]["signatures"][0]["mins"];
29+
min_hashes = min_hashes_loaded;
30+
} catch (json::exception& e) {
31+
std::cerr << "Error: " << e.what() << std::endl;
32+
std::cerr << "Error: cannot find the mins in filename: " << json_filename << std::endl;
33+
std::cerr << "Error: The file is may not be a valid sketch file!" << std::endl;
34+
exit(1);
35+
}
36+
37+
try {
38+
name = jsonData[0]["name"];
39+
} catch (json::exception& e) {
40+
std::cerr << "Warning: no name found in: " << json_filename << ", using empty string" << std::endl;
41+
name = "";
42+
}
43+
44+
try {
45+
md5 = jsonData[0]["signatures"][0]["md5sum"];
46+
} catch (json::exception& e) {
47+
// crash
48+
//std::cerr << "Error: " << e.what() << std::endl;
49+
//std::cerr << "Error: cannot find the md5 in filename: " << json_filename << std::endl;
50+
//std::cerr << "Error: The file is may not be a valid sketch file!" << std::endl;
51+
//exit(1);
52+
std::cerr << "Warning: no md5 found in: " << json_filename << ", using empty string" << std::endl;
53+
md5 = "";
54+
}
55+
56+
try{
57+
ksize = jsonData[0]["signatures"][0]["ksize"];
58+
} catch (json::exception& e) {
59+
// crash
60+
std::cerr << "Error: " << e.what() << std::endl;
61+
std::cerr << "Error: cannot find the ksize in filename: " << json_filename << std::endl;
62+
std::cerr << "Error: The file is may not be a valid sketch file!" << std::endl;
63+
exit(1);
64+
}
65+
66+
try {
67+
max_hash = jsonData[0]["signatures"][0]["max_hash"];
68+
} catch (json::exception& e) {
69+
// crash
70+
std::cerr << "Error: " << e.what() << std::endl;
71+
std::cerr << "Error: cannot find the max_hash in filename: " << json_filename << std::endl;
72+
std::cerr << "Error: The file is may not be a valid sketch file!" << std::endl;
73+
exit(1);
74+
}
75+
76+
try {
77+
seed = jsonData[0]["signatures"][0]["seed"];
78+
} catch (json::exception& e) {
79+
// crash
80+
std::cerr << "Error: " << e.what() << std::endl;
81+
std::cerr << "Error: cannot find the seed in filename: " << json_filename << std::endl;
82+
std::cerr << "Error: The file is may not be a valid sketch file!" << std::endl;
83+
exit(1);
84+
}
2585

2686
// Close the file
2787
inputFile.close();

0 commit comments

Comments
 (0)