Skip to content

Commit acffaf8

Browse files
author
Shane Snyder
committed
various darshan-convert obfuscate fixes
- independent obfuscation flags to allow more control over what is obfuscated (e.g., to not obfuscate job ids) - don't obfuscate common names like standard streams (e.g., <STDOUT>) or heatmaps
1 parent bddda9d commit acffaf8

File tree

1 file changed

+57
-17
lines changed

1 file changed

+57
-17
lines changed

darshan-util/darshan-convert.c

Lines changed: 57 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ int usage (char *exename)
3030
fprintf(stderr, " Converts darshan log from infile to outfile.\n");
3131
fprintf(stderr, " rewrites the log file into the newest format.\n");
3232
fprintf(stderr, " --bzip2 Use bzip2 compression instead of zlib.\n");
33-
fprintf(stderr, " --obfuscate Obfuscate items in the log.\n");
33+
fprintf(stderr, " --obfuscate_jobid Obfuscate job ID in the log.\n");
34+
fprintf(stderr, " --obfuscate_uid Obfuscate uid in the log.\n");
35+
fprintf(stderr, " --obfuscate_exe Obfuscate executable in the log.\n");
36+
fprintf(stderr, " --obfuscate_names Obfuscate name records in the log.\n");
3437
fprintf(stderr, " --key <key> Key to use when obfuscating.\n");
3538
fprintf(stderr, " --annotate <string> Additional metadata to add.\n");
3639
fprintf(stderr, " --file <hash> Limit output to specified (hashed) file only.\n");
@@ -39,9 +42,9 @@ int usage (char *exename)
3942
exit(1);
4043
}
4144

42-
void parse_args (int argc, char **argv, char **infile, char **outfile,
43-
int *bzip2, int *obfuscate, int *reset_md, int *key,
44-
char **annotate, uint64_t* hash)
45+
void parse_args (int argc, char **argv, char **infile, char **outfile, int *bzip2,
46+
int *obfuscate_jobid, int *obfuscate_uid, int *obfuscate_exe, int *obfuscate_names,
47+
int *reset_md, int *key, char **annotate, uint64_t* hash)
4548
{
4649
int index;
4750
int ret;
@@ -50,7 +53,10 @@ void parse_args (int argc, char **argv, char **infile, char **outfile,
5053
{
5154
{"bzip2", 0, NULL, 'b'},
5255
{"annotate", 1, NULL, 'a'},
53-
{"obfuscate", 0, NULL, 'o'},
56+
{"obfuscate_jobid", 0, NULL, 'j'},
57+
{"obfuscate_uid", 0, NULL, 'u'},
58+
{"obfuscate_exe", 0, NULL, 'e'},
59+
{"obfuscate_names", 0, NULL, 'n'},
5460
{"reset-md", 0, NULL, 'r'},
5561
{"key", 1, NULL, 'k'},
5662
{"file", 1, NULL, 'f'},
@@ -59,7 +65,10 @@ void parse_args (int argc, char **argv, char **infile, char **outfile,
5965
};
6066

6167
*bzip2 = 0;
62-
*obfuscate = 0;
68+
*obfuscate_jobid = 0;
69+
*obfuscate_uid = 0;
70+
*obfuscate_exe = 0;
71+
*obfuscate_names = 0;
6372
*reset_md = 0;
6473
*key = 0;
6574
*hash = 0;
@@ -78,8 +87,17 @@ void parse_args (int argc, char **argv, char **infile, char **outfile,
7887
case 'a':
7988
*annotate = optarg;
8089
break;
81-
case 'o':
82-
*obfuscate = 1;
90+
case 'j':
91+
*obfuscate_jobid = 1;
92+
break;
93+
case 'u':
94+
*obfuscate_uid = 1;
95+
break;
96+
case 'e':
97+
*obfuscate_exe = 1;
98+
break;
99+
case 'n':
100+
*obfuscate_names = 1;
83101
break;
84102
case 'r':
85103
*reset_md = 1;
@@ -119,9 +137,8 @@ static void reset_md_job(struct darshan_job *job)
119137
return;
120138
}
121139

122-
void obfuscate_job(int key, struct darshan_job *job)
140+
void obfuscate_job_jobid(int key, struct darshan_job *job)
123141
{
124-
job->uid = (int64_t) darshan_hashlittle(&job->uid, sizeof(job->uid), key);
125142
if (job->jobid != 0)
126143
{
127144
job->jobid = (int64_t) darshan_hashlittle(&job->jobid, sizeof(job->jobid), key);
@@ -130,7 +147,14 @@ void obfuscate_job(int key, struct darshan_job *job)
130147
return;
131148
}
132149

133-
void obfuscate_exe(int key, char *exe)
150+
void obfuscate_job_uid(int key, struct darshan_job *job)
151+
{
152+
job->uid = (int64_t) darshan_hashlittle(&job->uid, sizeof(job->uid), key);
153+
154+
return;
155+
}
156+
157+
void obfuscate_executable(int key, char *exe)
134158
{
135159
uint32_t hashed;
136160

@@ -149,11 +173,25 @@ void obfuscate_filenames(int key, struct darshan_name_record_ref *name_hash, str
149173
uint32_t hashed;
150174
char tmp_string[__TMP_OBF_SIZE] = {0};
151175
darshan_record_id tmp_id;
176+
const char *keep_list[] = {"<STDIN>", "<STDOUT>", "<STDERR>", "heatmap:"};
177+
int keep_list_len = sizeof(keep_list) / sizeof(keep_list[0]);
152178

153179
HASH_ITER(hlink, name_hash, ref, tmp)
154180
{
155-
/* find file system */
156181
int j;
182+
/* skip names in keep list -- they don't require anonymization */
183+
for(j = 0; j < keep_list_len; j++)
184+
{
185+
if(strncmp(keep_list[j], ref->name_record->name,
186+
strlen(keep_list[j])) == 0)
187+
{
188+
break;
189+
}
190+
}
191+
if(j != keep_list_len)
192+
continue;
193+
194+
/* find file system */
157195
char *mnt_pt = NULL;
158196

159197
/* get mount point and fs type associated with this record */
@@ -266,13 +304,14 @@ int main(int argc, char **argv)
266304
char *mod_buf, *tmp_mod_buf;
267305
enum darshan_comp_type comp_type;
268306
int bzip2;
269-
int obfuscate;
307+
int obfuscate_jobid, obfuscate_uid, obfuscate_exe, obfuscate_names;
270308
int key;
271309
char *annotation = NULL;
272310
darshan_record_id hash;
273311
int reset_md;
274312

275-
parse_args(argc, argv, &infile_name, &outfile_name, &bzip2, &obfuscate,
313+
parse_args(argc, argv, &infile_name, &outfile_name, &bzip2,
314+
&obfuscate_jobid, &obfuscate_uid, &obfuscate_exe, &obfuscate_names,
276315
&reset_md, &key, &annotation, &hash);
277316

278317
infile = darshan_log_open(infile_name);
@@ -298,7 +337,8 @@ int main(int argc, char **argv)
298337
}
299338

300339
if (reset_md) reset_md_job(&job);
301-
if (obfuscate) obfuscate_job(key, &job);
340+
if (obfuscate_jobid) obfuscate_job_jobid(key, &job);
341+
if (obfuscate_uid) obfuscate_job_uid(key, &job);
302342
if (annotation) add_annotation(annotation, &job);
303343

304344
ret = darshan_log_put_job(outfile, &job);
@@ -318,7 +358,7 @@ int main(int argc, char **argv)
318358
return(-1);
319359
}
320360

321-
if (obfuscate) obfuscate_exe(key, tmp_string);
361+
if (obfuscate_exe) obfuscate_executable(key, tmp_string);
322362

323363
ret = darshan_log_put_exe(outfile, tmp_string);
324364
if(ret < 0)
@@ -357,7 +397,7 @@ int main(int argc, char **argv)
357397
/* NOTE: obfuscating filepaths breaks the ability to map files
358398
* to the corresponding FS & mount info maintained by darshan
359399
*/
360-
if(obfuscate) obfuscate_filenames(key, name_hash, mnt_data_array, mount_count );
400+
if(obfuscate_names) obfuscate_filenames(key, name_hash, mnt_data_array, mount_count );
361401
if(hash) remove_hash_recs(&name_hash, hash);
362402

363403
ret = darshan_log_put_namehash(outfile, name_hash);

0 commit comments

Comments
 (0)