Skip to content

Commit a8eed1b

Browse files
author
shanedsnyder
authored
Merge pull request #1019 from darshan-hpc/snyder/darshan-convert-obfuscate-fixes
ENH: various darshan-convert obfuscate fixes
2 parents bddda9d + 5df732b commit a8eed1b

File tree

1 file changed

+63
-15
lines changed

1 file changed

+63
-15
lines changed

darshan-util/darshan-convert.c

Lines changed: 63 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ int usage (char *exename)
3030
fprintf(stderr, " Converts darshan log from infile to outfile.\n");
3131
fprintf(stderr, " rewrites the log file into the newest format.\n");
3232
fprintf(stderr, " --bzip2 Use bzip2 compression instead of zlib.\n");
33-
fprintf(stderr, " --obfuscate Obfuscate items in the log.\n");
33+
fprintf(stderr, " --obfuscate Obfuscate all items in the log.\n");
34+
fprintf(stderr, " --obfuscate_jobid Obfuscate job ID in the log.\n");
35+
fprintf(stderr, " --obfuscate_uid Obfuscate uid in the log.\n");
36+
fprintf(stderr, " --obfuscate_exe Obfuscate executable in the log.\n");
37+
fprintf(stderr, " --obfuscate_names Obfuscate name records in the log.\n");
3438
fprintf(stderr, " --key <key> Key to use when obfuscating.\n");
3539
fprintf(stderr, " --annotate <string> Additional metadata to add.\n");
3640
fprintf(stderr, " --file <hash> Limit output to specified (hashed) file only.\n");
@@ -39,9 +43,9 @@ int usage (char *exename)
3943
exit(1);
4044
}
4145

42-
void parse_args (int argc, char **argv, char **infile, char **outfile,
43-
int *bzip2, int *obfuscate, int *reset_md, int *key,
44-
char **annotate, uint64_t* hash)
46+
void parse_args (int argc, char **argv, char **infile, char **outfile, int *bzip2,
47+
int *obfuscate_jobid, int *obfuscate_uid, int *obfuscate_exe, int *obfuscate_names,
48+
int *reset_md, int *key, char **annotate, uint64_t* hash)
4549
{
4650
int index;
4751
int ret;
@@ -51,6 +55,10 @@ void parse_args (int argc, char **argv, char **infile, char **outfile,
5155
{"bzip2", 0, NULL, 'b'},
5256
{"annotate", 1, NULL, 'a'},
5357
{"obfuscate", 0, NULL, 'o'},
58+
{"obfuscate_jobid", 0, NULL, 'j'},
59+
{"obfuscate_uid", 0, NULL, 'u'},
60+
{"obfuscate_exe", 0, NULL, 'e'},
61+
{"obfuscate_names", 0, NULL, 'n'},
5462
{"reset-md", 0, NULL, 'r'},
5563
{"key", 1, NULL, 'k'},
5664
{"file", 1, NULL, 'f'},
@@ -59,7 +67,10 @@ void parse_args (int argc, char **argv, char **infile, char **outfile,
5967
};
6068

6169
*bzip2 = 0;
62-
*obfuscate = 0;
70+
*obfuscate_jobid = 0;
71+
*obfuscate_uid = 0;
72+
*obfuscate_exe = 0;
73+
*obfuscate_names = 0;
6374
*reset_md = 0;
6475
*key = 0;
6576
*hash = 0;
@@ -79,7 +90,22 @@ void parse_args (int argc, char **argv, char **infile, char **outfile,
7990
*annotate = optarg;
8091
break;
8192
case 'o':
82-
*obfuscate = 1;
93+
*obfuscate_jobid = 1;
94+
*obfuscate_uid = 1;
95+
*obfuscate_exe = 1;
96+
*obfuscate_names = 1;
97+
break;
98+
case 'j':
99+
*obfuscate_jobid = 1;
100+
break;
101+
case 'u':
102+
*obfuscate_uid = 1;
103+
break;
104+
case 'e':
105+
*obfuscate_exe = 1;
106+
break;
107+
case 'n':
108+
*obfuscate_names = 1;
83109
break;
84110
case 'r':
85111
*reset_md = 1;
@@ -119,9 +145,8 @@ static void reset_md_job(struct darshan_job *job)
119145
return;
120146
}
121147

122-
void obfuscate_job(int key, struct darshan_job *job)
148+
void obfuscate_job_jobid(int key, struct darshan_job *job)
123149
{
124-
job->uid = (int64_t) darshan_hashlittle(&job->uid, sizeof(job->uid), key);
125150
if (job->jobid != 0)
126151
{
127152
job->jobid = (int64_t) darshan_hashlittle(&job->jobid, sizeof(job->jobid), key);
@@ -130,7 +155,14 @@ void obfuscate_job(int key, struct darshan_job *job)
130155
return;
131156
}
132157

133-
void obfuscate_exe(int key, char *exe)
158+
void obfuscate_job_uid(int key, struct darshan_job *job)
159+
{
160+
job->uid = (int64_t) darshan_hashlittle(&job->uid, sizeof(job->uid), key);
161+
162+
return;
163+
}
164+
165+
void obfuscate_executable(int key, char *exe)
134166
{
135167
uint32_t hashed;
136168

@@ -149,11 +181,25 @@ void obfuscate_filenames(int key, struct darshan_name_record_ref *name_hash, str
149181
uint32_t hashed;
150182
char tmp_string[__TMP_OBF_SIZE] = {0};
151183
darshan_record_id tmp_id;
184+
const char *keep_list[] = {"<STDIN>", "<STDOUT>", "<STDERR>", "heatmap:"};
185+
int keep_list_len = sizeof(keep_list) / sizeof(keep_list[0]);
152186

153187
HASH_ITER(hlink, name_hash, ref, tmp)
154188
{
155-
/* find file system */
156189
int j;
190+
/* skip names in keep list -- they don't require anonymization */
191+
for(j = 0; j < keep_list_len; j++)
192+
{
193+
if(strncmp(keep_list[j], ref->name_record->name,
194+
strlen(keep_list[j])) == 0)
195+
{
196+
break;
197+
}
198+
}
199+
if(j != keep_list_len)
200+
continue;
201+
202+
/* find file system */
157203
char *mnt_pt = NULL;
158204

159205
/* get mount point and fs type associated with this record */
@@ -266,13 +312,14 @@ int main(int argc, char **argv)
266312
char *mod_buf, *tmp_mod_buf;
267313
enum darshan_comp_type comp_type;
268314
int bzip2;
269-
int obfuscate;
315+
int obfuscate_jobid, obfuscate_uid, obfuscate_exe, obfuscate_names;
270316
int key;
271317
char *annotation = NULL;
272318
darshan_record_id hash;
273319
int reset_md;
274320

275-
parse_args(argc, argv, &infile_name, &outfile_name, &bzip2, &obfuscate,
321+
parse_args(argc, argv, &infile_name, &outfile_name, &bzip2,
322+
&obfuscate_jobid, &obfuscate_uid, &obfuscate_exe, &obfuscate_names,
276323
&reset_md, &key, &annotation, &hash);
277324

278325
infile = darshan_log_open(infile_name);
@@ -298,7 +345,8 @@ int main(int argc, char **argv)
298345
}
299346

300347
if (reset_md) reset_md_job(&job);
301-
if (obfuscate) obfuscate_job(key, &job);
348+
if (obfuscate_jobid) obfuscate_job_jobid(key, &job);
349+
if (obfuscate_uid) obfuscate_job_uid(key, &job);
302350
if (annotation) add_annotation(annotation, &job);
303351

304352
ret = darshan_log_put_job(outfile, &job);
@@ -318,7 +366,7 @@ int main(int argc, char **argv)
318366
return(-1);
319367
}
320368

321-
if (obfuscate) obfuscate_exe(key, tmp_string);
369+
if (obfuscate_exe) obfuscate_executable(key, tmp_string);
322370

323371
ret = darshan_log_put_exe(outfile, tmp_string);
324372
if(ret < 0)
@@ -357,7 +405,7 @@ int main(int argc, char **argv)
357405
/* NOTE: obfuscating filepaths breaks the ability to map files
358406
* to the corresponding FS & mount info maintained by darshan
359407
*/
360-
if(obfuscate) obfuscate_filenames(key, name_hash, mnt_data_array, mount_count );
408+
if(obfuscate_names) obfuscate_filenames(key, name_hash, mnt_data_array, mount_count );
361409
if(hash) remove_hash_recs(&name_hash, hash);
362410

363411
ret = darshan_log_put_namehash(outfile, name_hash);

0 commit comments

Comments
 (0)