@@ -30,7 +30,11 @@ int usage (char *exename)
3030 fprintf (stderr , " Converts darshan log from infile to outfile.\n" );
3131 fprintf (stderr , " rewrites the log file into the newest format.\n" );
3232 fprintf (stderr , " --bzip2 Use bzip2 compression instead of zlib.\n" );
33- fprintf (stderr , " --obfuscate Obfuscate items in the log.\n" );
33+ fprintf (stderr , " --obfuscate Obfuscate all items in the log.\n" );
34+ fprintf (stderr , " --obfuscate_jobid Obfuscate job ID in the log.\n" );
35+ fprintf (stderr , " --obfuscate_uid Obfuscate uid in the log.\n" );
36+ fprintf (stderr , " --obfuscate_exe Obfuscate executable in the log.\n" );
37+ fprintf (stderr , " --obfuscate_names Obfuscate name records in the log.\n" );
3438 fprintf (stderr , " --key <key> Key to use when obfuscating.\n" );
3539 fprintf (stderr , " --annotate <string> Additional metadata to add.\n" );
3640 fprintf (stderr , " --file <hash> Limit output to specified (hashed) file only.\n" );
@@ -39,9 +43,9 @@ int usage (char *exename)
3943 exit (1 );
4044}
4145
42- void parse_args (int argc , char * * argv , char * * infile , char * * outfile ,
43- int * bzip2 , int * obfuscate , int * reset_md , int * key ,
44- char * * annotate , uint64_t * hash )
46+ void parse_args (int argc , char * * argv , char * * infile , char * * outfile , int * bzip2 ,
47+ int * obfuscate_jobid , int * obfuscate_uid , int * obfuscate_exe , int * obfuscate_names ,
48+ int * reset_md , int * key , char * * annotate , uint64_t * hash )
4549{
4650 int index ;
4751 int ret ;
@@ -51,6 +55,10 @@ void parse_args (int argc, char **argv, char **infile, char **outfile,
5155 {"bzip2" , 0 , NULL , 'b' },
5256 {"annotate" , 1 , NULL , 'a' },
5357 {"obfuscate" , 0 , NULL , 'o' },
58+ {"obfuscate_jobid" , 0 , NULL , 'j' },
59+ {"obfuscate_uid" , 0 , NULL , 'u' },
60+ {"obfuscate_exe" , 0 , NULL , 'e' },
61+ {"obfuscate_names" , 0 , NULL , 'n' },
5462 {"reset-md" , 0 , NULL , 'r' },
5563 {"key" , 1 , NULL , 'k' },
5664 {"file" , 1 , NULL , 'f' },
@@ -59,7 +67,10 @@ void parse_args (int argc, char **argv, char **infile, char **outfile,
5967 };
6068
6169 * bzip2 = 0 ;
62- * obfuscate = 0 ;
70+ * obfuscate_jobid = 0 ;
71+ * obfuscate_uid = 0 ;
72+ * obfuscate_exe = 0 ;
73+ * obfuscate_names = 0 ;
6374 * reset_md = 0 ;
6475 * key = 0 ;
6576 * hash = 0 ;
@@ -79,7 +90,22 @@ void parse_args (int argc, char **argv, char **infile, char **outfile,
7990 * annotate = optarg ;
8091 break ;
8192 case 'o' :
82- * obfuscate = 1 ;
93+ * obfuscate_jobid = 1 ;
94+ * obfuscate_uid = 1 ;
95+ * obfuscate_exe = 1 ;
96+ * obfuscate_names = 1 ;
97+ break ;
98+ case 'j' :
99+ * obfuscate_jobid = 1 ;
100+ break ;
101+ case 'u' :
102+ * obfuscate_uid = 1 ;
103+ break ;
104+ case 'e' :
105+ * obfuscate_exe = 1 ;
106+ break ;
107+ case 'n' :
108+ * obfuscate_names = 1 ;
83109 break ;
84110 case 'r' :
85111 * reset_md = 1 ;
@@ -119,9 +145,8 @@ static void reset_md_job(struct darshan_job *job)
119145 return ;
120146}
121147
122- void obfuscate_job (int key , struct darshan_job * job )
148+ void obfuscate_job_jobid (int key , struct darshan_job * job )
123149{
124- job -> uid = (int64_t ) darshan_hashlittle (& job -> uid , sizeof (job -> uid ), key );
125150 if (job -> jobid != 0 )
126151 {
127152 job -> jobid = (int64_t ) darshan_hashlittle (& job -> jobid , sizeof (job -> jobid ), key );
@@ -130,7 +155,14 @@ void obfuscate_job(int key, struct darshan_job *job)
130155 return ;
131156}
132157
133- void obfuscate_exe (int key , char * exe )
158+ void obfuscate_job_uid (int key , struct darshan_job * job )
159+ {
160+ job -> uid = (int64_t ) darshan_hashlittle (& job -> uid , sizeof (job -> uid ), key );
161+
162+ return ;
163+ }
164+
165+ void obfuscate_executable (int key , char * exe )
134166{
135167 uint32_t hashed ;
136168
@@ -149,11 +181,25 @@ void obfuscate_filenames(int key, struct darshan_name_record_ref *name_hash, str
149181 uint32_t hashed ;
150182 char tmp_string [__TMP_OBF_SIZE ] = {0 };
151183 darshan_record_id tmp_id ;
184+ const char * keep_list [] = {"<STDIN>" , "<STDOUT>" , "<STDERR>" , "heatmap:" };
185+ int keep_list_len = sizeof (keep_list ) / sizeof (keep_list [0 ]);
152186
153187 HASH_ITER (hlink , name_hash , ref , tmp )
154188 {
155- /* find file system */
156189 int j ;
190+ /* skip names in keep list -- they don't require anonymization */
191+ for (j = 0 ; j < keep_list_len ; j ++ )
192+ {
193+ if (strncmp (keep_list [j ], ref -> name_record -> name ,
194+ strlen (keep_list [j ])) == 0 )
195+ {
196+ break ;
197+ }
198+ }
199+ if (j != keep_list_len )
200+ continue ;
201+
202+ /* find file system */
157203 char * mnt_pt = NULL ;
158204
159205 /* get mount point and fs type associated with this record */
@@ -266,13 +312,14 @@ int main(int argc, char **argv)
266312 char * mod_buf , * tmp_mod_buf ;
267313 enum darshan_comp_type comp_type ;
268314 int bzip2 ;
269- int obfuscate ;
315+ int obfuscate_jobid , obfuscate_uid , obfuscate_exe , obfuscate_names ;
270316 int key ;
271317 char * annotation = NULL ;
272318 darshan_record_id hash ;
273319 int reset_md ;
274320
275- parse_args (argc , argv , & infile_name , & outfile_name , & bzip2 , & obfuscate ,
321+ parse_args (argc , argv , & infile_name , & outfile_name , & bzip2 ,
322+ & obfuscate_jobid , & obfuscate_uid , & obfuscate_exe , & obfuscate_names ,
276323 & reset_md , & key , & annotation , & hash );
277324
278325 infile = darshan_log_open (infile_name );
@@ -298,7 +345,8 @@ int main(int argc, char **argv)
298345 }
299346
300347 if (reset_md ) reset_md_job (& job );
301- if (obfuscate ) obfuscate_job (key , & job );
348+ if (obfuscate_jobid ) obfuscate_job_jobid (key , & job );
349+ if (obfuscate_uid ) obfuscate_job_uid (key , & job );
302350 if (annotation ) add_annotation (annotation , & job );
303351
304352 ret = darshan_log_put_job (outfile , & job );
@@ -318,7 +366,7 @@ int main(int argc, char **argv)
318366 return (-1 );
319367 }
320368
321- if (obfuscate ) obfuscate_exe (key , tmp_string );
369+ if (obfuscate_exe ) obfuscate_executable (key , tmp_string );
322370
323371 ret = darshan_log_put_exe (outfile , tmp_string );
324372 if (ret < 0 )
@@ -357,7 +405,7 @@ int main(int argc, char **argv)
357405 /* NOTE: obfuscating filepaths breaks the ability to map files
358406 * to the corresponding FS & mount info maintained by darshan
359407 */
360- if (obfuscate ) obfuscate_filenames (key , name_hash , mnt_data_array , mount_count );
408+ if (obfuscate_names ) obfuscate_filenames (key , name_hash , mnt_data_array , mount_count );
361409 if (hash ) remove_hash_recs (& name_hash , hash );
362410
363411 ret = darshan_log_put_namehash (outfile , name_hash );
0 commit comments