@@ -30,7 +30,10 @@ int usage (char *exename)
3030 fprintf (stderr , " Converts darshan log from infile to outfile.\n" );
3131 fprintf (stderr , " rewrites the log file into the newest format.\n" );
3232 fprintf (stderr , " --bzip2 Use bzip2 compression instead of zlib.\n" );
33- fprintf (stderr , " --obfuscate Obfuscate items in the log.\n" );
33+ fprintf (stderr , " --obfuscate_jobid Obfuscate job ID in the log.\n" );
34+ fprintf (stderr , " --obfuscate_uid Obfuscate uid in the log.\n" );
35+ fprintf (stderr , " --obfuscate_exe Obfuscate executable in the log.\n" );
36+ fprintf (stderr , " --obfuscate_names Obfuscate name records in the log.\n" );
3437 fprintf (stderr , " --key <key> Key to use when obfuscating.\n" );
3538 fprintf (stderr , " --annotate <string> Additional metadata to add.\n" );
3639 fprintf (stderr , " --file <hash> Limit output to specified (hashed) file only.\n" );
@@ -39,9 +42,9 @@ int usage (char *exename)
3942 exit (1 );
4043}
4144
42- void parse_args (int argc , char * * argv , char * * infile , char * * outfile ,
43- int * bzip2 , int * obfuscate , int * reset_md , int * key ,
44- char * * annotate , uint64_t * hash )
45+ void parse_args (int argc , char * * argv , char * * infile , char * * outfile , int * bzip2 ,
46+ int * obfuscate_jobid , int * obfuscate_uid , int * obfuscate_exe , int * obfuscate_names ,
47+ int * reset_md , int * key , char * * annotate , uint64_t * hash )
4548{
4649 int index ;
4750 int ret ;
@@ -50,7 +53,10 @@ void parse_args (int argc, char **argv, char **infile, char **outfile,
5053 {
5154 {"bzip2" , 0 , NULL , 'b' },
5255 {"annotate" , 1 , NULL , 'a' },
53- {"obfuscate" , 0 , NULL , 'o' },
56+ {"obfuscate_jobid" , 0 , NULL , 'j' },
57+ {"obfuscate_uid" , 0 , NULL , 'u' },
58+ {"obfuscate_exe" , 0 , NULL , 'e' },
59+ {"obfuscate_names" , 0 , NULL , 'n' },
5460 {"reset-md" , 0 , NULL , 'r' },
5561 {"key" , 1 , NULL , 'k' },
5662 {"file" , 1 , NULL , 'f' },
@@ -59,7 +65,10 @@ void parse_args (int argc, char **argv, char **infile, char **outfile,
5965 };
6066
6167 * bzip2 = 0 ;
62- * obfuscate = 0 ;
68+ * obfuscate_jobid = 0 ;
69+ * obfuscate_uid = 0 ;
70+ * obfuscate_exe = 0 ;
71+ * obfuscate_names = 0 ;
6372 * reset_md = 0 ;
6473 * key = 0 ;
6574 * hash = 0 ;
@@ -78,8 +87,17 @@ void parse_args (int argc, char **argv, char **infile, char **outfile,
7887 case 'a' :
7988 * annotate = optarg ;
8089 break ;
81- case 'o' :
82- * obfuscate = 1 ;
90+ case 'j' :
91+ * obfuscate_jobid = 1 ;
92+ break ;
93+ case 'u' :
94+ * obfuscate_uid = 1 ;
95+ break ;
96+ case 'e' :
97+ * obfuscate_exe = 1 ;
98+ break ;
99+ case 'n' :
100+ * obfuscate_names = 1 ;
83101 break ;
84102 case 'r' :
85103 * reset_md = 1 ;
@@ -119,9 +137,8 @@ static void reset_md_job(struct darshan_job *job)
119137 return ;
120138}
121139
122- void obfuscate_job (int key , struct darshan_job * job )
140+ void obfuscate_job_jobid (int key , struct darshan_job * job )
123141{
124- job -> uid = (int64_t ) darshan_hashlittle (& job -> uid , sizeof (job -> uid ), key );
125142 if (job -> jobid != 0 )
126143 {
127144 job -> jobid = (int64_t ) darshan_hashlittle (& job -> jobid , sizeof (job -> jobid ), key );
@@ -130,7 +147,14 @@ void obfuscate_job(int key, struct darshan_job *job)
130147 return ;
131148}
132149
133- void obfuscate_exe (int key , char * exe )
150+ void obfuscate_job_uid (int key , struct darshan_job * job )
151+ {
152+ job -> uid = (int64_t ) darshan_hashlittle (& job -> uid , sizeof (job -> uid ), key );
153+
154+ return ;
155+ }
156+
157+ void obfuscate_executable (int key , char * exe )
134158{
135159 uint32_t hashed ;
136160
@@ -149,11 +173,25 @@ void obfuscate_filenames(int key, struct darshan_name_record_ref *name_hash, str
149173 uint32_t hashed ;
150174 char tmp_string [__TMP_OBF_SIZE ] = {0 };
151175 darshan_record_id tmp_id ;
176+ const char * keep_list [] = {"<STDIN>" , "<STDOUT>" , "<STDERR>" , "heatmap:" };
177+ int keep_list_len = sizeof (keep_list ) / sizeof (keep_list [0 ]);
152178
153179 HASH_ITER (hlink , name_hash , ref , tmp )
154180 {
155- /* find file system */
156181 int j ;
182+ /* skip names in keep list -- they don't require anonymization */
183+ for (j = 0 ; j < keep_list_len ; j ++ )
184+ {
185+ if (strncmp (keep_list [j ], ref -> name_record -> name ,
186+ strlen (keep_list [j ])) == 0 )
187+ {
188+ break ;
189+ }
190+ }
191+ if (j != keep_list_len )
192+ continue ;
193+
194+ /* find file system */
157195 char * mnt_pt = NULL ;
158196
159197 /* get mount point and fs type associated with this record */
@@ -266,13 +304,14 @@ int main(int argc, char **argv)
266304 char * mod_buf , * tmp_mod_buf ;
267305 enum darshan_comp_type comp_type ;
268306 int bzip2 ;
269- int obfuscate ;
307+ int obfuscate_jobid , obfuscate_uid , obfuscate_exe , obfuscate_names ;
270308 int key ;
271309 char * annotation = NULL ;
272310 darshan_record_id hash ;
273311 int reset_md ;
274312
275- parse_args (argc , argv , & infile_name , & outfile_name , & bzip2 , & obfuscate ,
313+ parse_args (argc , argv , & infile_name , & outfile_name , & bzip2 ,
314+ & obfuscate_jobid , & obfuscate_uid , & obfuscate_exe , & obfuscate_names ,
276315 & reset_md , & key , & annotation , & hash );
277316
278317 infile = darshan_log_open (infile_name );
@@ -298,7 +337,8 @@ int main(int argc, char **argv)
298337 }
299338
300339 if (reset_md ) reset_md_job (& job );
301- if (obfuscate ) obfuscate_job (key , & job );
340+ if (obfuscate_jobid ) obfuscate_job_jobid (key , & job );
341+ if (obfuscate_uid ) obfuscate_job_uid (key , & job );
302342 if (annotation ) add_annotation (annotation , & job );
303343
304344 ret = darshan_log_put_job (outfile , & job );
@@ -318,7 +358,7 @@ int main(int argc, char **argv)
318358 return (-1 );
319359 }
320360
321- if (obfuscate ) obfuscate_exe (key , tmp_string );
361+ if (obfuscate_exe ) obfuscate_executable (key , tmp_string );
322362
323363 ret = darshan_log_put_exe (outfile , tmp_string );
324364 if (ret < 0 )
@@ -357,7 +397,7 @@ int main(int argc, char **argv)
357397 /* NOTE: obfuscating filepaths breaks the ability to map files
358398 * to the corresponding FS & mount info maintained by darshan
359399 */
360- if (obfuscate ) obfuscate_filenames (key , name_hash , mnt_data_array , mount_count );
400+ if (obfuscate_names ) obfuscate_filenames (key , name_hash , mnt_data_array , mount_count );
361401 if (hash ) remove_hash_recs (& name_hash , hash );
362402
363403 ret = darshan_log_put_namehash (outfile , name_hash );
0 commit comments