21
21
# for dac_compute_coords.py and dac_upload_model.py
22
22
import imp
23
23
24
+ # for error logging
25
+ import cherrypy
26
+
24
27
# note this version assumes the first row is a header row, and keeps only the header
25
28
# and data (called by the generic zip parser)
26
29
def parse_table_file (file ):
@@ -121,7 +124,7 @@ def parse_mat_file(file):
121
124
# parse file using comma delimiter
122
125
rows = [row for row in csv .reader (file .decode ().splitlines (), delimiter = "," , doublequote = True ,
123
126
escapechar = None , quotechar = '"' , quoting = csv .QUOTE_MINIMAL , skipinitialspace = True )]
124
-
127
+
125
128
# check that we have a matrix
126
129
num_rows = len (rows )
127
130
num_cols = len (rows [0 ])
@@ -287,6 +290,7 @@ def parse_gen_zip(database, model, input, files, aids, **kwargs):
287
290
# look for one occurrence (only) of .dac file and var, dist, and time directories
288
291
dac_file = ""
289
292
landmarks_file = ""
293
+ pca_file = ""
290
294
var_meta_file = ""
291
295
var_files = []
292
296
dist_files = []
@@ -308,6 +312,10 @@ def parse_gen_zip(database, model, input, files, aids, **kwargs):
308
312
if zip_file == "landmarks.csv" :
309
313
landmarks_file = zip_file
310
314
315
+ # is is "pca.csv"?
316
+ if zip_file == "pca.csv" :
317
+ pca_file = zip_file
318
+
311
319
# found a directory -- is it "var/"?
312
320
elif head == "var" :
313
321
@@ -395,23 +403,23 @@ def parse_gen_zip(database, model, input, files, aids, **kwargs):
395
403
396
404
# check var file names
397
405
num_vars = len (meta_vars )
398
- check_file_names (database , model , parse_error_log , dac_error ,
406
+ check_file_names (database , model , dac_error , parse_error_log ,
399
407
"var/variable_" , ".var" , num_vars , var_files ,
400
408
"missing variable_*.var file(s)." )
401
409
402
410
parse_error_log = dac_error .update_parse_log (database , model , parse_error_log , "Progress" ,
403
411
"Checked DAC variable file names." )
404
412
405
413
# check time file names
406
- check_file_names (database , model , parse_error_log , dac_error ,
414
+ check_file_names (database , model , dac_error , parse_error_log ,
407
415
"time/variable_" , ".time" , num_vars , time_files ,
408
416
"missing variable_*.time file(s)." )
409
417
410
418
parse_error_log = dac_error .update_parse_log (database , model , parse_error_log , "Progress" ,
411
419
"Checked DAC time file names." )
412
420
413
421
# check dist file names
414
- check_file_names (database , model , parse_error_log , dac_error ,
422
+ check_file_names (database , model , dac_error , parse_error_log ,
415
423
"dist/variable_" , ".dist" , num_vars , dist_files ,
416
424
"missing variable_*.dist file(s)." )
417
425
@@ -428,19 +436,32 @@ def parse_gen_zip(database, model, input, files, aids, **kwargs):
428
436
landmarks = None
429
437
if landmarks_file != "" :
430
438
431
- # parse variables.meta file
439
+ # parse landmarks.csv file
432
440
attr , dim , landmarks = parse_mat_file (zip_ref .read (landmarks_file ))
433
441
434
442
else :
435
443
436
444
parse_error_log = dac_error .update_parse_log (database , model , parse_error_log , "Progress" ,
437
445
"No landmarks.csv file found, using all data points." )
438
446
447
+ # load pca-comps file
448
+ pca_comps = None
449
+ if pca_file != "" :
450
+
451
+ # parse pca.csv file
452
+ attr , dim , pca_comps = parse_mat_file (zip_ref .read (pca_file ))
453
+
454
+ else :
455
+
456
+ parse_error_log = dac_error .update_parse_log (database , model , parse_error_log , "Progress" ,
457
+ "No pca.csv file found, using MDS algorithm." )
458
+
439
459
# now start thread to prevent timing out on large files
440
460
stop_event = threading .Event ()
441
461
thread = threading .Thread (target = parse_gen_zip_thread ,
442
462
args = (database , model , zip_ref , dac_error , parse_error_log ,
443
- meta_var_col_names , meta_vars , landmarks , dac_file , stop_event ))
463
+ meta_var_col_names , meta_vars , landmarks , pca_comps ,
464
+ dac_file , stop_event ))
444
465
thread .start ()
445
466
446
467
@@ -463,7 +484,8 @@ def check_file_names (database, model, dac_error, parse_error_log,
463
484
464
485
# gen zip parsing thread to prevent time outs by browser
465
486
def parse_gen_zip_thread (database , model , zip_ref , dac_error , parse_error_log ,
466
- meta_var_col_names , meta_vars , landmarks , dac_file , stop_event ):
487
+ meta_var_col_names , meta_vars , landmarks , pca_comps ,
488
+ dac_file , stop_event ):
467
489
468
490
# put entire thread into a try-except block in order report errors
469
491
try :
@@ -480,8 +502,27 @@ def parse_gen_zip_thread(database, model, zip_ref, dac_error, parse_error_log,
480
502
# number of data points
481
503
num_datapoints = len (meta_rows )
482
504
505
+ # do pca check (pca over-rides landmarks)
506
+ use_coordinates = False
507
+ if pca_comps is not None :
508
+
509
+ num_pca_comps = int (numpy .round (pca_comps [0 ]))
510
+
511
+ # check that pca comps is at least two
512
+ if num_pca_comps < 2 :
513
+
514
+ dac_error .quit_raise_exception (database , model , parse_error_log ,
515
+ 'Number of PCA components must be at least two.' )
516
+
517
+ # set as number of landmarks
518
+ num_landmarks = num_pca_comps
519
+ use_coordinates = True
520
+
521
+ parse_error_log = dac_error .update_parse_log (database , model , parse_error_log , "Progress" ,
522
+ "Using " + str (num_pca_comps ) + " PCA components." )
523
+
483
524
# do landmark checks
484
- if landmarks is not None :
525
+ elif landmarks is not None :
485
526
486
527
num_landmarks = len (landmarks )
487
528
@@ -589,7 +630,8 @@ def parse_gen_zip_thread(database, model, zip_ref, dac_error, parse_error_log,
589
630
push .init_upload_model (database , model , dac_error , parse_error_log ,
590
631
meta_column_names , meta_rows ,
591
632
meta_var_col_names , meta_vars ,
592
- variable , time_steps , var_dist , landmarks = landmarks )
633
+ variable , time_steps , var_dist ,
634
+ landmarks = landmarks , use_coordinates = use_coordinates )
593
635
594
636
# done -- destroy the thread
595
637
stop_event .set ()
0 commit comments