sandialabs
diff --git a/‎docker/compose/slycat-compose/requirements.txt
Lines changed: 1 addition & 0 deletions b/‎docker/compose/slycat-compose/requirements.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎web-client/slycat/web/client/dac_tdms.py
Lines changed: 36 additions & 18 deletions b/‎web-client/slycat/web/client/dac_tdms.py
Lines changed: 36 additions & 18 deletions
diff --git a/‎web-server/plugins/slycat-dac/dac-generic-file-parser.py
Lines changed: 51 additions & 9 deletions b/‎web-server/plugins/slycat-dac/dac-generic-file-parser.py
Lines changed: 51 additions & 9 deletions
@@ -9,6 +9,7 @@ configparser
 future
 routes
 scipy
+sklearn
 npTDMS
 pandas
 pysmb
@@ -88,21 +88,27 @@ def check_parser_parms (parms):
             "for min-num-shots and try again.")
 
     # fourth parameter is number of landmarks
-    if not (parms[3] == 0 or parms[3] >= 3):
-        check_parser_msg.append("Number of landmarks must be zero or >= 3.  Please " + \
-            "provide a valid number of landmarks and try again.")
-
-    # fifth parameter is expected type
-    if parms[4] != "General" and \
-       parms[4] != "Overvoltage" and \
-       parms[4] != "Sprytron":
+    if parms[3] is not None:
+        if not (parms[3] == 0 or parms[3] >= 3):
+            check_parser_msg.append("Number of landmarks must be zero or >= 3.  Please " + \
+                "provide a valid number of landmarks and try again.")
+
+    # fifth parameter is number of PCA components
+    if parms[4] < 2:
+        check_parser_msg.append("Number of PCA components must be >= 2.  Please provide " + \
+            "a valid number of PCA components and try again.")
+
+    # sixth parameter is expected type
+    if parms[5] != "General" and \
+       parms[5] != "Overvoltage" and \
+       parms[5] != "Sprytron":
         check_parser_msg.append ('Expected data type must be one of "General", ' + \
             '"Overvoltage" or "Sprytron". Please use one of those options ' + \
             'and try again.')
 
-    # sixth parameter is union or intersection (combination of time series)
-    if parms[5] != "Union" and \
-       parms[5] != "Intersection":
+    # seventh parameter is union or intersection (combination of time series)
+    if parms[6] != "Union" and \
+       parms[6] != "Intersection":
         check_parser_msg.append ('Available methods for combining mismatched, ' + \
             'time points are "Union" and "Intersection". Please use one of those options ' + \
             'and try again.')
@@ -270,6 +276,7 @@ def create_model (arguments, log):
     # populate parameters
     parser_parms = [arguments.min_time_points, arguments.min_channels, 
                     arguments.min_num_shots, arguments.num_landmarks,
+                    arguments.num_PCA_comps,
                     shot_type, union_type, 
                     not arguments.do_not_infer_channel_units,
                     not arguments.do_not_infer_time_units]
@@ -279,6 +286,13 @@ def create_model (arguments, log):
     if check_parser_error != "":
         raise TDMSUploadError(check_parser_error)
 
+    # landmarks over-rides PCA comps
+    if arguments.num_landmarks is not None:
+        parser_parms[4] = False
+    else:
+        parser_parms[4] = True
+        parser_parms[3] = arguments.num_PCA_comps
+
     # compile suffixes to include if .zip file
     dac_parser = "dac-tdms-file-parser"
     if file_type == "zip":
@@ -328,18 +342,19 @@ def create_model (arguments, log):
                 log("\t%s" % suffix)
 
         log("Including TDMS file suffixes:")
-        for suffix in parser_parms[8]:
+        for suffix in parser_parms[9]:
             log("\t%s" % suffix)
 
     # next list common parameters
     log("Minimum number of time steps per channel: %s" % parser_parms[0])
     log("Minumum number of channels: %s" % parser_parms[1])
     log("Minimum number of shots: %s" % parser_parms[2])
     log("Number of landmarks: %s" % parser_parms[3])
-    log("Expecting TDMS data type: %s" % parser_parms[4])
-    log("Combining mismatched time steps using: %s" % parser_parms[5])
-    log("Infer channel units: %s" % parser_parms[6])
-    log("Infer time units: %s" % parser_parms[7])
+    log("Number of PCA components: %s" % parser_parms[4])
+    log("Expecting TDMS data type: %s" % parser_parms[5])
+    log("Combining mismatched time steps using: %s" % parser_parms[6])
+    log("Infer channel units: %s" % parser_parms[7])
+    log("Infer time units: %s" % parser_parms[9])
 
     # upload model file(s)
     mid = upload_model (arguments, dac_parser, parser_parms, file_list, progress=True)
@@ -398,9 +413,12 @@ def parser ():
         help="Channels must occur in at least this many shots, integer >= 0. " +
              "Use zero to indicate that channel must occur in every shot. " +
              "Default: %(default)s.")
-    parser.add_argument("--num-landmarks", default=200, type=int,
+    parser.add_argument("--num-landmarks", default=None, type=int,
         help="Number of landmarks to use, integer >= 3.  Can also use zero " +
-             "to indicate use of full dataset (no landmarks).")
+             "to indicate use of full dataset (no landmarks).  Default: %(default)s.")
+    parser.add_argument("--num-PCA-comps", default=10, type=int,
+        help="Number of PCA components to use, integer >= 2.  Note --num-landmarks " + 
+             "over-rides --num-PCA-comps.  Default: %(default)s.")
     parser.add_argument("--overvoltage", action="store_true",
         help="Expecting overvoltage data.")
     parser.add_argument("--sprytron", action="store_true",
 
@@ -21,6 +21,9 @@
 # for dac_compute_coords.py and dac_upload_model.py
 import imp
 
+# for error logging
+import cherrypy
+
 # note this version assumes the first row is a header row, and keeps only the header
 # and data (called by the generic zip parser)
 def parse_table_file(file):
@@ -121,7 +124,7 @@ def parse_mat_file(file):
     # parse file using comma delimiter
     rows = [row for row in csv.reader(file.decode().splitlines(), delimiter=",", doublequote=True,
             escapechar=None, quotechar='"', quoting=csv.QUOTE_MINIMAL, skipinitialspace=True)]
-
+    
     # check that we have a matrix
     num_rows = len(rows)
     num_cols = len(rows[0])
@@ -287,6 +290,7 @@ def parse_gen_zip(database, model, input, files, aids, **kwargs):
     # look for one occurrence (only) of .dac file and var, dist, and time directories
     dac_file = ""
     landmarks_file = ""
+    pca_file = ""
     var_meta_file = ""
     var_files = []
     dist_files = []
@@ -308,6 +312,10 @@ def parse_gen_zip(database, model, input, files, aids, **kwargs):
             if zip_file == "landmarks.csv":
                 landmarks_file = zip_file
 
+            # is is "pca.csv"?
+            if zip_file == "pca.csv":
+                pca_file = zip_file
+
         # found a directory -- is it "var/"?
         elif head == "var":
 
@@ -395,23 +403,23 @@ def parse_gen_zip(database, model, input, files, aids, **kwargs):
 
         # check var file names
         num_vars = len(meta_vars)
-        check_file_names(database, model, parse_error_log, dac_error,
+        check_file_names(database, model, dac_error, parse_error_log,
                         "var/variable_", ".var", num_vars, var_files,
                         "missing variable_*.var file(s).")
 
         parse_error_log = dac_error.update_parse_log (database, model, parse_error_log, "Progress",
                                             "Checked DAC variable file names.")
 
         # check time file names
-        check_file_names(database, model, parse_error_log, dac_error,
+        check_file_names(database, model, dac_error, parse_error_log,
                          "time/variable_", ".time", num_vars, time_files,
                          "missing variable_*.time file(s).")
 
         parse_error_log = dac_error.update_parse_log (database, model, parse_error_log, "Progress",
                                             "Checked DAC time file names.")
 
         # check dist file names
-        check_file_names(database, model, parse_error_log, dac_error,
+        check_file_names(database, model, dac_error, parse_error_log,
                          "dist/variable_", ".dist", num_vars, dist_files,
                          "missing variable_*.dist file(s).")
 
@@ -428,19 +436,32 @@ def parse_gen_zip(database, model, input, files, aids, **kwargs):
     landmarks = None
     if landmarks_file != "":
 
-        # parse variables.meta file
+        # parse landmarks.csv file
         attr, dim, landmarks = parse_mat_file(zip_ref.read(landmarks_file))
 
     else:
 
         parse_error_log = dac_error.update_parse_log (database, model, parse_error_log, "Progress",
             "No landmarks.csv file found, using all data points.")
 
+    # load pca-comps file
+    pca_comps = None
+    if pca_file != "":
+
+        # parse pca.csv file
+        attr, dim, pca_comps = parse_mat_file(zip_ref.read(pca_file))
+
+    else:
+
+        parse_error_log = dac_error.update_parse_log (database, model, parse_error_log, "Progress",
+            "No pca.csv file found, using MDS algorithm.")
+
     # now start thread to prevent timing out on large files
     stop_event = threading.Event()
     thread = threading.Thread(target=parse_gen_zip_thread,
                               args=(database, model, zip_ref, dac_error, parse_error_log,
-                              meta_var_col_names, meta_vars, landmarks, dac_file, stop_event))
+                              meta_var_col_names, meta_vars, landmarks, pca_comps, 
+                              dac_file, stop_event))
     thread.start()
 
 
@@ -463,7 +484,8 @@ def check_file_names (database, model, dac_error, parse_error_log,
 
 # gen zip parsing thread to prevent time outs by browser
 def parse_gen_zip_thread(database, model, zip_ref, dac_error, parse_error_log,
-                         meta_var_col_names, meta_vars, landmarks, dac_file, stop_event):
+                         meta_var_col_names, meta_vars, landmarks, pca_comps,
+                         dac_file, stop_event):
 
     # put entire thread into a try-except block in order report errors
     try:
@@ -480,8 +502,27 @@ def parse_gen_zip_thread(database, model, zip_ref, dac_error, parse_error_log,
         # number of data points
         num_datapoints = len(meta_rows)
 
+        # do pca check (pca over-rides landmarks)
+        use_coordinates=False
+        if pca_comps is not None:
+
+            num_pca_comps = int(numpy.round(pca_comps[0]))
+
+            # check that pca comps is at least two
+            if num_pca_comps < 2:
+                
+                dac_error.quit_raise_exception(database, model, parse_error_log,
+                            'Number of PCA components must be at least two.')
+
+            # set as number of landmarks
+            num_landmarks = num_pca_comps
+            use_coordinates = True
+
+            parse_error_log = dac_error.update_parse_log (database, model, parse_error_log, "Progress", 
+                                    "Using " + str(num_pca_comps) + " PCA components.")
+
         # do landmark checks
-        if landmarks is not None:
+        elif landmarks is not None:
 
             num_landmarks = len(landmarks)
 
@@ -589,7 +630,8 @@ def parse_gen_zip_thread(database, model, zip_ref, dac_error, parse_error_log,
         push.init_upload_model(database, model, dac_error, parse_error_log,
                                meta_column_names, meta_rows,
                                meta_var_col_names, meta_vars,
-                               variable, time_steps, var_dist, landmarks=landmarks)
+                               variable, time_steps, var_dist, 
+                               landmarks=landmarks, use_coordinates=use_coordinates)
 
         # done -- destroy the thread
         stop_event.set()
-Original file line number
+Diff line change
 future
 routes
 scipy
 +sklearn
 npTDMS
 pandas
 pysmb