From 994ce0d2b8bc8a55514a184d62877bc0fcc30f7c Mon Sep 17 00:00:00 2001
From: "djgagne@ou.edu" <davidjohngagne@gmail.com>
Date: Wed, 20 Jan 2021 11:32:31 -0700
Subject: [PATCH 01/15] Updated hsdata

---
 bin/hsdata | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/bin/hsdata b/bin/hsdata
index 8cea126..7388582 100755
--- a/bin/hsdata
+++ b/bin/hsdata
@@ -171,11 +171,11 @@ def process_ensemble_member(run_date, member, config):
                         obs_tracks_to_json(mrms_tracks, member, run_date, config, track_proc.model_grid.proj)
                     print("Output csv", run_date, member)
                     for table_name, table_data in obs_data.items():
-                        csv_filename = config.csv_path + "{0}_{1}_{2}_{3}.csv".format(table_name,
+                        csv_filename = join(config.csv_path, "{0}_{1}_{2}_{3}.csv".format(table_name,
                                                                                   "obs",
                                                                                   member,
                                                                                   run_date.strftime(
-                                                                                      config.run_date_format))
+                                                                                      config.run_date_format)))
                         table_data.to_csv(csv_filename,
                                       na_rep="nan",
                                       float_format="%0.5f",
@@ -201,10 +201,10 @@ def process_ensemble_member(run_date, member, config):
             forecast_data = {}
 
         for table_name, table_data in forecast_data.items():
-            csv_filename = config.csv_path + "{0}_{1}_{2}_{3}.csv".format(table_name,
+            csv_filename = join(config.csv_path, "{0}_{1}_{2}_{3}.csv".format(table_name,
                                                                           config.ensemble_name,
                                                                           member,
-                                                                          run_date.strftime(config.run_date_format))
+                                                                          run_date.strftime(config.run_date_format)))
             print("Output csv file " + csv_filename)
             table_data.to_csv(csv_filename,
                               na_rep="nan",
@@ -333,10 +333,10 @@ def rematch_ensemble_tracks(run_date, member, config):
             obs_data = make_obs_track_data(mrms_tracks, member, run_date, config, track_proc.model_grid.proj,
                                            )
             for table_name, table_data in obs_data.items():
-                table_data.to_csv(config.csv_path + "{0}_{1}_{2}_{3}.csv".format(table_name,
+                table_data.to_csv(join(config.csv_path, "{0}_{1}_{2}_{3}.csv".format(table_name,
                                                                                  "obs",
                                                                                  member,
-                                                                                 run_date.strftime("%Y%m%d")),
+                                                                                 run_date.strftime("%Y%m%d"))),
                                   na_rep="nan",
                                   float_format="%0.5f",
                                   index=False)
@@ -345,10 +345,10 @@ def rematch_ensemble_tracks(run_date, member, config):
         else:
             forecast_data = {}
         for table_name, table_data in forecast_data.items():
-            table_data.to_csv(config.csv_path + "{0}_{1}_{2}_{3}.csv".format(table_name,
+            table_data.to_csv(join(config.csv_path, "{0}_{1}_{2}_{3}.csv".format(table_name,
                                                                              config.ensemble_name,
                                                                              member,
-                                                                             run_date.strftime("%Y%m%d")),
+                                                                             run_date.strftime("%Y%m%d"))),
                               na_rep="nan",
                               float_format="%0.5f",
                               index=False)

From fd8ef447387fb2730638be3ff0094f6a20f6e158 Mon Sep 17 00:00:00 2001
From: "djgagne@ou.edu" <davidjohngagne@gmail.com>
Date: Mon, 8 Mar 2021 15:10:20 -0700
Subject: [PATCH 02/15] Added sspf eval script

---
 demos/eval_sspf_days.py | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 demos/eval_sspf_days.py

diff --git a/demos/eval_sspf_days.py b/demos/eval_sspf_days.py
new file mode 100644
index 0000000..ee7e5bf
--- /dev/null
+++ b/demos/eval_sspf_days.py
@@ -0,0 +1,40 @@
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from mpl_toolkits.basemap import Basemap
+from hagelslag.evaluation import DistributedROC, DistributedReliability
+from scipy.ndimage import convolve
+from glob import glob
+import xarray as xr
+import os
+from os.path import join
+
+eval_path = "/glade/p/work/dgagne/ncar_coarse_neighbor_eval_2016_s_2/"
+eval_files = sorted(os.listdir(eval_path))
+eval_test = pd.read_csv(join(eval_path, eval_files[0]))
+models = eval_test.columns[eval_test.columns.str.contains("mean")]
+run_dates = pd.DatetimeIndex([e.split("_")[-1][:8] for e in eval_files])
+thresholds = [25, 50, 75]
+prob_thresholds = np.concatenate(([0, 0.01], np.arange(0.1, 1.1, 0.1), [1.05]))
+brier = {}
+roc = {}
+for thresh in thresholds:
+    brier[thresh] = pd.DataFrame(index=run_dates, columns=models, dtype=object)
+    roc[thresh] = pd.DataFrame(index=run_dates, columns=models, dtype=object)
+for ev, eval_file in enumerate(eval_files):
+    print(eval_file)
+    eval_data = pd.read_csv(join(eval_path, eval_file))
+    us_mask = eval_data["us_mask"] == 1
+    for thresh in thresholds:
+        obs = eval_data.loc[us_mask, "MESH_Max_60min_00.50_{0:2d}".format(thresh)]
+        for model in models:
+            brier[thresh].loc[run_dates[ev], model] = DistributedReliability(thresholds=prob_thresholds)
+            brier[thresh].loc[run_dates[ev], model].update(eval_data.loc[us_mask, model], 
+                                                           obs)
+            roc[thresh].loc[run_dates[ev], model] = DistributedROC(thresholds=prob_thresholds)
+            roc[thresh].loc[run_dates[ev], model].update(eval_data.loc[us_mask, model], 
+                                                         obs)
+out_path = "/glade/p/work/dgagne/ncar_coarse_neighbor_scores_2016/"
+for thresh in [25, 50, 75]:
+    brier[thresh].to_csv(join(out_path, "ncar_2016_s_2_brier_objs_{0:02d}.csv".format(thresh)), index_label="Date")
+    roc[thresh].to_csv(join(out_path, "ncar_2016_s_2_roc_objs_{0:02d}.csv".format(thresh)), index_label="Date")

From 1a0237e9f1fdde0d05687b1c7cdaf0d9c14c9281 Mon Sep 17 00:00:00 2001
From: "djgagne@ou.edu" <davidjohngagne@gmail.com>
Date: Mon, 8 Mar 2021 15:26:30 -0700
Subject: [PATCH 03/15] Updated travis file

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 787f279..ec3da2d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -8,6 +8,7 @@ before_install:
 install:
   - conda env create -f environment.yml
   - source activate hagelslag
+  - echo `which python`
 script:
   - pytest
 notifications:

From 0f62f696507acca58b98e07e47114b5291f36234 Mon Sep 17 00:00:00 2001
From: David John Gagne <davidjohngagne@gmail.com>
Date: Wed, 22 Sep 2021 10:12:15 -0600
Subject: [PATCH 04/15] Updated environment file and Grib2 writing to use
 Grib2io package instead of ncepgrib2 github.

---
 environment.yml                          |  3 ++-
 hagelslag/data/ModelOutput.py            |  1 -
 hagelslag/processing/EnsembleProducts.py | 31 +++++++++++-------------
 hagelslag/processing/Watershed.py        |  2 +-
 requirements.txt                         |  1 -
 test/test_data_MRMSGrid.py               |  2 +-
 6 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/environment.yml b/environment.yml
index 696b873..4113cb1 100644
--- a/environment.yml
+++ b/environment.yml
@@ -5,6 +5,7 @@ dependencies:
   - python=3.8
   - numpy
   - scipy
+  - s3fs
   - matplotlib
   - xarray
   - netcdf4
@@ -23,4 +24,4 @@ dependencies:
   - sphinx
   - mock
   - pip:
-      - git+https://github.com/jswhit/ncepgrib2.git
+      - grib2io
diff --git a/hagelslag/data/ModelOutput.py b/hagelslag/data/ModelOutput.py
index 19de456..f1bba04 100644
--- a/hagelslag/data/ModelOutput.py
+++ b/hagelslag/data/ModelOutput.py
@@ -12,7 +12,6 @@
 import numpy as np
 from scipy.spatial import cKDTree
 from scipy.ndimage import gaussian_filter
-from pyproj import Proj
 
 class ModelOutput(object):
     """
diff --git a/hagelslag/processing/EnsembleProducts.py b/hagelslag/processing/EnsembleProducts.py
index 5c94a97..96bc41d 100644
--- a/hagelslag/processing/EnsembleProducts.py
+++ b/hagelslag/processing/EnsembleProducts.py
@@ -4,23 +4,16 @@
 import pandas as pd
 from scipy.ndimage import gaussian_filter
 from scipy.signal import fftconvolve
-from scipy.stats import gamma, bernoulli
+from scipy.stats import gamma
 from scipy.interpolate import interp1d
 from scipy.spatial import cKDTree
 from skimage.morphology import disk
-from netCDF4 import Dataset, date2num, num2date
-import os
+from netCDF4 import Dataset, num2date
 from glob import glob
 from os.path import join, exists
 import json
 from datetime import timedelta
-
-
-try:
-    from ncepgrib2 import Grib2Encode
-    grib_support = True
-except ImportError("ncepgrib2 not available"):
-    grib_support = False
+from grib2io import Grib2Message
 
 
 class EnsembleMemberProduct(object):
@@ -219,6 +212,7 @@ def load_forecast_netcdf_data(self, nc_path):
             self.nc_patches = None
             return 
         return
+
     def quantile_match(self):
         """
         For each storm object, get the percentiles of the enhanced watershed variable field relative to the training
@@ -403,7 +397,9 @@ def encode_grib2_percentile(self):
         for t, time in enumerate(self.times):
             time_list = list(self.run_date.utctimetuple()[0:6])
             if grib_objects[time] is None:
-                grib_objects[time] = Grib2Encode(0, np.array(grib_id_start + time_list + [2, 1], dtype=np.int32))
+                #grib_objects[time] = Grib2Encode(0, np.array(grib_id_start + time_list + [2, 1], dtype=np.int32))
+                grib_objects[time] = Grib2Message(discipline=0,
+                                                  idsect=np.array(grib_id_start + time_list + [2, 1], dtype=np.int32))
                 grib_objects[time].addgrid(gdsinfo, gdtmp1)
             pdtmp1[8] = (time.to_pydatetime() - self.run_date).total_seconds() / 3600.0
             data = self.percentile_data[:, t] / 1000.0
@@ -469,7 +465,9 @@ def encode_grib2_data(self):
         for t, time in enumerate(self.times):
             time_list = list(self.run_date.utctimetuple()[0:6])
             if grib_objects[time] is None:
-                grib_objects[time] = Grib2Encode(0, np.array(grib_id_start + time_list + [2, 1], dtype=np.int32))
+                #grib_objects[time] = Grib2Encode(0, np.array(grib_id_start + time_list + [2, 1], dtype=np.int32))
+                grib_objects[time] = Grib2Message(discipline=0,
+                                                  idsect=np.array(grib_id_start + time_list + [2, 1], dtype=np.int32))
                 grib_objects[time].addgrid(gdsinfo, gdtmp1)
             pdtmp1[8] = (time.to_pydatetime() - self.run_date).total_seconds() / 3600.0
             data = self.data[t] / 1000.0
@@ -490,13 +488,12 @@ def write_grib2_files(self, grib_objects, path):
         """
         for t, time in enumerate(self.times.to_pydatetime()):
             grib_objects[time].end()
-            filename = path + "{0}_{1}_{2}_{3}_{4}.grib2".format(self.ensemble_name,
+            filename = join(path, "{0}_{1}_{2}_{3}_{4}.grib2".format(self.ensemble_name,
                                                                  self.member,
                                                                  self.model_name.replace(" ", "-"),
                                                                  self.variable,
                                                                  self.run_date.strftime("%Y%m%d%H") +
                                                                  "f{0:02d}".format(self.forecast_hours[t])
-                                                                 )
-            fo = open(filename, "wb")
-            fo.write(grib_objects[time].msg)
-            fo.close()
+                                                                 ))
+            with open(filename, "wb") as fo:
+                fo.write(grib_objects[time]._msg)
diff --git a/hagelslag/processing/Watershed.py b/hagelslag/processing/Watershed.py
index 16b6b66..6e25308 100644
--- a/hagelslag/processing/Watershed.py
+++ b/hagelslag/processing/Watershed.py
@@ -1,4 +1,4 @@
-from skimage.morphology import watershed
+from skimage.segmentation import watershed
 from scipy.ndimage import label, find_objects
 import numpy as np
 
diff --git a/requirements.txt b/requirements.txt
index 486ac44..a69d52f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,4 +10,3 @@ scikit-image
 pygrib
 arrow>=0.8
 cython
-ncepgrib2
diff --git a/test/test_data_MRMSGrid.py b/test/test_data_MRMSGrid.py
index 94bdcc2..64fbccc 100644
--- a/test/test_data_MRMSGrid.py
+++ b/test/test_data_MRMSGrid.py
@@ -11,7 +11,7 @@ def setUp(self):
         self.mrms = MRMSGrid(self.start_date, self.end_date, self.variable, self.path)
  
     def test_constructor(self):
-        self.assertEquals(self.mrms.all_dates.size, 22, "Number of dates is wrong")
+        self.assertEqual(self.mrms.all_dates.size, 22, "Number of dates is wrong")
         self.assertIsNone(self.mrms.data, "Data already loaded")         
         self.assertIsNone(self.mrms.valid_dates, "Valid dates already loaded")
 

From c312c37734624e51b8f62b4e1d5b3071620f9953 Mon Sep 17 00:00:00 2001
From: David John Gagne <davidjohngagne@gmail.com>
Date: Wed, 22 Sep 2021 10:16:37 -0600
Subject: [PATCH 05/15] Updated requirements list and version.

---
 requirements.txt | 6 ++++++
 setup.py         | 6 ++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index a69d52f..e63baa4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,3 +10,9 @@ scikit-image
 pygrib
 arrow>=0.8
 cython
+grib2io
+s3fs
+xarray
+sphinx
+pytest
+
diff --git a/setup.py b/setup.py
index 76c1ad1..07508cd 100644
--- a/setup.py
+++ b/setup.py
@@ -7,8 +7,10 @@
                'Programming Language :: Python :: 2',
                'Programming Language :: Python :: 2.7',
                'Programming Language :: Python :: 3',
-               'Programming Language :: Python :: 3.5',
                'Programming Language :: Python :: 3.6',
+               'Programming Language :: Python :: 3.7',
+               'Programming Language :: Python :: 3.8',
+               'Programming Language :: Python :: 3.9',
                ]
 
 on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
@@ -22,7 +24,7 @@
     pkg_description = "Hagelslag is a Python package for storm-based analysis, forecasting, and evaluation."
 
     setup(name="hagelslag",
-          version="0.4.0b1",
+          version="0.4.1b1",
           description="Object-based severe weather forecast system",
           author="David John Gagne",
           author_email="dgagne@ucar.edu",

From ba1102309434813e22aef23dcc0d73f8e3167485 Mon Sep 17 00:00:00 2001
From: David John Gagne <davidjohngagne@gmail.com>
Date: Wed, 29 Sep 2021 11:04:11 -0600
Subject: [PATCH 06/15] Updated environment.yml to install hagelslag too

---
 environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment.yml b/environment.yml
index 4113cb1..f360e5c 100644
--- a/environment.yml
+++ b/environment.yml
@@ -25,3 +25,4 @@ dependencies:
   - mock
   - pip:
       - grib2io
+      - .

From b0bb9a6c0c9a13e449e9695c4a6b687c5c767d2a Mon Sep 17 00:00:00 2001
From: David John Gagne <davidjohngagne@gmail.com>
Date: Wed, 29 Sep 2021 11:09:45 -0600
Subject: [PATCH 07/15] move workflow to github actions folder

---
 .../workflows/python-package-conda.yml                            | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename python-package-conda.yml => .github/workflows/python-package-conda.yml (100%)

diff --git a/python-package-conda.yml b/.github/workflows/python-package-conda.yml
similarity index 100%
rename from python-package-conda.yml
rename to .github/workflows/python-package-conda.yml

From d0ca3455245a569774ef858e6afd289e6680f304 Mon Sep 17 00:00:00 2001
From: David John Gagne <davidjohngagne@gmail.com>
Date: Wed, 29 Sep 2021 11:33:50 -0600
Subject: [PATCH 08/15] Added jasper to environment to help with grib2io
 install

---
 environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment.yml b/environment.yml
index f360e5c..64c50fe 100644
--- a/environment.yml
+++ b/environment.yml
@@ -23,6 +23,7 @@ dependencies:
   - cython
   - sphinx
   - mock
+  - jasper
   - pip:
       - grib2io
       - .

From ecd9d035ad40a431153bf45d114f55e8f5293222 Mon Sep 17 00:00:00 2001
From: David John Gagne <davidjohngagne@gmail.com>
Date: Wed, 29 Sep 2021 13:12:52 -0600
Subject: [PATCH 09/15] Trying to install jasper with apt-get command in CI

---
 .github/workflows/python-package-conda.yml | 3 +++
 environment.yml                            | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index 7bae7e2..252f1bc 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -18,6 +18,9 @@ jobs:
       run: |
         # $CONDA is an environment variable pointing to the root of the miniconda directory
         echo $CONDA/bin >> $GITHUB_PATH
+    - name: Install grib dependencies
+      run: | 
+        sudo apt-get install -y jasper
     - name: Install dependencies
       run: |
         conda env update --file environment.yml --name base
diff --git a/environment.yml b/environment.yml
index 64c50fe..f360e5c 100644
--- a/environment.yml
+++ b/environment.yml
@@ -23,7 +23,6 @@ dependencies:
   - cython
   - sphinx
   - mock
-  - jasper
   - pip:
       - grib2io
       - .

From 35cb9482bb63c002ded8d7f0210a59d0c0bf9ff6 Mon Sep 17 00:00:00 2001
From: David John Gagne <davidjohngagne@gmail.com>
Date: Wed, 29 Sep 2021 14:00:31 -0600
Subject: [PATCH 10/15] update with new package names to install

---
 .github/workflows/python-package-conda.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index 252f1bc..0c5ceec 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -20,7 +20,9 @@ jobs:
         echo $CONDA/bin >> $GITHUB_PATH
     - name: Install grib dependencies
       run: | 
-        sudo apt-get install -y jasper
+        sudo apt-get install -y libjasper-dev
+        sudo apt-get install -y libpng-dev
+        sudo apt-get install -y zlib1g-dev
     - name: Install dependencies
       run: |
         conda env update --file environment.yml --name base

From 3f5f2bd662ea155b908544a29329149c5d4b54a8 Mon Sep 17 00:00:00 2001
From: David John Gagne <davidjohngagne@gmail.com>
Date: Wed, 29 Sep 2021 14:01:47 -0600
Subject: [PATCH 11/15] removed libjasper

---
 .github/workflows/python-package-conda.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index 0c5ceec..fecd4e2 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -20,7 +20,6 @@ jobs:
         echo $CONDA/bin >> $GITHUB_PATH
     - name: Install grib dependencies
       run: | 
-        sudo apt-get install -y libjasper-dev
         sudo apt-get install -y libpng-dev
         sudo apt-get install -y zlib1g-dev
     - name: Install dependencies

From 0ef91f39a8e5ef7c0bf842fc0d688eac94b4126f Mon Sep 17 00:00:00 2001
From: David John Gagne <davidjohngagne@gmail.com>
Date: Wed, 29 Sep 2021 14:02:14 -0600
Subject: [PATCH 12/15] jasper in env

---
 environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment.yml b/environment.yml
index f360e5c..64c50fe 100644
--- a/environment.yml
+++ b/environment.yml
@@ -23,6 +23,7 @@ dependencies:
   - cython
   - sphinx
   - mock
+  - jasper
   - pip:
       - grib2io
       - .

From 19f021ff0cc1e12e47205a8d931a42d8375291d4 Mon Sep 17 00:00:00 2001
From: "djgagne@ou.edu" <davidjohngagne@gmail.com>
Date: Wed, 29 Sep 2021 17:00:24 -0600
Subject: [PATCH 13/15] Added way to install libjasper with ubuntu

---
 .github/workflows/python-package-conda.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index fecd4e2..b5db039 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -20,8 +20,10 @@ jobs:
         echo $CONDA/bin >> $GITHUB_PATH
     - name: Install grib dependencies
       run: | 
-        sudo apt-get install -y libpng-dev
-        sudo apt-get install -y zlib1g-dev
+        sudo add-apt-repository "deb http://security.ubuntu.com/ubuntu xenial-security main"
+        sudo apt update
+        sudo apt install libjasper1 libjasper-dev
+        ldconfig -p | grep libjasper
     - name: Install dependencies
       run: |
         conda env update --file environment.yml --name base

From 6f3b7e54f8776e45a6e757d8cd51d1209239f0b2 Mon Sep 17 00:00:00 2001
From: David John Gagne <davidjohngagne@gmail.com>
Date: Thu, 30 Sep 2021 09:10:57 -0600
Subject: [PATCH 14/15] Cleaned up code formatting with auto-format in PyCharm.

---
 bin/hsdata                                    | 111 ++++----
 bin/hsforecast                                |  86 +++---
 demos/eval_sspf_days.py                       |  17 +-
 demos/ncar_data_2015.config                   |  41 +--
 demos/obj_tracking.py                         | 269 +++++++++---------
 hagelslag/data/FV3ModelGrid.py                |  20 +-
 hagelslag/data/GribModelGrid.py               |  20 +-
 hagelslag/data/HREFv2ModelGrid.py             |  38 +--
 hagelslag/data/HRRREModelGrid.py              |  46 +--
 hagelslag/data/HRRRModelGrid.py               |  12 +-
 hagelslag/data/HRRRZarrModelGrid.py           |   3 +-
 hagelslag/data/HailForecastGrid.py            |  17 +-
 hagelslag/data/MRMSGrid.py                    |  18 +-
 hagelslag/data/ModelGrid.py                   |  19 +-
 hagelslag/data/ModelOutput.py                 | 105 +++----
 hagelslag/data/NCARModelGrid.py               |   7 +-
 hagelslag/data/NCARStormEventModelGrid.py     |   7 +-
 hagelslag/data/NCARWRF2020ModelGrid.py        |   6 +-
 hagelslag/data/SSEFModelGrid.py               |  11 +-
 hagelslag/data/VSEModelGrid.py                |  38 +--
 hagelslag/data/WRFModelGrid.py                |  16 +-
 hagelslag/data/ZarrModelGrid.py               |   7 +-
 hagelslag/evaluation/ContingencyTable.py      |   5 +-
 hagelslag/evaluation/GridEvaluator.py         |  33 +--
 hagelslag/evaluation/MetricPlotter.py         |   4 +-
 .../evaluation/MulticlassContingencyTable.py  |   6 +-
 hagelslag/evaluation/NeighborEvaluator.py     |  12 +-
 hagelslag/evaluation/ObjectEvaluator.py       |  44 +--
 hagelslag/evaluation/ProbabilityMetrics.py    |  20 +-
 .../processing/EnhancedWatershedSegmenter.py  |   3 +-
 hagelslag/processing/EnsembleProducts.py      | 137 ++++-----
 hagelslag/processing/ObjectMatcher.py         |   4 +-
 hagelslag/processing/STObject.py              |  28 +-
 hagelslag/processing/TrackModeler.py          |  59 ++--
 hagelslag/processing/TrackProcessing.py       |  84 +++---
 hagelslag/processing/TrackSampler.py          |  44 +--
 hagelslag/processing/Watershed.py             |   5 +-
 hagelslag/processing/tracker.py               |  13 +-
 hagelslag/util/Config.py                      |   2 +-
 hagelslag/util/convert_mrms_grids.py          |  31 +-
 hagelslag/util/create_model_grid_us_mask.py   |  17 +-
 hagelslag/util/create_sector_grid_data.py     | 193 +++++++------
 hagelslag/util/custom_grib_table.py           |   5 +-
 hagelslag/util/derived_vars.py                |   2 +-
 hagelslag/util/hrefv2_symbolic_Links.py       | 141 ++++-----
 hagelslag/util/lsr_calibration_dataset.py     | 128 +++++----
 hagelslag/util/make_proj_grids.py             |   7 +-
 hagelslag/util/merge_forecast_data.py         |  29 +-
 .../util/mrms_mesh_calibration_dataset.py     | 132 +++++----
 hagelslag/util/munkres.py                     |  50 ++--
 hagelslag/util/output_tree_ensembles.py       |   8 +-
 hagelslag/util/show_importance_ranks.py       |   4 +-
 hagelslag/util/storm_patch_center_coords.py   |  29 +-
 hagelslag/util/test_size_distributions.py     |  27 +-
 hagelslag/util/testing_sector.py              | 147 +++++-----
 setup.py                                      |  13 +-
 56 files changed, 1233 insertions(+), 1147 deletions(-)

diff --git a/bin/hsdata b/bin/hsdata
index a7d9f69..0b32dcc 100755
--- a/bin/hsdata
+++ b/bin/hsdata
@@ -1,18 +1,18 @@
 #!/usr/bin/env python
-import argparse, pdb
-from multiprocessing import Pool
-from hagelslag.util.Config import Config
-from hagelslag.processing.TrackProcessing import TrackProcessor
-from hagelslag.util.make_proj_grids import read_ncar_map_file
-from hagelslag.util.create_sector_grid_data import SectorProcessor
-from datetime import timedelta
-import pandas as pd
-import numpy as np
+import argparse
 import os
 import traceback
-from netCDF4 import Dataset, date2num
+from datetime import timedelta
+from multiprocessing import Pool
 from os.path import join, exists
 
+import numpy as np
+import pandas as pd
+from netCDF4 import Dataset
+
+from hagelslag.processing.TrackProcessing import TrackProcessor
+from hagelslag.util.Config import Config
+
 
 def main():
     parser = argparse.ArgumentParser("hsdata - Hagelslag Data Processor")
@@ -126,13 +126,13 @@ def process_ensemble_member(run_date, member, config):
         if config.train:
             print("Find obs tracks", run_date, member)
             mrms_tracks = track_proc.find_mrms_tracks()
-        
+
         print("Find model tracks", run_date, member)
         if patch_radius is None:
             model_tracks = track_proc.find_model_tracks()
         else:
             model_tracks = track_proc.find_model_patch_tracks()
-        
+
         if model_tracks:
             print(run_date, member, "Found this many model tracks: {0:d}".format(len(model_tracks)))
             print("Extract model attributes", run_date, member)
@@ -154,62 +154,65 @@ def process_ensemble_member(run_date, member, config):
                         track_errors = None
                     else:
                         track_pairings = track_proc.match_tracks(model_tracks, mrms_tracks,
-                                                             unique_matches=config.unique_matches,
-                                                             closest_matches=config.closest_matches)
+                                                                 unique_matches=config.unique_matches,
+                                                                 closest_matches=config.closest_matches)
                         track_proc.match_size_distributions(model_tracks, mrms_tracks, track_pairings)
                         track_errors = track_proc.calc_track_errors(model_tracks,
-                                                                mrms_tracks,
-                                                                track_pairings)
+                                                                    mrms_tracks,
+                                                                    track_pairings)
                     print("Output data", run_date, member)
                     forecast_data = make_forecast_track_data(model_tracks, run_date, member,
-                                                         config, track_proc.model_grid.proj, mrms_tracks, track_errors)
+                                                             config, track_proc.model_grid.proj, mrms_tracks,
+                                                             track_errors)
                     if patch_radius is not None:
                         print("Output netCDF", run_date, member)
                         forecast_track_patches_to_netcdf(model_tracks, patch_radius, run_date, member, config)
                     if config.json:
                         print("Output json", run_date, member)
                         forecast_tracks_to_json(model_tracks, run_date, member, config, track_proc.model_grid.proj,
-                                            observed_tracks=mrms_tracks,
-                                            track_errors=track_errors)
+                                                observed_tracks=mrms_tracks,
+                                                track_errors=track_errors)
                     obs_data = make_obs_track_data(mrms_tracks, member, run_date, config, track_proc.model_grid.proj)
                     if config.json:
                         obs_tracks_to_json(mrms_tracks, member, run_date, config, track_proc.model_grid.proj)
                     print("Output csv", run_date, member)
                     for table_name, table_data in obs_data.items():
                         csv_filename = join(config.csv_path, "{0}_{1}_{2}_{3}.csv".format(table_name,
-                                                                                  "obs",
-                                                                                  member,
-                                                                                  run_date.strftime(
-                                                                                      config.run_date_format)))
+                                                                                          "obs",
+                                                                                          member,
+                                                                                          run_date.strftime(
+                                                                                              config.run_date_format)))
                         table_data.to_csv(csv_filename,
-                                      na_rep="nan",
-                                      float_format="%0.5f",
-                                      index=False)
+                                          na_rep="nan",
+                                          float_format="%0.5f",
+                                          index=False)
                         os.chmod(csv_filename, 0o666)
                 else:
                     forecast_data = make_forecast_track_data(model_tracks, run_date, member, config,
-                                                         track_proc.model_grid.proj)
+                                                             track_proc.model_grid.proj)
                     if patch_radius is not None:
                         forecast_track_patches_to_netcdf(model_tracks, patch_radius, run_date, member, config)
                     if config.json:
                         forecast_tracks_to_json(model_tracks, run_date, member, config, track_proc.model_grid.proj)
             elif len(model_tracks) > 0:
                 print(run_date, member, "Make Forecast Track Data")
-                forecast_data = make_forecast_track_data(model_tracks, run_date, member, config, track_proc.model_grid.proj)
+                forecast_data = make_forecast_track_data(model_tracks, run_date, member, config,
+                                                         track_proc.model_grid.proj)
                 if patch_radius is not None:
                     print(run_date, member, "Track Data to netCDF")
                     forecast_track_patches_to_netcdf(model_tracks, patch_radius, run_date, member, config)
                 if config.json:
                     forecast_tracks_to_json(model_tracks, run_date, member, config, track_proc.model_grid.proj)
         else:
-            print('No {0} {1} modeled tracks found'.format(run_date,member))
+            print('No {0} {1} modeled tracks found'.format(run_date, member))
             forecast_data = {}
 
         for table_name, table_data in forecast_data.items():
             csv_filename = join(config.csv_path, "{0}_{1}_{2}_{3}.csv".format(table_name,
-                                                                          config.ensemble_name,
-                                                                          member,
-                                                                          run_date.strftime(config.run_date_format)))
+                                                                              config.ensemble_name,
+                                                                              member,
+                                                                              run_date.strftime(
+                                                                                  config.run_date_format)))
             print("Output csv file " + csv_filename)
             table_data.to_csv(csv_filename,
                               na_rep="nan",
@@ -264,7 +267,7 @@ def process_observed_tracks(run_date, member, config):
         mrms_tracks = track_proc.find_mrms_tracks()
         if len(mrms_tracks) > 0:
             obs_data = make_obs_track_data(mrms_tracks, member, run_date, config, track_proc.model_grid.proj)
-            if config.json:       
+            if config.json:
                 obs_tracks_to_json(mrms_tracks, member, run_date, config, track_proc.model_grid.proj)
             # if not os.access(config.csv_path + run_date.strftime("%Y%m%d"), os.R_OK):
             #    try:
@@ -339,9 +342,9 @@ def rematch_ensemble_tracks(run_date, member, config):
                                            )
             for table_name, table_data in obs_data.items():
                 table_data.to_csv(join(config.csv_path, "{0}_{1}_{2}_{3}.csv".format(table_name,
-                                                                                 "obs",
-                                                                                 member,
-                                                                                 run_date.strftime("%Y%m%d"))),
+                                                                                     "obs",
+                                                                                     member,
+                                                                                     run_date.strftime("%Y%m%d"))),
                                   na_rep="nan",
                                   float_format="%0.5f",
                                   index=False)
@@ -351,9 +354,9 @@ def rematch_ensemble_tracks(run_date, member, config):
             forecast_data = {}
         for table_name, table_data in forecast_data.items():
             table_data.to_csv(join(config.csv_path, "{0}_{1}_{2}_{3}.csv".format(table_name,
-                                                                             config.ensemble_name,
-                                                                             member,
-                                                                             run_date.strftime("%Y%m%d"))),
+                                                                                 config.ensemble_name,
+                                                                                 member,
+                                                                                 run_date.strftime("%Y%m%d"))),
                               na_rep="nan",
                               float_format="%0.5f",
                               index=False)
@@ -483,11 +486,11 @@ def make_forecast_track_data(forecast_tracks, run_date, member, config, proj, ob
             else:
                 if forecast_track.observations is not None:
                     num_labels = len(forecast_track.observations)
-                    for l in range(num_labels):
+                    for label in range(num_labels):
                         if config.label_type == "gamma":
-                            hail_label = forecast_track.observations[l].loc[step].values.tolist()
+                            hail_label = forecast_track.observations[label].loc[step].values.tolist()
                         else:
-                            hail_label = [forecast_track.observations[l].loc[step, "Max_Hail_Size"]]
+                            hail_label = [forecast_track.observations[label].loc[step, "Max_Hail_Size"]]
                         forecast_data['track_step'].loc[track_step_count] = record + hail_label
                         track_step_count += 1
                 else:
@@ -545,11 +548,11 @@ def forecast_tracks_to_json(forecast_tracks, run_date, member, config, proj, obs
                 except OSError:
                     print("directory already created")
 
-        json_filename = config.geojson_path + "/".join(full_path) + \
-                        "/{0}_{1}_{2}_model_track_{3:03d}.json".format(ensemble_name,
-                                                                       run_date.strftime("%Y%m%d"),
-                                                                       member,
-                                                                       f)
+        json_filename = join(config.geojson_path, "/".join(full_path),
+                             "{0}_{1}_{2}_model_track_{3:03d}.json".format(ensemble_name,
+                                                                           run_date.strftime("%Y%m%d"),
+                                                                           member,
+                                                                           f))
         json_metadata = dict(id=track_id,
                              ensemble_name=ensemble_name,
                              ensemble_member=member,
@@ -662,7 +665,7 @@ def forecast_track_patches_to_netcdf(forecast_tracks, patch_radius, run_date, me
     if hasattr(config, "future_variables"):
         for f_variable in config.future_variables:
             out_file.variables[f_variable + "_future"][:] = np.vstack([f_track.attributes[f_variable + "-future"]
-                                                                 for f_track in forecast_tracks])
+                                                                       for f_track in forecast_tracks])
     if config.train:
         for label_column in label_columns:
             try:
@@ -672,12 +675,12 @@ def forecast_track_patches_to_netcdf(forecast_tracks, patch_radius, run_date, me
                 out_file.variables[label_column][:] = 0
 
     # Save configuration dictionary as global attributes.
-    for k,v in config.__dict__.items():
+    for k, v in config.__dict__.items():
         # Don't save attributes that are already netCDF varaible names
-        if k=="dates": continue
-        if k=="storm_variables": continue
-        if k=="potential_variables": continue
-        if k=="tendency_variables": continue
+        if k == "dates": continue
+        if k == "storm_variables": continue
+        if k == "potential_variables": continue
+        if k == "tendency_variables": continue
         v = str(v)
         # Don't clobber existing attribute.
         if hasattr(out_file, k):
@@ -685,7 +688,7 @@ def forecast_track_patches_to_netcdf(forecast_tracks, patch_radius, run_date, me
             alt_key = "config_" + k
             if hasattr(out_file, alt_key):
                 # If alternative key exists too, raise attribute error.
-                raise AttributeError("Can't save "+k+":"+" "+v+" as global attribute. It already exists.")
+                raise AttributeError("Can't save " + k + ":" + " " + v + " as global attribute. It already exists.")
             else:
                 setattr(out_file, alt_key, v)
         else:
diff --git a/bin/hsforecast b/bin/hsforecast
index aba3219..02a4263 100755
--- a/bin/hsforecast
+++ b/bin/hsforecast
@@ -1,18 +1,12 @@
 #!/usr/bin/env python
 import argparse
-from netCDF4 import Dataset
-from hagelslag.util.Config import Config
-from hagelslag.processing.TrackModeler import TrackModeler
-from multiprocessing import Pool, Manager
-from hagelslag.processing.EnsembleProducts import *
-from hagelslag.util.make_proj_grids import read_ncar_map_file
-from scipy.ndimage import gaussian_filter
-import pandas as pd
-import numpy as np
-import traceback
-from datetime import timedelta
-from glob import glob
 import os
+import traceback
+from multiprocessing import Pool
+
+from hagelslag.processing.EnsembleProducts import *
+from hagelslag.processing.TrackModeler import TrackModeler
+from hagelslag.util.Config import Config
 
 
 def main():
@@ -48,11 +42,11 @@ def main():
         if args.train:
             train_models(track_modeler, config)
             training_data_percentiles(config.ensemble_members,
-                                        config.ensemble_name,
-                                        config.watershed_variable,
-                                        config.train_data_path,
-                                        config.size_dis_training_path,
-                                        config.weighting_function)
+                                      config.ensemble_name,
+                                      config.watershed_variable,
+                                      config.train_data_path,
+                                      config.size_dis_training_path,
+                                      config.weighting_function)
         if args.fore:
             forecasts = make_forecasts(track_modeler, config)
             output_forecasts_csv(forecasts, track_modeler, config)
@@ -106,9 +100,9 @@ def make_forecasts(track_modeler, config):
 
     print("Size Distribution Forecasts")
     forecasts["dist"] = track_modeler.predict_size_distribution_models(config.size_distribution_model_names,
-                                                                    config.size_distribution_input_columns,
-                                                                    config.metadata_columns, 
-                                                                    location=config.size_distribution_loc)
+                                                                       config.size_distribution_input_columns,
+                                                                       config.metadata_columns,
+                                                                       location=config.size_distribution_loc)
     return forecasts
 
 
@@ -156,13 +150,13 @@ def generate_ml_grids(config, mode="forecast"):
     """
     pool = Pool(config.num_procs)
     run_dates = pd.date_range(start=config.start_dates[mode],
-                                 end=config.end_dates[mode],
-                                 freq='1D')
+                              end=config.end_dates[mode],
+                              freq='1D')
     ml_model_list = config.size_distribution_model_names
     print(ml_model_list)
     print()
     print('Size Distribution coming from: {0}'.format(config.size_dis_training_path))
-    print() 
+    print()
 
     ml_var = "hail"
     for run_date in run_dates:
@@ -218,7 +212,7 @@ def generate_ml_member_grid(ensemble_name, model_names, member, run_date, variab
                 ep.load_forecast_csv_data(forecast_csv_path)
                 ep.load_forecast_netcdf_data(netcdf_path)
                 ep.quantile_match()
-                #ep.load_data(num_samples=num_samples, percentiles=ml_grid_percentiles)
+                # ep.load_data(num_samples=num_samples, percentiles=ml_grid_percentiles)
                 grib_objects = ep.encode_grib2_data()
                 if grib_objects is None:
                     return
@@ -236,9 +230,9 @@ def generate_ml_member_grid(ensemble_name, model_names, member, run_date, variab
     return
 
 
-def training_data_percentiles(member_list, ensemble_name, watershed_obj, csv_data_path, 
-                            csv_outpath, weighting_function=None,
-                            percentiles=np.linspace(0.1, 99.9, 100)):
+def training_data_percentiles(member_list, ensemble_name, watershed_obj, csv_data_path,
+                              csv_outpath, weighting_function=None,
+                              percentiles=np.linspace(0.1, 99.9, 100)):
     """
     Creates watershed object distribution of sizes for a given set of training netcdf patches
 
@@ -257,52 +251,54 @@ def training_data_percentiles(member_list, ensemble_name, watershed_obj, csv_dat
     ensemble_obj_data = []
 
     #########################
-    #Individual Member Distributions 
+    # Individual Member Distributions
     #########################
     for member in member_list:
         member_obj_data = []
-        member_files = sorted(glob(csv_data_path+'*step*{0}*{1}*.csv'.format(ensemble_name,member)))
-        dataset = pd.concat(map(pd.read_csv, member_files),ignore_index=True,sort='True')
+        member_files = sorted(glob(csv_data_path + '*step*{0}*{1}*.csv'.format(ensemble_name, member)))
+        dataset = pd.concat(map(pd.read_csv, member_files), ignore_index=True, sort=True)
         match_inds = np.where(dataset["Matched"] == 1)[0]
-        match_dataset = dataset.loc[match_inds,:]
+        match_dataset = dataset.loc[match_inds, :]
         if weighting_function is None:
-            objs_stat = match_dataset.loc[:,"{0}_max".format(watershed_obj)].dropna()
+            objs_stat = match_dataset.loc[:, "{0}_max".format(watershed_obj)].dropna()
             member_obj_data.append(objs_stat)
         else:
             training_weights = weighting_function(match_dataset)
             weight_inds = np.where(training_weights >= 1.0)[0]
-            weights_objs_stat = match_dataset.reindex(index=weight_inds,columns=["{0}_max".format(watershed_obj)]).dropna()
+            weights_objs_stat = match_dataset.reindex(index=weight_inds,
+                                                      columns=["{0}_max".format(watershed_obj)]).dropna()
             member_obj_data.append(weights_objs_stat)
         if member_obj_data:
             obj_values = np.concatenate(member_obj_data)
-            print('{0} {1} model objects: {2}'.format(watershed_obj,member,obj_values.shape[0]))
-            print() 
+            print('{0} {1} model objects: {2}'.format(watershed_obj, member, obj_values.shape[0]))
+            print()
             obj_percent_val = np.percentile(obj_values, percentiles)
-            dataframe_values = {'Obj_Values':obj_percent_val, 'Percentiles':percentiles}
+            dataframe_values = {'Obj_Values': obj_percent_val, 'Percentiles': percentiles}
             obj_value_percentile = pd.DataFrame(data=dataframe_values)
             if csv_outpath:
-                csv_path = csv_outpath+'{0}_{1}_{2}_Size_Distribution.csv'.format(ensemble_name,
-                                    watershed_obj,member)
+                csv_path = csv_outpath + '{0}_{1}_{2}_Size_Distribution.csv'.format(ensemble_name,
+                                                                                    watershed_obj, member)
                 obj_value_percentile.to_csv(csv_path, index=False)
                 print('Output csv: {0}'.format(csv_path))
             ensemble_obj_data.append(obj_values)
-    
+
     #########################
-    #Ensemble Distribution 
+    # Ensemble Distribution
     #########################
     if ensemble_obj_data:
         ens_obj_values = [obj for member in ensemble_obj_data for obj in member]
-        print() 
-        print('Total ensemble {0} model objects: {1}'.format(watershed_obj,np.shape(ens_obj_values)[0]))
+        print()
+        print('Total ensemble {0} model objects: {1}'.format(watershed_obj, np.shape(ens_obj_values)[0]))
         ens_obj_percent_val = np.percentile(ens_obj_values, percentiles)
-        dataframe_values = {'Obj_Values':ens_obj_percent_val, 'Percentiles':percentiles}
+        dataframe_values = {'Obj_Values': ens_obj_percent_val, 'Percentiles': percentiles}
         ens_obj_value_percentile = pd.DataFrame(data=dataframe_values)
         if csv_outpath:
-            csv_path = csv_outpath+'{0}_{1}_Size_Distribution.csv'.format(ensemble_name,
-                                    watershed_obj)
+            csv_path = csv_outpath + '{0}_{1}_Size_Distribution.csv'.format(ensemble_name,
+                                                                            watershed_obj)
             ens_obj_value_percentile.to_csv(csv_path, index=False)
             print('Output csv: {0}'.format(csv_path))
     return
 
+
 if __name__ == "__main__":
     main()
diff --git a/demos/eval_sspf_days.py b/demos/eval_sspf_days.py
index ee7e5bf..8343a70 100644
--- a/demos/eval_sspf_days.py
+++ b/demos/eval_sspf_days.py
@@ -1,14 +1,11 @@
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-from mpl_toolkits.basemap import Basemap
-from hagelslag.evaluation import DistributedROC, DistributedReliability
-from scipy.ndimage import convolve
-from glob import glob
-import xarray as xr
 import os
 from os.path import join
 
+import numpy as np
+import pandas as pd
+
+from hagelslag.evaluation import DistributedROC, DistributedReliability
+
 eval_path = "/glade/p/work/dgagne/ncar_coarse_neighbor_eval_2016_s_2/"
 eval_files = sorted(os.listdir(eval_path))
 eval_test = pd.read_csv(join(eval_path, eval_files[0]))
@@ -29,10 +26,10 @@
         obs = eval_data.loc[us_mask, "MESH_Max_60min_00.50_{0:2d}".format(thresh)]
         for model in models:
             brier[thresh].loc[run_dates[ev], model] = DistributedReliability(thresholds=prob_thresholds)
-            brier[thresh].loc[run_dates[ev], model].update(eval_data.loc[us_mask, model], 
+            brier[thresh].loc[run_dates[ev], model].update(eval_data.loc[us_mask, model],
                                                            obs)
             roc[thresh].loc[run_dates[ev], model] = DistributedROC(thresholds=prob_thresholds)
-            roc[thresh].loc[run_dates[ev], model].update(eval_data.loc[us_mask, model], 
+            roc[thresh].loc[run_dates[ev], model].update(eval_data.loc[us_mask, model],
                                                          obs)
 out_path = "/glade/p/work/dgagne/ncar_coarse_neighbor_scores_2016/"
 for thresh in [25, 50, 75]:
diff --git a/demos/ncar_data_2015.config b/demos/ncar_data_2015.config
index bc2670c..bbe28f2 100644
--- a/demos/ncar_data_2015.config
+++ b/demos/ncar_data_2015.config
@@ -1,36 +1,37 @@
 #!/usr/bin/env python
-from hagelslag.processing.ObjectMatcher import shifted_centroid_distance, start_time_distance
-from hagelslag.processing.ObjectMatcher import mean_minimum_centroid_distance, mean_area_distance, \
-    duration_distance, start_centroid_distance,nonoverlap,mean_min_time_distance
-import pandas as pd
 import numpy as np
+import pandas as pd
+
+from hagelslag.processing.ObjectMatcher import mean_minimum_centroid_distance, duration_distance, nonoverlap, \
+    mean_min_time_distance
 
 scratch_path = "/glade/p/nmmm0001/sobash/OBJECT_TRACK/"
 date_index = pd.DatetimeIndex(start="2015-06-20T00:00", end="2015-06-20T00:00", freq="1D")
 config = dict(dates=date_index.to_pydatetime(),
               start_hour=12,
               end_hour=36,
-              #watershed_variable="GRPL_MAX",
+              # watershed_variable="GRPL_MAX",
               watershed_variable="UP_HELI_MAX",
               ensemble_name="NCAR",
-              ensemble_members=["mem{0:d}".format(m) for m in range(1,11)],
+              ensemble_members=["mem{0:d}".format(m) for m in range(1, 11)],
               model_path="/glade/scratch/sobash/RT2015/",
-              model_watershed_params=(25, 5, 250, 200, 200), #decreasing the size threshold results in more broken tracks
-              #model_watershed_params=(6, 1, 100, 100, 100),
-              #size_filter=10,
+              model_watershed_params=(25, 5, 250, 200, 200),
+              # decreasing the size threshold results in more broken tracks
+              # model_watershed_params=(6, 1, 100, 100, 100),
+              # size_filter=10,
               size_filter=0,
               gaussian_window=0,
-              #gaussian_window=1,
+              # gaussian_window=1,
               mrms_path=None,
               mrms_variable="MRMS_RotationTrackML60min_00.50",
               mrms_watershed_params=(2, 1, 50, 100, 100),
               object_matcher_params=(
                   [nonoverlap],
                   np.array([1.0]),
-                  np.array([24000])), #used 24000 for 11/16
-              #object_matcher_params=([shifted_centroid_distance], np.array([1.0]),
+                  np.array([24000])),  # used 24000 for 11/16
+              # object_matcher_params=([shifted_centroid_distance], np.array([1.0]),
               #                       np.array([24000])),
-              #object_matcher_params=(
+              # object_matcher_params=(
               #    [shifted_centroid_distance,nonoverlap],
               #    np.array([0.5,0.5]),
               #    np.array([24000,24000])), #used 24000 for 11/16
@@ -38,15 +39,17 @@ config = dict(dates=date_index.to_pydatetime(),
                   [mean_minimum_centroid_distance, mean_min_time_distance, duration_distance],
                   np.array([0.4, 0.3, 0.3]),
                   np.array([500000, 1, 6])),
-              #storm_variables=["UP_HELI_MAX", "GRPL_MAX", "W_UP_MAX", "W_DN_MAX", "HAIL_MAX2D", "HAIL_MAXK1",
+              # storm_variables=["UP_HELI_MAX", "GRPL_MAX", "W_UP_MAX", "W_DN_MAX", "HAIL_MAX2D", "HAIL_MAXK1",
               #                 "LTG3_MAX", "RVORT1_MAX", "UP_HELI_MAX03", "UP_HELI_MIN", "WSPD10MAX"],
-              storm_variables=["UP_HELI_MAX", "UP_HELI_MAX03", "RVORT1_MAX", "GRPL_MAX", "W_UP_MAX", "W_DN_MAX", "WSPD10MAX", "T2", "Q2"],
-              potential_variables=["CAPE_SFC", "MLCAPE", "MUCAPE", "CIN_SFC", "MLCIN", "LCL_HEIGHT", "SRH1", "SRH3", "UBSHR1", "VBSHR1", "UBSHR6", "VBSHR6", "T2", "Q2", "PREC_ACC_NC"],
+              storm_variables=["UP_HELI_MAX", "UP_HELI_MAX03", "RVORT1_MAX", "GRPL_MAX", "W_UP_MAX", "W_DN_MAX",
+                               "WSPD10MAX", "T2", "Q2"],
+              potential_variables=["CAPE_SFC", "MLCAPE", "MUCAPE", "CIN_SFC", "MLCIN", "LCL_HEIGHT", "SRH1", "SRH3",
+                                   "UBSHR1", "VBSHR1", "UBSHR6", "VBSHR6", "T2", "Q2", "PREC_ACC_NC"],
               tendency_variables=[],
-              #shape_variables=["area", "eccentricity", "major_axis_length", "minor_axis_length", "orientation",
+              # shape_variables=["area", "eccentricity", "major_axis_length", "minor_axis_length", "orientation",
               #                 "extent"] + ["weighted_moments_hu_{0:d}".format(h) for h in range(7)],
-              shape_variables=["area","major_axis_length","minor_axis_length"],
-              #variable_statistics=["mean", "max", "min", "std", "mean_dt", "max_dt"],
+              shape_variables=["area", "major_axis_length", "minor_axis_length"],
+              # variable_statistics=["mean", "max", "min", "std", "mean_dt", "max_dt"],
               variable_statistics=["max", "mean", "min"],
               csv_path=scratch_path + "track_data_ncar_2015_csv_UH25_NEW/",
               geojson_path=scratch_path + "track_data_ncar_2015_json/",
diff --git a/demos/obj_tracking.py b/demos/obj_tracking.py
index 1cf1080..3b20a2a 100755
--- a/demos/obj_tracking.py
+++ b/demos/obj_tracking.py
@@ -9,41 +9,40 @@
 # This module demonstrates how data science tools from the image processing and machine learning families can be used to create a forecast of severe hail. It aims to teach the advantages, challenges, and limitations of these tools through hands-on interaction.
 #  
 
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.patches import Polygon, PathPatch
-from matplotlib.collections import PatchCollection
-from datetime import datetime, timedelta
-from scipy.ndimage import gaussian_filter, find_objects
+import argparse
+import os
 from copy import deepcopy
-import pdb, sys, argparse, os
-
+from datetime import datetime, timedelta
 
-# In[2]:
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.ndimage import gaussian_filter, find_objects
 
-from hagelslag.processing.EnhancedWatershedSegmenter import EnhancedWatershed
 from hagelslag.data import ModelOutput
-from hagelslag.processing.ObjectMatcher import ObjectMatcher, closest_distance
 from hagelslag.processing import STObject
+from hagelslag.processing.EnhancedWatershedSegmenter import EnhancedWatershed
+from hagelslag.processing.ObjectMatcher import ObjectMatcher, closest_distance
+
+# In[2]:
 
 parser = argparse.ArgumentParser(description='object tracker')
 parser.add_argument('-m', '--member', type=str, help='member description (e.g. 1km_pbl7)', default='1km_on_3km_pbl1')
 parser.add_argument('-d', '--date', help='date yyyymmddhh', default='2005011312')
 parser.add_argument('-f', '--field', default='UP_HELI_MAX03', help='field in which to find objects')
-parser.add_argument('-t','--timethresh', type=int, default=3, help='time threshold (hours)')
-parser.add_argument('-v','--verbose', action="store_true", help='print more output. useful for debugging')
+parser.add_argument('-t', '--timethresh', type=int, default=3, help='time threshold (hours)')
+parser.add_argument('-v', '--verbose', action="store_true", help='print more output. useful for debugging')
 args = parser.parse_args()
 
-if args.verbose: 
+if args.verbose:
     print(args)
 
 odir = '/glade/p/work/ahijevyc/hagelslag/out/'
 model_path = "/glade/scratch/ahijevyc/VSE/"
 ensemble_name = "NCAR"
-run_date = datetime.strptime(args.date,'%Y%m%d%H')
+run_date = datetime.strptime(args.date, '%Y%m%d%H')
 member = args.member
 field = args.field
-start_date = run_date + timedelta(hours=10) # remember first time is usually all zeros
+start_date = run_date + timedelta(hours=10)  # remember first time is usually all zeros
 
 #    Attributes:
 #        min_thresh (int): minimum pixel value for pixel to be part of a region
@@ -54,25 +53,29 @@
 
 # From ahij's config file.
 if field == "MAX_UPDRAFT_HELICITY" or field == "UP_HELI_MAX03":
-        params = {"min_thresh":75, "step":5, "max_thresh":250, "max_size":50, "delta":75, "min_size":1, "filter_size":0}
+    params = {"min_thresh": 75, "step": 5, "max_thresh": 250, "max_size": 50, "delta": 75, "min_size": 1,
+              "filter_size": 0}
 if field == "HAIL2D":
-        params = {"min_thresh":0.025, "step":0.005, "max_thresh":0.1, "max_size":150, "delta":75, "min_size":0, "filter_size":1}
-levels = params['min_thresh'] * np.arange(1,8)
+    params = {"min_thresh": 0.025, "step": 0.005, "max_thresh": 0.1, "max_size": 150, "delta": 75, "min_size": 0,
+              "filter_size": 1}
+levels = params['min_thresh'] * np.arange(1, 8)
 levels = np.append(levels, params['min_thresh'] * 15)
-model_watershed_params = (params['min_thresh'],params['step'],params['max_thresh'],params["max_size"],params["delta"])
+model_watershed_params = (
+params['min_thresh'], params['step'], params['max_thresh'], params["max_size"], params["delta"])
 
 end_date = start_date + timedelta(hours=0)
 
 from netCDF4 import Dataset
-model_grid = ModelOutput(ensemble_name, 
-                         member, 
-                         run_date, 
-                         field, 
-                         start_date, 
+
+model_grid = ModelOutput(ensemble_name,
+                         member,
+                         run_date,
+                         field,
+                         start_date,
                          end_date,
                          model_path,
                          single_step=True)
-model_map_file="/glade/p/work/ahijevyc/hagelslag/mapfiles/VSE.txt"
+model_map_file = "/glade/p/work/ahijevyc/hagelslag/mapfiles/VSE.txt"
 
 model_grid.load_map_info(model_map_file)
 model_grid.data = []
@@ -80,88 +83,95 @@
 d = start_date
 deltat = timedelta(minutes=60)
 
+
 def add_grid(m):
-        m.drawstates()
-        m.drawcoastlines(linewidth=0.5)
-        parallels = np.arange(0.,81.,1.)
-        m.drawparallels(parallels,labels=[True,False,False,False],linewidth=0.5)
-        meridians = np.arange(0.,351.,2.)
-        m.drawmeridians(meridians,labels=[False,False,False,True],linewidth=0.5)
-        return m
-        
+    m.drawstates()
+    m.drawcoastlines(linewidth=0.5)
+    parallels = np.arange(0., 81., 1.)
+    m.drawparallels(parallels, labels=[True, False, False, False], linewidth=0.5)
+    meridians = np.arange(0., 351., 2.)
+    m.drawmeridians(meridians, labels=[False, False, False, True], linewidth=0.5)
+    return m
+
 
 while d <= end_date:
-        fhr = (d - run_date).total_seconds()/3600
-        # list of potential paths to diagnostic files
-        dfiles = [model_path+member+run_date.strftime("/%Y%m%d%H")+"/wrf/wrfout_d01_"+d.strftime("%Y-%m-%d_%H:%M:%S"),
-                model_path+member+run_date.strftime("/%Y%m%d%H")+"/wrf/diags_d01."+d.strftime("%Y-%m-%d_%H:%M:%S")+".nc",
-                model_path+member+run_date.strftime("/%Y%m%d%H")+"/post_AGAIN/"+'fhr_%d'%fhr+"/WRFTWO"+'%02d'%fhr+".nc",
-                model_path+member+run_date.strftime("/%Y%m%d%H")+"/wrf/vse_d01."+d.strftime("%Y-%m-%d_%H:%M:%S")+".nc"]
-        for dfile in dfiles:
-                # see if each path exists
-                if not os.path.isfile(dfile):
-                        continue
-                # If it does, see if 'field' is a variable.
-                ncf = Dataset(dfile)
-                if field in ncf.variables:
-                        print(dfile)
-                        model_grid.data.append(ncf.variables[field][0,:,:])
-                        ncf.close()
-                        break
-                ncf.close()
-        d += deltat
-
-print(model_grid.lon.shape, np.maximum.reduce(model_grid.data).shape) # max across time dimension 
+    fhr = (d - run_date).total_seconds() / 3600
+    # list of potential paths to diagnostic files
+    dfiles = [
+        model_path + member + run_date.strftime("/%Y%m%d%H") + "/wrf/wrfout_d01_" + d.strftime("%Y-%m-%d_%H:%M:%S"),
+        model_path + member + run_date.strftime("/%Y%m%d%H") + "/wrf/diags_d01." + d.strftime(
+            "%Y-%m-%d_%H:%M:%S") + ".nc",
+        model_path + member + run_date.strftime(
+            "/%Y%m%d%H") + "/post_AGAIN/" + 'fhr_%d' % fhr + "/WRFTWO" + '%02d' % fhr + ".nc",
+        model_path + member + run_date.strftime("/%Y%m%d%H") + "/wrf/vse_d01." + d.strftime(
+            "%Y-%m-%d_%H:%M:%S") + ".nc"]
+    for dfile in dfiles:
+        # see if each path exists
+        if not os.path.isfile(dfile):
+            continue
+        # If it does, see if 'field' is a variable.
+        ncf = Dataset(dfile)
+        if field in ncf.variables:
+            print(dfile)
+            model_grid.data.append(ncf.variables[field][0, :, :])
+            ncf.close()
+            break
+        ncf.close()
+    d += deltat
+
+print(model_grid.lon.shape, np.maximum.reduce(model_grid.data).shape)  # max across time dimension
 print(model_grid.data[0].max(), model_grid.data[-1].max(), np.maximum.reduce(model_grid.data).max())
 
-plt.figure(figsize=(10,8))
+plt.figure(figsize=(10, 8))
 
 plt.contourf(model_grid.lon, model_grid.lat,
-                np.maximum.reduce(model_grid.data), # max across time dimension 
-                levels,
-                extend="max",
-                latlon= True,
-                cmap="Accent")
+             np.maximum.reduce(model_grid.data),  # max across time dimension
+             levels,
+             extend="max",
+             latlon=True,
+             cmap="Accent")
 plt.colorbar(shrink=0.9, fraction=0.1, ticks=levels)
-title_info = plt.title(field + "\n"+member+" {0}-{1}".format(start_date.strftime("%d %b %Y %H:%M"),
-                        end_date.strftime("%d %b %Y %H:%M")),
-                        fontweight="bold", fontsize=14)
-dtstr = "_"+member+run_date.strftime("_%Y%m%d%H")
-ret = plt.savefig(odir+"uh_swaths/"+field+"_swaths"+dtstr+".png")
+title_info = plt.title(field + "\n" + member + " {0}-{1}".format(start_date.strftime("%d %b %Y %H:%M"),
+                                                                 end_date.strftime("%d %b %Y %H:%M")),
+                       fontweight="bold", fontsize=14)
+dtstr = "_" + member + run_date.strftime("_%Y%m%d%H")
+ret = plt.savefig(odir + "uh_swaths/" + field + "_swaths" + dtstr + ".png")
 
 
 def get_forecast_objects(model_grid, ew_params, min_size, gaussian_window):
-        ew = EnhancedWatershed(*ew_params)
-        model_objects = []
-        print("Find model objects Hour:")
-        for h in range(int((model_grid.end_date - model_grid.start_date).total_seconds()/deltat.total_seconds())+1):
-                print(h)
-                hour_labels = ew.size_filter(ew.label(gaussian_filter(model_grid.data[h], gaussian_window)), min_size)
-                obj_slices = find_objects(hour_labels)
-                num_slices = len(obj_slices)
-                model_objects.append([])
-                if num_slices > 0:
-                        fig, ax = plt.subplots()
-                        t = plt.contourf(model_grid.lon,model_grid.lat,hour_labels,np.arange(0,num_slices+1)+0.5,extend="max",cmap="Set1",latlon=True,title=str(run_date)+" "+field+" "+str(h))
-                        ret = plt.savefig(odir+"enh_watershed_ex/ew{0:02d}.png".format(h))
-                        for s, sl in enumerate(obj_slices): 
-                                model_objects[-1].append(STObject(model_grid.data[h][sl],
-                                                #np.where(hour_labels[sl] > 0, 1, 0),
-                                                # For some objects (especially long, diagonal ones), the rectangular
-                                                # slice encompasses part of other objects (i.e. non-zero elements of slice).
-                                                # We don't want them in our mask.
-                                                np.where(hour_labels[sl] == s+1, 1, 0),
-                                                model_grid.x[sl], 
-                                                model_grid.y[sl], 
-                                                model_grid.i[sl], 
-                                                model_grid.j[sl],
-                                                h,
-                                                h,
-                                                dx=model_grid.dx))
-                                if h > 0:
-                                        dims = model_objects[-1][-1].timesteps[0].shape
-                                        model_objects[-1][-1].estimate_motion(h, model_grid.data[h-1], dims[1], dims[0])
-        return model_objects
+    ew = EnhancedWatershed(*ew_params)
+    model_objects = []
+    print("Find model objects Hour:")
+    for h in range(int((model_grid.end_date - model_grid.start_date).total_seconds() / deltat.total_seconds()) + 1):
+        print(h)
+        hour_labels = ew.size_filter(ew.label(gaussian_filter(model_grid.data[h], gaussian_window)), min_size)
+        obj_slices = find_objects(hour_labels)
+        num_slices = len(obj_slices)
+        model_objects.append([])
+        if num_slices > 0:
+            fig, ax = plt.subplots()
+            t = plt.contourf(model_grid.lon, model_grid.lat, hour_labels, np.arange(0, num_slices + 1) + 0.5,
+                             extend="max", cmap="Set1", latlon=True, title=str(run_date) + " " + field + " " + str(h))
+            ret = plt.savefig(odir + "enh_watershed_ex/ew{0:02d}.png".format(h))
+            for s, sl in enumerate(obj_slices):
+                model_objects[-1].append(STObject(model_grid.data[h][sl],
+                                                  # np.where(hour_labels[sl] > 0, 1, 0),
+                                                  # For some objects (especially long, diagonal ones), the rectangular
+                                                  # slice encompasses part of other objects (i.e. non-zero elements of slice).
+                                                  # We don't want them in our mask.
+                                                  np.where(hour_labels[sl] == s + 1, 1, 0),
+                                                  model_grid.x[sl],
+                                                  model_grid.y[sl],
+                                                  model_grid.i[sl],
+                                                  model_grid.j[sl],
+                                                  h,
+                                                  h,
+                                                  dx=model_grid.dx))
+                if h > 0:
+                    dims = model_objects[-1][-1].timesteps[0].shape
+                    model_objects[-1][-1].estimate_motion(h, model_grid.data[h - 1], dims[1], dims[0])
+    return model_objects
+
 
 model_objects = get_forecast_objects(model_grid, model_watershed_params, params['min_size'], params['filter_size'])
 
@@ -169,37 +179,36 @@ def get_forecast_objects(model_grid, ew_params, min_size, gaussian_window):
 # In[12]:
 
 def track_forecast_objects(input_model_objects, model_grid, object_matcher):
-        model_objects = deepcopy(input_model_objects)
-        hours = np.arange(int((model_grid.end_date-model_grid.start_date).total_seconds()/deltat.total_seconds()) + 1)
-        print("hours = ",hours)
-        tracked_model_objects = []
-        for h in hours:
-                past_time_objs = []
-                for obj in tracked_model_objects:
-                        # Potential trackable objects are identified
-                        # In other words, objects whose end_time is the previous hour
-                        if obj.end_time == h - 1:
-                                past_time_objs.append(obj)
-                # If no objects existed in the last time step, then consider objects in current time step all new
-                if len(past_time_objs) == 0:
-                        print("time",h, " no objects existed in the last time step. consider objects in current time step all new")
-                        tracked_model_objects.extend(deepcopy(model_objects[h]))
-                # Match from previous time step with current time step
-                elif len(past_time_objs) > 0 and len(model_objects[h]) > 0:
-                        assignments = object_matcher.match_objects(past_time_objs, model_objects[h], h - 1, h)
-                        print("assignments:", assignments)
-                        unpaired = range(len(model_objects[h]))
-                        for pair in assignments:
-                                past_time_objs[pair[0]].extend(model_objects[h][pair[1]])
-                                unpaired.remove(pair[1])
-                        if len(unpaired) > 0:
-                                for up in unpaired:
-                                    tracked_model_objects.append(model_objects[h][up])
-                print("Tracked Model Objects: {0:03d} hour {1:2d}".format(len(tracked_model_objects), h))
-        return tracked_model_objects
-
-#object_matcher = ObjectMatcher([shifted_centroid_distance, centroid_distance], 
+    model_objects = deepcopy(input_model_objects)
+    hours = np.arange(int((model_grid.end_date - model_grid.start_date).total_seconds() / deltat.total_seconds()) + 1)
+    print("hours = ", hours)
+    tracked_model_objects = []
+    for h in hours:
+        past_time_objs = []
+        for obj in tracked_model_objects:
+            # Potential trackable objects are identified
+            # In other words, objects whose end_time is the previous hour
+            if obj.end_time == h - 1:
+                past_time_objs.append(obj)
+        # If no objects existed in the last time step, then consider objects in current time step all new
+        if len(past_time_objs) == 0:
+            print("time", h, " no objects existed in the last time step. consider objects in current time step all new")
+            tracked_model_objects.extend(deepcopy(model_objects[h]))
+        # Match from previous time step with current time step
+        elif len(past_time_objs) > 0 and len(model_objects[h]) > 0:
+            assignments = object_matcher.match_objects(past_time_objs, model_objects[h], h - 1, h)
+            print("assignments:", assignments)
+            unpaired = range(len(model_objects[h]))
+            for pair in assignments:
+                past_time_objs[pair[0]].extend(model_objects[h][pair[1]])
+                unpaired.remove(pair[1])
+            if len(unpaired) > 0:
+                for up in unpaired:
+                    tracked_model_objects.append(model_objects[h][up])
+        print("Tracked Model Objects: {0:03d} hour {1:2d}".format(len(tracked_model_objects), h))
+    return tracked_model_objects
+
+
+# object_matcher = ObjectMatcher([shifted_centroid_distance, centroid_distance],
 #                             np.array([dist_weight, 1-dist_weight]), np.array([max_distance] * 2))
-object_matcher = ObjectMatcher([closest_distance],np.array([1]),np.array([4*model_grid.dx]))
-
-
+object_matcher = ObjectMatcher([closest_distance], np.array([1]), np.array([4 * model_grid.dx]))
diff --git a/hagelslag/data/FV3ModelGrid.py b/hagelslag/data/FV3ModelGrid.py
index e391d89..5232c80 100644
--- a/hagelslag/data/FV3ModelGrid.py
+++ b/hagelslag/data/FV3ModelGrid.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 import numpy as np
+
 from .GribModelGrid import GribModelGrid
 from .ModelGrid import ModelGrid
 
@@ -18,24 +19,24 @@ class FV3ModelGrid(GribModelGrid):
                 file (True) or one file containing all timesteps (False).
     """
 
-    def __init__(self, member, run_date, variable, start_date, 
-                end_date, path, single_step=True):
+    def __init__(self, member, run_date, variable, start_date,
+                 end_date, path, single_step=True):
         self.path = path
         self.member = member
         filenames = []
         self.forecast_hours = np.arange((start_date - run_date).total_seconds() / 3600,
                                         (end_date - run_date).total_seconds() / 3600 + 1, dtype=int)
-        
+
         for forecast_hr in self.forecast_hours:
-            file_name=self.path+'{0}/{1}_{2}f{3:03d}.grib2'.format(
-                                                        run_date.strftime("%Y%m%d"),
-                                                        self.member,
-                                                        run_date.strftime("%Y%m%d%H"),
-                                                        forecast_hr)
+            file_name = self.path + '{0}/{1}_{2}f{3:03d}.grib2'.format(
+                run_date.strftime("%Y%m%d"),
+                self.member,
+                run_date.strftime("%Y%m%d%H"),
+                forecast_hr)
             filenames.append(file_name)
         self.netcdf_variables = ["hske_1000", "hske_3000", "hmf_1000", "hmf_3000", "ihm_1000", "ihm_3000"]
         super(FV3ModelGrid, self).__init__(filenames, run_date, start_date, end_date, variable, member)
-        
+
         return
 
     def load_data(self):
@@ -43,4 +44,3 @@ def load_data(self):
             return ModelGrid.load_data(self)
         else:
             return GribModelGrid.load_data(self)
-
diff --git a/hagelslag/data/GribModelGrid.py b/hagelslag/data/GribModelGrid.py
index 67ae733..7fe78e6 100644
--- a/hagelslag/data/GribModelGrid.py
+++ b/hagelslag/data/GribModelGrid.py
@@ -1,9 +1,9 @@
-#!/usr/bin/env python
+import datetime
+from os.path import exists
+
+import numpy as np
 import pandas as pd
 import pygrib
-import numpy as np
-from os.path import exists
-import datetime
 from netCDF4 import Dataset
 
 
@@ -81,11 +81,11 @@ def format_grib_name(self, selected_variable):
             data values of unknown variable name, given the ID.
         """
         names = self.unknown_names
-        Id = None
+        grib_id = None
         for key, value in names.items():
             if selected_variable == value:
-                Id = key
-        return Id, None
+                grib_id = key
+        return grib_id, None
 
     def load_lightning_data(self):
         """
@@ -135,11 +135,7 @@ def load_data(self):
             if type(self.variable) is int:
                 data_values = grib[self.variable].values
                 print(grib[self.variable])
-                if grib[self.variable].units == 'unknown':
-                    Id = grib[self.variable].parameterNumber
-                    # units = self.unknown_units[Id]
-                # else:
-                # units = grib[self.variable].units
+
             elif type(self.variable) is str:
                 if '_' in self.variable:
                     # Multiple levels
diff --git a/hagelslag/data/HREFv2ModelGrid.py b/hagelslag/data/HREFv2ModelGrid.py
index a434148..de6d5ac 100644
--- a/hagelslag/data/HREFv2ModelGrid.py
+++ b/hagelslag/data/HREFv2ModelGrid.py
@@ -1,9 +1,11 @@
 #!/usr/bin/env python
-from .GribModelGrid import GribModelGrid
-from datetime import timedelta 
-import numpy as np
+from datetime import timedelta
 from glob import glob
 
+import numpy as np
+
+from .GribModelGrid import GribModelGrid
+
 
 class HREFv2ModelGrid(GribModelGrid):
     """
@@ -17,35 +19,39 @@ class HREFv2ModelGrid(GribModelGrid):
         path (str): Path to model output files
     """
 
-    def __init__(self, member, run_date, variable, start_date, 
-                end_date, path):
+    def __init__(self, member, run_date, variable, start_date,
+                 end_date, path):
         self.path = path
         self.member = member
         filenames = []
         self.forecast_hours = np.arange((start_date - run_date).total_seconds() / 3600,
                                         (end_date - run_date).total_seconds() / 3600 + 1, dtype=int)
-        
-        day_before_date = (run_date-timedelta(days=1)).strftime("%Y%m%d") 
+
+        day_before_date = (run_date - timedelta(days=1)).strftime("%Y%m%d")
         member_name = str(self.member.split("_")[0])
         if '00' in self.member:
-            inilization='00'
+            initial_hour = '00'
             hours = self.forecast_hours
             date = run_date.strftime("%Y%m%d")
         elif '12' in self.member:
-            inilization='12'
-            hours = self.forecast_hours+12
+            initial_hour = '12'
+            hours = self.forecast_hours + 12
             date = day_before_date
+        else:
+            initial_hour = '00'
+            hours = self.forecast_hours
+            date = run_date.strftime("%Y%m%d")
         for forecast_hr in hours:
             if 'nam' in self.member:
                 files = glob('{0}/{1}/nam*conusnest*{2}f*{3}*'.format(self.path,
-                        date,inilization,forecast_hr))
+                                                                      date, initial_hour, forecast_hr))
                 if not files:
                     files = glob('{0}/{1}/nam*t{2}z*conusnest*{3}*'.format(self.path,
-                            date,inilization,forecast_hr))
+                                                                           date, initial_hour, forecast_hr))
             else:
                 files = glob('{0}/{1}/*hiresw*conus{2}*{3}f*{4}*'.format(self.path,
-                        date,member_name,inilization,forecast_hr))
-            if len(files) >=1:
+                                                                         date, member_name, initial_hour, forecast_hr))
+            if len(files) >= 1:
                 filenames.append(files[0])
-        super(HREFv2ModelGrid, self).__init__(filenames,run_date,start_date,end_date,variable,member)
-        return 
+        super(HREFv2ModelGrid, self).__init__(filenames, run_date, start_date, end_date, variable, member)
+        return
diff --git a/hagelslag/data/HRRREModelGrid.py b/hagelslag/data/HRRREModelGrid.py
index 8804b8d..8c07924 100644
--- a/hagelslag/data/HRRREModelGrid.py
+++ b/hagelslag/data/HRRREModelGrid.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python
-import numpy as np
 from os.path import exists
+
+import numpy as np
+
 from .GribModelGrid import GribModelGrid
 
 
@@ -18,33 +20,33 @@ class HRRREModelGrid(GribModelGrid):
                 file (True) or one file containing all timesteps (False).
     """
 
-    def __init__(self, member, run_date, variable, start_date, 
-                end_date, path,single_step=True):
+    def __init__(self, member, run_date, variable, start_date,
+                 end_date, path, single_step=True):
         self.path = path
         self.member = member
         filenames = []
         self.forecast_hours = np.arange((start_date - run_date).total_seconds() / 3600,
                                         (end_date - run_date).total_seconds() / 3600 + 1, dtype=int)
-        
+
         for forecast_hr in self.forecast_hours:
-            file_name=self.path+'{1}00/{0}_{1}00f0{2:02}.grib2'.format(
-                                                        self.member,
-                                                        run_date.strftime("%Y%m%d"),
-                                                        forecast_hr)
+            file_name = self.path + '{1}00/{0}_{1}00f0{2:02}.grib2'.format(
+                self.member,
+                run_date.strftime("%Y%m%d"),
+                forecast_hr)
             if not exists(file_name):
-                member_number=self.member.split("0")[-1]
-                file_name=self.path+'{0}00/wrftwo_hrrre_clue_mem000{1}_{2}.grib2'.format(
-                                                                run_date.strftime("%Y%m%d"),
-                                                                member_number,
-                                                                forecast_hr)
+                member_number = self.member.split("0")[-1]
+                file_name = self.path + '{0}00/wrftwo_hrrre_clue_mem000{1}_{2}.grib2'.format(
+                    run_date.strftime("%Y%m%d"),
+                    member_number,
+                    forecast_hr)
                 if not exists(file_name):
-                    file_name = self.path+'{0}00/wrftwo_conus_mem000{1}_{2}.grib2'.format(
-                                                                run_date.strftime("%Y%m%d"),
-                                                                member_number,
-                                                                forecast_hr)
-             
+                    file_name = self.path + '{0}00/wrftwo_conus_mem000{1}_{2}.grib2'.format(
+                        run_date.strftime("%Y%m%d"),
+                        member_number,
+                        forecast_hr)
+
             filenames.append(file_name)
-        
-        super(HRRREModelGrid, self).__init__(filenames,run_date,start_date,end_date,variable,member)
-        
-        return 
+
+        super(HRRREModelGrid, self).__init__(filenames, run_date, start_date, end_date, variable, member)
+
+        return
diff --git a/hagelslag/data/HRRRModelGrid.py b/hagelslag/data/HRRRModelGrid.py
index 8adcfb8..ab425bc 100644
--- a/hagelslag/data/HRRRModelGrid.py
+++ b/hagelslag/data/HRRRModelGrid.py
@@ -1,7 +1,9 @@
+from os.path import join
+
 import numpy as np
 import pandas as pd
+
 from hagelslag.data.ModelGrid import ModelGrid
-from os.path import join
 
 
 class HRRRModelGrid(ModelGrid):
@@ -12,9 +14,9 @@ def __init__(self, run_date, variable, start_date, end_date, path, frequency="1H
         self.end_date = pd.Timestamp(end_date)
         self.frequency = frequency
         self.path = path
-        self.valid_dates = pd.DatetimeIndex(start=self.start_date,
-                                            end=self.end_date,
-                                            freq=self.frequency)
+        self.valid_dates = pd.date_range(start=self.start_date,
+                                         end=self.end_date,
+                                         freq=self.frequency)
         self.forecast_hours = np.array((self.valid_dates - self.run_date).total_seconds() / 3600, dtype=int)
         filenames = []
         for forecast_hour in self.forecast_hours:
@@ -23,5 +25,3 @@ def __init__(self, run_date, variable, start_date, end_date, path, frequency="1H
                                                                  forecast_hour)))
 
         super(HRRRModelGrid, self).__init__(filenames, run_date, start_date, end_date, variable)
-
-
diff --git a/hagelslag/data/HRRRZarrModelGrid.py b/hagelslag/data/HRRRZarrModelGrid.py
index 515f9ee..35de55b 100644
--- a/hagelslag/data/HRRRZarrModelGrid.py
+++ b/hagelslag/data/HRRRZarrModelGrid.py
@@ -1,7 +1,6 @@
-import numpy as np
 import pandas as pd
+
 from hagelslag.data.ZarrModelGrid import ZarrModelGrid
-from os.path import join
 
 
 class HRRRZarrModelGrid(ZarrModelGrid):
diff --git a/hagelslag/data/HailForecastGrid.py b/hagelslag/data/HailForecastGrid.py
index 99fdaad..1e1456f 100644
--- a/hagelslag/data/HailForecastGrid.py
+++ b/hagelslag/data/HailForecastGrid.py
@@ -1,10 +1,11 @@
+from os.path import exists
+
 import numpy as np
 import pandas as pd
-from pyproj import Proj
 import pygrib
-from scipy.spatial import cKDTree
+from pyproj import Proj
 from scipy.ndimage import gaussian_filter
-from os.path import exists
+from scipy.spatial import cKDTree
 
 
 class HailForecastGrid(object):
@@ -33,6 +34,7 @@ class HailForecastGrid(object):
         proj (Proj): a pyproj projection object used for converting lat-lon points to x-y coordinate values
         projparams (dict): PROJ4 parameters describing map projection
     """
+
     def __init__(self, run_date, start_date, end_date, ensemble_name, ml_model, members,
                  variable, message_number, path):
         self.run_date = run_date
@@ -91,7 +93,7 @@ def load_data(self):
                 grbs.close()
         return
 
-    def period_neighborhood_probability(self, radius, smoothing, threshold, stride,start_time,end_time):
+    def period_neighborhood_probability(self, radius, smoothing, threshold, stride, start_time, end_time):
         """
         Calculate the neighborhood probability over the full period of the forecast
 
@@ -110,14 +112,15 @@ def period_neighborhood_probability(self, radius, smoothing, threshold, stride,s
         neighbor_prob = np.zeros((self.data.shape[0], neighbor_x.shape[0], neighbor_x.shape[1]))
         print('Forecast Hours: {0}-{1}'.format(start_time, end_time))
         for m in range(len(self.members)):
-            period_max = self.data[m,start_time:end_time,:,:].max(axis=0)
+            period_max = self.data[m, start_time:end_time, :, :].max(axis=0)
             valid_i, valid_j = np.where(period_max >= threshold)
             print(self.members[m], len(valid_i))
             if len(valid_i) > 0:
                 var_kd_tree = cKDTree(np.vstack((self.x[valid_i, valid_j], self.y[valid_i, valid_j])).T)
-                exceed_points = np.unique(np.concatenate(var_kd_tree.query_ball_tree(neighbor_kd_tree, radius))).astype(int)
+                exceed_points = np.unique(np.concatenate(var_kd_tree.query_ball_tree(neighbor_kd_tree, radius))).astype(
+                    int)
                 exceed_i, exceed_j = np.unravel_index(exceed_points, neighbor_x.shape)
                 neighbor_prob[m][exceed_i, exceed_j] = 1
                 if smoothing > 0:
-                    neighbor_prob[m] = gaussian_filter(neighbor_prob[m], smoothing,mode='constant')
+                    neighbor_prob[m] = gaussian_filter(neighbor_prob[m], smoothing, mode='constant')
         return neighbor_prob
diff --git a/hagelslag/data/MRMSGrid.py b/hagelslag/data/MRMSGrid.py
index 311f026..d24ab1f 100644
--- a/hagelslag/data/MRMSGrid.py
+++ b/hagelslag/data/MRMSGrid.py
@@ -1,9 +1,10 @@
-from netCDF4 import Dataset, num2date
-import pandas as pd
-import numpy as np
 import os
-from scipy.spatial import cKDTree
+
+import numpy as np
+import pandas as pd
+from netCDF4 import Dataset, num2date
 from scipy.ndimage import gaussian_filter
+from scipy.spatial import cKDTree
 
 
 class MRMSGrid(object):
@@ -31,6 +32,7 @@ class MRMSGrid(object):
         data (ndarray or None): Array of gridded observations after load_data is called. None otherwise.
         valid_dates (ndarray): Contains the dates where data loaded successfully.
     """
+
     def __init__(self, start_date, end_date, variable, path, freq="1H"):
         self.start_date = start_date
         self.end_date = end_date
@@ -49,7 +51,7 @@ def load_data(self):
         valid_dates = []
         mrms_files = np.array(sorted(os.listdir(self.path + self.variable + "/")))
         mrms_file_dates = np.array([m_file.split("_")[-2].split("-")[0]
-            for m_file in mrms_files])
+                                    for m_file in mrms_files])
         old_mrms_file = None
         file_obj = None
         for t in range(self.all_dates.shape[0]):
@@ -60,8 +62,8 @@ def load_data(self):
                     if file_obj is not None:
                         file_obj.close()
                     file_obj = Dataset(self.path + self.variable + "/" + mrms_file)
-                    #old_mrms_file = mrms_file
-                    
+                    # old_mrms_file = mrms_file
+
                     if "time" in file_obj.variables.keys():
                         time_var = "time"
                     else:
@@ -76,7 +78,7 @@ def load_data(self):
                     valid_dates.append(self.all_dates[t])
         if file_obj is not None:
             file_obj.close()
-        
+
         self.data = np.array(data)
         self.data[self.data < 0] = 0
         self.data[self.data > 150] = 150
diff --git a/hagelslag/data/ModelGrid.py b/hagelslag/data/ModelGrid.py
index 6a1eefa..a8fa9ed 100644
--- a/hagelslag/data/ModelGrid.py
+++ b/hagelslag/data/ModelGrid.py
@@ -1,7 +1,8 @@
-from netCDF4 import Dataset
+from os.path import exists
+
 import numpy as np
+from netCDF4 import Dataset
 from pandas import date_range
-from os.path import exists
 
 
 class ModelGrid(object):
@@ -21,10 +22,11 @@ class ModelGrid(object):
         forecast_hours: array of all hours in the forecast
         file_objects (list): List of the file objects for each model time step
     """
-    def __init__(self, 
-                 filenames, 
-                 run_date, 
-                 start_date, 
+
+    def __init__(self,
+                 filenames,
+                 run_date,
+                 start_date,
                  end_date,
                  variable,
                  frequency="1H"):
@@ -35,8 +37,8 @@ def __init__(self,
         self.end_date = np.datetime64(end_date)
         self.frequency = frequency
         self.valid_dates = date_range(start=self.start_date,
-                                         end=self.end_date,
-                                         freq=self.frequency)
+                                      end=self.end_date,
+                                      freq=self.frequency)
         self.forecast_hours = (self.valid_dates.values - self.run_date).astype("timedelta64[h]").astype(int)
         self.file_objects = []
         self.__enter__()
@@ -164,4 +166,3 @@ def close(self):
         Close links to all open file objects and delete the objects.
         """
         self.__exit__()
-
diff --git a/hagelslag/data/ModelOutput.py b/hagelslag/data/ModelOutput.py
index 2276785..4a4d196 100644
--- a/hagelslag/data/ModelOutput.py
+++ b/hagelslag/data/ModelOutput.py
@@ -1,18 +1,21 @@
 from __future__ import division
-from .SSEFModelGrid import SSEFModelGrid
-from .VSEModelGrid import VSEModelGrid
-from .NCARModelGrid import NCARModelGrid
+
+import numpy as np
+from scipy.ndimage import gaussian_filter
+from scipy.spatial import cKDTree
+
+from hagelslag.util.derived_vars import relative_humidity_pressure_level, melting_layer_height
+from hagelslag.util.make_proj_grids import make_proj_grids, read_arps_map_file, read_ncar_map_file, get_proj_obj
 from .FV3ModelGrid import FV3ModelGrid
-from .HRRRModelGrid import HRRRModelGrid
-from .HRRREModelGrid import HRRREModelGrid
 from .HREFv2ModelGrid import HREFv2ModelGrid
+from .HRRREModelGrid import HRRREModelGrid
+from .HRRRModelGrid import HRRRModelGrid
 from .HRRRZarrModelGrid import HRRRZarrModelGrid
+from .NCARModelGrid import NCARModelGrid
 from .NCARStormEventModelGrid import NCARStormEventModelGrid
-from hagelslag.util.make_proj_grids import make_proj_grids, read_arps_map_file, read_ncar_map_file, get_proj_obj
-from hagelslag.util.derived_vars import relative_humidity_pressure_level, melting_layer_height
-import numpy as np
-from scipy.spatial import cKDTree
-from scipy.ndimage import gaussian_filter
+from .SSEFModelGrid import SSEFModelGrid
+from .VSEModelGrid import VSEModelGrid
+
 
 class ModelOutput(object):
     """
@@ -31,12 +34,13 @@ class ModelOutput(object):
 
         map_file (str): path to data map file
     """
-    def __init__(self, 
-                 ensemble_name, 
-                 member_name, 
-                 run_date, 
-                 variable, 
-                 start_date, 
+
+    def __init__(self,
+                 ensemble_name,
+                 member_name,
+                 run_date,
+                 variable,
+                 start_date,
                  end_date,
                  path,
                  map_file,
@@ -128,41 +132,41 @@ def load_data(self):
             mg.close()
         elif self.ensemble_name.upper() == "HREFV2":
             mg = HREFv2ModelGrid(self.member_name,
-                               self.run_date,
-                               self.variable,
-                               self.start_date,
-                               self.end_date,
-                               self.path)
+                                 self.run_date,
+                                 self.variable,
+                                 self.start_date,
+                                 self.end_date,
+                                 self.path)
             self.data, self.units = mg.load_data()
-        
+
         elif self.ensemble_name.upper() == "HRRRE":
             mg = HRRREModelGrid(self.member_name,
-                               self.run_date,
-                               self.variable,
-                               self.start_date,
-                               self.end_date,
-                               self.path,
-                               single_step=self.single_step)
+                                self.run_date,
+                                self.variable,
+                                self.start_date,
+                                self.end_date,
+                                self.path,
+                                single_step=self.single_step)
             self.data, self.units = mg.load_data()
-        
+
         elif self.ensemble_name.upper() == "SAR-FV3":
             mg = FV3ModelGrid(self.member_name,
-                               self.run_date,
-                               self.variable,
-                               self.start_date,
-                               self.end_date,
-                               self.path,
-                               single_step=self.single_step)
+                              self.run_date,
+                              self.variable,
+                              self.start_date,
+                              self.end_date,
+                              self.path,
+                              single_step=self.single_step)
             self.data, self.units = mg.load_data()
 
         elif self.ensemble_name.upper() == "VSE":
             mg = VSEModelGrid(self.member_name,
-                               self.run_date,
-                               self.variable,
-                               self.start_date,
-                               self.end_date,
-                               self.path,
-                               single_step=self.single_step)
+                              self.run_date,
+                              self.variable,
+                              self.start_date,
+                              self.end_date,
+                              self.path,
+                              single_step=self.single_step)
             self.data, self.units = mg.load_data()
             mg.close()
         elif self.ensemble_name.upper() == "HRRR":
@@ -173,7 +177,7 @@ def load_data(self):
                                self.path)
             self.data, self.units = mg.load_data()
             mg.close()
-            
+
         elif self.ensemble_name.upper() == "NCARSTORM":
             mg = NCARStormEventModelGrid(self.run_date,
                                          self.variable,
@@ -182,13 +186,13 @@ def load_data(self):
                                          self.path)
             self.data, self.units = mg.load_data()
             mg.close()
-            
+
         elif self.ensemble_name.upper() == "HRRR-ZARR":
             mg = HRRRZarrModelGrid(self.run_date,
-                               self.variable,
-                               self.start_date,
-                               self.end_date,
-                               self.path)
+                                   self.variable,
+                                   self.start_date,
+                                   self.end_date,
+                                   self.path)
             self.data, self.units = mg.load_data()
         else:
             print(self.ensemble_name + " not supported.")
@@ -210,13 +214,14 @@ def load_map_info(self, map_file):
             self.proj = get_proj_obj(proj_dict)
         else:
             proj_dict, grid_dict = read_ncar_map_file(map_file)
-            if self.member_name[0:7] == "1km_pbl": # Don't just look at the first 3 characters. You have to differentiate '1km_pbl1' and '1km_on_3km_pbl1'
+            if self.member_name[
+               0:7] == "1km_pbl":  # Don't just look at the first 3 characters. You have to differentiate '1km_pbl1' and '1km_on_3km_pbl1'
                 grid_dict["dx"] = 1000
                 grid_dict["dy"] = 1000
                 grid_dict["sw_lon"] = 258.697
                 grid_dict["sw_lat"] = 23.999
                 grid_dict["ne_lon"] = 282.868269206236
-                grid_dict["ne_lat"] = 36.4822338520542 
+                grid_dict["ne_lat"] = 36.4822338520542
 
             self.dx = int(grid_dict["dx"])
             mapping_data = make_proj_grids(proj_dict, grid_dict)
@@ -225,7 +230,6 @@ def load_map_info(self, map_file):
             self.i, self.j = np.indices(self.lon.shape)
             self.proj = get_proj_obj(proj_dict)
 
-
     def period_neighborhood_probability(self, radius, smoothing, threshold, stride, x=None, y=None, dx=None):
         """
         Calculate the neighborhood probability over the full period of the forecast
@@ -259,4 +263,3 @@ def period_neighborhood_probability(self, radius, smoothing, threshold, stride,
             if smoothing > 0:
                 neighbor_prob = gaussian_filter(neighbor_prob, smoothing)
         return neighbor_prob
-
diff --git a/hagelslag/data/NCARModelGrid.py b/hagelslag/data/NCARModelGrid.py
index 8c9f906..533aa35 100644
--- a/hagelslag/data/NCARModelGrid.py
+++ b/hagelslag/data/NCARModelGrid.py
@@ -1,7 +1,9 @@
-from .ModelGrid import ModelGrid
-import numpy as np
 from datetime import timedelta
 
+import numpy as np
+
+from .ModelGrid import ModelGrid
+
 
 class NCARModelGrid(ModelGrid):
     """
@@ -16,6 +18,7 @@ class NCARModelGrid(ModelGrid):
         single_step (boolean (default=False)): Whether variable information is stored with each time step in a separate
             file or one file containing all timesteps.
     """
+
     def __init__(self, member, run_date, variable, start_date, end_date, path, single_step=False):
         self.member = member
         self.path = path
diff --git a/hagelslag/data/NCARStormEventModelGrid.py b/hagelslag/data/NCARStormEventModelGrid.py
index 439897b..655a1e3 100644
--- a/hagelslag/data/NCARStormEventModelGrid.py
+++ b/hagelslag/data/NCARStormEventModelGrid.py
@@ -1,14 +1,17 @@
-import numpy as np
-from .ModelGrid import ModelGrid
 from datetime import timedelta
 from os.path import join
 
+import numpy as np
+
+from .ModelGrid import ModelGrid
+
 
 class NCARStormEventModelGrid(ModelGrid):
     """
     Loads model output from the NCAR MMM 1 and 3 km WRF runs on Cheyenne.
 
     """
+
     def __init__(self, run_date, variable, start_date, end_date, path):
         self.pressure_levels = np.array([1000, 925, 850, 700, 600, 500, 400, 300, 250, 200, 150, 100])
         self.path = path
diff --git a/hagelslag/data/NCARWRF2020ModelGrid.py b/hagelslag/data/NCARWRF2020ModelGrid.py
index 5dd6c4c..9cfc6b8 100644
--- a/hagelslag/data/NCARWRF2020ModelGrid.py
+++ b/hagelslag/data/NCARWRF2020ModelGrid.py
@@ -1,7 +1,8 @@
+from os.path import join
+
 import numpy as np
+
 from .ModelGrid import ModelGrid
-from datetime import timedelta
-from os.path import join
 
 
 class NCARWRF2020ModelGrid(ModelGrid):
@@ -9,6 +10,7 @@ class NCARWRF2020ModelGrid(ModelGrid):
     Loads model output from the NCAR MMM WRF 2020 3 km real-time runs
 
     """
+
     def __init__(self, member_name, run_date, variable, start_date, end_date, path):
         self.member_name = member_name
         self.pressure_levels = np.array([1000, 925, 850, 700, 600, 500, 400, 300, 250, 200, 150, 100])
diff --git a/hagelslag/data/SSEFModelGrid.py b/hagelslag/data/SSEFModelGrid.py
index 364ebe5..750ab11 100644
--- a/hagelslag/data/SSEFModelGrid.py
+++ b/hagelslag/data/SSEFModelGrid.py
@@ -1,8 +1,10 @@
-from .ModelGrid import ModelGrid
-import numpy as np
 import os
 from os.path import exists
 
+import numpy as np
+
+from .ModelGrid import ModelGrid
+
 
 class SSEFModelGrid(ModelGrid):
     """
@@ -17,6 +19,7 @@ class SSEFModelGrid(ModelGrid):
         single_step (boolean (default=False)): Whether variable information is stored with each time step in a separate
             file or one file containing all timesteps.
     """
+
     def __init__(self, member, run_date, variable, start_date, end_date, path, single_step=False):
         self.path = path
         self.member = member
@@ -31,9 +34,9 @@ def __init__(self, member, run_date, variable, start_date, end_date, path, singl
         potential_filenames = []
         if single_step:
             for hour in forecast_hours:
-                potential_filenames.append("{0}ar{1}00.net{2}{3:06d}".format(full_path, 
+                potential_filenames.append("{0}ar{1}00.net{2}{3:06d}".format(full_path,
                                                                              run_date.strftime("%Y%m%d"),
-                                                                             variable.ljust(6,"_"),
+                                                                             variable.ljust(6, "_"),
                                                                              int(hour) * 3600))
         else:
             potential_filenames.append("{0}ssef_{1}_{2}_{3}.nc".format(full_path,
diff --git a/hagelslag/data/VSEModelGrid.py b/hagelslag/data/VSEModelGrid.py
index 4363efd..05c789a 100644
--- a/hagelslag/data/VSEModelGrid.py
+++ b/hagelslag/data/VSEModelGrid.py
@@ -1,10 +1,10 @@
-from .ModelGrid import ModelGrid
-from glob import glob
-import pandas as pd
+import os
 from datetime import timedelta
+
 import numpy as np
-import os, pdb
-from os.path import exists
+
+from .ModelGrid import ModelGrid
+
 
 class VSEModelGrid(ModelGrid):
     """
@@ -19,24 +19,30 @@ class VSEModelGrid(ModelGrid):
         single_step (boolean (default=False)): Whether variable information is stored with each time step in a separate
             file or one file containing all timesteps.
     """
+
     def __init__(self, member, run_date, variable, start_date, end_date, path, single_step=False):
         self.path = path
         self.member = member
         forecast_hours = np.arange((start_date - run_date).total_seconds() / 3600,
                                    (end_date - run_date).total_seconds() / 3600 + 1)
         # Maybe you need the vse-style file.
-        if variable in ["XLAT","XLONG", "MU", "MUB", "Q2", "T2", "PSFC", "U10", "V10", "HGT", "RAINNC", "GRAUPELNC", "HAILNC", "PBLH", "WSPD10MAX", "W_UP_MAX", "W_DN_MAX", "REFD_MAX", "UP_HELI_MAX", "UP_HELI_MAX03", "UP_HELI_MAX01", "UP_HELI_MIN", "RVORT1_MAX", "RVORT0_MAX", "W_MEAN", "GRPL_MAX", "C_PBLH", "W_PBLH", "W_MAX_PBL", "W_1KM_AGL", "HAIL_MAXK1", "HAIL_MAX2D", "PREC_ACC_NC", "REFD_COM", "REFD", "ECHOTOP", "AFWA_MSLP", "AFWA_HAIL", "AFWA_HAIL_NEWMEAN", "AFWA_HAIL_NEWSTD"]:
-                full_path = "/".join([self.path, member, run_date.strftime("%Y%m%d%H"), "wrf"])
-                potential_filenames = []
-                for hour in forecast_hours:
-                    valid_time = run_date + timedelta(hours=hour)
-                    potential_filenames.append(full_path +"/vse_d01."+valid_time.strftime("%Y-%m-%d_%H:%M:%S")+".nc")
+        if variable in ["XLAT", "XLONG", "MU", "MUB", "Q2", "T2", "PSFC", "U10", "V10", "HGT", "RAINNC", "GRAUPELNC",
+                        "HAILNC", "PBLH", "WSPD10MAX", "W_UP_MAX", "W_DN_MAX", "REFD_MAX", "UP_HELI_MAX",
+                        "UP_HELI_MAX03", "UP_HELI_MAX01", "UP_HELI_MIN", "RVORT1_MAX", "RVORT0_MAX", "W_MEAN",
+                        "GRPL_MAX", "C_PBLH", "W_PBLH", "W_MAX_PBL", "W_1KM_AGL", "HAIL_MAXK1", "HAIL_MAX2D",
+                        "PREC_ACC_NC", "REFD_COM", "REFD", "ECHOTOP", "AFWA_MSLP", "AFWA_HAIL", "AFWA_HAIL_NEWMEAN",
+                        "AFWA_HAIL_NEWSTD"]:
+            full_path = "/".join([self.path, member, run_date.strftime("%Y%m%d%H"), "wrf"])
+            potential_filenames = []
+            for hour in forecast_hours:
+                valid_time = run_date + timedelta(hours=hour)
+                potential_filenames.append(full_path + "/vse_d01." + valid_time.strftime("%Y-%m-%d_%H:%M:%S") + ".nc")
 
-        else: # use the WRFTWO??.nc file
-                full_path = "/".join([self.path, member, run_date.strftime("%Y%m%d%H"), "post_AGAIN"])
-                potential_filenames = []
-                for hour in forecast_hours:
-                    potential_filenames.append("{0}/fhr_{1:d}/WRFTWO{2:02d}.nc".format(full_path, int(hour), int(hour) ))
+        else:  # use the WRFTWO??.nc file
+            full_path = "/".join([self.path, member, run_date.strftime("%Y%m%d%H"), "post_AGAIN"])
+            potential_filenames = []
+            for hour in forecast_hours:
+                potential_filenames.append("{0}/fhr_{1:d}/WRFTWO{2:02d}.nc".format(full_path, int(hour), int(hour)))
         filenames = []
         for filename in potential_filenames:
             if os.access(filename, os.R_OK):
diff --git a/hagelslag/data/WRFModelGrid.py b/hagelslag/data/WRFModelGrid.py
index a979d3e..3a412b2 100644
--- a/hagelslag/data/WRFModelGrid.py
+++ b/hagelslag/data/WRFModelGrid.py
@@ -1,14 +1,16 @@
-from netCDF4 import Dataset
-from os.path import isdir, exists, join
-import numpy as np
 from glob import glob
+from os.path import isdir, exists, join
+
 import arrow
+import numpy as np
+from netCDF4 import Dataset
 
 
 class WRFModelGrid(object):
     """
     Load data from raw WRF model output files.
     """
+
     def __init__(self, forecast_date, variable, domain, path, nocolons=False):
         self.forecast_date = arrow.get(forecast_date)
         self.variable = variable
@@ -47,7 +49,7 @@ def load_time_var(self, time_var="XTIME"):
             if time_var in var_list:
                 for attr in patch_zero.variables[time_var].ncattrs():
                     var_attrs[attr] = getattr(patch_zero.variables[time_var], attr)
-                var_val = patch_zero.variables[time_var][:]        
+                var_val = patch_zero.variables[time_var][:]
             patch_zero.close()
         else:
             wrf_data = Dataset(join(self.path, self.wrf_filename))
@@ -55,9 +57,9 @@ def load_time_var(self, time_var="XTIME"):
             if time_var in var_list:
                 for attr in wrf_data.variables[time_var].ncattrs():
                     var_attrs[attr] = getattr(wrf_data.variables[time_var], attr)
-                var_val = wrf_data.variables[time_var][:]   
+                var_val = wrf_data.variables[time_var][:]
             wrf_data.close()
-        return var_val, var_attrs 
+        return var_val, var_attrs
 
     def load_full_grid(self):
         var_data = None
@@ -149,7 +151,7 @@ def load_full_grid(self):
                     else:
                         var_attrs[attr] = getattr(wrf_data.variables[self.variable], attr)
 
-                var_data = wrf_data.variables[self.variable][:] 
+                var_data = wrf_data.variables[self.variable][:]
                 if np.any(is_stag):
                     stag_dim = np.where(is_stag)[0][0]
                     if stag_dim == 1:
diff --git a/hagelslag/data/ZarrModelGrid.py b/hagelslag/data/ZarrModelGrid.py
index 9ea770f..2417487 100644
--- a/hagelslag/data/ZarrModelGrid.py
+++ b/hagelslag/data/ZarrModelGrid.py
@@ -1,10 +1,10 @@
+from os.path import join
+
 import numpy as np
 import pandas as pd
-from pandas import date_range
-from os.path import exists, join
 import s3fs
 import xarray as xr
-from datetime import timedelta
+from pandas import date_range
 
 
 class ZarrModelGrid(object):
@@ -23,6 +23,7 @@ class ZarrModelGrid(object):
         valid_dates: DatetimeIndex of all model timesteps
         forecast_hours: array of all hours in the forecast
     """
+
     def __init__(self,
                  path,
                  run_date,
diff --git a/hagelslag/evaluation/ContingencyTable.py b/hagelslag/evaluation/ContingencyTable.py
index 78e6f0d..9a8ead1 100644
--- a/hagelslag/evaluation/ContingencyTable.py
+++ b/hagelslag/evaluation/ContingencyTable.py
@@ -16,6 +16,7 @@ class ContingencyTable(object):
         N: total number of items in table
 
     """
+
     def __init__(self, a, b, c, d):
         self.table = np.array([[a, b], [c, d]], dtype=float)
         self.N = self.table.sum()
@@ -111,8 +112,8 @@ def ets(self):
     def hss(self):
         """Doolittle (Heidke) Skill Score.  2(ad-bc)/((a+b)(b+d) + (a+c)(c+d))"""
         return 2 * (self.table[0, 0] * self.table[1, 1] - self.table[0, 1] * self.table[1, 0]) / (
-            (self.table[0, 0] + self.table[0, 1]) * (self.table[0, 1] + self.table[1, 1]) +
-            (self.table[0, 0] + self.table[1, 0]) * (self.table[1, 0] + self.table[1, 1]))
+                (self.table[0, 0] + self.table[0, 1]) * (self.table[0, 1] + self.table[1, 1]) +
+                (self.table[0, 0] + self.table[1, 0]) * (self.table[1, 0] + self.table[1, 1]))
 
     def pss(self):
         """Peirce (Hansen-Kuipers, True) Skill Score (ad - bc)/((a+c)(b+d))"""
diff --git a/hagelslag/evaluation/GridEvaluator.py b/hagelslag/evaluation/GridEvaluator.py
index 6a747ba..0fe18f7 100644
--- a/hagelslag/evaluation/GridEvaluator.py
+++ b/hagelslag/evaluation/GridEvaluator.py
@@ -1,9 +1,11 @@
-from netCDF4 import Dataset
+from datetime import timedelta
+
 import numpy as np
+from netCDF4 import Dataset
+from scipy.ndimage import binary_dilation
+
 from hagelslag.data.MRMSGrid import MRMSGrid
 from hagelslag.evaluation.ProbabilityMetrics import DistributedROC, DistributedReliability
-from datetime import timedelta
-from scipy.ndimage import binary_dilation
 
 
 class GridEvaluator(object):
@@ -45,6 +47,7 @@ class GridEvaluator(object):
     mask_variable : str, optional (default="RadarQualityIndex_00.00")
         Name of the MRMS variable used for masking.
     """
+
     def __init__(self, run_date, ensemble_name, ensemble_member,
                  model_names, size_thresholds, start_hour, end_hour, window_size, time_skip,
                  forecast_path, mrms_path, mrms_variable, obs_mask=True,
@@ -60,9 +63,9 @@ def __init__(self, run_date, ensemble_name, ensemble_member,
         self.window_size = window_size
         self.time_skip = time_skip
         self.hour_windows = []
-        for hour in self.valid_hours[self.window_size-1::self.time_skip]:
-            self.hour_windows.append(slice(hour-self.window_size+1 - self.start_hour,
-                                           hour+1-self.start_hour))
+        for hour in self.valid_hours[self.window_size - 1::self.time_skip]:
+            self.hour_windows.append(slice(hour - self.window_size + 1 - self.start_hour,
+                                           hour + 1 - self.start_hour))
         self.forecast_path = forecast_path
         self.mrms_path = mrms_path
         self.mrms_variable = mrms_variable
@@ -82,7 +85,8 @@ def load_forecasts(self):
         for model_name in self.model_names:
             self.raw_forecasts[model_name] = {}
             forecast_file = self.forecast_path + run_date_str + "/" + \
-                model_name.replace(" ", "-") + "_hailprobs_{0}_{1}.nc".format(self.ensemble_member, run_date_str)
+                            model_name.replace(" ", "-") + "_hailprobs_{0}_{1}.nc".format(self.ensemble_member,
+                                                                                          run_date_str)
             forecast_obj = Dataset(forecast_file)
             forecast_hours = forecast_obj.variables["forecast_hour"][:]
             valid_hour_indices = np.where((self.start_hour <= forecast_hours) & (forecast_hours <= self.end_hour))[0]
@@ -102,7 +106,7 @@ def get_window_forecasts(self):
                     np.array([self.raw_forecasts[model_name][size_threshold][sl].sum(axis=0)
                               for sl in self.hour_windows])
 
-    def load_obs(self,  mask_threshold=0.5):
+    def load_obs(self, mask_threshold=0.5):
         """
         Loads observations and masking grid (if needed).
 
@@ -122,7 +126,7 @@ def load_obs(self,  mask_threshold=0.5):
                 mask_grid.load_data()
                 self.raw_obs[self.mask_variable] = np.where(mask_grid.data >= mask_threshold, 1, 0)
                 self.window_obs[self.mask_variable] = np.array([self.raw_obs[self.mask_variable][sl].max(axis=0)
-                                                               for sl in self.hour_windows])
+                                                                for sl in self.hour_windows])
 
     def dilate_obs(self, dilation_radius):
         """
@@ -134,7 +138,8 @@ def dilate_obs(self, dilation_radius):
         for s in self.size_thresholds:
             self.dilated_obs[s] = np.zeros(self.window_obs[self.mrms_variable].shape)
             for t in range(self.dilated_obs[s].shape[0]):
-                self.dilated_obs[s][t][binary_dilation(self.window_obs[self.mrms_variable][t] >= s, iterations=dilation_radius)] = 1
+                self.dilated_obs[s][t][
+                    binary_dilation(self.window_obs[self.mrms_variable][t] >= s, iterations=dilation_radius)] = 1
 
     def roc_curves(self, prob_thresholds):
         """
@@ -195,11 +200,3 @@ def reliability_curves(self, prob_thresholds):
                             self.dilated_obs[size_threshold][h]
                         )
         return all_rel_curves
-
-
-
-
-
-
-
-
diff --git a/hagelslag/evaluation/MetricPlotter.py b/hagelslag/evaluation/MetricPlotter.py
index 06fbee9..4a40f40 100644
--- a/hagelslag/evaluation/MetricPlotter.py
+++ b/hagelslag/evaluation/MetricPlotter.py
@@ -1,5 +1,5 @@
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 from mpl_toolkits.axes_grid1.inset_locator import inset_axes, InsetPosition
 
 
@@ -218,7 +218,7 @@ def reliability_diagram(rel_objs, obj_labels, colors, markers, filename, figsize
 
 
 def attributes_diagram(rel_objs, obj_labels, colors, markers, filename, figsize=(8, 8), xlabel="Forecast Probability",
-                       ylabel="Observed Relative Frequency", ticks=np.arange(0, 1.05, 0.05), dpi=300, 
+                       ylabel="Observed Relative Frequency", ticks=np.arange(0, 1.05, 0.05), dpi=300,
                        title="Attributes Diagram", legend_params=None, inset_params=None,
                        inset_position=(0.12, 0.72, 0.25, 0.25), bootstrap_sets=None, ci=(2.5, 97.5)):
     """
diff --git a/hagelslag/evaluation/MulticlassContingencyTable.py b/hagelslag/evaluation/MulticlassContingencyTable.py
index 81bdfa5..ef301a9 100644
--- a/hagelslag/evaluation/MulticlassContingencyTable.py
+++ b/hagelslag/evaluation/MulticlassContingencyTable.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 __author__ = 'David John Gagne <djgagne@ou.edu>'
 
 
@@ -19,6 +20,7 @@ class MulticlassContingencyTable(object):
     The contingency table is stored in table as a numpy array with the rows corresponding to forecast categories,
     and the columns corresponding to observation categories.
     """
+
     def __init__(self, table=None, n_classes=2, class_names=("1", "0")):
         self.table = table
         self.n_classes = n_classes
@@ -55,9 +57,9 @@ def gerrity_score(self):
         s = np.zeros(self.table.shape, dtype=float)
         for (i, j) in np.ndindex(*s.shape):
             if i == j:
-                s[i, j] = 1.0 / (k - 1.0) * (np.sum(1.0 / a[0:j]) + np.sum(a[j:k-1]))
+                s[i, j] = 1.0 / (k - 1.0) * (np.sum(1.0 / a[0:j]) + np.sum(a[j:k - 1]))
             elif i < j:
-                s[i, j] = 1.0 / (k - 1.0) * (np.sum(1.0 / a[0:i]) - (j - i) + np.sum(a[j:k-1]))
+                s[i, j] = 1.0 / (k - 1.0) * (np.sum(1.0 / a[0:i]) - (j - i) + np.sum(a[j:k - 1]))
             else:
                 s[i, j] = s[j, i]
         return np.sum(self.table / float(self.table.sum()) * s)
diff --git a/hagelslag/evaluation/NeighborEvaluator.py b/hagelslag/evaluation/NeighborEvaluator.py
index 916518f..fa46a49 100644
--- a/hagelslag/evaluation/NeighborEvaluator.py
+++ b/hagelslag/evaluation/NeighborEvaluator.py
@@ -1,12 +1,14 @@
+from datetime import timedelta
+
 import numpy as np
 import pandas as pd
-from hagelslag.data.MRMSGrid import MRMSGrid
-from hagelslag.evaluation.ProbabilityMetrics import DistributedReliability, DistributedROC
 from netCDF4 import Dataset
-from datetime import timedelta
 from scipy.signal import fftconvolve
 from skimage.morphology import disk
 
+from hagelslag.data.MRMSGrid import MRMSGrid
+from hagelslag.evaluation.ProbabilityMetrics import DistributedReliability, DistributedROC
+
 
 class NeighborEvaluator(object):
     """
@@ -92,7 +94,7 @@ def load_forecasts(self):
                         self.period_forecasts[period_var] = forecast_data.variables[period_var][:]
         forecast_data.close()
 
-    def load_obs(self,  mask_threshold=0.5):
+    def load_obs(self, mask_threshold=0.5):
         """
         Loads observations and masking grid (if needed).
 
@@ -177,7 +179,7 @@ def evaluate_period_forecasts(self):
             A pandas DataFrame with full-period metadata and verification statistics
         """
         score_columns = ["Run_Date", "Ensemble Name", "Model_Name", "Forecast_Variable", "Neighbor_Radius",
-                         "Smoothing_Radius", "Size_Threshold",  "ROC", "Reliability"]
+                         "Smoothing_Radius", "Size_Threshold", "ROC", "Reliability"]
         all_scores = pd.DataFrame(columns=score_columns)
         if self.coordinate_file is not None:
             coord_mask = np.where((self.coordinates["lon"] >= self.lon_bounds[0]) &
diff --git a/hagelslag/evaluation/ObjectEvaluator.py b/hagelslag/evaluation/ObjectEvaluator.py
index 1f6c7bb..9f45958 100644
--- a/hagelslag/evaluation/ObjectEvaluator.py
+++ b/hagelslag/evaluation/ObjectEvaluator.py
@@ -1,10 +1,13 @@
-import numpy as np
-import pandas as pd
 import json
 from glob import glob
-from hagelslag.evaluation.ProbabilityMetrics import DistributedCRPS, DistributedReliability, DistributedROC
+from os.path import join
+
+import numpy as np
+import pandas as pd
 from scipy.stats import gamma
 
+from hagelslag.evaluation.ProbabilityMetrics import DistributedCRPS, DistributedReliability, DistributedROC
+
 
 class ObjectEvaluator(object):
     """
@@ -31,6 +34,7 @@ class ObjectEvaluator(object):
         matched_forecasts (dict): Forecasts merged with observation information.
 
     """
+
     def __init__(self, run_date, ensemble_name, ensemble_member, model_names, model_types, forecast_bins,
                  dist_thresholds, forecast_json_path, track_data_csv_path):
         self.run_date = run_date
@@ -57,7 +61,9 @@ def __init__(self, run_date, ensemble_name, ensemble_member, model_names, model_
             self.forecasts[model_type] = {}
             for model_name in self.model_names[model_type]:
                 self.forecasts[model_type][model_name] = pd.DataFrame(columns=self.metadata_columns +
-                                                                      list(self.forecast_bins[model_type].astype(str)))
+                                                                              list(
+                                                                                  self.forecast_bins[model_type].astype(
+                                                                                      str)))
 
     def load_forecasts(self):
         """
@@ -90,14 +96,14 @@ def load_obs(self):
         """
         Loads the track total and step files and merges the information into a single data frame.
         """
-        track_total_file = self.track_data_csv_path + \
-            "track_total_{0}_{1}_{2}.csv".format(self.ensemble_name,
-                                                 self.ensemble_member,
-                                                 self.run_date.strftime("%Y%m%d"))
-        track_step_file = self.track_data_csv_path + \
-            "track_step_{0}_{1}_{2}.csv".format(self.ensemble_name,
-                                                self.ensemble_member,
-                                                self.run_date.strftime("%Y%m%d"))
+        track_total_file = join(self.track_data_csv_path,
+                                "track_total_{0}_{1}_{2}.csv".format(self.ensemble_name,
+                                                                     self.ensemble_member,
+                                                                     self.run_date.strftime("%Y%m%d")))
+        track_step_file = join(self.track_data_csv_path,
+                               "track_step_{0}_{1}_{2}.csv".format(self.ensemble_name,
+                                                                   self.ensemble_member,
+                                                                   self.run_date.strftime("%Y%m%d")))
         track_total_cols = ["Track_ID", "Translation_Error_X", "Translation_Error_Y", "Start_Time_Error"]
         track_step_cols = ["Step_ID", "Track_ID", "Hail_Size", "Shape", "Location", "Scale"]
         track_total_data = pd.read_csv(track_total_file, usecols=track_total_cols)
@@ -159,7 +165,7 @@ def gamma_cdf(x, a, loc, b):
                         f_params = sub_forecasts[self.forecast_bins[model_type]].values[f]
                     forecast_cdfs[f] = gamma_cdf(self.dist_thresholds, f_params[0], f_params[1], f_params[2])
                 obs_cdfs = np.array([gamma_cdf(self.dist_thresholds, *params)
-                                    for params in sub_forecasts[self.type_cols[model_type]].values])
+                                     for params in sub_forecasts[self.type_cols[model_type]].values])
                 crps_obj.update(forecast_cdfs, obs_cdfs)
             else:
                 crps_obj.update(sub_forecasts[self.forecast_bins[model_type].astype(str)].values,
@@ -193,12 +199,12 @@ def roc(self, model_type, model_name, intensity_threshold, prob_thresholds, quer
                 forecast_values = np.array([gamma_sf(intensity_threshold, *params)
                                             for params in sub_forecasts[self.forecast_bins[model_type]].values])
                 obs_probs = np.array([gamma_sf(intensity_threshold, *params)
-                                    for params in sub_forecasts[self.type_cols[model_type]].values])
+                                      for params in sub_forecasts[self.type_cols[model_type]].values])
                 obs_values[obs_probs >= 0.01] = 1
             elif len(self.forecast_bins[model_type]) > 1:
                 fbin = np.argmin(np.abs(self.forecast_bins[model_type] - intensity_threshold))
-                forecast_values = 1 - sub_forecasts[self.forecast_bins[model_type].astype(str)].values.cumsum(axis=1)[:,
-                                    fbin]
+                forecast_values = 1 - sub_forecasts[
+                                          self.forecast_bins[model_type].astype(str)].values.cumsum(axis=1)[:, fbin]
                 obs_values[sub_forecasts[self.type_cols[model_type]].values >= intensity_threshold] = 1
             else:
                 forecast_values = sub_forecasts[self.forecast_bins[model_type].astype(str)[0]].values
@@ -232,12 +238,12 @@ def reliability(self, model_type, model_name, intensity_threshold, prob_threshol
                 forecast_values = np.array([gamma_sf(intensity_threshold, *params)
                                             for params in sub_forecasts[self.forecast_bins[model_type]].values])
                 obs_probs = np.array([gamma_sf(intensity_threshold, *params)
-                                    for params in sub_forecasts[self.type_cols[model_type]].values])
+                                      for params in sub_forecasts[self.type_cols[model_type]].values])
                 obs_values[obs_probs >= 0.01] = 1
             elif len(self.forecast_bins[model_type]) > 1:
                 fbin = np.argmin(np.abs(self.forecast_bins[model_type] - intensity_threshold))
                 forecast_values = 1 - sub_forecasts[self.forecast_bins[model_type].astype(str)].values.cumsum(axis=1)[:,
-                                    fbin]
+                                      fbin]
                 obs_values[sub_forecasts[self.type_cols[model_type]].values >= intensity_threshold] = 1
             else:
                 forecast_values = sub_forecasts[self.forecast_bins[model_type].astype(str)[0]].values
@@ -307,8 +313,6 @@ def max_hail_sample_crps(self, forecast_max_hail, obs_max_hail):
         return crps
 
 
-
-
 def gamma_sf(x, a, loc, b):
     if a == 0 or b == 0:
         sf = 0
diff --git a/hagelslag/evaluation/ProbabilityMetrics.py b/hagelslag/evaluation/ProbabilityMetrics.py
index 51618a4..5b550bd 100644
--- a/hagelslag/evaluation/ProbabilityMetrics.py
+++ b/hagelslag/evaluation/ProbabilityMetrics.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pandas as pd
+
 from .ContingencyTable import ContingencyTable
 
 __author__ = "David John Gagne <djgagne@ou.edu>"
@@ -56,6 +57,7 @@ class DistributedROC(object):
         >>> roc.update(forecasts, obs)
         >>> print(roc.auc())
     """
+
     def __init__(self, thresholds=np.arange(0, 1.1, 0.1), obs_threshold=1.0, input_str=None):
         """
         Initializes the DistributedROC object. If input_str is not None, then the DistributedROC object is
@@ -90,7 +92,7 @@ def update(self, forecasts, observations):
             tn = np.count_nonzero((forecasts < threshold) &
                                   (observations < self.obs_threshold))
             self.contingency_tables.iloc[t] += [tp, fp, fn, tn]
-    
+
     def clear(self):
         self.contingency_tables.loc[:, :] = 0
 
@@ -207,7 +209,7 @@ def from_str(self, in_str):
                 self.thresholds = np.array(value.split(), dtype=float)
                 self.contingency_tables = pd.DataFrame(columns=self.contingency_tables.columns,
                                                        data=np.zeros((self.thresholds.size,
-                                                                     self.contingency_tables.columns.size)))
+                                                                      self.contingency_tables.columns.size)))
             elif var_name in self.contingency_tables.columns:
                 self.contingency_tables[var_name] = np.array(value.split(), dtype=int)
 
@@ -227,10 +229,10 @@ def calc_reliability_curve(self):
         total_frequency = np.zeros(self.thresholds.shape)
         for t, threshold in enumerate(self.thresholds[:-1]):
             pos_frequency[t] = np.count_nonzero((threshold <= self.forecasts) &
-                                                (self.forecasts < self.thresholds[t+1]) &
+                                                (self.forecasts < self.thresholds[t + 1]) &
                                                 (self.observations > self.obs_threshold))
             total_frequency[t] = np.count_nonzero((threshold <= self.forecasts) &
-                                                  (self.forecasts < self.thresholds[t+1]))
+                                                  (self.forecasts < self.thresholds[t + 1]))
             if total_frequency[t] > 0:
                 self.pos_relative_frequency[t] = pos_frequency[t] / float(total_frequency[t])
                 self.total_relative_frequency[t] = total_frequency / self.forecasts.size
@@ -315,11 +317,11 @@ def update(self, forecasts, observations):
         """
         for t, threshold in enumerate(self.thresholds[:-1]):
             self.frequencies.loc[t, "Positive_Freq"] += np.count_nonzero((threshold <= forecasts) &
-                                                                         (forecasts < self.thresholds[t+1]) &
+                                                                         (forecasts < self.thresholds[t + 1]) &
                                                                          (observations >= self.obs_threshold))
             self.frequencies.loc[t, "Total_Freq"] += np.count_nonzero((threshold <= forecasts) &
-                                                                      (forecasts < self.thresholds[t+1]))
-    
+                                                                      (forecasts < self.thresholds[t + 1]))
+
     def clear(self):
         self.frequencies.loc[:, :] = 0
 
@@ -429,7 +431,7 @@ def from_str(self, in_str):
                 self.thresholds = np.array(value.split(), dtype=float)
                 self.frequencies = pd.DataFrame(columns=self.frequencies.columns,
                                                 data=np.zeros((self.thresholds.size,
-                                                              self.frequencies.columns.size)))
+                                                               self.frequencies.columns.size)))
             elif var_name in ["Positive_Freq", "Total_Freq"]:
                 self.frequencies[var_name] = np.array(value.split(), dtype=int)
 
@@ -490,7 +492,7 @@ def crps(self):
         Calculates the continuous ranked probability score.
         """
         return np.sum(self.errors["F_2"].values - self.errors["F_O"].values * 2.0 + self.errors["O_2"].values) / \
-            (self.thresholds.size * self.num_forecasts)
+               (self.thresholds.size * self.num_forecasts)
 
     def crps_climo(self):
         """
diff --git a/hagelslag/processing/EnhancedWatershedSegmenter.py b/hagelslag/processing/EnhancedWatershedSegmenter.py
index af37baa..c5622a2 100644
--- a/hagelslag/processing/EnhancedWatershedSegmenter.py
+++ b/hagelslag/processing/EnhancedWatershedSegmenter.py
@@ -10,9 +10,10 @@
 @author: David John Gagne (djgagne@ou.edu)
 """
 
+from collections import OrderedDict
+
 import numpy as np
 from scipy.ndimage import find_objects
-from collections import OrderedDict
 
 
 class EnhancedWatershed(object):
diff --git a/hagelslag/processing/EnsembleProducts.py b/hagelslag/processing/EnsembleProducts.py
index 96bc41d..0b77606 100644
--- a/hagelslag/processing/EnsembleProducts.py
+++ b/hagelslag/processing/EnsembleProducts.py
@@ -1,19 +1,21 @@
-from hagelslag.data.ModelOutput import ModelOutput
-from hagelslag.util.make_proj_grids import read_arps_map_file, read_ncar_map_file, make_proj_grids
+import json
+from datetime import timedelta
+from glob import glob
+from os.path import join, exists
+
 import numpy as np
 import pandas as pd
+from grib2io import Grib2Message
+from netCDF4 import Dataset, num2date
+from scipy.interpolate import interp1d
 from scipy.ndimage import gaussian_filter
 from scipy.signal import fftconvolve
-from scipy.stats import gamma
-from scipy.interpolate import interp1d
 from scipy.spatial import cKDTree
+from scipy.stats import gamma
 from skimage.morphology import disk
-from netCDF4 import Dataset, num2date
-from glob import glob
-from os.path import join, exists
-import json
-from datetime import timedelta
-from grib2io import Grib2Message
+
+from hagelslag.data.ModelOutput import ModelOutput
+from hagelslag.util.make_proj_grids import read_arps_map_file, read_ncar_map_file, make_proj_grids
 
 
 class EnsembleMemberProduct(object):
@@ -37,6 +39,7 @@ class EnsembleMemberProduct(object):
         condition_model_name (str): Name of the condition ML model being used if different from model_name
         condition_threshold (float): Probability threshold for including or excluding storms.
     """
+
     def __init__(self, ensemble_name, model_name, member, run_date, variable, start_date, end_date, path, single_step,
                  size_distribution_training_path, watershed_var, map_file=None,
                  condition_model_name=None, condition_threshold=0.5):
@@ -106,7 +109,7 @@ def load_data(self, num_samples=1000, percentiles=None):
             self.data = np.zeros((self.forecast_hours.size,
                                   self.mapping_data["lon"].shape[0],
                                   self.mapping_data["lon"].shape[1]), dtype=np.float32)
-            
+
             if self.percentiles is not None:
                 self.percentile_data = np.zeros([len(self.percentiles)] + list(self.data.shape))
             full_condition_name = "condition_" + self.condition_model_name.replace(" ", "-")
@@ -123,7 +126,7 @@ def load_data(self, num_samples=1000, percentiles=None):
                     if forecast_time in self.times:
                         t = np.where(self.times == forecast_time)[0][0]
                         mask = np.array(step["properties"]["masks"], dtype=int).ravel()
-                        rankings = np.argsort(np.array(step["properties"]["timesteps"]).ravel()[mask==1])
+                        rankings = np.argsort(np.array(step["properties"]["timesteps"]).ravel()[mask == 1])
                         i = np.array(step["properties"]["i"], dtype=int).ravel()[mask == 1][rankings]
                         j = np.array(step["properties"]["j"], dtype=int).ravel()[mask == 1][rankings]
                         if rankings.size > 0 and forecast_params[0] > 0.1 and 1 < forecast_params[2] < 100:
@@ -176,8 +179,8 @@ def load_forecast_csv_data(self, csv_path):
 
         """
         forecast_file = join(csv_path, "hail_forecasts_{0}_{1}_{2}.csv".format(self.ensemble_name,
-                                                                    self.member,
-                                                                    self.run_date.strftime("%Y%m%d-%H%M")))
+                                                                               self.member,
+                                                                               self.run_date.strftime("%Y%m%d-%H%M")))
         if exists(forecast_file):
             self.hail_forecast_table = pd.read_csv(forecast_file)
         return
@@ -196,7 +199,7 @@ def load_forecast_netcdf_data(self, nc_path):
         if exists(nc_file):
             nc_patches = Dataset(nc_file)
             nc_times = pd.DatetimeIndex(num2date(nc_patches.variables["time"][:],
-                                             nc_patches.variables["time"].units))
+                                                 nc_patches.variables["time"].units))
             time_indices = np.isin(nc_times, self.times)
             self.nc_patches = dict()
             self.nc_patches["time"] = nc_times[time_indices]
@@ -208,9 +211,9 @@ def load_forecast_netcdf_data(self, nc_path):
             nc_patches.close()
             print(nc_file)
         else:
-            print('no {0} {1} netCDF4 file'.format(self.member,self.run_date.strftime("%Y%m%d")))
+            print('no {0} {1} netCDF4 file'.format(self.member, self.run_date.strftime("%Y%m%d")))
             self.nc_patches = None
-            return 
+            return
         return
 
     def quantile_match(self):
@@ -223,13 +226,13 @@ def quantile_match(self):
         """
         if self.nc_patches is None:
             self.data = None
-            return 
+            return
 
         mask_indices = np.where(self.nc_patches["masks"] == 1)
         obj_values = self.nc_patches["obj_values"][mask_indices]
         obj_values = np.array(obj_values)
         percentiles = np.linspace(0.1, 99.9, 100)
-        
+
         try:
             filename = join(self.size_distribution_training_path,
                             '{0}_{1}_{2}_Size_Distribution.csv'.format(self.ensemble_name,
@@ -237,11 +240,11 @@ def quantile_match(self):
                                                                        self.member))
             if not exists(filename):
                 filename = join(self.size_distribution_training_path,
-                        '{0}_{1}_Size_Distribution.csv'.format(self.ensemble_name,
-                                                                self.watershed_var))
+                                '{0}_{1}_Size_Distribution.csv'.format(self.ensemble_name,
+                                                                       self.watershed_var))
             train_period_obj_per_vals = pd.read_csv(filename)
-            train_period_obj_per_vals = train_period_obj_per_vals.loc[:,"Obj_Values"].values
-            per_func = interp1d(train_period_obj_per_vals, percentiles / 100.0, 
+            train_period_obj_per_vals = train_period_obj_per_vals.loc[:, "Obj_Values"].values
+            per_func = interp1d(train_period_obj_per_vals, percentiles / 100.0,
                                 bounds_error=False, fill_value=(0.1, 99.9))
         except FileNotFoundError:
             obj_per_vals = np.percentile(obj_values, percentiles)
@@ -374,22 +377,22 @@ def encode_grib2_percentile(self):
                   self.grid_dict["dx"] * 1e3, self.grid_dict["dy"] * 1e3, 0b00000000, 0b01000000,
                   self.proj_dict["lat_1"] * lscale,
                   self.proj_dict["lat_2"] * lscale, -90 * lscale, 0]
-        pdtmp1 = np.array([1,                # parameter category Moisture
-                           31,               # parameter number Hail
-                           4,                # Type of generating process Ensemble Forecast
-                           0,                # Background generating process identifier
-                           31,               # Generating process or model from NCEP
-                           0,                # Hours after reference time data cutoff
-                           0,                # Minutes after reference time data cutoff
-                           1,                # Forecast time units Hours
-                           0,                # Forecast time
-                           1,                # Type of first fixed surface Ground
-                           1,                # Scale value of first fixed surface
-                           0,                # Value of first fixed surface
-                           1,                # Type of second fixed surface
-                           1,                # Scale value of 2nd fixed surface
-                           0,                # Value of 2nd fixed surface
-                           0,                # Derived forecast type
+        pdtmp1 = np.array([1,  # parameter category Moisture
+                           31,  # parameter number Hail
+                           4,  # Type of generating process Ensemble Forecast
+                           0,  # Background generating process identifier
+                           31,  # Generating process or model from NCEP
+                           0,  # Hours after reference time data cutoff
+                           0,  # Minutes after reference time data cutoff
+                           1,  # Forecast time units Hours
+                           0,  # Forecast time
+                           1,  # Type of first fixed surface Ground
+                           1,  # Scale value of first fixed surface
+                           0,  # Value of first fixed surface
+                           1,  # Type of second fixed surface
+                           1,  # Scale value of 2nd fixed surface
+                           0,  # Value of 2nd fixed surface
+                           0,  # Derived forecast type
                            self.num_samples  # Number of ensemble members
                            ], dtype=np.int32)
         grib_objects = pd.Series(index=self.times, data=[None] * self.times.size, dtype=object)
@@ -397,7 +400,7 @@ def encode_grib2_percentile(self):
         for t, time in enumerate(self.times):
             time_list = list(self.run_date.utctimetuple()[0:6])
             if grib_objects[time] is None:
-                #grib_objects[time] = Grib2Encode(0, np.array(grib_id_start + time_list + [2, 1], dtype=np.int32))
+                # grib_objects[time] = Grib2Encode(0, np.array(grib_id_start + time_list + [2, 1], dtype=np.int32))
                 grib_objects[time] = Grib2Message(discipline=0,
                                                   idsect=np.array(grib_id_start + time_list + [2, 1], dtype=np.int32))
                 grib_objects[time].addgrid(gdsinfo, gdtmp1)
@@ -405,8 +408,8 @@ def encode_grib2_percentile(self):
             data = self.percentile_data[:, t] / 1000.0
             masked_data = np.ma.array(data, mask=data <= 0)
             for p, percentile in enumerate(self.percentiles):
-                print("GRIB {3} Percentile {0}. Max: {1} Min: {2}".format(percentile, 
-                                                                          masked_data[p].max(), 
+                print("GRIB {3} Percentile {0}. Max: {1} Min: {2}".format(percentile,
+                                                                          masked_data[p].max(),
                                                                           masked_data[p].min(),
                                                                           time))
                 if percentile in range(1, 100):
@@ -425,7 +428,7 @@ def encode_grib2_data(self):
             Series of GRIB2 messages
         """
         if self.data is None:
-            return None 
+            return None
         lscale = 1e6
         grib_id_start = [7, 0, 14, 14, 2]
         gdsinfo = np.array([0, np.product(self.data.shape[-2:]), 0, 0, 30], dtype=np.int32)
@@ -442,30 +445,30 @@ def encode_grib2_data(self):
                   self.grid_dict["dx"] * 1e3, self.grid_dict["dy"] * 1e3, 0b00000000, 0b01000000,
                   self.proj_dict["lat_1"] * lscale,
                   self.proj_dict["lat_2"] * lscale, -90 * lscale, 0]
-        pdtmp1 = np.array([1,                # parameter category Moisture
-                           31,               # parameter number Hail
-                           4,                # Type of generating process Ensemble Forecast
-                           0,                # Background generating process identifier
-                           31,               # Generating process or model from NCEP
-                           0,                # Hours after reference time data cutoff
-                           0,                # Minutes after reference time data cutoff
-                           1,                # Forecast time units Hours
-                           0,                # Forecast time
-                           1,                # Type of first fixed surface Ground
-                           1,                # Scale value of first fixed surface
-                           0,                # Value of first fixed surface
-                           1,                # Type of second fixed surface
-                           1,                # Scale value of 2nd fixed surface
-                           0,                # Value of 2nd fixed surface
-                           0,                # Derived forecast type
-                           1                 # Number of ensemble members
+        pdtmp1 = np.array([1,  # parameter category Moisture
+                           31,  # parameter number Hail
+                           4,  # Type of generating process Ensemble Forecast
+                           0,  # Background generating process identifier
+                           31,  # Generating process or model from NCEP
+                           0,  # Hours after reference time data cutoff
+                           0,  # Minutes after reference time data cutoff
+                           1,  # Forecast time units Hours
+                           0,  # Forecast time
+                           1,  # Type of first fixed surface Ground
+                           1,  # Scale value of first fixed surface
+                           0,  # Value of first fixed surface
+                           1,  # Type of second fixed surface
+                           1,  # Scale value of 2nd fixed surface
+                           0,  # Value of 2nd fixed surface
+                           0,  # Derived forecast type
+                           1  # Number of ensemble members
                            ], dtype=np.int32)
         grib_objects = pd.Series(index=self.times, data=[None] * self.times.size, dtype=object)
         drtmp1 = np.array([0, 0, 4, 8, 0], dtype=np.int32)
         for t, time in enumerate(self.times):
             time_list = list(self.run_date.utctimetuple()[0:6])
             if grib_objects[time] is None:
-                #grib_objects[time] = Grib2Encode(0, np.array(grib_id_start + time_list + [2, 1], dtype=np.int32))
+                # grib_objects[time] = Grib2Encode(0, np.array(grib_id_start + time_list + [2, 1], dtype=np.int32))
                 grib_objects[time] = Grib2Message(discipline=0,
                                                   idsect=np.array(grib_id_start + time_list + [2, 1], dtype=np.int32))
                 grib_objects[time].addgrid(gdsinfo, gdtmp1)
@@ -489,11 +492,11 @@ def write_grib2_files(self, grib_objects, path):
         for t, time in enumerate(self.times.to_pydatetime()):
             grib_objects[time].end()
             filename = join(path, "{0}_{1}_{2}_{3}_{4}.grib2".format(self.ensemble_name,
-                                                                 self.member,
-                                                                 self.model_name.replace(" ", "-"),
-                                                                 self.variable,
-                                                                 self.run_date.strftime("%Y%m%d%H") +
-                                                                 "f{0:02d}".format(self.forecast_hours[t])
-                                                                 ))
+                                                                     self.member,
+                                                                     self.model_name.replace(" ", "-"),
+                                                                     self.variable,
+                                                                     self.run_date.strftime("%Y%m%d%H") +
+                                                                     "f{0:02d}".format(self.forecast_hours[t])
+                                                                     ))
             with open(filename, "wb") as fo:
                 fo.write(grib_objects[time]._msg)
diff --git a/hagelslag/processing/ObjectMatcher.py b/hagelslag/processing/ObjectMatcher.py
index 5832035..f284bb4 100644
--- a/hagelslag/processing/ObjectMatcher.py
+++ b/hagelslag/processing/ObjectMatcher.py
@@ -1,7 +1,8 @@
 import numpy as np
-from hagelslag.util.munkres import Munkres
 import pandas as pd
 
+from hagelslag.util.munkres import Munkres
+
 
 class ObjectMatcher(object):
     """
@@ -183,6 +184,7 @@ class TrackStepMatcher(object):
     """
     Determine if each step in a track is in close proximity to steps from another set of tracks
     """
+
     def __init__(self, cost_function_components, max_values):
         self.cost_function_components = cost_function_components
         self.max_values = max_values
diff --git a/hagelslag/processing/STObject.py b/hagelslag/processing/STObject.py
index 1009c7c..f846d54 100644
--- a/hagelslag/processing/STObject.py
+++ b/hagelslag/processing/STObject.py
@@ -1,8 +1,9 @@
+import json
+
 import numpy as np
 from skimage.measure import regionprops
-from skimage.segmentation import find_boundaries
 from skimage.morphology import convex_hull_image
-import json
+from skimage.segmentation import find_boundaries
 
 
 class STObject(object):
@@ -71,7 +72,7 @@ def __init__(self, grid, mask, x, y, i, j, start_time, end_time, step=1, dx=4000
     def __str__(self):
         com_x, com_y = self.center_of_mass(self.start_time)
         data = dict(maxsize=self.max_size(), comx=com_x, comy=com_y, start=self.start_time, end=self.end_time)
-        return "ST Object [maxSize=%(maxsize)d,initialCenter=%(comx)0.2f,%(comy)0.2f,duration=%(start)02d-%(end)02d]" %\
+        return "ST Object [maxSize=%(maxsize)d,initialCenter=%(comx)0.2f,%(comy)0.2f,duration=%(start)02d-%(end)02d]" % \
                data
 
     def center_of_mass(self, time):
@@ -235,7 +236,7 @@ def boundary_polygon(self, time):
         chull = convex_hull_image(padded_mask)
         boundary_image = find_boundaries(chull, mode='inner', background=0)
         # Now remove the padding.
-        boundary_image = boundary_image[1:-1,1:-1]
+        boundary_image = boundary_image[1:-1, 1:-1]
         boundary_x = self.x[ti].ravel()[boundary_image.ravel()]
         boundary_y = self.y[ti].ravel()[boundary_image.ravel()]
         r = np.sqrt((boundary_x - com_x) ** 2 + (boundary_y - com_y) ** 2)
@@ -273,7 +274,7 @@ def estimate_motion(self, time, intensity_grid, max_u, max_v):
             for v in v_shifts:
                 i_shift = i_vals - v
                 if np.all((0 <= i_shift) & (i_shift < intensity_grid.shape[0]) &
-                                  (0 <= j_shift) & (j_shift < intensity_grid.shape[1])):
+                          (0 <= j_shift) & (j_shift < intensity_grid.shape[1])):
                     shift_vals = intensity_grid[i_shift, j_shift]
                 else:
                     shift_vals = np.zeros(i_shift.shape)
@@ -284,7 +285,7 @@ def estimate_motion(self, time, intensity_grid, max_u, max_v):
                     best_u = u * self.dx
                     best_v = v * self.dx
         # 60 seems arbitrarily high
-        #if min_error > 60:
+        # if min_error > 60:
         #    best_u = 0
         #    best_v = 0
         self.u[ti] = best_u
@@ -344,7 +345,6 @@ def extract_attribute_array(self, data_array, var_name):
         for t in range(self.times.size):
             self.attributes[var_name].append(data_array[self.i[t], self.j[t]])
 
-
     def extract_tendency_grid(self, model_grid):
         """
         Extracts the difference in model outputs
@@ -360,7 +360,7 @@ def extract_tendency_grid(self, model_grid):
             t_index = t - model_grid.start_hour
             self.attributes[var_name].append(
                 model_grid.data[t_index, self.i[ti], self.j[ti]] - model_grid.data[t_index - 1, self.i[ti], self.j[ti]]
-                )
+            )
 
     def calc_attribute_statistics(self, statistic_name):
         """
@@ -405,7 +405,7 @@ def calc_attribute_statistic(self, attribute, statistic, time):
             stat_val = np.median(self.attributes[attribute][ti].ravel()[ma])
         elif statistic == "skew":
             stat_val = np.mean(self.attributes[attribute][ti].ravel()[ma]) - \
-                    np.median(self.attributes[attribute][ti].ravel()[ma])
+                       np.median(self.attributes[attribute][ti].ravel()[ma])
         elif 'percentile' in statistic:
             per = int(statistic.split("_")[1])
             stat_val = np.percentile(self.attributes[attribute][ti].ravel()[ma], per)
@@ -415,7 +415,7 @@ def calc_attribute_statistic(self, attribute, statistic, time):
                 stat_val = 0
             else:
                 stat_val = self.calc_attribute_statistic(attribute, stat_name, time) \
-                    - self.calc_attribute_statistic(attribute, stat_name, time - 1)
+                           - self.calc_attribute_statistic(attribute, stat_name, time - 1)
         else:
             stat_val = np.nan
         return stat_val
@@ -445,8 +445,8 @@ def calc_timestep_statistic(self, statistic, time):
             if ti == 0:
                 stat_val = 0
             else:
-                stat_val = self.calc_timestep_statistic(stat_name, time) -\
-                    self.calc_timestep_statistic(stat_name, time - 1)
+                stat_val = self.calc_timestep_statistic(stat_name, time) - \
+                           self.calc_timestep_statistic(stat_name, time - 1)
         else:
             stat_val = np.nan
         return stat_val
@@ -485,7 +485,7 @@ def calc_shape_step(self, stat_names, time):
         ti = np.where(self.times == time)[0][0]
         shape_stats = []
         try:
-            props = regionprops(self.masks[ti], self.timesteps[ti])[0]           
+            props = regionprops(self.masks[ti], self.timesteps[ti])[0]
         except:
             for stat_name in stat_names:
                 shape_stats.append(np.nan)
@@ -544,6 +544,7 @@ def to_geojson(self, filename, proj, metadata=None):
         file_obj.close()
         return
 
+
 def read_geojson(filename):
     """
     Reads a geojson file containing an STObject and initializes a new STObject from the information in the file.
@@ -577,4 +578,3 @@ def read_geojson(filename):
     for k, v in attribute_data.items():
         sto.attributes[k] = v
     return sto
-
diff --git a/hagelslag/processing/TrackModeler.py b/hagelslag/processing/TrackModeler.py
index 71aa2e6..3b6a8df 100644
--- a/hagelslag/processing/TrackModeler.py
+++ b/hagelslag/processing/TrackModeler.py
@@ -1,17 +1,20 @@
-import numpy as np
-import pandas as pd
-import pickle
 import json
 import os
-from sklearn.decomposition import PCA
+import pickle
 from copy import deepcopy
 from glob import glob
 from multiprocessing import Pool
+from os.path import join
+
+import numpy as np
+import pandas as pd
+from sklearn.decomposition import PCA
 from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import KFold
+
 from hagelslag.evaluation.ProbabilityMetrics import DistributedROC
-from os.path import join
 from hagelslag.util.make_proj_grids import read_arps_map_file, read_ncar_map_file
-from sklearn.model_selection import KFold
+
 
 class TrackModeler(object):
     """
@@ -30,7 +33,7 @@ def __init__(self,
                  weighting_function,
                  map_file,
                  group_col="Microphysics"):
-        
+
         self.train_data_path = train_data_path
         self.forecast_data_path = forecast_data_path
         self.ensemble_name = ensemble_name
@@ -50,8 +53,8 @@ def __init__(self,
 
         if self.map_file[-3:] == "map":
             self.proj_dict, self.grid_dict = read_arps_map_file(self.map_file)
-        else:                                               
-            self.proj_dict, self.grid_dict = read_ncar_map_file(self.map_file)   
+        else:
+            self.proj_dict, self.grid_dict = read_ncar_map_file(self.map_file)
         return
 
     def load_data(self, mode="train", format="csv"):
@@ -66,7 +69,7 @@ def load_data(self, mode="train", format="csv"):
         """
         if mode in self.data.keys():
             run_dates = pd.date_range(start=self.start_dates[mode],
-                                         end=self.end_dates[mode], freq="1D")
+                                      end=self.end_dates[mode], freq="1D")
             run_date_str = [d.strftime("%Y%m%d-%H%M") for d in run_dates.date]
             print(np.unique(run_dates.strftime('%Y%m')))
             all_total_track_files = sorted(glob(getattr(self, mode + "_data_path") +
@@ -84,12 +87,13 @@ def load_data(self, mode="train", format="csv"):
                 if file_date in run_date_str:
                     step_track_files.append(step_file)
 
-            self.data[mode]["total"] = pd.concat(map(pd.read_csv, total_track_files),
-                                                 ignore_index=True,sort='True')
+            self.data[mode]["total"] = pd.concat([pd.read_csv(total_track_file)
+                                                  for total_track_file in total_track_files],
+                                                 ignore_index=True, sort='True')
             self.data[mode]["total"] = self.data[mode]["total"].fillna(value=0)
             self.data[mode]["total"] = self.data[mode]["total"].replace([np.inf, -np.inf], 0)
             self.data[mode]["step"] = pd.concat(map(pd.read_csv, step_track_files),
-                                                ignore_index=True,sort='True')
+                                                ignore_index=True, sort='True')
             self.data[mode]["step"] = self.data[mode]["step"].fillna(value=0)
             self.data[mode]["step"] = self.data[mode]["step"].replace([np.inf, -np.inf], 0)
             if mode == "forecast":
@@ -316,8 +320,8 @@ def fit_size_distribution_models(self, model_names, model_objs, input_columns,
                 self.size_distribution_models[group]["multi"][model_name] = deepcopy(model_objs[m])
                 try:
                     self.size_distribution_models[group]["multi"][model_name].fit(group_data[input_columns],
-                                                                   (log_labels - log_means) / log_sds,
-                                                                               sample_weight=weights)
+                                                                                  (log_labels - log_means) / log_sds,
+                                                                                  sample_weight=weights)
                 except:
                     self.size_distribution_models[group]["multi"][model_name].fit(group_data[input_columns],
                                                                                   (log_labels - log_means) / log_sds)
@@ -389,10 +393,10 @@ def fit_size_distribution_component_models(self, model_names, model_objs, input_
                             "pc_{0:d}".format(comp)][model_name].fit(group_data[input_columns],
                                                                      out_pc_labels[:, comp])
         return
-    
+
     def predict_size_distribution_models(self, model_names, input_columns, metadata_cols,
-                                        data_mode="forecast", location=6, 
-                                        calibrate=False):
+                                         data_mode="forecast", location=6,
+                                         calibrate=False):
         """
         Make predictions using fitted size distribution models. Each ML model predicts the normalized shape
         and scale parameters simultaneously using multitask learning. Only scikit learn models that support
@@ -419,7 +423,7 @@ def predict_size_distribution_models(self, model_names, input_columns, metadata_
                 log_sd = self.size_distribution_models[group]["lognorm"]["sd"]
                 for m, model_name in enumerate(model_names):
                     multi_predictions = self.size_distribution_models[group]["multi"][model_name].predict(
-                        self.data[data_mode]["combo"].loc[group_idxs,input_columns])
+                        self.data[data_mode]["combo"].loc[group_idxs, input_columns])
                     if calibrate:
                         multi_predictions[:, 0] = self.size_distribution_models[group]["calshape"][model_name].predict(
                             multi_predictions[:, 0:1])
@@ -428,9 +432,9 @@ def predict_size_distribution_models(self, model_names, input_columns, metadata_
                     multi_predictions = np.exp(multi_predictions * log_sd + log_mean)
                     if multi_predictions.shape[1] == 2:
                         multi_predictions_temp = np.zeros((multi_predictions.shape[0], 3))
-                        multi_predictions_temp[:, 0] = multi_predictions[:,0]
+                        multi_predictions_temp[:, 0] = multi_predictions[:, 0]
                         multi_predictions_temp[:, 1] = location
-                        multi_predictions_temp[:, 2] = multi_predictions[:,1]
+                        multi_predictions_temp[:, 2] = multi_predictions[:, 1]
                     for p, pred_col in enumerate(["shape", "location", "scale"]):
                         predictions.loc[group_idxs, model_name.replace(" ", "-") + "_" + pred_col] = \
                             multi_predictions_temp[:, p]
@@ -844,9 +848,7 @@ def output_forecasts_csv(self, forecasts, mode, csv_path, run_date_format="%Y%m%
 
     def output_forecasts_json_parallel(self, forecasts,
                                        condition_model_names,
-                                       size_model_names,
                                        dist_model_names,
-                                       track_model_names,
                                        json_data_path,
                                        out_path,
                                        num_procs):
@@ -864,17 +866,9 @@ def output_forecasts_json_parallel(self, forecasts,
             for ml_model in condition_model_names:
                 step_forecasts["condition_" + ml_model.replace(" ", "-")] = forecasts["condition"][group].loc[
                     forecasts["condition"][group]["Track_ID"] == track_id, ml_model].copy()
-            # for ml_model in size_model_names:
-            #    step_forecasts["size_" + ml_model.replace(" ", "-")] = forecasts["size"][group][ml_model].loc[
-            #        forecasts["size"][group][ml_model]["Track_ID"] == track_id].copy()
             for ml_model in dist_model_names:
                 step_forecasts["dist_" + ml_model.replace(" ", "-")] = forecasts["dist"][group][ml_model].loc[
                     forecasts["dist"][group][ml_model]["Track_ID"] == track_id].copy()
-            # for model_type in forecasts["track"].keys():
-            #    for ml_model in track_model_names:
-            #        mframe = forecasts["track"][model_type][group][ml_model]
-            #        step_forecasts[model_type + "_" + ml_model.replace(" ", "-")] = mframe.loc[
-            #            mframe["Track_ID"] == track_id].copy()
             pool.apply_async(output_forecast, (step_forecasts, run_date, ensemble_name, member, track_num,
                                                json_data_path,
                                                out_path))
@@ -925,6 +919,3 @@ def output_forecast(step_forecasts, run_date, ensemble_name, member, track_num,
         print(out_json_filename + " not found")
         return
     return
-    
-
-
diff --git a/hagelslag/processing/TrackProcessing.py b/hagelslag/processing/TrackProcessing.py
index e88535c..489d6e0 100644
--- a/hagelslag/processing/TrackProcessing.py
+++ b/hagelslag/processing/TrackProcessing.py
@@ -1,19 +1,21 @@
-from hagelslag.data.ModelOutput import ModelOutput
+from datetime import timedelta
+from glob import glob
+
+import numpy as np
+import pandas as pd
+from netCDF4 import Dataset
+from scipy.interpolate import interp1d
+from scipy.ndimage import find_objects, gaussian_filter
+from scipy.stats import gamma
+
 from hagelslag.data.MRMSGrid import MRMSGrid
+from hagelslag.data.ModelOutput import ModelOutput
 from hagelslag.processing.EnhancedWatershedSegmenter import EnhancedWatershed, rescale_data
-from hagelslag.processing.Watershed import Watershed
 from hagelslag.processing.Hysteresis import Hysteresis
+from hagelslag.processing.Watershed import Watershed
 from hagelslag.processing.tracker import label_storm_objects, extract_storm_patches, track_storms
 from .ObjectMatcher import ObjectMatcher, TrackMatcher, TrackStepMatcher
-from scipy.ndimage import find_objects, gaussian_filter
 from .STObject import STObject, read_geojson
-import numpy as np
-from scipy.interpolate import interp1d
-from glob import glob
-import pandas as pd
-from datetime import timedelta
-from scipy.stats import gamma
-from netCDF4 import Dataset
 
 
 class TrackProcessor(object):
@@ -46,6 +48,7 @@ class TrackProcessor(object):
         single_step: Whether model timesteps are in separate files or aggregated into one file.
         mask_file: netCDF filename containing a mask of valid grid points on the model domain.
     """
+
     def __init__(self,
                  run_date,
                  start_date,
@@ -99,7 +102,7 @@ def __init__(self,
         self.single_step = single_step
         self.model_grid = ModelOutput(self.ensemble_name, self.ensemble_member, self.run_date, self.variable,
                                       self.start_date, self.end_date, self.model_path, self.model_map_file,
-                                        single_step=self.single_step)
+                                      single_step=self.single_step)
         self.model_grid.load_map_info(self.model_map_file)
         if self.mrms_path is not None:
             self.mrms_variable = mrms_variable
@@ -181,7 +184,7 @@ def find_model_patch_tracks(self):
                 if len(slices) > 0:
                     dims = (slices[0][0].stop - slices[0][0].start, slices[0][1].stop - slices[0][1].start)
                     if h > 0:
-                        model_obj.estimate_motion(hour, self.model_grid.data[h-1], dims[1], dims[0])
+                        model_obj.estimate_motion(hour, self.model_grid.data[h - 1], dims[1], dims[0])
 
             del model_data
             del hour_labels
@@ -250,16 +253,16 @@ def find_model_tracks(self):
                 for s, sl in enumerate(obj_slices):
                     model_objects[-1].append(STObject(self.model_grid.data[h][sl],
                                                       np.where(hour_labels[sl] == s + 1, 1, 0),
-                                                      self.model_grid.x[sl], 
-                                                      self.model_grid.y[sl], 
-                                                      self.model_grid.i[sl], 
+                                                      self.model_grid.x[sl],
+                                                      self.model_grid.y[sl],
+                                                      self.model_grid.i[sl],
                                                       self.model_grid.j[sl],
                                                       hour,
                                                       hour,
                                                       dx=self.model_grid.dx))
                     if h > 0:
                         dims = model_objects[-1][-1].timesteps[0].shape
-                        model_objects[-1][-1].estimate_motion(hour, self.model_grid.data[h-1], dims[1], dims[0])
+                        model_objects[-1][-1].estimate_motion(hour, self.model_grid.data[h - 1], dims[1], dims[0])
             del hour_labels
             del scaled_data
             del model_data
@@ -315,18 +318,18 @@ def find_mrms_tracks(self):
         tracked_obs_objects = []
         if self.mrms_ew is not None:
             self.mrms_grid.load_data()
-            
+
             if len(self.mrms_grid.data) != len(self.hours):
                 print('Less than 24 hours of observation data found')
-                
+
                 return tracked_obs_objects
-         
+
             for h, hour in enumerate(self.hours):
                 mrms_data = np.zeros(self.mrms_grid.data[h].shape)
                 mrms_data[:] = np.array(self.mrms_grid.data[h])
                 mrms_data[mrms_data < 0] = 0
                 hour_labels = self.mrms_ew.size_filter(self.mrms_ew.label(gaussian_filter(mrms_data,
-                                                                                      self.gaussian_window)),
+                                                                                          self.gaussian_window)),
                                                        self.size_filter)
                 hour_labels[mrms_data < self.mrms_ew.min_intensity] = 0
                 obj_slices = find_objects(hour_labels)
@@ -345,8 +348,8 @@ def find_mrms_tracks(self):
                                                         dx=self.model_grid.dx))
                         if h > 0:
                             dims = obs_objects[-1][-1].timesteps[0].shape
-                            obs_objects[-1][-1].estimate_motion(hour, self.mrms_grid.data[h-1], dims[1], dims[0])
-        
+                            obs_objects[-1][-1].estimate_motion(hour, self.mrms_grid.data[h - 1], dims[1], dims[0])
+
             for h, hour in enumerate(self.hours):
                 past_time_objs = []
                 for obj in tracked_obs_objects:
@@ -364,7 +367,7 @@ def find_mrms_tracks(self):
                         for up in unpaired:
                             tracked_obs_objects.append(obs_objects[h][up])
                 print("Tracked Obs Objects: {0:03d} Hour: {1:02d}".format(len(tracked_obs_objects), hour))
-        
+
         return tracked_obs_objects
 
     def match_tracks(self, model_tracks, obs_tracks, unique_matches=True, closest_matches=False):
@@ -414,11 +417,11 @@ def extract_model_attributes(self, tracked_model_objects, storm_variables, poten
             for model_obj in tracked_model_objects:
                 model_obj.extract_attribute_array(getattr(self.model_grid, l_var), l_var)
         for storm_var in storm_variables:
-            print("Storm {0} {1} {2}".format(storm_var,self.ensemble_member, self.run_date.strftime("%Y%m%d")))
+            print("Storm {0} {1} {2}".format(storm_var, self.ensemble_member, self.run_date.strftime("%Y%m%d")))
             model_grids[storm_var] = ModelOutput(self.ensemble_name, self.ensemble_member,
                                                  self.run_date, storm_var, self.start_date - timedelta(hours=1),
                                                  self.end_date + timedelta(hours=1),
-                                                 self.model_path,self.model_map_file, 
+                                                 self.model_path, self.model_map_file,
                                                  self.single_step)
             model_grids[storm_var].load_data()
             for model_obj in tracked_model_objects:
@@ -426,13 +429,13 @@ def extract_model_attributes(self, tracked_model_objects, storm_variables, poten
             if storm_var not in potential_variables + tendency_variables + future_variables:
                 del model_grids[storm_var]
         for potential_var in potential_variables:
-            print("Potential {0} {1} {2}".format(potential_var,self.ensemble_member, self.run_date.strftime("%Y%m%d")))
+            print("Potential {0} {1} {2}".format(potential_var, self.ensemble_member, self.run_date.strftime("%Y%m%d")))
             if potential_var not in model_grids.keys():
                 model_grids[potential_var] = ModelOutput(self.ensemble_name, self.ensemble_member,
                                                          self.run_date, potential_var,
                                                          self.start_date - timedelta(hours=1),
                                                          self.end_date + timedelta(hours=1),
-                                                         self.model_path, self.model_map_file, 
+                                                         self.model_path, self.model_map_file,
                                                          self.single_step)
                 model_grids[potential_var].load_data()
             for model_obj in tracked_model_objects:
@@ -443,11 +446,11 @@ def extract_model_attributes(self, tracked_model_objects, storm_variables, poten
             print("Future {0} {1} {2}".format(future_var, self.ensemble_member, self.run_date.strftime("%Y%m%d")))
             if future_var not in model_grids.keys():
                 model_grids[future_var] = ModelOutput(self.ensemble_name, self.ensemble_member,
-                                                         self.run_date, future_var,
-                                                         self.start_date - timedelta(hours=1),
-                                                         self.end_date + timedelta(hours=1),
-                                                         self.model_path, self.model_map_file,
-                                                         self.single_step)
+                                                      self.run_date, future_var,
+                                                      self.start_date - timedelta(hours=1),
+                                                      self.end_date + timedelta(hours=1),
+                                                      self.model_path, self.model_map_file,
+                                                      self.single_step)
                 model_grids[future_var].load_data()
             for model_obj in tracked_model_objects:
                 model_obj.extract_attribute_grid(model_grids[future_var], future=True)
@@ -460,13 +463,12 @@ def extract_model_attributes(self, tracked_model_objects, storm_variables, poten
                                                         self.run_date, tendency_var,
                                                         self.start_date - timedelta(hours=1),
                                                         self.end_date,
-                                                        self.model_path, self.model_map_file, 
+                                                        self.model_path, self.model_map_file,
                                                         self.single_step)
             for model_obj in tracked_model_objects:
                 model_obj.extract_tendency_grid(model_grids[tendency_var])
             del model_grids[tendency_var]
 
-
     @staticmethod
     def match_hail_sizes(model_tracks, obs_tracks, track_pairings):
         """
@@ -487,10 +489,10 @@ def match_hail_sizes(model_tracks, obs_tracks, track_pairings):
             obs_hail_sizes = np.array([step[obs_track.masks[t] == 1].max()
                                        for t, step in enumerate(obs_track.timesteps)])
             if obs_track.times.size > 1 and model_track.times.size > 1:
-                normalized_obs_times = 1.0 / (obs_track.times.max() - obs_track.times.min())\
-                    * (obs_track.times - obs_track.times.min())
-                normalized_model_times = 1.0 / (model_track.times.max() - model_track.times.min())\
-                    * (model_track.times - model_track.times.min())
+                normalized_obs_times = 1.0 / (obs_track.times.max() - obs_track.times.min()) \
+                                       * (obs_track.times - obs_track.times.min())
+                normalized_model_times = 1.0 / (model_track.times.max() - model_track.times.min()) \
+                                         * (model_track.times - model_track.times.min())
                 hail_interp = interp1d(normalized_obs_times, obs_hail_sizes, kind="nearest",
                                        bounds_error=False, fill_value=0)
                 model_track.observations = hail_interp(normalized_model_times)
@@ -498,7 +500,7 @@ def match_hail_sizes(model_tracks, obs_tracks, track_pairings):
                 model_track.observations = np.ones(model_track.times.shape) * obs_hail_sizes[0]
             elif model_track.times.size == 1:
                 model_track.observations = np.array([obs_hail_sizes.max()])
-            print(pair[0], "obs",  obs_hail_sizes)
+            print(pair[0], "obs", obs_hail_sizes)
             print(pair[0], "model", model_track.observations)
         for u in unpaired:
             model_tracks[u].observations = np.zeros(model_tracks[u].times.shape)
@@ -529,6 +531,7 @@ def match_single_track_dist(model_track, obs_track):
                 for param in obs_hail_dists.columns:
                     model_hail_dists.loc[model_track.times, param] = obs_hail_dists.loc[obs_track.times[0], param]
             return model_hail_dists
+
         unpaired = list(range(len(model_tracks)))
         for p, pair in enumerate(track_pairings):
             unpaired.remove(pair[0])
@@ -611,6 +614,5 @@ def calc_track_errors(model_tracks, obs_tracks, track_pairings):
             track_errors.loc[pair[0], 'translation_error_x'] = model_com[0] - obs_com[0]
             track_errors.loc[pair[0], 'translation_error_y'] = model_com[1] - obs_com[1]
             track_errors.loc[pair[0], 'start_time_difference'] = model_track.start_time - obs_track.start_time
-            track_errors.loc[pair[0], 'end_time_difference'] = model_track.end_time - obs_track.end_time 
+            track_errors.loc[pair[0], 'end_time_difference'] = model_track.end_time - obs_track.end_time
         return track_errors
-
diff --git a/hagelslag/processing/TrackSampler.py b/hagelslag/processing/TrackSampler.py
index 3327bc7..d1f27b8 100644
--- a/hagelslag/processing/TrackSampler.py
+++ b/hagelslag/processing/TrackSampler.py
@@ -1,16 +1,17 @@
-from netCDF4 import Dataset
-import numpy as np
-import pandas as pd
+import argparse
 import json
-from glob import glob
 import os
+import pickle
+import traceback
 from datetime import datetime
+from glob import glob
 from multiprocessing import Pool
-import argparse
-import pickle
+
+import numpy as np
+import pandas as pd
+from netCDF4 import Dataset
 from scipy.ndimage import binary_dilation
 from scipy.stats import multivariate_normal
-import traceback
 
 
 def load_grid_info(grid_file):
@@ -73,20 +74,20 @@ def main():
         for run_date in run_dates:
             for member in members:
                 group = member_info.loc[member, "Microphysics"]
-                apply(sample_member_run_tracks, (member,
-                                                 group,
-                                                 run_date,
-                                                 model_names,
-                                                 start_hour,
-                                                 end_hour,
-                                                 grid_shape,
-                                                 dx,
-                                                 track_path,
-                                                 num_samples,
-                                                 thresholds,
-                                                 copula_file,
-                                                 out_path
-                                                 ))
+                sample_member_run_tracks(member,
+                                         group,
+                                         run_date,
+                                         model_names,
+                                         start_hour,
+                                         end_hour,
+                                         grid_shape,
+                                         dx,
+                                         track_path,
+                                         num_samples,
+                                         thresholds,
+                                         copula_file,
+                                         out_path
+                                         )
 
     return
 
@@ -133,6 +134,7 @@ class TrackSampler(object):
     Monte Carlo sampler of forecast storm tracks.
 
     """
+
     def __init__(self, member, group, run_date, model_names, start_hour,
                  end_hour, grid_shape, dx, track_path, num_samples, copula_file=None):
         self.member = member
diff --git a/hagelslag/processing/Watershed.py b/hagelslag/processing/Watershed.py
index 6e25308..174ba12 100644
--- a/hagelslag/processing/Watershed.py
+++ b/hagelslag/processing/Watershed.py
@@ -1,6 +1,6 @@
-from skimage.segmentation import watershed
-from scipy.ndimage import label, find_objects
 import numpy as np
+from scipy.ndimage import label, find_objects
+from skimage.segmentation import watershed
 
 
 class Watershed(object):
@@ -14,6 +14,7 @@ class Watershed(object):
         core_intensity: the intensity used to determine the initial objects.
 
     """
+
     def __init__(self, min_intensity, max_intensity):
         self.min_intensity = min_intensity
         self.max_intensity = max_intensity
diff --git a/hagelslag/processing/tracker.py b/hagelslag/processing/tracker.py
index 8f02be4..05b44d7 100644
--- a/hagelslag/processing/tracker.py
+++ b/hagelslag/processing/tracker.py
@@ -1,10 +1,11 @@
-from .STObject import STObject
+import numpy as np
+from scipy.ndimage import find_objects, center_of_mass, gaussian_filter
+
+from hagelslag.processing.ObjectMatcher import ObjectMatcher
 from .EnhancedWatershedSegmenter import EnhancedWatershed
-from .Watershed import Watershed
 from .Hysteresis import Hysteresis
-from hagelslag.processing.ObjectMatcher import ObjectMatcher
-from scipy.ndimage import find_objects, center_of_mass, gaussian_filter
-import numpy as np
+from .STObject import STObject
+from .Watershed import Watershed
 
 
 def label_storm_objects(data, method, min_intensity, max_intensity, min_area=1, max_area=100, max_range=1,
@@ -220,7 +221,7 @@ def track_storms(storm_objects, times, distance_components, distance_maxima, dis
         if len(past_time_objects) == 0:
             tracked_objects.extend(storm_objects[t])
         elif len(past_time_objects) > 0 and len(storm_objects[t]) > 0:
-            assignments = obj_matcher.match_objects(past_time_objects, storm_objects[t], times[t-1], times[t])
+            assignments = obj_matcher.match_objects(past_time_objects, storm_objects[t], times[t - 1], times[t])
             unpaired = list(range(len(storm_objects[t])))
             for pair in assignments:
                 past_time_objects[pair[0]].extend(storm_objects[t][pair[1]])
diff --git a/hagelslag/util/Config.py b/hagelslag/util/Config.py
index af1e873..2e9248a 100644
--- a/hagelslag/util/Config.py
+++ b/hagelslag/util/Config.py
@@ -3,6 +3,7 @@ class Config(object):
     Class that loads options from a config file and converts
     them into attributes.
     """
+
     def __init__(self, filename, required_attributes=()):
         config = None
         print(filename)
@@ -25,4 +26,3 @@ def __init__(self, filename, required_attributes=()):
                     if not has_required[i]:
                         print("{0} not found.".format(required_attributes[i]))
                         exit(1)
-
diff --git a/hagelslag/util/convert_mrms_grids.py b/hagelslag/util/convert_mrms_grids.py
index ef96901..8e0df46 100644
--- a/hagelslag/util/convert_mrms_grids.py
+++ b/hagelslag/util/convert_mrms_grids.py
@@ -1,18 +1,21 @@
-import pandas as pd
+import argparse
 import os
+import pickle
 import subprocess
+import traceback
+import warnings
+from datetime import datetime, timedelta
+from multiprocessing import Pool
+
 import numpy as np
+import pandas as pd
+import xarray as xr
+from netCDF4 import Dataset, date2num
 from scipy.interpolate import RectBivariateSpline
 from scipy.spatial import cKDTree
-import pickle
-from netCDF4 import Dataset, date2num
-from datetime import datetime, timedelta
+
 from hagelslag.util.make_proj_grids import read_arps_map_file, read_ncar_map_file, make_proj_grids
-from multiprocessing import Pool
-import warnings
-import traceback
-import argparse
-import xarray as xr
+
 
 def main():
     warnings.simplefilter("ignore")
@@ -117,6 +120,7 @@ class MRMSGrid(object):
     MRMSGrid reads time series of MRMS grib2 files, interpolates them, and outputs them to netCDF4 format.
 
     """
+
     def __init__(self, start_date, end_date, variable, path_start, freq="1H"):
         self.start_date = start_date
         self.end_date = end_date
@@ -145,7 +149,7 @@ def load_data(self):
                 data_files = sorted(os.listdir(full_path))
                 file_dates = pd.to_datetime([d.split("_")[-1][0:13] for d in data_files])
                 if timestamp in file_dates:
-                    data_file = data_files[np.where(timestamp==file_dates)[0][0]]
+                    data_file = data_files[np.where(timestamp == file_dates)[0][0]]
                     print(full_path + data_file)
                     if data_file[-2:] == "gz":
                         subprocess.call(["gunzip", full_path + data_file])
@@ -217,7 +221,8 @@ def max_neighbor(self, in_lon, in_lat, radius=0.05):
             if len(nz_points[0]) > 0:
                 nz_vals = self.data[d][nz_points]
                 nz_rank = np.argsort(nz_vals)
-                original_points = cKDTree(np.vstack((self.lat[nz_points[0][nz_rank]], self.lon[nz_points[1][nz_rank]])).T)
+                original_points = cKDTree(
+                    np.vstack((self.lat[nz_points[0][nz_rank]], self.lon[nz_points[1][nz_rank]])).T)
                 all_neighbors = original_points.query_ball_tree(in_tree, radius, p=2, eps=0)
                 for n, neighbors in enumerate(all_neighbors):
                     if len(neighbors) > 0:
@@ -246,7 +251,7 @@ def interpolate_to_netcdf(self, in_lon, in_lat, out_path, date_unit="seconds sin
         out_obj.createDimension("time", out_data.shape[0])
         out_obj.createDimension("y", out_data.shape[1])
         out_obj.createDimension("x", out_data.shape[2])
-        data_var = out_obj.createVariable(self.variable, "f4", ("time", "y", "x"), zlib=True, 
+        data_var = out_obj.createVariable(self.variable, "f4", ("time", "y", "x"), zlib=True,
                                           fill_value=-9999.0,
                                           least_significant_digit=3)
         data_var[:] = out_data
@@ -270,7 +275,7 @@ def interpolate_to_netcdf(self, in_lon, in_lat, out_path, date_unit="seconds sin
         dates[:] = np.round(date2num(self.all_dates.to_pydatetime(), date_unit)).astype(np.int64)
         dates.long_name = "Valid date"
         dates.units = date_unit
-        out_obj.Conventions="CF-1.6"
+        out_obj.Conventions = "CF-1.6"
         out_obj.close()
         return
 
diff --git a/hagelslag/util/create_model_grid_us_mask.py b/hagelslag/util/create_model_grid_us_mask.py
index f387d62..92dd5cb 100644
--- a/hagelslag/util/create_model_grid_us_mask.py
+++ b/hagelslag/util/create_model_grid_us_mask.py
@@ -1,11 +1,12 @@
-from skimage.draw import polygon
+import argparse
+
 import numpy as np
 import shapefile
 from netCDF4 import Dataset
-from hagelslag.util.make_proj_grids import read_arps_map_file, read_ncar_map_file, make_proj_grids
 from pyproj import Proj
-import argparse
-import matplotlib.pyplot as plt
+from skimage.draw import polygon
+
+from hagelslag.util.make_proj_grids import read_arps_map_file, read_ncar_map_file, make_proj_grids
 
 
 def main():
@@ -19,9 +20,9 @@ def main():
     print("Creating mask grid")
     mask_grid = create_mask_grid(args.shape, mapping_data, proj_dict, grid_dict)
     output_netcdf_file(args.out, mask_grid, proj_dict, grid_dict)
-    #plt.figure(figsize=(10, 6))
-    #plt.contourf(mapping_data["lon"], mapping_data["lat"], mask_grid, cmap="Reds", vmin=0, vmax=1)
-    #plt.show()
+    # plt.figure(figsize=(10, 6))
+    # plt.contourf(mapping_data["lon"], mapping_data["lat"], mask_grid, cmap="Reds", vmin=0, vmax=1)
+    # plt.show()
     return
 
 
@@ -42,7 +43,7 @@ def create_mask_grid(mask_shape_file, mapping_data, proj_dict, grid_dict):
     sf = shapefile.Reader(mask_shape_file)
     s = 0
     for state_shape in sf.shapeRecords():
-        print(s, state_shape.record[4]) #changed [5])
+        print(s, state_shape.record[4])  # changed [5])
         part_start = list(state_shape.shape.parts)
         part_end = list(state_shape.shape.parts[1:]) + [len(state_shape.shape.points)]
         for p in range(len(part_start)):
diff --git a/hagelslag/util/create_sector_grid_data.py b/hagelslag/util/create_sector_grid_data.py
index e3b4b43..4177358 100644
--- a/hagelslag/util/create_sector_grid_data.py
+++ b/hagelslag/util/create_sector_grid_data.py
@@ -1,31 +1,33 @@
 #!/home/tmp/aburke/miniconda3/bin/python -u
-from hagelslag.util.make_proj_grids import read_ncar_map_file
-from netCDF4 import Dataset
-import pandas as pd
-import numpy as np
 import os
-from netCDF4 import Dataset
 from os.path import exists
 
+import numpy as np
+import pandas as pd
+from netCDF4 import Dataset
+
+from hagelslag.util.make_proj_grids import read_ncar_map_file
+
+
 class SectorProcessor(object):
-    def __init__(self,mapfile,
-                ensemble_name,member,
-                run_date,date_format):
+    def __init__(self, mapfile,
+                 ensemble_name, member,
+                 run_date, date_format):
 
         self.mapfile = mapfile
         self.ensemble_name = ensemble_name
         self.member = member
         self.run_date = run_date
         self.date_format = date_format
-        self.inds = None 
-        proj_dict, grid_dict = read_ncar_map_file(self.mapfile) 
-        
-        self.ne_lat, self.sw_lat = grid_dict["ne_lat"],grid_dict["sw_lat"]
-        self.ne_lon, self.sw_lon = grid_dict["ne_lon"],grid_dict["sw_lon"]
+        self.inds = None
+        proj_dict, grid_dict = read_ncar_map_file(self.mapfile)
+
+        self.ne_lat, self.sw_lat = grid_dict["ne_lat"], grid_dict["sw_lat"]
+        self.ne_lon, self.sw_lon = grid_dict["ne_lon"], grid_dict["sw_lon"]
 
         return
-    
-    def output_sector_csv(self,csv_path,file_dict_key,out_path):
+
+    def output_sector_csv(self, csv_path, file_dict_key, out_path):
         """
         Segment forecast tracks to only output data contined within a 
         region in the CONUS, as defined by the mapfile.
@@ -39,73 +41,72 @@ def output_sector_csv(self,csv_path,file_dict_key,out_path):
             Segmented forecast tracks in a csv file.
         """
         csv_file = csv_path + "{0}_{1}_{2}_{3}.csv".format(
-                                                        file_dict_key,
-                                                        self.ensemble_name,
-                                                        self.member,
-                                                        self.run_date.strftime(self.date_format))
+            file_dict_key,
+            self.ensemble_name,
+            self.member,
+            self.run_date.strftime(self.date_format))
         if exists(csv_file):
             csv_data = pd.read_csv(csv_file)
-            
+
             if self.inds is None:
-                lon_obj = csv_data.loc[:,"Centroid_Lon"]
-                lat_obj = csv_data.loc[:,"Centroid_Lat"]
-            
-                self.inds = np.where((self.ne_lat>=lat_obj)&(self.sw_lat<=lat_obj)\
-                        &(self.ne_lon>=lon_obj)&(self.sw_lon<=lon_obj))[0]
-            
+                lon_obj = csv_data.loc[:, "Centroid_Lon"]
+                lat_obj = csv_data.loc[:, "Centroid_Lat"]
+
+                self.inds = np.where((self.ne_lat >= lat_obj) & (self.sw_lat <= lat_obj) \
+                                     & (self.ne_lon >= lon_obj) & (self.sw_lon <= lon_obj))[0]
+
             if np.shape(self.inds)[0] > 0:
-                csv_data = csv_data.reindex(np.array(self.inds)) 
+                csv_data = csv_data.reindex(np.array(self.inds))
                 sector_csv_filename = out_path + "{0}_{1}_{2}_{3}.csv".format(
-                                                        file_dict_key,
-                                                        self.ensemble_name,
-                                                        self.member,
-                                                        self.run_date.strftime(self.date_format))
+                    file_dict_key,
+                    self.ensemble_name,
+                    self.member,
+                    self.run_date.strftime(self.date_format))
                 print("Output sector csv file " + sector_csv_filename)
                 csv_data.to_csv(sector_csv_filename,
-                        na_rep="nan",
-                        float_format="%0.5f",
-                        index=False)
+                                na_rep="nan",
+                                float_format="%0.5f",
+                                index=False)
                 os.chmod(sector_csv_filename, 0o666)
             else:
                 print('No {0} {1} sector data found'.format(self.member,
-                                self.run_date.strftime("%Y%m%d")))
-            
+                                                            self.run_date.strftime("%Y%m%d")))
+
         else:
             print('No {0} {1} csv file found'.format(self.member,
-                                self.run_date.strftime("%Y%m%d")))
-        return 
-
+                                                     self.run_date.strftime("%Y%m%d")))
+        return
 
-    def load_netcdf_data(self,netcdf_path,patch_radius):
-        nc_file = netcdf_path+ "{0}_{1}_{2}_model_patches.nc".format(
-                                                            self.ensemble_name,
-                                                            self.run_date.strftime(self.date_format),
-                                                            self.member)
+    def load_netcdf_data(self, netcdf_path, patch_radius):
+        nc_file = netcdf_path + "{0}_{1}_{2}_model_patches.nc".format(
+            self.ensemble_name,
+            self.run_date.strftime(self.date_format),
+            self.member)
         if exists(nc_file):
             nc_data = Dataset(nc_file)
-            
+
             lon_obj = nc_data.variables['centroid_lon'][:]
             lat_obj = nc_data.variables['centroid_lat'][:]
-            
-            inds = np.where((self.ne_lat>=lat_obj)&(self.sw_lat<=lat_obj)\
-                        &(self.ne_lon>=lon_obj)&(self.sw_lon<=lon_obj))[0]
+
+            inds = np.where((self.ne_lat >= lat_obj) & (self.sw_lat <= lat_obj) \
+                            & (self.ne_lon >= lon_obj) & (self.sw_lon <= lon_obj))[0]
             if np.shape(inds)[0] > 0:
                 for var in nc_data.variables:
-                    if np.shape(nc_data.variables[var])[0] > (2*patch_radius):
+                    if np.shape(nc_data.variables[var])[0] > (2 * patch_radius):
                         try:
-                            nc_data.variables[var] =\
-                            nc_data.variables[var][np.array(inds),:,:]
+                            nc_data.variables[var] = \
+                                nc_data.variables[var][np.array(inds), :, :]
                         except:
-                            nc_data.variables[var] =\
+                            nc_data.variables[var] = \
                                 nc_data.variables[var][np.array(inds)]
             else:
-                nc_data=None
+                nc_data = None
         else:
-            nc_data=None
-        
+            nc_data = None
+
         return nc_data
-    
-    def output_sector_netcdf(self,netcdf_path,out_path,patch_radius,config):
+
+    def output_sector_netcdf(self, netcdf_path, out_path, patch_radius, config):
         """
         Segment patches of forecast tracks to only output data contined within a 
         region in the CONUS, as defined by the mapfile.
@@ -119,14 +120,14 @@ def output_sector_netcdf(self,netcdf_path,out_path,patch_radius,config):
         Returns:
             Segmented patch netcdf files.
         """
-        
-        nc_data = self.load_netcdf_data(netcdf_path,patch_radius)
-    
+
+        nc_data = self.load_netcdf_data(netcdf_path, patch_radius)
+
         if nc_data is not None:
             out_filename = out_path + "{0}_{1}_{2}_model_patches.nc".format(
-                                                            self.ensemble_name,
-                                                            self.run_date.strftime(self.date_format),
-                                                            self.member)
+                self.ensemble_name,
+                self.run_date.strftime(self.date_format),
+                self.member)
             out_file = Dataset(out_filename, "w")
             out_file.createDimension("p", np.shape(nc_data.variables['p'])[0])
             out_file.createDimension("row", np.shape(nc_data.variables['row'])[0])
@@ -135,26 +136,26 @@ def output_sector_netcdf(self,netcdf_path,out_path,patch_radius,config):
             out_file.createVariable("row", "i4", ("row",))
             out_file.createVariable("col", "i4", ("col",))
             out_file.variables["p"][:] = nc_data.variables['p'][:]
-            out_file.variables["row"][:] =  nc_data.variables['row'][:]
-            out_file.variables["col"][:] =  nc_data.variables['col'][:]
+            out_file.variables["row"][:] = nc_data.variables['row'][:]
+            out_file.variables["col"][:] = nc_data.variables['col'][:]
             out_file.Conventions = "CF-1.6"
             out_file.title = "{0} Storm Patches for run {1} member {2}".format(self.ensemble_name,
-                                                                       self.run_date.strftime(self.date_format),
-                                                                       self.member)
+                                                                               self.run_date.strftime(self.date_format),
+                                                                               self.member)
             out_file.object_variable = config.watershed_variable
             meta_variables = ["lon", "lat", "i", "j", "x", "y", "masks"]
             meta_units = ["degrees_east", "degrees_north", "", "", "m", "m", ""]
             center_vars = ["time", "centroid_lon", "centroid_lat", "centroid_i", "centroid_j", "track_id", "track_step"]
             center_units = ["hours since {0}".format(self.run_date.strftime("%Y-%m-%d %H:%M:%S")),
-                    "degrees_east",
-                    "degrees_north",
-                    "",
-                    "",
-                    "",
-                    ""]
+                            "degrees_east",
+                            "degrees_north",
+                            "",
+                            "",
+                            "",
+                            ""]
 
             label_columns = ["Matched", "Max_Hail_Size", "Num_Matches", "Shape", "Location", "Scale"]
-        
+
             for m, meta_variable in enumerate(meta_variables):
                 if meta_variable in ["i", "j", "masks"]:
                     dtype = "i4"
@@ -163,7 +164,7 @@ def output_sector_netcdf(self,netcdf_path,out_path,patch_radius,config):
                 m_var = out_file.createVariable(meta_variable, dtype, ("p", "row", "col"), complevel=1, zlib=True)
                 m_var.long_name = meta_variable
                 m_var.units = meta_units[m]
-        
+
             for c, center_var in enumerate(center_vars):
                 if center_var in ["time", "track_id", "track_step"]:
                     dtype = "i4"
@@ -171,19 +172,20 @@ def output_sector_netcdf(self,netcdf_path,out_path,patch_radius,config):
                     dtype = "f4"
                 c_var = out_file.createVariable(center_var, dtype, ("p",), zlib=True, complevel=1)
                 c_var.long_name = center_var
-                c_var.units =center_units[c]
-        
+                c_var.units = center_units[c]
+
             for storm_variable in config.storm_variables:
-                s_var = out_file.createVariable(storm_variable + "_curr", "f4", ("p", "row", "col"), complevel=1, zlib=True)
+                s_var = out_file.createVariable(storm_variable + "_curr", "f4", ("p", "row", "col"), complevel=1,
+                                                zlib=True)
                 s_var.long_name = storm_variable
                 s_var.units = ""
-        
+
             for potential_variable in config.potential_variables:
                 p_var = out_file.createVariable(potential_variable + "_prev", "f4", ("p", "row", "col"),
-                                        complevel=1, zlib=True)
+                                                complevel=1, zlib=True)
                 p_var.long_name = potential_variable
                 p_var.units = ""
-        
+
             if config.train:
                 for label_column in label_columns:
                     if label_column in ["Matched", "Num_Matches"]:
@@ -193,30 +195,30 @@ def output_sector_netcdf(self,netcdf_path,out_path,patch_radius,config):
                     l_var = out_file.createVariable(label_column, dtype, ("p",), zlib=True, complevel=1)
                     l_var.long_name = label_column
                     l_var.units = ""
-            
+
             out_file.variables["time"][:] = nc_data.variables['time'][:]
-    
+
             for c_var in ["lon", "lat"]:
-                out_file.variables["centroid_" + c_var][:] =  nc_data.variables['centroid_' + c_var][:]
+                out_file.variables["centroid_" + c_var][:] = nc_data.variables['centroid_' + c_var][:]
 
             for c_var in ["i", "j"]:
-                out_file.variables["centroid_" + c_var][:] =  nc_data.variables["centroid_" + c_var][:]
-        
-            out_file.variables["track_id"][:] =  nc_data.variables['track_id'][:]
-            out_file.variables["track_step"][:] =  nc_data.variables['track_step'][:]
-        
+                out_file.variables["centroid_" + c_var][:] = nc_data.variables["centroid_" + c_var][:]
+
+            out_file.variables["track_id"][:] = nc_data.variables['track_id'][:]
+            out_file.variables["track_step"][:] = nc_data.variables['track_step'][:]
+
             for meta_var in meta_variables:
                 if meta_var in ["lon", "lat"]:
                     out_file.variables[meta_var][:] = nc_data.variables[meta_var][:]
                 else:
                     out_file.variables[meta_var][:] = nc_data.variables[meta_var][:]
-        
+
             for storm_variable in config.storm_variables:
                 out_file.variables[storm_variable + "_curr"][:] = nc_data.variables[storm_variable + '_curr'][:]
-        
+
             for p_variable in config.potential_variables:
                 out_file.variables[p_variable + "_prev"][:] = nc_data.variables[p_variable + '_prev'][:]
-        
+
             if config.train:
                 for label_column in label_columns:
                     try:
@@ -225,13 +227,10 @@ def output_sector_netcdf(self,netcdf_path,out_path,patch_radius,config):
                         out_file.variables[label_column][:] = 0
 
             out_file.close()
-        
+
             print("Output sector nc file " + out_filename)
         else:
             print('No {0} {1} netcdf file/sector data found'.format(self.member,
-                                self.run_date.strftime("%Y%m%d")))
+                                                                    self.run_date.strftime("%Y%m%d")))
 
         return
-
-if __name__ == "__main__":
-    main()
diff --git a/hagelslag/util/custom_grib_table.py b/hagelslag/util/custom_grib_table.py
index 324c18e..5c644a6 100644
--- a/hagelslag/util/custom_grib_table.py
+++ b/hagelslag/util/custom_grib_table.py
@@ -1,6 +1,7 @@
-import pandas as pd
 import argparse
 
+import pandas as pd
+
 
 def main():
     parser = argparse.ArgumentParser()
@@ -23,4 +24,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/hagelslag/util/derived_vars.py b/hagelslag/util/derived_vars.py
index 0ed99bc..8b3aa8e 100644
--- a/hagelslag/util/derived_vars.py
+++ b/hagelslag/util/derived_vars.py
@@ -10,4 +10,4 @@ def melting_layer_height(height_surface, height_700mb, height_500mb, temperature
 def relative_humidity_pressure_level(temperature, specific_humidity, pressure):
     saturation_vapor_pressure = 611.2 * np.exp((17.67 * temperature) / (temperature + 243.5))
     vapor_pressure = specific_humidity / 1000.0 * pressure / 0.622
-    return vapor_pressure / saturation_vapor_pressure * 100.0
\ No newline at end of file
+    return vapor_pressure / saturation_vapor_pressure * 100.0
diff --git a/hagelslag/util/hrefv2_symbolic_Links.py b/hagelslag/util/hrefv2_symbolic_Links.py
index f9d17a4..98f6e57 100644
--- a/hagelslag/util/hrefv2_symbolic_Links.py
+++ b/hagelslag/util/hrefv2_symbolic_Links.py
@@ -1,114 +1,117 @@
-from datetime import datetime, timedelta 
-from os.path import exists
-import pandas as pd
 import argparse
 import os
+from datetime import timedelta
+
+import pandas as pd
+
 
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("-s", "--start_date", required=True, help="Start date for symbolic links in YYYY-MM-DD format")
     parser.add_argument("-e", "--end_date", required=False, help="End date for symbolic links in YYYY-MM-DD format")
     parser.add_argument("-p", "--source_path", required=True, help="Path to the source directory of HREFv2 dataset")
-    parser.add_argument("-o", "--destination_path", required=True, help="Path to the destination of symbolic HREFv2 dataset")
+    parser.add_argument("-o", "--destination_path", required=True,
+                        help="Path to the destination of symbolic HREFv2 dataset")
     args = parser.parse_args()
     source_path = args.source_path
-    destination_path = args.destination_path 
+    destination_path = args.destination_path
     if args.end_date:
         date_index = pd.DatetimeIndex(start=args.start_date, end=args.end_date, freq='1D')
     else:
         date_index = pd.DatetimeIndex(start=args.start_date, end=args.start_date, freq='1D')
 
-    member = ['nam_00' ,'nam_12','nmmb_00','nmmb_12','nssl_00','nssl_12','arw_00','arw_12']
+    member = ['nam_00', 'nam_12', 'nmmb_00', 'nmmb_12', 'nssl_00', 'nssl_12', 'arw_00', 'arw_12']
 
-    for datetime_date in date_index: 
+    for datetime_date in date_index:
         source_date = datetime_date.strftime("%Y%m%d")
-        print('\nSource Date',source_date) 
+        print('\nSource Date', source_date)
         for m in member:
             source_member = str(m.split('_')[0])
             if 'nam' in source_member:
-                source_date_path = source_path+'nam_conusnest/'
-            elif source_member in ['arw','nssl','nmmb']:
-                source_date_path = source_path+'hiresw_conus{0}/'.format(source_member)
+                source_date_path = source_path + 'nam_conusnest/'
+            elif source_member in ['arw', 'nssl', 'nmmb']:
+                source_date_path = source_path + 'hiresw_conus{0}/'.format(source_member)
+
+            destination_date_path = destination_path + '{0}/{1}/'.format(m, source_date)
 
-            destination_date_path = destination_path+'{0}/{1}/'.format(m,source_date)
-    
             if not os.path.exists(destination_date_path):
                 os.makedirs(destination_date_path)
 
-            if '00' in m:	
-                if m in ['nam_00']:        
-                    for fore_hr in range(0,37):
-                        source = source_date_path+'nam_conusnest.{1}00/nam_conusnest.{1}00f{0:02}'.format(
-                                                                                            fore_hr,
-                                                                                            source_date)
-                        destination = destination_date_path+'nam_conusnest_{1}00f0{0:02}.grib2'.format(
-                                                                                            fore_hr,
-                                                                                            source_date)
+            if '00' in m:
+                if m in ['nam_00']:
+                    for fore_hr in range(0, 37):
+                        source = source_date_path + 'nam_conusnest.{1}00/nam_conusnest.{1}00f{0:02}'.format(
+                            fore_hr,
+                            source_date)
+                        destination = destination_date_path + 'nam_conusnest_{1}00f0{0:02}.grib2'.format(
+                            fore_hr,
+                            source_date)
                         if os.path.exists(destination):
-                            continue 
-                        if os.path.exists(source): 
-                            os.symlink(source,destination)
+                            continue
+                        if os.path.exists(source):
+                            os.symlink(source, destination)
                         else:
                             print('Incorrect source file:')
                             print(source)
-                elif m in ['arw_00','nssl_00','nmmb_00']:
-                    for fore_hr in range(0,37):
-                        source = source_date_path+'hiresw_conus{0}.{1}00/hiresw_conus{0}.{1}00f{2:02}'.format(
-                                                                                            source_member,
-                                                                                            source_date,
-                                                                                            fore_hr)
-                        destination = destination_date_path+'hiresw_conus{2}_{1}00f0{0:02}.grib2'.format(
-                                                                                            fore_hr,
-                                                                                            source_date,
-                                                                                            source_member)
-                
+                elif m in ['arw_00', 'nssl_00', 'nmmb_00']:
+                    for fore_hr in range(0, 37):
+                        source = source_date_path + 'hiresw_conus{0}.{1}00/hiresw_conus{0}.{1}00f{2:02}'.format(
+                            source_member,
+                            source_date,
+                            fore_hr)
+                        destination = destination_date_path + 'hiresw_conus{2}_{1}00f0{0:02}.grib2'.format(
+                            fore_hr,
+                            source_date,
+                            source_member)
+
                         if os.path.exists(destination):
-                            continue 
-                        if os.path.exists(source): 
-                            os.symlink(source,destination)
+                            continue
+                        if os.path.exists(source):
+                            os.symlink(source, destination)
                         else:
                             print('Incorrect source file:')
                             print(source)
             elif '12' in m:
-                changed_date = (datetime_date-timedelta(days=1)).strftime("%Y%m%d")
+                changed_date = (datetime_date - timedelta(days=1)).strftime("%Y%m%d")
                 if 'nam_12' in m:
-                    for fore_hr in range(0,37):					
-                        source = source_date_path+'nam_conusnest.{1}12/nam_conusnest.{1}12f{0:02}'.format(
-                                                                                            (fore_hr+12),
-                                                                                            changed_date)
-                
-                        destination = destination_date_path+'nam_conusnest_{1}12f0{0:02}.grib2'.format(
-                                                                                            fore_hr,
-                                                                                            source_date)
-                    
+                    for fore_hr in range(0, 37):
+                        source = source_date_path + 'nam_conusnest.{1}12/nam_conusnest.{1}12f{0:02}'.format(
+                            (fore_hr + 12),
+                            changed_date)
+
+                        destination = destination_date_path + 'nam_conusnest_{1}12f0{0:02}.grib2'.format(
+                            fore_hr,
+                            source_date)
+
                         if os.path.exists(destination):
-                            continue 
-                        if os.path.exists(source): 
-                            os.symlink(source,destination)
+                            continue
+                        if os.path.exists(source):
+                            os.symlink(source, destination)
                         else:
                             print('Incorrect source file:')
                             print(source)
-            
-                elif m in ['arw_12','nssl_12','nmmb_12']:
-                    for fore_hr in range(0,37): 
-                        source = source_date_path+'hiresw_conus{0}.{1}12/hiresw_conus{0}.{1}12f{2:02}'.format(
-                                                                                            source_member,
-                                                                                            changed_date,
-                                                                                            (fore_hr+12))
-
-                        destination = destination_date_path+'hiresw_conus{2}_{1}12f0{0:02}.grib2'.format(
-                                                                                            fore_hr,
-                                                                                            source_date,
-                                                                                            source_member)
+
+                elif m in ['arw_12', 'nssl_12', 'nmmb_12']:
+                    for fore_hr in range(0, 37):
+                        source = source_date_path + 'hiresw_conus{0}.{1}12/hiresw_conus{0}.{1}12f{2:02}'.format(
+                            source_member,
+                            changed_date,
+                            (fore_hr + 12))
+
+                        destination = destination_date_path + 'hiresw_conus{2}_{1}12f0{0:02}.grib2'.format(
+                            fore_hr,
+                            source_date,
+                            source_member)
                         if os.path.exists(destination):
-                            continue 
-                        if os.path.exists(source): 
-                            os.symlink(source,destination)
+                            continue
+                        if os.path.exists(source):
+                            os.symlink(source, destination)
                         else:
                             print('Incorrect source file:')
                             print(source)
-            
+
     return
 
+
 if __name__ == "__main__":
-        main()
+    main()
diff --git a/hagelslag/util/lsr_calibration_dataset.py b/hagelslag/util/lsr_calibration_dataset.py
index 278959b..9872482 100644
--- a/hagelslag/util/lsr_calibration_dataset.py
+++ b/hagelslag/util/lsr_calibration_dataset.py
@@ -1,11 +1,12 @@
-from make_proj_grids import * 
-from netCDF4 import Dataset
-import cartopy.crs as ccrs
-import pandas as pd
-import numpy as np 
 import argparse
-import os
+
+import numpy as np
+import pandas as pd
 import pyproj
+from netCDF4 import Dataset
+
+from make_proj_grids import *
+
 
 def main():
     parser = argparse.ArgumentParser()
@@ -18,13 +19,14 @@ def main():
         run_dates = pd.date_range(start=args.start_date, end=args.end_date, freq='1D').strftime("%y%m%d")
     else:
         run_dates = pd.date_range(start=args.start_date, end=args.start_date, freq='1D').strftime("%y%m%d")
-    out_path = args.out_path 
+    out_path = args.out_path
     mapfile = args.map_file
 
-    LSR_calibration_data(mapfile,out_path,run_dates)
-    return 
+    LSR_calibration_data(mapfile, out_path, run_dates)
+    return
 
-def LSR_calibration_data(mapfile,out_path,run_dates,hours=[17,19,21],sector=None):
+
+def LSR_calibration_data(mapfile, out_path, run_dates, hours=[17, 19, 21], sector=None):
     """
     Using the grid from input ML forecast (netcdf) data, SPC storm reports 
     with a 25 mile radius around the reports can be plotted. 
@@ -34,11 +36,11 @@ def LSR_calibration_data(mapfile,out_path,run_dates,hours=[17,19,21],sector=None
 
     Currently only supports netcdf files.
     """
-    hail_threshold = [25,50]
+    hail_threshold = [25, 50]
 
     lsr_dict = dict()
-    
-    proj_dict, grid_dict = read_ncar_map_file(mapfile) 
+
+    proj_dict, grid_dict = read_ncar_map_file(mapfile)
     projection = pyproj.Proj(proj_dict)
 
     mapping_data = make_proj_grids(proj_dict, grid_dict)
@@ -47,96 +49,102 @@ def LSR_calibration_data(mapfile,out_path,run_dates,hours=[17,19,21],sector=None
     forecast_x = np.array(mapping_data['x'])
     forecast_y = np.array(mapping_data['y'])
 
-    for date in run_dates: 
+    for date in run_dates:
         print(date)
         csv_file = 'https://www.spc.noaa.gov/climo/reports/{0}_rpts_hail.csv'.format(date)
         try:
             hail_reports = pd.read_csv(csv_file)
         except:
             print('Report CSV file could not be opened.')
-            continue           
+            continue
         for threshold in hail_threshold:
-            #if os.path.exists(out_path+'{0}_{1}_lsr_mask.nc'.format(date,threshold)):
+            # if os.path.exists(out_path+'{0}_{1}_lsr_mask.nc'.format(date,threshold)):
             #    print('>{0}mm file already exists'.format(threshold))
             #    continue
-            #print('Creating LSR mask >{0}mm'.format(threshold))
-            
-            #Get size  values from hail reports
-            inches_thresh = round((threshold)*0.03937)*100
-            report_size = hail_reports.loc[:,'Size'].values
-            
+            # print('Creating LSR mask >{0}mm'.format(threshold))
+
+            # Get size  values from hail reports
+            inches_thresh = round((threshold) * 0.03937) * 100
+            report_size = hail_reports.loc[:, 'Size'].values
+
             lsr_dict['full_day'] = np.zeros(forecast_lats.shape)
             full_day_indices = np.where(report_size >= inches_thresh)[0]
             if len(full_day_indices) < 1:
                 print('No >{0}mm LSRs found'.format(threshold))
-                continue 
-            reports_lat_full = hail_reports.loc[full_day_indices,'Lat'].values
-            reports_lon_full = hail_reports.loc[full_day_indices,'Lon'].values
-
-            lsr_dict['full_day'] = calculate_distance(reports_lat_full,reports_lon_full,forecast_y,forecast_x,projection)
-            
-            #Get time  values from hail reports
-            report_time = (hail_reports.loc[:,'Time'].values)/100
-            #Get lat/lon of different time periods and hail sizes
-            
+                continue
+            reports_lat_full = hail_reports.loc[full_day_indices, 'Lat'].values
+            reports_lon_full = hail_reports.loc[full_day_indices, 'Lon'].values
+
+            lsr_dict['full_day'] = calculate_distance(reports_lat_full, reports_lon_full, forecast_y, forecast_x,
+                                                      projection)
+
+            # Get time  values from hail reports
+            report_time = (hail_reports.loc[:, 'Time'].values) / 100
+            # Get lat/lon of different time periods and hail sizes
+
             for start_hour in hours:
                 lsr_dict['{0}'.format(start_hour)] = np.zeros(forecast_lats.shape)
-                end_hour = (start_hour+4)%24
+                end_hour = (start_hour + 4) % 24
                 if end_hour > 12:
-                    hour_indices = np.where((start_hour <= report_time) & (end_hour >= report_time) & (report_size >= inches_thresh))[0]
+                    hour_indices = \
+                    np.where((start_hour <= report_time) & (end_hour >= report_time) & (report_size >= inches_thresh))[
+                        0]
                 else:
-                    #Find reports before and after 0z
+                    # Find reports before and after 0z
                     hour_before_0z = np.where((start_hour <= report_time) & (report_size >= inches_thresh))[0]
-                    hour_after_0z = np.where((end_hour >= report_time) & (report_size >= inches_thresh))[0]  
-                    #Combine two arrays
+                    hour_after_0z = np.where((end_hour >= report_time) & (report_size >= inches_thresh))[0]
+                    # Combine two arrays
                     hour_indices = np.hstack((hour_before_0z, hour_after_0z))
-                if len(hour_indices) < 1: 
+                if len(hour_indices) < 1:
                     continue
-                reports_lat = hail_reports.loc[hour_indices,'Lat'].values
-                reports_lon = hail_reports.loc[hour_indices,'Lon'].values
-                lsr_dict['{0}'.format(start_hour)] = calculate_distance(reports_lat,reports_lon,forecast_y,forecast_x,projection)
-            
+                reports_lat = hail_reports.loc[hour_indices, 'Lat'].values
+                reports_lon = hail_reports.loc[hour_indices, 'Lon'].values
+                lsr_dict['{0}'.format(start_hour)] = calculate_distance(reports_lat, reports_lon, forecast_y,
+                                                                        forecast_x, projection)
+
             # Create netCDF file
             if sector:
-                out_filename = out_path+'{0}_{1}_{2}_lsr_mask.nc'.format(date,threshold,sector)
+                out_filename = out_path + '{0}_{1}_{2}_lsr_mask.nc'.format(date, threshold, sector)
             else:
-                out_filename = out_path+'{0}_{1}_lsr_mask.nc'.format(date,threshold)
-            
+                out_filename = out_path + '{0}_{1}_lsr_mask.nc'.format(date, threshold)
+
             out_file = Dataset(out_filename, "w")
             out_file.createDimension("x", forecast_lons.shape[0])
             out_file.createDimension("y", forecast_lons.shape[1])
             out_file.createVariable("Longitude", "f4", ("x", "y"))
-            out_file.createVariable("Latitude", "f4",("x", "y"))
+            out_file.createVariable("Latitude", "f4", ("x", "y"))
             out_file.createVariable("24_Hour_All_12z_12z", "f4", ("x", "y"))
             out_file.createVariable("4_Hour_All_17z_21z", "f4", ("x", "y"))
             out_file.createVariable("4_Hour_All_19z_23z", "f4", ("x", "y"))
             out_file.createVariable("4_Hour_All_21z_25z", "f4", ("x", "y"))
-            out_file.variables["Longitude"][:,:] = forecast_lons
-            out_file.variables["Latitude"][:,:] = forecast_lats
-            out_file.variables["24_Hour_All_12z_12z"][:,:] = lsr_dict['full_day']
-            out_file.variables["4_Hour_All_17z_21z"][:,:] = lsr_dict['17']
-            out_file.variables["4_Hour_All_19z_23z"][:,:] = lsr_dict['19']
-            out_file.variables["4_Hour_All_21z_25z"][:,:] = lsr_dict['21']
+            out_file.variables["Longitude"][:, :] = forecast_lons
+            out_file.variables["Latitude"][:, :] = forecast_lats
+            out_file.variables["24_Hour_All_12z_12z"][:, :] = lsr_dict['full_day']
+            out_file.variables["4_Hour_All_17z_21z"][:, :] = lsr_dict['17']
+            out_file.variables["4_Hour_All_19z_23z"][:, :] = lsr_dict['19']
+            out_file.variables["4_Hour_All_21z_25z"][:, :] = lsr_dict['21']
             out_file.close()
             print("Writing to " + out_filename)
-        print() 
+        print()
     return
 
-def calculate_distance(obs_lat,obs_lon,forecast_y,forecast_x,projection):
+
+def calculate_distance(obs_lat, obs_lon, forecast_y, forecast_x, projection):
     """
     Calculate the difference between forecast data points and observed data.
     Returns:
         Binary array where ones are within a 40km radius 
     """
-    x,y = projection(obs_lon,obs_lat)
+    x, y = projection(obs_lon, obs_lat)
     mask_array = np.zeros(forecast_y.shape)
     for index, point in enumerate(obs_lat):
-        y_diff = (y[index]-forecast_y)**2.0
-        x_diff = (x[index]-forecast_x)**2.0
-        total_dist = np.sqrt(y_diff+x_diff)
+        y_diff = (y[index] - forecast_y) ** 2.0
+        x_diff = (x[index] - forecast_x) ** 2.0
+        total_dist = np.sqrt(y_diff + x_diff)
         row, col = np.where(total_dist < 40234.0)
-        mask_array[row,col] =+ 1.0
+        mask_array[row, col] = + 1.0
     return mask_array
 
+
 if __name__ == "__main__":
-        main()
+    main()
diff --git a/hagelslag/util/make_proj_grids.py b/hagelslag/util/make_proj_grids.py
index f9cac39..194071f 100644
--- a/hagelslag/util/make_proj_grids.py
+++ b/hagelslag/util/make_proj_grids.py
@@ -1,5 +1,5 @@
-from pyproj import Proj
 import numpy as np
+from pyproj import Proj
 
 
 def main():
@@ -22,10 +22,10 @@ def read_arps_map_file(map_filename):
     grid_names = ['sw_lat', 'sw_lon', 'ne_lat', 'ne_lon', 'dx', 'dy']
     for i in range(len(proj_names) + len(grid_names)):
         if i < len(proj_names):
-            proj_dict[proj_names[i]] = float(map_params[i+2])
+            proj_dict[proj_names[i]] = float(map_params[i + 2])
         else:
             j = i - len(proj_names)
-            grid_dict[grid_names[j]] = float(map_params[i+2])
+            grid_dict[grid_names[j]] = float(map_params[i + 2])
     return proj_dict, grid_dict
 
 
@@ -70,5 +70,6 @@ def make_proj_grids(proj_dict, grid_dict):
 def get_proj_obj(proj_dict):
     return Proj(proj_dict)
 
+
 if __name__ == "__main__":
     main()
diff --git a/hagelslag/util/merge_forecast_data.py b/hagelslag/util/merge_forecast_data.py
index 864405c..4a055c9 100644
--- a/hagelslag/util/merge_forecast_data.py
+++ b/hagelslag/util/merge_forecast_data.py
@@ -1,12 +1,13 @@
-import pandas as pd
-import numpy as np
-import json
 import argparse
-from os.path import exists
-from multiprocessing import Pool
-from glob import glob
-from datetime import datetime
+import json
 import traceback
+from datetime import datetime
+from glob import glob
+from multiprocessing import Pool
+from os.path import exists
+
+import pandas as pd
+
 
 def main():
     parser = argparse.ArgumentParser()
@@ -37,6 +38,7 @@ def output_combined_files(output):
         else:
             output[0].to_csv(out_file, mode="w", index_label="Step_ID")
         return
+
     csv_files = sorted(glob(args.csv + "track_step_{0}_*.csv".format(args.ens)))
     print(csv_files)
     for csv_file in csv_files:
@@ -83,20 +85,20 @@ def merge_input_csv_forecast_json(input_csv_file, forecast_json_path, condition_
             for param in gamma_params:
                 model_pred_cols.append(dist_model.replace(" ", "-") + "_" + param)
         pred_data = pd.DataFrame(index=input_data.index, columns=model_pred_cols,
-                                dtype=float)
+                                 dtype=float)
         for track_id in track_ids:
             track_id_num = track_id.split("_")[-1]
             json_filename = full_json_path + "{0}_{1}_{2}_model_track_{3}.json".format(ens_name,
-                                                                                    run_date,
-                                                                                    ens_member,
-                                                                                    track_id_num)
+                                                                                       run_date,
+                                                                                       ens_member,
+                                                                                       track_id_num)
             json_file = open(json_filename)
             json_data = json.load(json_file)
             json_file.close()
             for s, step in enumerate(json_data["features"]):
                 step_id = track_id + "_{0:02d}".format(s)
                 for cond_model in condition_models_ns:
-                    pred_data.loc[step_id, cond_model + "_Condition"]  = step["properties"]["condition_" + cond_model]
+                    pred_data.loc[step_id, cond_model + "_Condition"] = step["properties"]["condition_" + cond_model]
                 for dist_model in dist_models_ns:
                     pred_data.loc[step_id, [dist_model + "_" + p
                                             for p in gamma_params]] = step["properties"]["dist_" + dist_model]
@@ -105,6 +107,7 @@ def merge_input_csv_forecast_json(input_csv_file, forecast_json_path, condition_
     except Exception as e:
         print(traceback.format_exc())
         raise e
+
+
 if __name__ == "__main__":
     main()
-
diff --git a/hagelslag/util/mrms_mesh_calibration_dataset.py b/hagelslag/util/mrms_mesh_calibration_dataset.py
index eb86d62..bb30a19 100644
--- a/hagelslag/util/mrms_mesh_calibration_dataset.py
+++ b/hagelslag/util/mrms_mesh_calibration_dataset.py
@@ -1,12 +1,14 @@
-from mpl_toolkits.basemap import Basemap
-import matplotlib.pyplot as plt
-from make_proj_grids import *
-from netCDF4 import Dataset
-import pandas as pd
-import numpy as np 
 import argparse
 import os
 
+import numpy as np
+import pandas as pd
+from pyproj import Proj
+from netCDF4 import Dataset
+
+from make_proj_grids import make_proj_grids, read_ncar_map_file
+
+
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("-s", "--start_date", required=True, help="Start date in YYYY-MM-DD format")
@@ -21,122 +23,118 @@ def main():
     else:
         run_dates = pd.DatetimeIndex(start=args.start_date, end=args.start_date, freq='1D').strftime("%y%m%d")
     data_path = args.data_path
-    out_path = args.out_path 
+    out_path = args.out_path
     mapfile = args.map_file
     maskfile = args.mask_file
-    
-    MESH_verification_data(data_path,maskfile,mapfile,run_dates,out_path)
-    return 
 
-def MESH_verification_data(data_path,maskfile,mapfile,run_dates,out_path,hours=[17,19,21]):
+    MESH_verification_data(data_path, maskfile, mapfile, run_dates, out_path)
+    return
+
+
+def MESH_verification_data(data_path, maskfile, mapfile, run_dates, out_path, hours=[17, 19, 21]):
     """
     Calculate 40 km halos around MESH values greater than a threshold value
     """
-    hail_threshold = [25,50]
+    hail_threshold = [25, 50]
 
     MESH_dict = dict()
-    
+
     mask_data = Dataset(maskfile)
     mask = mask_data.variables['usa_mask'][:]
 
-    proj_dict, grid_dict = read_ncar_map_file(mapfile) 
+    proj_dict, grid_dict = read_ncar_map_file(mapfile)
     mapping_data = make_proj_grids(proj_dict, grid_dict)
     ML_forecast_lons = mapping_data['lon']
     ML_forecast_lats = mapping_data['lat']
-    m = Basemap(projection='lcc', resolution="l",
-                rsphere=6371229.0,
-                lon_0=proj_dict['lon_0'],
-                lat_0=proj_dict['lat_0'],
-                lat_1=proj_dict['lat_1'],
-                lat_2=proj_dict['lat_2'],
-                llcrnrlon=grid_dict['sw_lon'],
-                llcrnrlat=grid_dict['sw_lat'],
-                urcrnrlon=grid_dict['ne_lon'],
-                urcrnrlat=grid_dict['ne_lat'])
+    m = Proj(proj_dict)
     x1, y1 = m(ML_forecast_lons, ML_forecast_lats)
     forecast_lat = np.array(y1)
     forecast_lon = np.array(x1)
-    
-    for date in run_dates: 
+
+    for date in run_dates:
         print(date)
         MESH_file = data_path + 'MESH_Max_60min_00.50_20{0}-00:00_20{0}-23:00.nc'.format(date)
         if not os.path.exists(MESH_file):
             print('No MESH file')
             continue
         MESH_dataset = Dataset(MESH_file)
-        MESH_var = MESH_dataset.variables['MESH_Max_60min_00.50']*mask
+        MESH_var = MESH_dataset.variables['MESH_Max_60min_00.50'] * mask
         MESH_lats = MESH_dataset.variables['latitude'][:]
         MESH_lons = MESH_dataset.variables['longitude'][:]
-        
-        fullday_MESH = MESH_var[0:23,:,:].max(axis=0)
-        
-        for threshold in hail_threshold: 
-            if os.path.exists(out_path+'{0}_{1}_mesh_mask.nc'.format(date,threshold)):
+
+        fullday_MESH = MESH_var[0:23, :, :].max(axis=0)
+
+        for threshold in hail_threshold:
+            if os.path.exists(out_path + '{0}_{1}_mesh_mask.nc'.format(date, threshold)):
                 print('>{0}mm file already exists'.format(threshold))
                 continue
             print('Creating MESH mask >{0}mm'.format(threshold))
-            
+
             MESH_dict['full_day'] = np.zeros(MESH_lons.shape)
-            
+
             thresh_row, thresh_col = np.where(fullday_MESH >= threshold)
-            if len(thresh_row)<1:
+            if len(thresh_row) < 1:
                 print('No >{0}mm MESH values found'.format(threshold))
-                continue    
-            threshold_MESH_lats = MESH_lats[thresh_row,thresh_col]
-            threshold_MESH_lons = MESH_lons[thresh_row,thresh_col]
-            MESH_dict['full_day'] = calculate_distance(threshold_MESH_lats,threshold_MESH_lons,forecast_lat,forecast_lon,m)
-    
+                continue
+            threshold_MESH_lats = MESH_lats[thresh_row, thresh_col]
+            threshold_MESH_lons = MESH_lons[thresh_row, thresh_col]
+            MESH_dict['full_day'] = calculate_distance(threshold_MESH_lats, threshold_MESH_lons, forecast_lat,
+                                                       forecast_lon, m)
+
             for hour in hours:
                 MESH_dict['{0}'.format(hour)] = np.zeros(MESH_lons.shape)
-                
-                start_hour = (hour-12)
-                end_hour = start_hour+4
-                hourly_MESH = MESH_var[start_hour:end_hour,:,:].max(axis=0)
+
+                start_hour = (hour - 12)
+                end_hour = start_hour + 4
+                hourly_MESH = MESH_var[start_hour:end_hour, :, :].max(axis=0)
                 thresh_row, thresh_col = np.where(hourly_MESH >= threshold)
-                if len(thresh_row)<1:
-                    continue    
-                threshold_MESH_lats = MESH_lats[thresh_row,thresh_col]
-                threshold_MESH_lons = MESH_lons[thresh_row,thresh_col]
-                MESH_dict['{0}'.format(hour)] = calculate_distance(threshold_MESH_lats,threshold_MESH_lons,forecast_lat,forecast_lon,m)
-                
-            #Create netcdf file
-            out_filename = out_path+'{0}_{1}_mesh_mask.nc'.format(date,threshold)
+                if len(thresh_row) < 1:
+                    continue
+                threshold_MESH_lats = MESH_lats[thresh_row, thresh_col]
+                threshold_MESH_lons = MESH_lons[thresh_row, thresh_col]
+                MESH_dict['{0}'.format(hour)] = calculate_distance(threshold_MESH_lats, threshold_MESH_lons,
+                                                                   forecast_lat, forecast_lon, m)
+
+            # Create netcdf file
+            out_filename = out_path + '{0}_{1}_mesh_mask.nc'.format(date, threshold)
             out_file = Dataset(out_filename, "w")
             out_file.createDimension("x", MESH_lons.shape[0])
             out_file.createDimension("y", MESH_lons.shape[1])
             out_file.createVariable("Longitude", "f4", ("x", "y"))
-            out_file.createVariable("Latitude", "f4",("x", "y"))
+            out_file.createVariable("Latitude", "f4", ("x", "y"))
             out_file.createVariable("24_Hour_All_12z_12z", "f4", ("x", "y"))
             out_file.createVariable("4_Hour_All_17z_21z", "f4", ("x", "y"))
             out_file.createVariable("4_Hour_All_19z_23z", "f4", ("x", "y"))
             out_file.createVariable("4_Hour_All_21z_25z", "f4", ("x", "y"))
-            out_file.variables["Longitude"][:,:] = MESH_lons 
-            out_file.variables["Latitude"][:,:] = MESH_lats
-            out_file.variables["24_Hour_All_12z_12z"][:,:] = MESH_dict['full_day']
-            out_file.variables["4_Hour_All_17z_21z"][:,:] = MESH_dict['17']
-            out_file.variables["4_Hour_All_19z_23z"][:,:] = MESH_dict['19']
-            out_file.variables["4_Hour_All_21z_25z"][:,:] = MESH_dict['21']
+            out_file.variables["Longitude"][:, :] = MESH_lons
+            out_file.variables["Latitude"][:, :] = MESH_lats
+            out_file.variables["24_Hour_All_12z_12z"][:, :] = MESH_dict['full_day']
+            out_file.variables["4_Hour_All_17z_21z"][:, :] = MESH_dict['17']
+            out_file.variables["4_Hour_All_19z_23z"][:, :] = MESH_dict['19']
+            out_file.variables["4_Hour_All_21z_25z"][:, :] = MESH_dict['21']
             out_file.close()
             print("Writing to " + out_filename)
         print()
-        
+
     return
 
-def calculate_distance(obs_lat,obs_lon,forecast_lat,forecast_lon,basemap_obj):
+
+def calculate_distance(obs_lat, obs_lon, forecast_lat, forecast_lon, proj_obj):
     """
     Calculate the difference between forecast data points and observed data.
     Returns:
         Binary array where ones are within a 30km radius 
     """
-    x, y = basemap_obj(obs_lon, obs_lat)
+    x, y = proj_obj(obs_lon, obs_lat)
     mask_array = np.zeros(forecast_lat.shape)
     for index, point in enumerate(obs_lat):
-        lat_diff = (y[index]-forecast_lat)**2.0
-        lon_diff = (x[index]-forecast_lon)**2.0
-        total_dist = np.sqrt(lat_diff+lon_diff)
+        lat_diff = (y[index] - forecast_lat) ** 2.0
+        lon_diff = (x[index] - forecast_lon) ** 2.0
+        total_dist = np.sqrt(lat_diff + lon_diff)
         row, col = np.where(total_dist < 30000.0)
-        mask_array[row,col] =+ 1.0
+        mask_array[row, col] = + 1.0
     return mask_array
 
+
 if __name__ == "__main__":
-        main()
+    main()
diff --git a/hagelslag/util/munkres.py b/hagelslag/util/munkres.py
index 0168aeb..d1b218a 100644
--- a/hagelslag/util/munkres.py
+++ b/hagelslag/util/munkres.py
@@ -276,25 +276,26 @@ def permute(a, results):
 # Imports
 # ---------------------------------------------------------------------------
 
-import sys
 import copy
+import sys
 
 # ---------------------------------------------------------------------------
 # Exports
 # ---------------------------------------------------------------------------
 
-__all__     = ['Munkres', 'make_cost_matrix']
+__all__ = ['Munkres', 'make_cost_matrix']
 
 # ---------------------------------------------------------------------------
 # Globals
 # ---------------------------------------------------------------------------
 
 # Info about the module
-__version__   = "1.0.7"
-__author__    = "Brian Clapper, bmc@clapper.org"
-__url__       = "http://software.clapper.org/munkres/"
+__version__ = "1.0.7"
+__author__ = "Brian Clapper, bmc@clapper.org"
+__url__ = "http://software.clapper.org/munkres/"
 __copyright__ = "(c) 2008 Brian M. Clapper"
-__license__   = "BSD-style license"
+__license__ = "BSD-style license"
+
 
 # ---------------------------------------------------------------------------
 # Classes
@@ -326,7 +327,6 @@ def make_cost_matrix(profit_matrix, inversion_function):
         """
         return make_cost_matrix(profit_matrix, inversion_function)
 
-
     def pad_matrix(self, matrix, pad_value=0):
         """
         Pad a possibly non-square matrix to make it square.
@@ -398,12 +398,12 @@ def compute(self, cost_matrix):
         done = False
         step = 1
 
-        steps = { 1 : self.__step1,
-                  2 : self.__step2,
-                  3 : self.__step3,
-                  4 : self.__step4,
-                  5 : self.__step5,
-                  6 : self.__step6 }
+        steps = {1: self.__step1,
+                 2: self.__step2,
+                 3: self.__step3,
+                 4: self.__step4,
+                 5: self.__step5,
+                 6: self.__step6}
 
         while not done:
             try:
@@ -482,7 +482,7 @@ def __step3(self):
                     count += 1
 
         if count >= n:
-            step = 7 # done
+            step = 7  # done
         else:
             step = 4
 
@@ -542,14 +542,14 @@ def __step5(self):
             if row >= 0:
                 count += 1
                 path[count][0] = row
-                path[count][1] = path[count-1][1]
+                path[count][1] = path[count - 1][1]
             else:
                 done = True
 
             if not done:
                 col = self.__find_prime_in_row(path[count][0])
                 count += 1
-                path[count][0] = path[count-1][0]
+                path[count][0] = path[count - 1][0]
                 path[count][1] = col
 
         self.__convert_path(path, count)
@@ -649,7 +649,7 @@ def __find_prime_in_row(self, row):
         return col
 
     def __convert_path(self, path, count):
-        for i in range(count+1):
+        for i in range(count + 1):
             if self.marked[path[i][0]][path[i][1]] == 1:
                 self.marked[path[i][0]][path[i][1]] = 0
             else:
@@ -668,6 +668,7 @@ def __erase_primes(self):
                 if self.marked[i][j] == 2:
                     self.marked[i][j] = 0
 
+
 # ---------------------------------------------------------------------------
 # Functions
 # ---------------------------------------------------------------------------
@@ -707,6 +708,7 @@ def make_cost_matrix(profit_matrix, inversion_function):
         cost_matrix.append([inversion_function(value) for value in row])
     return cost_matrix
 
+
 def print_matrix(matrix, msg=None):
     """
     Convenience function: Displays the contents of a matrix of integers.
@@ -740,6 +742,7 @@ def print_matrix(matrix, msg=None):
             sep = ', '
         sys.stdout.write(']\n')
 
+
 # ---------------------------------------------------------------------------
 # Main
 # ---------------------------------------------------------------------------
@@ -759,17 +762,16 @@ def print_matrix(matrix, msg=None):
           [300, 225, 300, 3]],
          452),  # expected cost
 
-
         # Square
-        ([[10, 10,  8],
-          [9,  8,  1],
-          [9,  7,  4]],
+        ([[10, 10, 8],
+          [9, 8, 1],
+          [9, 7, 4]],
          18),
 
         # Rectangular variant
-        ([[10, 10,  8, 11],
-          [9,  8,  1, 1],
-          [9,  7,  4, 10]],
+        ([[10, 10, 8, 11],
+          [9, 8, 1, 1],
+          [9, 7, 4, 10]],
          15)]
 
     m = Munkres()
diff --git a/hagelslag/util/output_tree_ensembles.py b/hagelslag/util/output_tree_ensembles.py
index c54a978..051a2c8 100644
--- a/hagelslag/util/output_tree_ensembles.py
+++ b/hagelslag/util/output_tree_ensembles.py
@@ -2,9 +2,8 @@
 """
 Read a scikit-learn tree ensemble object and output the object into a human-readable text format.
 """
-import pickle
 import argparse
-
+import pickle
 
 __author__ = "David John Gagne <djgagne@ou.edu>"
 __copyright__ = "Copyright 2015, David John Gagne"
@@ -61,10 +60,10 @@ def output_tree_ensemble(tree_ensemble_obj, output_filename, attribute_names=Non
     for t, tree in enumerate(tree_ensemble_obj.estimators_):
         print("Writing Tree {0:d}".format(t))
         out_file = open(output_filename + ".{0:d}.tree", "w")
-        #out_file.write("Tree {0:d}\n".format(t))
+        # out_file.write("Tree {0:d}\n".format(t))
         tree_str = print_tree_recursive(tree.tree_, 0, attribute_names)
         out_file.write(tree_str)
-        #out_file.write("\n")
+        # out_file.write("\n")
         out_file.close()
     return
 
@@ -115,5 +114,6 @@ def print_tree_recursive(tree_obj, node_index, attribute_names=None):
         tree_str += print_tree_recursive(tree_obj, tree_obj.children_right[node_index], attribute_names)
     return tree_str
 
+
 if __name__ == "__main__":
     main()
diff --git a/hagelslag/util/show_importance_ranks.py b/hagelslag/util/show_importance_ranks.py
index 67f4253..c32e79f 100644
--- a/hagelslag/util/show_importance_ranks.py
+++ b/hagelslag/util/show_importance_ranks.py
@@ -1,7 +1,9 @@
+import argparse
 import pickle
+
 import numpy as np
+
 from hagelslag.util.Config import Config
-import argparse
 
 
 def main():
diff --git a/hagelslag/util/storm_patch_center_coords.py b/hagelslag/util/storm_patch_center_coords.py
index 7fd0abf..971e39f 100644
--- a/hagelslag/util/storm_patch_center_coords.py
+++ b/hagelslag/util/storm_patch_center_coords.py
@@ -1,10 +1,10 @@
-import numpy as np
-import pandas as pd
-from netCDF4 import Dataset, num2date
-from glob import glob
 import argparse
+from glob import glob
 from os.path import join
 
+import pandas as pd
+from netCDF4 import Dataset, num2date
+
 
 def main():
     parser = argparse.ArgumentParser()
@@ -17,6 +17,7 @@ def main():
     patch_center_frame = pd.concat(patch_center_list, ignore_index=True)
     patch_center_frame.to_csv(args.out, index=False)
 
+
 def get_storm_centers(patch_file):
     patch_info = patch_file.split("/")[-1][:-3].split("_")
     run_date = pd.Timestamp(patch_info[-3][:-2])
@@ -24,16 +25,18 @@ def get_storm_centers(patch_file):
     patch_data = None
     print(run_date, member)
     with Dataset(patch_file) as patch_set:
-       num_patches = patch_set.variables["p"].size
-       lons = patch_set.variables["longitude"][:, 32, 32]
-       lats = patch_set.variables["latitude"][:, 32, 32]
-       valid_date = num2date(patch_set.variables["valid_date"][:], patch_set.variables["valid_date"].units)
-       patch_data = pd.DataFrame(dict(longitude=lons, 
-                                      latitude=lats, 
-                                      member=[member] * num_patches, 
-                                      run_date=[run_date] * num_patches,
-                                      valid_date=valid_date), columns=["run_date", "member", "valid_date", "longitude", "latitude"])
+        num_patches = patch_set.variables["p"].size
+        lons = patch_set.variables["longitude"][:, 32, 32]
+        lats = patch_set.variables["latitude"][:, 32, 32]
+        valid_date = num2date(patch_set.variables["valid_date"][:], patch_set.variables["valid_date"].units)
+        patch_data = pd.DataFrame(dict(longitude=lons,
+                                       latitude=lats,
+                                       member=[member] * num_patches,
+                                       run_date=[run_date] * num_patches,
+                                       valid_date=valid_date),
+                                  columns=["run_date", "member", "valid_date", "longitude", "latitude"])
     return patch_data
 
+
 if __name__ == "__main__":
     main()
diff --git a/hagelslag/util/test_size_distributions.py b/hagelslag/util/test_size_distributions.py
index 7ef8249..527f1e8 100644
--- a/hagelslag/util/test_size_distributions.py
+++ b/hagelslag/util/test_size_distributions.py
@@ -1,11 +1,13 @@
 import json
+import os
+from glob import glob
 from multiprocessing import Pool
-from scipy.stats import kstest, gamma
+
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from glob import glob
-import matplotlib.pyplot as plt
-import os 
+from scipy.stats import kstest, gamma
+
 
 def main():
     json_path = "/sharp/djgagne/track_data_spring2015_unique_json/"
@@ -22,11 +24,12 @@ def main():
     ks_total_frame.to_csv("ks_results_2015.csv", index_label="Id")
     return
 
+
 def run_kstests(json_path, run_date, member):
     try:
         full_path = json_path + "/{0}/{1}/mesh_*.json".format(run_date, member)
         json_files = sorted(glob(full_path))
-        ks_results = {"id":[], "ks":[]}
+        ks_results = {"id": [], "ks": []}
         for json_file in json_files:
             js = open(json_file)
             mesh_track = json.load(js)
@@ -37,25 +40,27 @@ def run_kstests(json_path, run_date, member):
                 ts = np.array(mesh_obj["properties"]["timesteps"])
                 mask = np.array(mesh_obj["properties"]["masks"])
                 vals = ts[mask == 1]
-                gdist = gamma.fit(vals, floc=vals.min()-0.1)
+                gdist = gamma.fit(vals, floc=vals.min() - 0.1)
                 sig = kstest(vals, gamma(*gdist).cdf)
                 ks_results["id"].append(step_id)
                 ks_results["ks"].append(sig)
                 if sig[1] < 0.01:
-                    print(step_id,)
-                    print(sig[1],gdist)
+                    print(step_id, )
+                    print(sig[1], gdist)
                     print(np.sort(vals))
-                    plt.figure(figsize=(8,8))
+                    plt.figure(figsize=(8, 8))
                     plt.pcolormesh(ts, alpha=0.5, cmap="YlOrRd", vmin=0, vmax=100)
-                    pc = plt.pcolormesh(np.ma.array(ts, mask=mask==0), cmap="YlOrRd", vmin=0, vmax=100)
+                    pc = plt.pcolormesh(np.ma.array(ts, mask=mask == 0), cmap="YlOrRd", vmin=0, vmax=100)
                     plt.title(step_id)
                     plt.colorbar(pc)
                     plt.savefig(step_id + ".png", bbox_inches="tight", dpi=150)
                     plt.close()
-        ks_frame = pd.DataFrame(ks_results["ks"], index=ks_results["id"],columns=["D", "p-val"])
+        ks_frame = pd.DataFrame(ks_results["ks"], index=ks_results["id"], columns=["D", "p-val"])
         print(ks_frame.shape[0])
     except Exception as e:
         raise e
     return ks_frame
+
+
 if __name__ == "__main__":
     main()
diff --git a/hagelslag/util/testing_sector.py b/hagelslag/util/testing_sector.py
index a656a1d..45c5e46 100644
--- a/hagelslag/util/testing_sector.py
+++ b/hagelslag/util/testing_sector.py
@@ -1,94 +1,97 @@
-import numpy as np
-from create_sector_grid_data import *             
 from datetime import datetime
-           
-from hagelslag.processing.ObjectMatcher import shifted_centroid_distance, start_time_distance
-from hagelslag.processing.ObjectMatcher import centroid_distance, time_distance
-import os
-import pandas as pd
+
 import numpy as np
-from datetime import datetime
+import pandas as pd
 
+from create_sector_grid_data import *
+from hagelslag.processing.ObjectMatcher import centroid_distance, time_distance
+from hagelslag.processing.ObjectMatcher import shifted_centroid_distance
 
-#date_index = pd.DatetimeIndex([pd.Timestamp.utcnow().strftime("%Y%m%d")])
+# date_index = pd.DatetimeIndex([pd.Timestamp.utcnow().strftime("%Y%m%d")])
 work_path = "/ai-hail/aburke/2018_HREFv2_data/"
-scratch_path= "/hail/aburke/testing_weights/figuring_out_files/"
-ensemble_members = ['nam_00','nam_12','arw_00','arw_12','nssl_00','nssl_12','nmmb_00','nmmb_12']
+scratch_path = "/hail/aburke/testing_weights/figuring_out_files/"
+ensemble_members = ['nam_00', 'nam_12', 'arw_00', 'arw_12', 'nssl_00', 'nssl_12', 'nmmb_00', 'nmmb_12']
 
-config=dict(dates=None,
-            start_hour=12, 
-            end_hour=36,
-            watershed_variable="MAXUVV",
-            ensemble_name="HREFv2",
-            ensemble_members=ensemble_members,
-            model_path=work_path+'symbolic_hrefv2_data',
-            model_watershed_params=(8, 1, 80, 100, 60),
-            size_filter=12,
-            gaussian_window=2,
-            mrms_path=work_path+"MRMS/",
-            mrms_variable="MESH_Max_60min_00.50",
-            mrms_watershed_params=(19, 1, 100, 100, 75),
-            object_matcher_params=([shifted_centroid_distance], np.array([1.0]),
+config = dict(dates=None,
+              start_hour=12,
+              end_hour=36,
+              watershed_variable="MAXUVV",
+              ensemble_name="HREFv2",
+              ensemble_members=ensemble_members,
+              model_path=work_path + 'symbolic_hrefv2_data',
+              model_watershed_params=(8, 1, 80, 100, 60),
+              size_filter=12,
+              gaussian_window=2,
+              mrms_path=work_path + "MRMS/",
+              mrms_variable="MESH_Max_60min_00.50",
+              mrms_watershed_params=(19, 1, 100, 100, 75),
+              object_matcher_params=([shifted_centroid_distance], np.array([1.0]),
                                      np.array([24000])),
-        
-            track_matcher_params=([centroid_distance, time_distance],
-                                     np.array([80000, 2])),
-		
-            storm_variables=['MAXUVV','Storm relative helicity_3000', 'Storm relative helicity_1000',
-                            'MAXREF','MXUPHL_5000','MAXDVV'], 
 
-            potential_variables=['Precipitable water_0','Temperature_1000','Dew point temperature_1000','Geopotential Height_500','Temperature_500', 
-                                'Dew point temperature_500','U component of wind_500','V component of wind_500',
-                                'Geopotential Height_700','Temperature_700', 'Dew point temperature_700','U component of wind_700',
-                                'V component of wind_700','Geopotential Height_850','Temperature_850', 'Dew point temperature_850',
-                                'U component of wind_850','V component of wind_850','MAXUW', 'MAXVW',
-                                'Surface lifted index','Convective available potential energy_0','Convective inhibition_0'],
-        
-            tendency_variables=[],
-            shape_variables=["area", "eccentricity", "major_axis_length", "minor_axis_length", "orientation",
+              track_matcher_params=([centroid_distance, time_distance],
+                                    np.array([80000, 2])),
+
+              storm_variables=['MAXUVV', 'Storm relative helicity_3000', 'Storm relative helicity_1000',
+                               'MAXREF', 'MXUPHL_5000', 'MAXDVV'],
+
+              potential_variables=['Precipitable water_0', 'Temperature_1000', 'Dew point temperature_1000',
+                                   'Geopotential Height_500', 'Temperature_500',
+                                   'Dew point temperature_500', 'U component of wind_500', 'V component of wind_500',
+                                   'Geopotential Height_700', 'Temperature_700', 'Dew point temperature_700',
+                                   'U component of wind_700',
+                                   'V component of wind_700', 'Geopotential Height_850', 'Temperature_850',
+                                   'Dew point temperature_850',
+                                   'U component of wind_850', 'V component of wind_850', 'MAXUW', 'MAXVW',
+                                   'Surface lifted index', 'Convective available potential energy_0',
+                                   'Convective inhibition_0'],
+
+              tendency_variables=[],
+              shape_variables=["area", "eccentricity", "major_axis_length", "minor_axis_length", "orientation",
                                "extent"],
-            variable_statistics=["mean", "max", "min", "std", "skew",
+              variable_statistics=["mean", "max", "min", "std", "skew",
                                    "percentile_10", "percentile_50", "percentile_90"],
-            csv_path=scratch_path+"track_data_spring2018_MAXUVV_closest_csv/",
-            geojson_path=scratch_path+"track_data_spring2018_MAXUVV_closest_json/",
-	        nc_path=scratch_path+"track_data_spring2018_MAXUVV_patch_nc/",
-            unique_matches=True,
-	        patch_radius=16,
-            closest_matches=True,
-	        match_steps=True,              
-	        train=False,
-            single_step=True,
-            label_type="gamma",
-            model_map_file="/hail/aburke/hagelslag/mapfiles/hrefv2_2018_map.txt",
-            mask_file="/hail/aburke/hagelslag/mapfiles/hrefv2_us_mask.nc",
-            run_date_format="%Y%m%d-%H%M",
-            sector_csv_outpath=scratch_path+'track_data_spring2018_MAXUVV_closest_csv/',
-            sector_nc_outpath=scratch_path+'track_data_spring2018_MAXUVV_patch_nc/',
-            sector_mapfile="/hail/aburke/hagelslag/mapfiles/hrefv2_sectors/C_2018_map.txt"
-            )
+              csv_path=scratch_path + "track_data_spring2018_MAXUVV_closest_csv/",
+              geojson_path=scratch_path + "track_data_spring2018_MAXUVV_closest_json/",
+              nc_path=scratch_path + "track_data_spring2018_MAXUVV_patch_nc/",
+              unique_matches=True,
+              patch_radius=16,
+              closest_matches=True,
+              match_steps=True,
+              train=False,
+              single_step=True,
+              label_type="gamma",
+              model_map_file="/hail/aburke/hagelslag/mapfiles/hrefv2_2018_map.txt",
+              mask_file="/hail/aburke/hagelslag/mapfiles/hrefv2_us_mask.nc",
+              run_date_format="%Y%m%d-%H%M",
+              sector_csv_outpath=scratch_path + 'track_data_spring2018_MAXUVV_closest_csv/',
+              sector_nc_outpath=scratch_path + 'track_data_spring2018_MAXUVV_patch_nc/',
+              sector_mapfile="/hail/aburke/hagelslag/mapfiles/hrefv2_sectors/C_2018_map.txt"
+              )
 
-sector='E'
+sector = 'E'
 
 date_index = pd.DatetimeIndex(start="2018-05-01T00:00", end="2018-06-02T00:00", freq="1D")
 
 sector_mapfile = "/hail/aburke/hagelslag/mapfiles/hrefv2_sectors/{0}_2018_map.txt".format(sector)
-ensemble_name="HREFv2"
-member='arw_00'
-run_date = datetime(2018,7,1) 
-run_date_format="%Y%m%d-%H%M" 
-csv_path='/hail/aburke/HREF_Scripts_and_Data/hwt_2018/track_data_spring2018_MAXUVV_closest_csv/'
-nc_path='/hail/aburke/HREF_Scripts_and_Data/hwt_2018/track_data_spring2018_MAXUVV_patch_nc/'
-sector_csv_outpath="/hail/aburke/testing_weights/sector_trained/{0}/track_data_spring2018_MAXUVV_closest_csv/".format(sector)
-sector_nc_outpath="/hail/aburke/testing_weights/sector_trained/{0}/track_data_spring2018_MAXUVV_patch_nc/".format(sector)
-patch_radius=16
+ensemble_name = "HREFv2"
+member = 'arw_00'
+run_date = datetime(2018, 7, 1)
+run_date_format = "%Y%m%d-%H%M"
+csv_path = '/hail/aburke/HREF_Scripts_and_Data/hwt_2018/track_data_spring2018_MAXUVV_closest_csv/'
+nc_path = '/hail/aburke/HREF_Scripts_and_Data/hwt_2018/track_data_spring2018_MAXUVV_patch_nc/'
+sector_csv_outpath = "/hail/aburke/testing_weights/sector_trained/{0}/track_data_spring2018_MAXUVV_closest_csv/".format(
+    sector)
+sector_nc_outpath = "/hail/aburke/testing_weights/sector_trained/{0}/track_data_spring2018_MAXUVV_patch_nc/".format(
+    sector)
+patch_radius = 16
 
 for run_date in date_index:
     for member in ensemble_members:
         sector = SectorProcessor(sector_mapfile,
-                        ensemble_name,member,
-                        run_date,run_date_format)
+                                 ensemble_name, member,
+                                 run_date, run_date_format)
 
-        sector.output_sector_netcdf(nc_path,sector_nc_outpath,patch_radius,config)
+        sector.output_sector_netcdf(nc_path, sector_nc_outpath, patch_radius, config)
 
-        #for key in ['track_step', 'track_total']:
+        # for key in ['track_step', 'track_total']:
         #    sector.output_sector_csv(csv_path,key,sector_csv_outpath)
diff --git a/setup.py b/setup.py
index 07508cd..5d63b04 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,7 @@
-from setuptools import setup
 import os
 
+from setuptools import setup
+
 classifiers = ['Development Status :: 4 - Beta',
                'Intended Audience :: Science/Research',
                'License :: OSI Approved :: MIT License',
@@ -32,11 +33,11 @@
           license="MIT",
           url="https://github.com/djgagne/hagelslag",
           packages=["hagelslag", "hagelslag.data", "hagelslag.processing", "hagelslag.evaluation", "hagelslag.util"],
-          scripts=["bin/hsdata", "bin/hsforecast", "bin/hseval", "bin/hsfileoutput", "bin/hsplotter", 
-                "bin/hswrf3d", "bin/hsstation", "bin/hsncarpatch", "bin/hscalibration"],
-          data_files=[("mapfiles", ["mapfiles/ssef2013.map", 
-                                    "mapfiles/ssef2014.map", 
-                                    "mapfiles/ssef2015.map", 
+          scripts=["bin/hsdata", "bin/hsforecast", "bin/hseval", "bin/hsfileoutput", "bin/hsplotter",
+                   "bin/hswrf3d", "bin/hsstation", "bin/hsncarpatch", "bin/hscalibration"],
+          data_files=[("mapfiles", ["mapfiles/ssef2013.map",
+                                    "mapfiles/ssef2014.map",
+                                    "mapfiles/ssef2015.map",
                                     "mapfiles/ncar_grib_table.txt",
                                     "mapfiles/hrrr_map_2016.txt",
                                     "mapfiles/ncar_ensemble_map_2015.txt",

From b994dfcb7381c1d155a204bef97306bbbdc863b1 Mon Sep 17 00:00:00 2001
From: David John Gagne <davidjohngagne@gmail.com>
Date: Thu, 30 Sep 2021 11:24:14 -0600
Subject: [PATCH 15/15] Addressing flake8 errors.

---
 .travis.yml                                   |  15 --
 demos/obj_tracking.py                         |  23 +--
 doc/conf.py                                   | 131 +++++++++---------
 hagelslag/data/GribModelGrid.py               |   4 +-
 hagelslag/processing/STObject.py              |   2 +-
 hagelslag/processing/TrackProcessing.py       |   9 +-
 hagelslag/util/lsr_calibration_dataset.py     |   2 +-
 .../util/mrms_mesh_calibration_dataset.py     |   6 +-
 hagelslag/util/testing_sector.py              |   2 +-
 requirements.txt                              |   2 +-
 10 files changed, 93 insertions(+), 103 deletions(-)
 delete mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index ec3da2d..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-language: python
-env:
-  - PYTHON_VERSION=3.8 IPYTHON_KERNEL=python3
-before_install:
-  - wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
-  - sh Miniconda3-latest-Linux-x86_64.sh -b -p /home/travis/miniconda
-  - export PATH=/home/travis/miniconda/bin:$PATH
-install:
-  - conda env create -f environment.yml
-  - source activate hagelslag
-  - echo `which python`
-script:
-  - pytest
-notifications:
-  email: true
diff --git a/demos/obj_tracking.py b/demos/obj_tracking.py
index 3b20a2a..18181ca 100755
--- a/demos/obj_tracking.py
+++ b/demos/obj_tracking.py
@@ -18,10 +18,12 @@
 import numpy as np
 from scipy.ndimage import gaussian_filter, find_objects
 
-from hagelslag.data import ModelOutput
-from hagelslag.processing import STObject
+from hagelslag.data.ModelOutput import ModelOutput
+from hagelslag.processing.STObject import STObject
 from hagelslag.processing.EnhancedWatershedSegmenter import EnhancedWatershed
 from hagelslag.processing.ObjectMatcher import ObjectMatcher, closest_distance
+from netCDF4 import Dataset
+
 
 # In[2]:
 
@@ -49,7 +51,8 @@
 #        data_increment (int): quantization interval. Use 1 if you don't want to quantize
 #        max_thresh (int): values greater than maxThresh are treated as the maximum threshold
 #        size_threshold_pixels (int): clusters smaller than this threshold are ignored.
-#        delta (int): maximum number of data increments the cluster is allowed to range over. Larger d results in clusters over larger scales.
+#        delta (int): maximum number of data increments the cluster is allowed to range over. Larger d results in
+#        clusters over larger scales.
 
 # From ahij's config file.
 if field == "MAX_UPDRAFT_HELICITY" or field == "UP_HELI_MAX03":
@@ -61,11 +64,10 @@
 levels = params['min_thresh'] * np.arange(1, 8)
 levels = np.append(levels, params['min_thresh'] * 15)
 model_watershed_params = (
-params['min_thresh'], params['step'], params['max_thresh'], params["max_size"], params["delta"])
+    params['min_thresh'], params['step'], params['max_thresh'], params["max_size"], params["delta"])
 
 end_date = start_date + timedelta(hours=0)
 
-from netCDF4 import Dataset
 
 model_grid = ModelOutput(ensemble_name,
                          member,
@@ -149,15 +151,16 @@ def get_forecast_objects(model_grid, ew_params, min_size, gaussian_window):
         num_slices = len(obj_slices)
         model_objects.append([])
         if num_slices > 0:
-            fig, ax = plt.subplots()
-            t = plt.contourf(model_grid.lon, model_grid.lat, hour_labels, np.arange(0, num_slices + 1) + 0.5,
+            plt.subplots()
+            plt.contourf(model_grid.lon, model_grid.lat, hour_labels, np.arange(0, num_slices + 1) + 0.5,
                              extend="max", cmap="Set1", latlon=True, title=str(run_date) + " " + field + " " + str(h))
-            ret = plt.savefig(odir + "enh_watershed_ex/ew{0:02d}.png".format(h))
+            plt.savefig(odir + "enh_watershed_ex/ew{0:02d}.png".format(h))
             for s, sl in enumerate(obj_slices):
                 model_objects[-1].append(STObject(model_grid.data[h][sl],
                                                   # np.where(hour_labels[sl] > 0, 1, 0),
                                                   # For some objects (especially long, diagonal ones), the rectangular
-                                                  # slice encompasses part of other objects (i.e. non-zero elements of slice).
+                                                  # slice encompasses part of other objects
+                                                  # (i.e. non-zero elements of slice).
                                                   # We don't want them in our mask.
                                                   np.where(hour_labels[sl] == s + 1, 1, 0),
                                                   model_grid.x[sl],
@@ -198,7 +201,7 @@ def track_forecast_objects(input_model_objects, model_grid, object_matcher):
         elif len(past_time_objs) > 0 and len(model_objects[h]) > 0:
             assignments = object_matcher.match_objects(past_time_objs, model_objects[h], h - 1, h)
             print("assignments:", assignments)
-            unpaired = range(len(model_objects[h]))
+            unpaired = list(range(len(model_objects[h])))
             for pair in assignments:
                 past_time_objs[pair[0]].extend(model_objects[h][pair[1]])
                 unpaired.remove(pair[1])
diff --git a/doc/conf.py b/doc/conf.py
index 42c9f08..fca195e 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -12,28 +12,30 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-import sys
 import os
-import shlex
+import sys
+
 from mock import Mock as MagicMock
 
 
 class Mock(MagicMock):
     @classmethod
     def __getattr__(cls, name):
-            return Mock()
+        return Mock()
 
 
-MOCK_MODULES = ['numpy', 'scipy', 'pandas', 'skimage', 'netCDF4', 'basemap', 'matplotlib', "pyproj", "Nio", "scipy.spatial",
-                "pygrib", "mpl_toolkits", "mpl_toolkits.axes_grid1", "mpl_toolkits.basemap", "mpl_toolkits.basemap.pyproj",
+MOCK_MODULES = ['numpy', 'scipy', 'pandas', 'skimage', 'netCDF4', 'basemap', 'matplotlib', "pyproj", "Nio",
+                "scipy.spatial",
+                "pygrib", "mpl_toolkits", "mpl_toolkits.axes_grid1", "mpl_toolkits.basemap",
+                "mpl_toolkits.basemap.pyproj",
                 "sklearn", 'skimage.morphology', "scipy.ndimage", "matplotlib.pyplot", "scipy.stats", "scipy.signal",
-                "skimage.measure", "skimage.segmentation", "scipy.interpolate", "skimage.draw", 
-                "mpl_toolkits.axes_grid1.inset_locator", "glue", "sklearn.linear_model", "glue.viewers", "glue.viewers.custom", 
+                "skimage.measure", "skimage.segmentation", "scipy.interpolate", "skimage.draw",
+                "mpl_toolkits.axes_grid1.inset_locator", "glue", "sklearn.linear_model", "glue.viewers",
+                "glue.viewers.custom",
                 "glue.viewers.custom.qt", "glue.core", "shapefile", "ncepgrib2",
                 "glue.config", "sklearn.decomposition", "sklearn.model_selection", "arrow"]
 sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
 
-
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
@@ -42,7 +44,7 @@ def __getattr__(cls, name):
 # -- General configuration ------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
+# needs_sphinx = '1.0'
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
@@ -64,14 +66,14 @@ def __getattr__(cls, name):
 source_suffix = '.rst'
 
 # The encoding of source files.
-#source_encoding = 'utf-8-sig'
+# source_encoding = 'utf-8-sig'
 
 # The master toctree document.
 master_doc = 'index'
 
 # General information about the project.
 project = u'hagelslag'
-copyright = u'2017, David John Gagne II'
+copyright = u'2017-2021, David John Gagne II'
 author = u'David John Gagne II'
 
 # The version info for the project you're documenting, acts as replacement for
@@ -92,9 +94,9 @@ def __getattr__(cls, name):
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
@@ -102,32 +104,31 @@ def __getattr__(cls, name):
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
-#default_role = None
+# default_role = None
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
 
 # A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
 
 # If true, keep warnings as "system message" paragraphs in the built documents.
-#keep_warnings = False
+# keep_warnings = False
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = False
 
-
 # -- Options for HTML output ----------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
@@ -140,23 +141,23 @@ def __getattr__(cls, name):
 html_theme_options = {'sidebarwidth': 375}
 
 # Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
+# html_theme_path = []
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-#html_title = None
+# html_title = None
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
+# html_short_title = None
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-#html_logo = None
+# html_logo = None
 
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-#html_favicon = None
+# html_favicon = None
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
@@ -166,62 +167,62 @@ def __getattr__(cls, name):
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied
 # directly to the root of the documentation.
-#html_extra_path = []
+# html_extra_path = []
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+# html_last_updated_fmt = '%b %d, %Y'
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
 
 # If false, no module index is generated.
-#html_domain_indices = True
+# html_domain_indices = True
 
 # If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
 
 # If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
 
 # If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
+# html_show_sphinx = True
 
 # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
+# html_show_copyright = True
 
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
+# html_file_suffix = None
 
 # Language to be used for generating the HTML full-text search index.
 # Sphinx supports the following languages:
 #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
 #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
-#html_search_language = 'en'
+# html_search_language = 'en'
 
 # A dictionary with options for the search language support, empty by default.
 # Now only 'ja' uses this config value
-#html_search_options = {'type': 'default'}
+# html_search_options = {'type': 'default'}
 
 # The name of a javascript file (relative to the configuration directory) that
 # implements a search results scorer. If empty, the default will be used.
-#html_search_scorer = 'scorer.js'
+# html_search_scorer = 'scorer.js'
 
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'hagelslagdoc'
@@ -229,46 +230,46 @@ def __getattr__(cls, name):
 # -- Options for LaTeX output ---------------------------------------------
 
 latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
-#'papersize': 'letterpaper',
+    # The paper size ('letterpaper' or 'a4paper').
+    # 'papersize': 'letterpaper',
 
-# The font size ('10pt', '11pt' or '12pt').
-#'pointsize': '10pt',
+    # The font size ('10pt', '11pt' or '12pt').
+    # 'pointsize': '10pt',
 
-# Additional stuff for the LaTeX preamble.
-#'preamble': '',
+    # Additional stuff for the LaTeX preamble.
+    # 'preamble': '',
 
-# Latex figure (float) alignment
-#'figure_align': 'htbp',
+    # Latex figure (float) alignment
+    # 'figure_align': 'htbp',
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-  (master_doc, 'hagelslag.tex', u'hagelslag Documentation',
-   u'David John Gagne II', 'manual'),
+    (master_doc, 'hagelslag.tex', u'hagelslag Documentation',
+     u'David John Gagne II', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
-#latex_logo = None
+# latex_logo = None
 
 # For "manual" documents, if this is true, then toplevel headings are parts,
 # not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
 
 # If true, show page references after internal links.
-#latex_show_pagerefs = False
+# latex_show_pagerefs = False
 
 # If true, show URL addresses after external links.
-#latex_show_urls = False
+# latex_show_urls = False
 
 # Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
 
 # If false, no module index is generated.
-#latex_domain_indices = True
+# latex_domain_indices = True
 
 
 # -- Options for manual page output ---------------------------------------
@@ -281,7 +282,7 @@ def __getattr__(cls, name):
 ]
 
 # If true, show URL addresses after external links.
-#man_show_urls = False
+# man_show_urls = False
 
 
 # -- Options for Texinfo output -------------------------------------------
@@ -290,19 +291,19 @@ def __getattr__(cls, name):
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-  (master_doc, 'hagelslag', u'hagelslag Documentation',
-   author, 'hagelslag', 'One line description of project.',
-   'Miscellaneous'),
+    (master_doc, 'hagelslag', u'hagelslag Documentation',
+     author, 'hagelslag', 'One line description of project.',
+     'Miscellaneous'),
 ]
 
 # Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
+# texinfo_appendices = []
 
 # If false, no module index is generated.
-#texinfo_domain_indices = True
+# texinfo_domain_indices = True
 
 # How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
+# texinfo_show_urls = 'footnote'
 
 # If true, do not generate a @detailmenu in the "Top" node's menu.
-#texinfo_no_detailmenu = False
+# texinfo_no_detailmenu = False
diff --git a/hagelslag/data/GribModelGrid.py b/hagelslag/data/GribModelGrid.py
index 7fe78e6..ee6af35 100644
--- a/hagelslag/data/GribModelGrid.py
+++ b/hagelslag/data/GribModelGrid.py
@@ -59,7 +59,7 @@ def __enter__(self):
     def format_grib_name(self, selected_variable):
         """
         Assigns name to grib2 message number with name 'unknown'. Names based on NOAA grib2 abbreviations.
-        
+
         Names:
             197: RETOP: Echo Top
             198: MAXREF: Hourly Maximum of Simulated Reflectivity at 1 km AGL
@@ -71,7 +71,7 @@ def format_grib_name(self, selected_variable):
             221: MAXDVV: Hourly Maximum of Downward Vertical Velocity in the lowest 400hPa
             222: MAXUW: U Component of Hourly Maximum 10m Wind Speed
             223: MAXVW: V Component of Hourly Maximum 10m Wind Speed
-        
+
         Args:
             selected_variable(str): Name of selected variable for loading
         Returns:
diff --git a/hagelslag/processing/STObject.py b/hagelslag/processing/STObject.py
index f846d54..aa7f2fe 100644
--- a/hagelslag/processing/STObject.py
+++ b/hagelslag/processing/STObject.py
@@ -154,7 +154,7 @@ def trajectory(self):
 
     def get_corner(self, time):
         """
-        Gets the corner array indices of the STObject at a given time that corresponds 
+        Gets the corner array indices of the STObject at a given time that corresponds
         to the upper left corner of the bounding box for the STObject.
 
         Args:
diff --git a/hagelslag/processing/TrackProcessing.py b/hagelslag/processing/TrackProcessing.py
index 489d6e0..7fa4ed4 100644
--- a/hagelslag/processing/TrackProcessing.py
+++ b/hagelslag/processing/TrackProcessing.py
@@ -547,12 +547,13 @@ def match_single_track_dist(model_track, obs_track):
 
     def match_hail_size_step_distributions(self, model_tracks, obs_tracks, track_pairings):
         """
-        Given a matching set of observed tracks for each model track, 
+        Given a matching set of observed tracks for each model track, combine the hail size values and create
+        an observed hail size distribution.
         
         Args:
-            model_tracks: 
-            obs_tracks: 
-            track_pairings: 
+            model_tracks: List of STObjects
+            obs_tracks: List of STObjects
+            track_pairings:
 
         Returns:
 
diff --git a/hagelslag/util/lsr_calibration_dataset.py b/hagelslag/util/lsr_calibration_dataset.py
index 9872482..b627c63 100644
--- a/hagelslag/util/lsr_calibration_dataset.py
+++ b/hagelslag/util/lsr_calibration_dataset.py
@@ -5,7 +5,7 @@
 import pyproj
 from netCDF4 import Dataset
 
-from make_proj_grids import *
+from make_proj_grids import read_ncar_map_file, make_proj_grids
 
 
 def main():
diff --git a/hagelslag/util/mrms_mesh_calibration_dataset.py b/hagelslag/util/mrms_mesh_calibration_dataset.py
index bb30a19..bbf133a 100644
--- a/hagelslag/util/mrms_mesh_calibration_dataset.py
+++ b/hagelslag/util/mrms_mesh_calibration_dataset.py
@@ -19,9 +19,9 @@ def main():
     parser.add_argument("-a", "--mask_file", required=True, help="Path to the ensemble mask file")
     args = parser.parse_args()
     if args.end_date:
-        run_dates = pd.DatetimeIndex(start=args.start_date, end=args.end_date, freq='1D').strftime("%y%m%d")
+        run_dates = pd.date_range(start=args.start_date, end=args.end_date, freq='1D').strftime("%y%m%d")
     else:
-        run_dates = pd.DatetimeIndex(start=args.start_date, end=args.start_date, freq='1D').strftime("%y%m%d")
+        run_dates = pd.date_range(start=args.start_date, end=args.start_date, freq='1D').strftime("%y%m%d")
     data_path = args.data_path
     out_path = args.out_path
     mapfile = args.map_file
@@ -31,7 +31,7 @@ def main():
     return
 
 
-def MESH_verification_data(data_path, maskfile, mapfile, run_dates, out_path, hours=[17, 19, 21]):
+def MESH_verification_data(data_path, maskfile, mapfile, run_dates, out_path, hours=(17, 19, 21)):
     """
     Calculate 40 km halos around MESH values greater than a threshold value
     """
diff --git a/hagelslag/util/testing_sector.py b/hagelslag/util/testing_sector.py
index 45c5e46..61247d7 100644
--- a/hagelslag/util/testing_sector.py
+++ b/hagelslag/util/testing_sector.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pandas as pd
 
-from create_sector_grid_data import *
+from create_sector_grid_data import SectorProcessor
 from hagelslag.processing.ObjectMatcher import centroid_distance, time_distance
 from hagelslag.processing.ObjectMatcher import shifted_centroid_distance
 
diff --git a/requirements.txt b/requirements.txt
index e63baa4..2aecb80 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,4 +15,4 @@ s3fs
 xarray
 sphinx
 pytest
-
+mock