Skip to content

Commit 3ad448a

Browse files
rzinkerzinke
andauthored
Iterative outlier rejection for timeseries fits now generalized (#76)
* Iterative outlier rejection for timeseries fits now generalized for GNSS and InSAR data. Applied only to GNSS TS fits for now. * Clear outputs --------- Co-authored-by: rzinke <robert.zinke@jpl.nasa.gov>
1 parent cd5d323 commit 3ad448a

File tree

4 files changed

+341
-207
lines changed

4 files changed

+341
-207
lines changed

methods/coseismic/Coseismic_Requirement_Validation.ipynb

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@
113113
"from mintpy.objects import gnss, timeseries\n",
114114
"from mintpy.smallbaselineApp import TimeSeriesAnalysis\n",
115115
"\n",
116-
"from solid_utils.gnss_utils import remove_gnss_outliers, model_gnss_timeseries\n",
116+
"from solid_utils.fitting import IterativeOutlierFit\n",
117117
"from solid_utils.sampling import load_geo, samp_pair, profile_samples, haversine_distance\n",
118118
"from solid_utils.plotting import display_coseismic_validation, display_validation_table"
119119
]
@@ -904,35 +904,26 @@
904904
"metadata": {},
905905
"outputs": [],
906906
"source": [
907-
"# Get InSAR geometry file\n",
908-
"geom_file = ut.get_geometry_file(['incidenceAngle','azimuthAngle'],\n",
909-
" work_dir=mintpy_dir, coord='geo')\n",
907+
"# Outlier ID and removal parameters\n",
908+
"outlier_thresh = 3 # standard deviations\n",
909+
"outlier_iters = 1 # iterations\n",
910910
"\n",
911911
"# Loop through valid GNSS stations\n",
912912
"for i, site_name in enumerate(site_names):\n",
913913
" # Retrieve station information\n",
914914
" gnss_stn = gnss_stns[site_name]\n",
915-
" gnss_stn.get_los_displacement(geom_file,\n",
915+
" gnss_stn.get_los_displacement(insar_metadata,\n",
916916
" start_date=start_date,\n",
917917
" end_date=end_date)\n",
918918
"\n",
919-
" # Prepare dictionary of displacement measurements\n",
920-
" gnss_stn.dis_dict = {} # empty for now\n",
921-
" \n",
922-
" # Save unfiltered data for plotting\n",
923-
" unfilt_dates = gnss_stn.dates\n",
924-
" unfilt_dis = gnss_stn.dis_los\n",
925-
" \n",
926919
" # Outlier detection and removal\n",
927-
" remove_gnss_outliers(gnss_stn, 'LOS', model=ts_functions,\n",
928-
" threshold=3, max_iter=2, verbose=False)\n",
920+
" gnss_fit = IterativeOutlierFit(gnss_stn.dates, gnss_stn.dis_los,\n",
921+
" model=ts_functions, threshold=3,\n",
922+
" max_iter=1)\n",
929923
" \n",
930-
" # Model outlier-removed time-series (estimate method 2)\n",
931-
" dis_los_hat, m_hat, mhat_se = model_gnss_timeseries(gnss_stn, 'LOS', ts_functions)\n",
932-
" \n",
933-
" # Record station velocity\n",
934-
" gnss_stn.dis_dict['dis_mm'] = m_hat[2] * 1000\n",
935-
" gnss_stn.dis_dict['dis_error_mm'] = mhat_se[2] * 1000\n",
924+
" # Record station displacement\n",
925+
" gnss_stn.dis_dict = {'dis_mm': gnss_fit.m_hat[2] * 1000,\n",
926+
" 'dis_error_mm': gnss_fit.mhat_se[2] * 1000}\n",
936927
"\n",
937928
" # Report\n",
938929
" if i == 0 :\n",
@@ -950,20 +941,18 @@
950941
" if ax_nb == 0:\n",
951942
" fig, axes = plt.subplots(figsize=(10, 4), ncols=2)\n",
952943
"\n",
944+
" # Plot outliers\n",
945+
" if gnss_fit.n_outliers > 0:\n",
946+
" axes[ax_nb].scatter(gnss_fit.outlier_dates,\n",
947+
" 1000*gnss_fit.outlier_dis,\n",
948+
" 2**2, 'firebrick', label='outliers')\n",
949+
"\n",
953950
" # Plot filtered data and model fit\n",
954-
" axes[ax_nb].scatter(gnss_stn.dates, 1000*gnss_stn.dis_los,\n",
955-
" 3**2, 'k', label='filt data')\n",
956-
" axes[ax_nb].plot(gnss_stn.dates, 1000*dis_los_hat,\n",
957-
" 'c', linewidth=3, label='model fit')\n",
951+
" axes[ax_nb].scatter(gnss_fit.dates, 1000*gnss_fit.dis, 3**2,\n",
952+
" 'dimgrey', zorder=1)\n",
953+
" axes[ax_nb].plot(gnss_fit.dates, 1000*gnss_fit.dis_hat,\n",
954+
" 'c', linewidth=3, label='model fit', zorder=2)\n",
958955
"\n",
959-
" # Plot outliers\n",
960-
" outlier_ndxs = [i for i, date in enumerate(unfilt_dates) if date not in gnss_stn.dates]\n",
961-
" if len(outlier_ndxs) > 0:\n",
962-
" outlier_dates = np.array([unfilt_dates[i] for i in outlier_ndxs])\n",
963-
" outlier_dis = np.array([unfilt_dis[i] for i in outlier_ndxs])\n",
964-
" axes[ax_nb].scatter(outlier_dates, 1000*outlier_dis,\n",
965-
" 3**2, 'firebrick', label='outliers')\n",
966-
" \n",
967956
" # Format plot\n",
968957
" axes[ax_nb].legend()\n",
969958
" axes[ax_nb].set_xticks(gnss_stn.dates[::label_skips])\n",

methods/secular/Secular_Requirement_Validation.ipynb

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,10 @@
115115
"from mintpy.utils import ptime, readfile, time_func, utils as ut\n",
116116
"from pathlib import Path\n",
117117
"from scipy import signal\n",
118+
"\n",
119+
"from solid_utils.fitting import IterativeOutlierFit\n",
118120
"from solid_utils.plotting import display_validation, display_validation_table\n",
119121
"from solid_utils.sampling import load_geo, samp_pair, profile_samples, haversine_distance\n",
120-
"from solid_utils.gnss_utils import remove_gnss_outliers, model_gnss_timeseries\n",
121122
"\n",
122123
"#Set Global Plot Parameters\n",
123124
"plt.rcParams.update({'font.size': 12})"
@@ -686,7 +687,7 @@
686687
"outputs": [],
687688
"source": [
688689
"# GNSS processing source\n",
689-
"if 'gnss_source' in sitedata['sites']:\n",
690+
"if 'gnss_source' in sitedata['sites'][site]:\n",
690691
" gnss_source = sitedata['sites'][site]['gnss_source']\n",
691692
"else:\n",
692693
" gnss_source = 'UNR'\n",
@@ -861,6 +862,10 @@
861862
},
862863
"outputs": [],
863864
"source": [
865+
"# Outlier ID and removal parameters\n",
866+
"outlier_thresh = 3 # standard deviations\n",
867+
"outlier_iters = 1 # iterations\n",
868+
"\n",
864869
"# Loop through valid GNSS stations\n",
865870
"for i, site_name in enumerate(site_names):\n",
866871
" # Retrieve station information\n",
@@ -869,27 +874,21 @@
869874
" start_date=start_date,\n",
870875
" end_date=end_date)\n",
871876
"\n",
872-
" # Save unfiltered data for plotting\n",
873-
" unfilt_dates = gnss_stn.dates\n",
874-
" unfilt_dis = gnss_stn.dis_los\n",
875-
" \n",
876877
" # Outlier detection and removal\n",
877-
" remove_gnss_outliers(gnss_stn, 'LOS', model=ts_functions,\n",
878-
" threshold=3, max_iter=2, verbose=False)\n",
879-
" \n",
880-
" # Model outlier-removed time-series\n",
881-
" dis_los_hat, m_hat, mhat_se = model_gnss_timeseries(gnss_stn, 'LOS', ts_functions)\n",
882-
" \n",
878+
" gnss_fit = IterativeOutlierFit(gnss_stn.dates, gnss_stn.dis_los,\n",
879+
" model=ts_functions, threshold=outlier_thresh,\n",
880+
" max_iter=outlier_iters)\n",
881+
"\n",
883882
" # Record station velocity\n",
884-
" gnss_stn.vel_dict = {'vel_mm_yr': m_hat[1] * 1000,\n",
885-
" 'vel_error_mm_yr': mhat_se[1] * 1000}\n",
883+
" gnss_stn.vel_dict = {'vel_mm_yr': gnss_fit.m_hat[1] * 1000,\n",
884+
" 'vel_error_mm_yr': gnss_fit.mhat_se[1] * 1000}\n",
886885
"\n",
887886
" # Report\n",
888887
" if i == 0 :\n",
889-
" print('site velocity(mm/yr)')\n",
890-
" print('{:s} {:.1f} +- {:.2f}'.format(gnss_stn.site,\n",
891-
" gnss_stn.vel_dict['vel_mm_yr'],\n",
892-
" gnss_stn.vel_dict['vel_error_mm_yr']))\n",
888+
" print('site velocity (mm/yr)')\n",
889+
" print(f\"{gnss_stn.site:s} \"\n",
890+
" f\"{gnss_stn.vel_dict['vel_mm_yr']:.1f} \"\n",
891+
" f\"+- {gnss_stn.vel_dict['vel_error_mm_yr']:.2f}\")\n",
893892
"\n",
894893
" # Plotting parameters\n",
895894
" n_dates = len(gnss_stn.dates)\n",
@@ -900,19 +899,17 @@
900899
" if ax_nb == 0:\n",
901900
" fig, axes = plt.subplots(figsize=(10, 4), ncols=2)\n",
902901
"\n",
903-
" # Plot filtered data and model fit\n",
904-
" axes[ax_nb].scatter(gnss_stn.dates, 1000*gnss_stn.dis_los,\n",
905-
" 3**2, 'k', label='filt data')\n",
906-
" axes[ax_nb].plot(gnss_stn.dates, 1000*dis_los_hat,\n",
907-
" 'c', linewidth=3, label='model fit')\n",
908-
"\n",
909902
" # Plot outliers\n",
910-
" outlier_ndxs = [i for i, date in enumerate(unfilt_dates) if date not in gnss_stn.dates]\n",
911-
" if len(outlier_ndxs) > 0:\n",
912-
" outlier_dates = np.array([unfilt_dates[i] for i in outlier_ndxs])\n",
913-
" outlier_dis = np.array([unfilt_dis[i] for i in outlier_ndxs])\n",
914-
" axes[ax_nb].scatter(outlier_dates, 1000*outlier_dis,\n",
915-
" 3**2, 'firebrick', label='outliers')\n",
903+
" if gnss_fit.n_outliers > 0:\n",
904+
" axes[ax_nb].scatter(gnss_fit.outlier_dates,\n",
905+
" 1000*gnss_fit.outlier_dis,\n",
906+
" 2**2, 'firebrick', label='outliers')\n",
907+
"\n",
908+
" # Plot filtered data and model fit\n",
909+
" axes[ax_nb].scatter(gnss_fit.dates, 1000*gnss_fit.dis, 3**2,\n",
910+
" 'dimgrey', zorder=1)\n",
911+
" axes[ax_nb].plot(gnss_fit.dates, 1000*gnss_fit.dis_hat,\n",
912+
" 'c', linewidth=3, label='model fit', zorder=2)\n",
916913
" \n",
917914
" # Format plot\n",
918915
" axes[ax_nb].legend()\n",
@@ -922,8 +919,7 @@
922919
" rotation=80)\n",
923920
" axes[ax_nb].set_ylabel('LOS displacement (mm)')\n",
924921
" axes[ax_nb].set_title(f'{gnss_stn.site:s} ({gnss_stn.vel_dict['vel_mm_yr']:.1f} mm/yr)')\n",
925-
" fig.tight_layout()\n",
926-
" "
922+
" fig.tight_layout()\n"
927923
]
928924
},
929925
{

solid_utils/fitting.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# Author: R Zinke
2+
# March, 2025
3+
4+
import numpy as np
5+
from scipy import stats
6+
7+
from mintpy.utils import time_func, utils as ut
8+
9+
10+
class TSModelFit:
11+
def __init__(self, dates:np.ndarray, dis:np.ndarray, model:dict, conf=95.45):
12+
"""Model a displacement time-series.
13+
14+
Parameters: dates - np.ndarray, dates as Python datetime objects
15+
dis - np.ndarray, displacements
16+
model - dict, time function model
17+
Attributes: dis_hat - np.ndarray, array of predicted displacement
18+
values
19+
m_hat - np.ndarray, model fit parameters
20+
mhat_se - np.ndarray, standard errors for model fit params
21+
err_envelope - list of np.ndarray, upper and lower confidence
22+
bounds
23+
"""
24+
# Record parameters
25+
self.model = model
26+
if len(dates) != len(dis):
27+
raise ValueError('Must have same number of dates and displacements')
28+
self.dates = dates
29+
self.dis = dis
30+
self.n = len(dates)
31+
32+
# Construct design matrix from dates and model
33+
date_list = [date.strftime('%Y%m%d') for date in dates]
34+
G = time_func.get_design_matrix4time_func(date_list, self.model)
35+
36+
# Invert for model parameters
37+
self.m_hat = np.linalg.pinv(G).dot(self.dis)
38+
39+
# Predict displacements
40+
self.dis_hat = np.dot(G, self.m_hat)
41+
42+
# Quantify error on model parameters
43+
resids = self.dis - self.dis_hat
44+
sse = np.sum(resids**2)
45+
dof = len(self.m_hat)
46+
pcov = sse/(self.n - dof) * np.linalg.inv(np.dot(G.T, G))
47+
self.mhat_se = np.sqrt(np.diag(pcov))
48+
49+
# Propagate uncertainty
50+
dcov = G.dot(pcov).dot(G.T)
51+
derr = np.sqrt(np.diag(dcov))
52+
53+
# Error envelope
54+
err_scale = stats.t.interval(conf/100, dof)
55+
err_lower = self.dis_hat + err_scale[0] * derr
56+
err_upper = self.dis_hat + err_scale[1] * derr
57+
self.err_envelope = [err_lower, err_upper]
58+
59+
60+
class IterativeOutlierFit:
61+
@staticmethod
62+
def outliers_zscore(dis:np.ndarray, dis_hat:np.ndarray, threshold:float):
63+
"""Identify outliers using the z-score metric.
64+
65+
Compute the number of standard deviations the data are from the mean
66+
and return the indices of values greater than the specified threshold.
67+
68+
Parameters: dis - np.ndarray, array of displacement values
69+
dis_hat - np.ndarray, array of predicted displacement
70+
values
71+
threshold - float, z-score value (standard deviation)
72+
beyond which to exclude data
73+
Returns: outlier_ndxs - np.ndarray, boolean array where True
74+
indicates an outlier
75+
n_outliers - int, number of outliers
76+
"""
77+
zscores = (dis - dis_hat) / np.std(dis - dis_hat)
78+
outlier_ndxs = np.abs(zscores) > threshold
79+
n_outliers = np.sum(outlier_ndxs)
80+
81+
return outlier_ndxs, n_outliers
82+
83+
def __init__(self, dates, dis, model, threshold=3, max_iter=2):
84+
"""Determine which data points are outliers based on the z-score
85+
metric and remove those points.
86+
87+
Parameters: dates - np.ndarray, dates as Python datetime objects
88+
dis - np.ndarray, displacements
89+
model - dict, time function model
90+
threshold - float, standard deviations beyond which values
91+
are considered outliers
92+
max_iter - int, maximutm number of iterations before
93+
stopping
94+
Attributes: dis_hat - np.ndarray, array of predicted displacement
95+
values
96+
mhat - np.ndarray, model fit parameters
97+
mhat_se - np.ndarray, standard errors for model fit
98+
params
99+
err_envelope - list of np.ndarray, upper and lower confidence
100+
bounds
101+
"""
102+
# Record parameters
103+
self.dates = dates
104+
self.dis = dis
105+
self.model = model
106+
self.outlier_threshold = threshold
107+
108+
# Initialize outlier removal
109+
self.iters = 0
110+
self.outlier_dates = np.array([])
111+
self.outlier_dis = np.array([])
112+
113+
# Initial fit to data
114+
ts_model = TSModelFit(self.dates, self.dis, self.model)
115+
116+
# Determine outliers based on z-score
117+
(outlier_ndxs,
118+
n_outliers) = self.outliers_zscore(ts_model.dis, ts_model.dis_hat,
119+
self.outlier_threshold)
120+
self.n_outliers = n_outliers
121+
122+
# Remove outliers from data set
123+
while (n_outliers > 0) and (self.iters < max_iter):
124+
# Update time series
125+
self.outlier_dates = np.append(self.outlier_dates,
126+
self.dates[outlier_ndxs])
127+
self.outlier_dis = np.append(self.outlier_dis,
128+
self.dis[outlier_ndxs])
129+
130+
self.dates = self.dates[~outlier_ndxs]
131+
self.dis = self.dis[~outlier_ndxs]
132+
133+
# Update timeseries model
134+
ts_model = TSModelFit(self.dates, self.dis, self.model)
135+
136+
# Determine outliers based on z-score
137+
(outlier_ndxs,
138+
n_outliers) = self.outliers_zscore(ts_model.dis, ts_model.dis_hat,
139+
self.outlier_threshold)
140+
self.n_outliers += n_outliers
141+
142+
# Update iteration counter
143+
self.iters += 1
144+
145+
# Record final parameters
146+
self.m_hat = ts_model.m_hat
147+
self.mhat_se = ts_model.mhat_se
148+
self.dis_hat = ts_model.dis_hat
149+
self.err_envelope = ts_model.err_envelope

0 commit comments

Comments
 (0)