Skip to content

Commit b54455f

Browse files
committed
Fix Nf in D4xdata.compute_r() and improve D4xdata.plot_residuals()
1 parent 129c63b commit b54455f

File tree

4 files changed

+4617
-4410
lines changed

4 files changed

+4617
-4410
lines changed

D47crunch/__init__.py

Lines changed: 84 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1747,6 +1747,7 @@ def residuals(p):
17471747
b2 = result.params.valuesdict()[f'b2_{s}']
17481748
c2 = result.params.valuesdict()[f'c2_{s}']
17491749
r[f'D{self._4x}'] = (r[f'D{self._4x}raw'] - c - b * r[f'd{self._4x}'] - c2 * r['t'] - b2 * r['t'] * r[f'd{self._4x}']) / (a + a2 * r['t'])
1750+
17501751

17511752
self.standardization = result
17521753

@@ -2230,7 +2231,7 @@ def consolidate_samples(self):
22302231
D4x_pop = [r[f'D{self._4x}'] for r in self.samples[sample]['data']]
22312232
if len(D4x_pop) > 2:
22322233
self.samples[sample]['p_Levene'] = levene(D4x_ref_pop, D4x_pop, center = 'median')[1]
2233-
2234+
22342235
if self.standardization_method == 'pooled':
22352236
for sample in self.anchors:
22362237
self.samples[sample][f'D{self._4x}'] = self.Nominal_D4x[sample]
@@ -2270,6 +2271,10 @@ def consolidate_samples(self):
22702271
for s in weights:
22712272
self.unknowns[sample][f'session_D{self._4x}'][s] += [self.unknowns[sample][f'session_D{self._4x}'][s][1]**-2 / wsum]
22722273

2274+
for r in self:
2275+
r[f'D{self._4x}_residual'] = r[f'D{self._4x}'] - self.samples[r['Sample']][f'D{self._4x}']
2276+
2277+
22732278

22742279
def consolidate_sessions(self):
22752280
'''
@@ -2469,9 +2474,6 @@ def compute_r(self, key, samples = 'all samples', sessions = 'all sessions'):
24692474
'''
24702475
Compute the repeatability of `[r[key] for r in self]`
24712476
'''
2472-
# NB: it's debatable whether rD47 should be computed
2473-
# with Nf = len(self)-len(self.samples) instead of
2474-
# Nf = len(self) - len(self.unknwons) - 3*len(self.sessions)
24752477

24762478
if samples == 'all samples':
24772479
mysamples = [k for k in self.samples]
@@ -2486,17 +2488,43 @@ def compute_r(self, key, samples = 'all samples', sessions = 'all sessions'):
24862488
sessions = [k for k in self.sessions]
24872489

24882490
if key in ['D47', 'D48']:
2489-
chisq, Nf = 0, 0
2490-
for sample in mysamples :
2491-
X = [ r[key] for r in self if r['Sample'] == sample and r['Session'] in sessions ]
2492-
if len(X) > 1 :
2493-
chisq += np.sum([ (x-self.samples[sample][key])**2 for x in X ])
2494-
if sample in self.unknowns:
2495-
Nf += len(X) - 1
2496-
else:
2497-
Nf += len(X)
2498-
if samples in ['anchors', 'all samples']:
2499-
Nf -= sum([self.sessions[s]['Np'] for s in sessions])
2491+
# Full disclosure: the definition of Nf is tricky/debatable
2492+
G = [r for r in self if r['Sample'] in mysamples and r['Session'] in sessions]
2493+
chisq = (np.array([r[f'{key}_residual'] for r in G])**2).sum()
2494+
Nf = len(G)
2495+
# print(f'len(G) = {Nf}')
2496+
Nf -= len([s for s in mysamples if s in self.unknowns])
2497+
# print(f'{len([s for s in mysamples if s in self.unknowns])} unknown samples to consider')
2498+
for session in sessions:
2499+
Np = len([
2500+
_ for _ in self.standardization.params
2501+
if (
2502+
self.standardization.params[_].expr is not None
2503+
and (
2504+
(_[0] in 'abc' and _[1] == '_' and _[2:] == pf(session))
2505+
or (_[0] in 'abc' and _[1:3] == '2_' and _[3:] == pf(session))
2506+
)
2507+
)
2508+
])
2509+
# print(f'session {session}: {Np} parameters to consider')
2510+
Na = len({
2511+
r['Sample'] for r in self.sessions[session]['data']
2512+
if r['Sample'] in self.anchors and r['Sample'] in mysamples
2513+
})
2514+
# print(f'session {session}: {Na} different anchors in that session')
2515+
Nf -= min(Np, Na)
2516+
# print(f'Nf = {Nf}')
2517+
2518+
# for sample in mysamples :
2519+
# X = [ r[key] for r in self if r['Sample'] == sample and r['Session'] in sessions ]
2520+
# if len(X) > 1 :
2521+
# chisq += np.sum([ (x-self.samples[sample][key])**2 for x in X ])
2522+
# if sample in self.unknowns:
2523+
# Nf += len(X) - 1
2524+
# else:
2525+
# Nf += len(X)
2526+
# if samples in ['anchors', 'all samples']:
2527+
# Nf -= sum([self.sessions[s]['Np'] for s in sessions])
25002528
r = (chisq / Nf)**.5 if Nf > 0 else 0
25012529

25022530
else: # if key not in ['D47', 'D48']
@@ -2707,6 +2735,7 @@ def plot_single_session(self,
27072735

27082736
def plot_residuals(
27092737
self,
2738+
kde = False,
27102739
hist = False,
27112740
binwidth = 2/3,
27122741
dir = 'output',
@@ -2719,16 +2748,20 @@ def plot_residuals(
27192748
Plot residuals of each analysis as a function of time (actually, as a function of
27202749
the order of analyses in the `D4xdata` object)
27212750
2722-
+ `hist`: whether to add a histogram of residuals
2751+
+ `kde`: whether to add a kernel density estimate of residuals
2752+
+ `hist`: whether to add a histogram of residuals (incompatible with `kde`)
27232753
+ `histbins`: specify bin edges for the histogram
27242754
+ `dir`: the directory in which to save the plot
27252755
+ `highlight`: a list of samples to highlight
27262756
+ `colors`: a dict of `{<sample>: <color>}` for all samples
27272757
+ `figsize`: (width, height) of figure
27282758
'''
2759+
2760+
from matplotlib import ticker
2761+
27292762
# Layout
27302763
fig = ppl.figure(figsize = (8,4) if figsize is None else figsize)
2731-
if hist:
2764+
if hist or kde:
27322765
ppl.subplots_adjust(left = .08, bottom = .05, right = .98, top = .8, wspace = -0.72)
27332766
ax1, ax2 = ppl.subplot(121), ppl.subplot(1,15,15)
27342767
else:
@@ -2760,6 +2793,8 @@ def plot_residuals(
27602793

27612794
ppl.axhline(0, color = 'k', alpha = .25, lw = 0.75)
27622795

2796+
ax1.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: f'${x:+.0f}$' if x else '$0$'))
2797+
27632798
session = self[0]['Session']
27642799
x1 = 0
27652800
# ymax = np.max([1e3 * (r['D47'] - self.samples[r['Sample']]['D47']) for r in self])
@@ -2792,15 +2827,15 @@ def plot_residuals(
27922827
)
27932828
if highlight and r['Sample'] not in highlight:
27942829
kw['alpha'] = 0.2
2795-
ppl.plot(k, 1e3 * (r['D47'] - self.samples[r['Sample']]['D47']), **kw)
2830+
ppl.plot(k, 1e3 * r[f'D{self._4x}_residual'], **kw)
27962831
x2 = k
27972832
x_sessions[session] = (x1+x2)/2
27982833

2799-
ppl.axhspan(-self.repeatability['r_D47']*1000, self.repeatability['r_D47']*1000, color = 'k', alpha = .05, lw = 1)
2800-
ppl.axhspan(-self.repeatability['r_D47']*1000*self.t95, self.repeatability['r_D47']*1000*self.t95, color = 'k', alpha = .05, lw = 1)
2801-
if not hist:
2802-
ppl.text(len(self), self.repeatability['r_D47']*1000, f" SD = {self.repeatability['r_D47']*1000:.1f} ppm", size = 9, alpha = 1, va = 'center')
2803-
ppl.text(len(self), self.repeatability['r_D47']*1000*self.t95, f" 95% CL = ± {self.repeatability['r_D47']*1000*self.t95:.1f} ppm", size = 9, alpha = 1, va = 'center')
2834+
ppl.axhspan(-self.repeatability[f'r_D{self._4x}']*1000, self.repeatability[f'r_D{self._4x}']*1000, color = 'k', alpha = .05, lw = 1)
2835+
ppl.axhspan(-self.repeatability[f'r_D{self._4x}']*1000*self.t95, self.repeatability[f'r_D{self._4x}']*1000*self.t95, color = 'k', alpha = .05, lw = 1)
2836+
if not (hist or kde):
2837+
ppl.text(len(self), self.repeatability[f'r_D{self._4x}']*1000, f" SD = {self.repeatability['r_D{self._4x}']*1000:.1f} ppm", size = 9, alpha = 1, va = 'center')
2838+
ppl.text(len(self), self.repeatability[f'r_D{self._4x}']*1000*self.t95, f" 95% CL = ± {self.repeatability['r_D{self._4x}']*1000*self.t95:.1f} ppm", size = 9, alpha = 1, va = 'center')
28042839

28052840
xmin, xmax, ymin, ymax = ppl.axis()
28062841
for s in x_sessions:
@@ -2816,7 +2851,7 @@ def plot_residuals(
28162851
)
28172852
)
28182853

2819-
if hist:
2854+
if hist or kde:
28202855
ppl.sca(ax2)
28212856

28222857
for s in colors:
@@ -2843,34 +2878,43 @@ def plot_residuals(
28432878
kw['label'] = 'other (N$\\,$>$\\,$1)' if one_or_more_singlets else 'other'
28442879
ppl.plot([], [], **kw)
28452880

2846-
if hist:
2881+
if hist or kde:
28472882
leg = ppl.legend(loc = 'upper right', bbox_to_anchor = (1, 1), bbox_transform=fig.transFigure, borderaxespad = 1.5, fontsize = 9)
28482883
else:
28492884
leg = ppl.legend(loc = 'lower right', bbox_to_anchor = (1, 0), bbox_transform=fig.transFigure, borderaxespad = 1.5)
28502885
leg.set_zorder(-1000)
28512886

28522887
ppl.sca(ax1)
28532888

2854-
ppl.ylabel('Δ$_{47}$ residuals (ppm)')
2889+
ppl.ylabel(f'Δ$_{{{self._4x}}}$ residuals (ppm)')
28552890
ppl.xticks([])
28562891
ppl.axis([-1, len(self), None, None])
28572892

2858-
if hist:
2893+
if hist or kde:
28592894
ppl.sca(ax2)
2860-
X = [1e3 * (r['D47'] - self.samples[r['Sample']]['D47']) for r in self if r['Sample'] in multiplets]
2861-
ppl.hist(
2862-
X,
2863-
orientation = 'horizontal',
2864-
histtype = 'stepfilled',
2865-
ec = [.4]*3,
2866-
fc = [.25]*3,
2867-
alpha = .25,
2868-
bins = np.linspace(-9e3*self.repeatability['r_D47'], 9e3*self.repeatability['r_D47'], int(18/binwidth+1)),
2869-
)
2870-
ppl.axis([None, None, ymin, ymax])
2895+
X = 1e3 * np.array([r[f'D{self._4x}_residual'] for r in self if r['Sample'] in multiplets or r['Sample'] in self.anchors])
2896+
2897+
if kde:
2898+
from scipy.stats import gaussian_kde
2899+
yi = np.linspace(ymin, ymax, 201)
2900+
xi = gaussian_kde(X).evaluate(yi)
2901+
ppl.fill_betweenx(yi, xi, xi*0, fc = (0,0,0,.15), lw = 1, ec = (.75,.75,.75,1))
2902+
# ppl.plot(xi, yi, 'k-', lw = 1)
2903+
ppl.axis([0, None, ymin, ymax])
2904+
elif hist:
2905+
ppl.hist(
2906+
X,
2907+
orientation = 'horizontal',
2908+
histtype = 'stepfilled',
2909+
ec = [.4]*3,
2910+
fc = [.25]*3,
2911+
alpha = .25,
2912+
bins = np.linspace(-9e3*self.repeatability[f'r_D{self._4x}'], 9e3*self.repeatability[f'r_D{self._4x}'], int(18/binwidth+1)),
2913+
)
2914+
ppl.axis([None, None, ymin, ymax])
28712915
ppl.text(0, 0,
2872-
f" SD = {self.repeatability['r_D47']*1000:.1f} ppm\n 95% CL = ± {self.repeatability['r_D47']*1000*self.t95:.1f} ppm",
2873-
size = 8,
2916+
f" SD = {self.repeatability[f'r_D{self._4x}']*1000:.1f} ppm\n 95% CL = ± {self.repeatability[f'r_D{self._4x}']*1000*self.t95:.1f} ppm",
2917+
size = 7.5,
28742918
alpha = 1,
28752919
va = 'center',
28762920
ha = 'left',

changelog.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
# Changelog
22

3+
### Bugfix
4+
* `D4xdata.compute_r()` uses an improved computation for degrees of freedom for arbitrary subsets of sessions and/or samples, yielding more estimates of analytical repeatabilities for Δ47 and Δ48.
5+
6+
### New feature
7+
* Added `kde` option to `D4xdata.plot_residuals()`
8+
9+
### Other changes
10+
* Minor improvement to y axis tick labels) in `D4xdata.plot_residuals()`.
11+
312
## v2.1.0
413
*Released on 2023-05-14*
514

0 commit comments

Comments
 (0)