@@ -1747,6 +1747,7 @@ def residuals(p):
17471747 b2 = result .params .valuesdict ()[f'b2_{ s } ' ]
17481748 c2 = result .params .valuesdict ()[f'c2_{ s } ' ]
17491749 r [f'D{ self ._4x } ' ] = (r [f'D{ self ._4x } raw' ] - c - b * r [f'd{ self ._4x } ' ] - c2 * r ['t' ] - b2 * r ['t' ] * r [f'd{ self ._4x } ' ]) / (a + a2 * r ['t' ])
1750+
17501751
17511752 self .standardization = result
17521753
@@ -2230,7 +2231,7 @@ def consolidate_samples(self):
22302231 D4x_pop = [r [f'D{ self ._4x } ' ] for r in self .samples [sample ]['data' ]]
22312232 if len (D4x_pop ) > 2 :
22322233 self .samples [sample ]['p_Levene' ] = levene (D4x_ref_pop , D4x_pop , center = 'median' )[1 ]
2233-
2234+
22342235 if self .standardization_method == 'pooled' :
22352236 for sample in self .anchors :
22362237 self .samples [sample ][f'D{ self ._4x } ' ] = self .Nominal_D4x [sample ]
@@ -2270,6 +2271,10 @@ def consolidate_samples(self):
22702271 for s in weights :
22712272 self .unknowns [sample ][f'session_D{ self ._4x } ' ][s ] += [self .unknowns [sample ][f'session_D{ self ._4x } ' ][s ][1 ]** - 2 / wsum ]
22722273
2274+ for r in self :
2275+ r [f'D{ self ._4x } _residual' ] = r [f'D{ self ._4x } ' ] - self .samples [r ['Sample' ]][f'D{ self ._4x } ' ]
2276+
2277+
22732278
22742279 def consolidate_sessions (self ):
22752280 '''
@@ -2469,9 +2474,6 @@ def compute_r(self, key, samples = 'all samples', sessions = 'all sessions'):
24692474 '''
24702475 Compute the repeatability of `[r[key] for r in self]`
24712476 '''
2472- # NB: it's debatable whether rD47 should be computed
2473- # with Nf = len(self)-len(self.samples) instead of
2474- # Nf = len(self) - len(self.unknwons) - 3*len(self.sessions)
24752477
24762478 if samples == 'all samples' :
24772479 mysamples = [k for k in self .samples ]
@@ -2486,17 +2488,43 @@ def compute_r(self, key, samples = 'all samples', sessions = 'all sessions'):
24862488 sessions = [k for k in self .sessions ]
24872489
24882490 if key in ['D47' , 'D48' ]:
2489- chisq , Nf = 0 , 0
2490- for sample in mysamples :
2491- X = [ r [key ] for r in self if r ['Sample' ] == sample and r ['Session' ] in sessions ]
2492- if len (X ) > 1 :
2493- chisq += np .sum ([ (x - self .samples [sample ][key ])** 2 for x in X ])
2494- if sample in self .unknowns :
2495- Nf += len (X ) - 1
2496- else :
2497- Nf += len (X )
2498- if samples in ['anchors' , 'all samples' ]:
2499- Nf -= sum ([self .sessions [s ]['Np' ] for s in sessions ])
2491+ # Full disclosure: the definition of Nf is tricky/debatable
2492+ G = [r for r in self if r ['Sample' ] in mysamples and r ['Session' ] in sessions ]
2493+ chisq = (np .array ([r [f'{ key } _residual' ] for r in G ])** 2 ).sum ()
2494+ Nf = len (G )
2495+ # print(f'len(G) = {Nf}')
2496+ Nf -= len ([s for s in mysamples if s in self .unknowns ])
2497+ # print(f'{len([s for s in mysamples if s in self.unknowns])} unknown samples to consider')
2498+ for session in sessions :
2499+ Np = len ([
2500+ _ for _ in self .standardization .params
2501+ if (
2502+ self .standardization .params [_ ].expr is not None
2503+ and (
2504+ (_ [0 ] in 'abc' and _ [1 ] == '_' and _ [2 :] == pf (session ))
2505+ or (_ [0 ] in 'abc' and _ [1 :3 ] == '2_' and _ [3 :] == pf (session ))
2506+ )
2507+ )
2508+ ])
2509+ # print(f'session {session}: {Np} parameters to consider')
2510+ Na = len ({
2511+ r ['Sample' ] for r in self .sessions [session ]['data' ]
2512+ if r ['Sample' ] in self .anchors and r ['Sample' ] in mysamples
2513+ })
2514+ # print(f'session {session}: {Na} different anchors in that session')
2515+ Nf -= min (Np , Na )
2516+ # print(f'Nf = {Nf}')
2517+
2518+ # for sample in mysamples :
2519+ # X = [ r[key] for r in self if r['Sample'] == sample and r['Session'] in sessions ]
2520+ # if len(X) > 1 :
2521+ # chisq += np.sum([ (x-self.samples[sample][key])**2 for x in X ])
2522+ # if sample in self.unknowns:
2523+ # Nf += len(X) - 1
2524+ # else:
2525+ # Nf += len(X)
2526+ # if samples in ['anchors', 'all samples']:
2527+ # Nf -= sum([self.sessions[s]['Np'] for s in sessions])
25002528 r = (chisq / Nf )** .5 if Nf > 0 else 0
25012529
25022530 else : # if key not in ['D47', 'D48']
@@ -2707,6 +2735,7 @@ def plot_single_session(self,
27072735
27082736 def plot_residuals (
27092737 self ,
2738+ kde = False ,
27102739 hist = False ,
27112740 binwidth = 2 / 3 ,
27122741 dir = 'output' ,
@@ -2719,16 +2748,20 @@ def plot_residuals(
27192748 Plot residuals of each analysis as a function of time (actually, as a function of
27202749 the order of analyses in the `D4xdata` object)
27212750
2722- + `hist`: whether to add a histogram of residuals
2751+ + `kde`: whether to add a kernel density estimate of residuals
2752+ + `hist`: whether to add a histogram of residuals (incompatible with `kde`)
27232753 + `histbins`: specify bin edges for the histogram
27242754 + `dir`: the directory in which to save the plot
27252755 + `highlight`: a list of samples to highlight
27262756 + `colors`: a dict of `{<sample>: <color>}` for all samples
27272757 + `figsize`: (width, height) of figure
27282758 '''
2759+
2760+ from matplotlib import ticker
2761+
27292762 # Layout
27302763 fig = ppl .figure (figsize = (8 ,4 ) if figsize is None else figsize )
2731- if hist :
2764+ if hist or kde :
27322765 ppl .subplots_adjust (left = .08 , bottom = .05 , right = .98 , top = .8 , wspace = - 0.72 )
27332766 ax1 , ax2 = ppl .subplot (121 ), ppl .subplot (1 ,15 ,15 )
27342767 else :
@@ -2760,6 +2793,8 @@ def plot_residuals(
27602793
27612794 ppl .axhline (0 , color = 'k' , alpha = .25 , lw = 0.75 )
27622795
2796+ ax1 .yaxis .set_major_formatter (ticker .FuncFormatter (lambda x , pos : f'${ x :+.0f} $' if x else '$0$' ))
2797+
27632798 session = self [0 ]['Session' ]
27642799 x1 = 0
27652800# ymax = np.max([1e3 * (r['D47'] - self.samples[r['Sample']]['D47']) for r in self])
@@ -2792,15 +2827,15 @@ def plot_residuals(
27922827 )
27932828 if highlight and r ['Sample' ] not in highlight :
27942829 kw ['alpha' ] = 0.2
2795- ppl .plot (k , 1e3 * ( r [ 'D47' ] - self .samples [ r [ 'Sample' ]][ 'D47' ]) , ** kw )
2830+ ppl .plot (k , 1e3 * r [ f'D { self ._4x } _residual' ] , ** kw )
27962831 x2 = k
27972832 x_sessions [session ] = (x1 + x2 )/ 2
27982833
2799- ppl .axhspan (- self .repeatability ['r_D47 ' ]* 1000 , self .repeatability ['r_D47 ' ]* 1000 , color = 'k' , alpha = .05 , lw = 1 )
2800- ppl .axhspan (- self .repeatability ['r_D47 ' ]* 1000 * self .t95 , self .repeatability ['r_D47 ' ]* 1000 * self .t95 , color = 'k' , alpha = .05 , lw = 1 )
2801- if not hist :
2802- ppl .text (len (self ), self .repeatability ['r_D47 ' ]* 1000 , f" SD = { self .repeatability ['r_D47 ' ]* 1000 :.1f} ppm" , size = 9 , alpha = 1 , va = 'center' )
2803- ppl .text (len (self ), self .repeatability ['r_D47 ' ]* 1000 * self .t95 , f" 95% CL = ± { self .repeatability ['r_D47 ' ]* 1000 * self .t95 :.1f} ppm" , size = 9 , alpha = 1 , va = 'center' )
2834+ ppl .axhspan (- self .repeatability [f'r_D { self . _4x } ' ]* 1000 , self .repeatability [f'r_D { self . _4x } ' ]* 1000 , color = 'k' , alpha = .05 , lw = 1 )
2835+ ppl .axhspan (- self .repeatability [f'r_D { self . _4x } ' ]* 1000 * self .t95 , self .repeatability [f'r_D { self . _4x } ' ]* 1000 * self .t95 , color = 'k' , alpha = .05 , lw = 1 )
2836+ if not ( hist or kde ) :
2837+ ppl .text (len (self ), self .repeatability [f'r_D { self . _4x } ' ]* 1000 , f" SD = { self .repeatability ['r_D{self._4x} ' ]* 1000 :.1f} ppm" , size = 9 , alpha = 1 , va = 'center' )
2838+ ppl .text (len (self ), self .repeatability [f'r_D { self . _4x } ' ]* 1000 * self .t95 , f" 95% CL = ± { self .repeatability ['r_D{self._4x} ' ]* 1000 * self .t95 :.1f} ppm" , size = 9 , alpha = 1 , va = 'center' )
28042839
28052840 xmin , xmax , ymin , ymax = ppl .axis ()
28062841 for s in x_sessions :
@@ -2816,7 +2851,7 @@ def plot_residuals(
28162851 )
28172852 )
28182853
2819- if hist :
2854+ if hist or kde :
28202855 ppl .sca (ax2 )
28212856
28222857 for s in colors :
@@ -2843,34 +2878,43 @@ def plot_residuals(
28432878 kw ['label' ] = 'other (N$\\ ,$>$\\ ,$1)' if one_or_more_singlets else 'other'
28442879 ppl .plot ([], [], ** kw )
28452880
2846- if hist :
2881+ if hist or kde :
28472882 leg = ppl .legend (loc = 'upper right' , bbox_to_anchor = (1 , 1 ), bbox_transform = fig .transFigure , borderaxespad = 1.5 , fontsize = 9 )
28482883 else :
28492884 leg = ppl .legend (loc = 'lower right' , bbox_to_anchor = (1 , 0 ), bbox_transform = fig .transFigure , borderaxespad = 1.5 )
28502885 leg .set_zorder (- 1000 )
28512886
28522887 ppl .sca (ax1 )
28532888
2854- ppl .ylabel ('Δ$_{47 }$ residuals (ppm)' )
2889+ ppl .ylabel (f 'Δ$_{{ { self . _4x } } }$ residuals (ppm)' )
28552890 ppl .xticks ([])
28562891 ppl .axis ([- 1 , len (self ), None , None ])
28572892
2858- if hist :
2893+ if hist or kde :
28592894 ppl .sca (ax2 )
2860- X = [1e3 * (r ['D47' ] - self .samples [r ['Sample' ]]['D47' ]) for r in self if r ['Sample' ] in multiplets ]
2861- ppl .hist (
2862- X ,
2863- orientation = 'horizontal' ,
2864- histtype = 'stepfilled' ,
2865- ec = [.4 ]* 3 ,
2866- fc = [.25 ]* 3 ,
2867- alpha = .25 ,
2868- bins = np .linspace (- 9e3 * self .repeatability ['r_D47' ], 9e3 * self .repeatability ['r_D47' ], int (18 / binwidth + 1 )),
2869- )
2870- ppl .axis ([None , None , ymin , ymax ])
2895+ X = 1e3 * np .array ([r [f'D{ self ._4x } _residual' ] for r in self if r ['Sample' ] in multiplets or r ['Sample' ] in self .anchors ])
2896+
2897+ if kde :
2898+ from scipy .stats import gaussian_kde
2899+ yi = np .linspace (ymin , ymax , 201 )
2900+ xi = gaussian_kde (X ).evaluate (yi )
2901+ ppl .fill_betweenx (yi , xi , xi * 0 , fc = (0 ,0 ,0 ,.15 ), lw = 1 , ec = (.75 ,.75 ,.75 ,1 ))
2902+ # ppl.plot(xi, yi, 'k-', lw = 1)
2903+ ppl .axis ([0 , None , ymin , ymax ])
2904+ elif hist :
2905+ ppl .hist (
2906+ X ,
2907+ orientation = 'horizontal' ,
2908+ histtype = 'stepfilled' ,
2909+ ec = [.4 ]* 3 ,
2910+ fc = [.25 ]* 3 ,
2911+ alpha = .25 ,
2912+ bins = np .linspace (- 9e3 * self .repeatability [f'r_D{ self ._4x } ' ], 9e3 * self .repeatability [f'r_D{ self ._4x } ' ], int (18 / binwidth + 1 )),
2913+ )
2914+ ppl .axis ([None , None , ymin , ymax ])
28712915 ppl .text (0 , 0 ,
2872- f" SD = { self .repeatability ['r_D47 ' ]* 1000 :.1f} ppm\n 95% CL = ± { self .repeatability ['r_D47 ' ]* 1000 * self .t95 :.1f} ppm" ,
2873- size = 8 ,
2916+ f" SD = { self .repeatability [f'r_D { self . _4x } ' ]* 1000 :.1f} ppm\n 95% CL = ± { self .repeatability [f'r_D { self . _4x } ' ]* 1000 * self .t95 :.1f} ppm" ,
2917+ size = 7.5 ,
28742918 alpha = 1 ,
28752919 va = 'center' ,
28762920 ha = 'left' ,
0 commit comments