Debias wn (#1238)

ykyohei · web-flow · commit 8f3182b76c63 · 2025-05-30T12:45:23.000-04:00
* debias wn

* draft of test_psd.py

* update test

* making changes to be non-breaking

* fix

* address comments
diff --git a/sotodlib/preprocess/processes.py b/sotodlib/preprocess/processes.py
@@ -356,41 +356,55 @@ class PSDCalc(_Preprocess):
     """ Calculate the PSD of the data and add it to the Preprocessing AxisManager under the
     "psd" field.
 
+    Note: noverlap = 0 amd full_output = True are recommended to get unbiased
+        median white noise estimation by Noise.
+
     Example config block::
 
       - "name : "psd"
         "signal: "signal" # optional
         "wrap": "psd" # optional
         "calc":
-          "psd_cfgs": # optional, kwargs to scipy.welch
-            "nperseg": 1024
+          "nperseg": 1024 # optional
+          "noverlap": 0 # optional
           "wrap_name": "psd" # optional
-          "subscan": False
+          "subscan": False # optional
+          "full_output": True # optional
         "save": True
 
     .. autofunction:: sotodlib.tod_ops.fft_ops.calc_psd
     """
     name = "psd"
-    
+
     def __init__(self, step_cfgs):
         self.signal = step_cfgs.get('signal', 'signal')
         self.wrap = step_cfgs.get('wrap', 'psd')
 
         super().__init__(step_cfgs)
 
     def calc_and_save(self, aman, proc_aman):
-        freqs, Pxx = tod_ops.fft_ops.calc_psd(aman, signal=aman[self.signal],
-                                              **self.calc_cfgs)
+        full_output = self.calc_cfgs.get('full_output')
+        if full_output:
+            freqs, Pxx, nseg = tod_ops.fft_ops.calc_psd(aman, signal=aman[self.signal],
+                                                        **self.calc_cfgs)
+        else:
+            freqs, Pxx = tod_ops.fft_ops.calc_psd(aman, signal=aman[self.signal],
+                                                  **self.calc_cfgs)
 
         fft_aman = core.AxisManager(aman.dets,
                                     core.OffsetAxis("nusamps", len(freqs)))
         pxx_axis_map = [(0, "dets"), (1, "nusamps")]
         if self.calc_cfgs.get('subscan', False):
             fft_aman.wrap("Pxx_ss", Pxx, pxx_axis_map+[(2, aman.subscans)])
             Pxx = np.nanmean(Pxx, axis=-1) # Mean of subscans
+            if full_output:
+                fft_aman.wrap("nseg_ss", nseg, [(0, aman.subscans)])
+                nseg = np.nansum(nseg)
 
         fft_aman.wrap("freqs", freqs, [(0,"nusamps")])
         fft_aman.wrap("Pxx", Pxx, pxx_axis_map)
+        if full_output:
+            fft_aman.wrap("nseg", nseg)
 
         self.save(proc_aman, fft_aman)
 
@@ -559,6 +573,7 @@ def calc_and_save(self, aman, proc_aman):
                 wn_f_low, wn_f_high = self.calc_cfgs.get('fwhite', (5, 10))
                 self.calc_cfgs['wn_est'] = tod_ops.fft_ops.calc_wn(aman, pxx=pxx,
                                                                    freqs=psd.freqs,
+                                                                   nseg=psd.get('nseg'),
                                                                    low_f=wn_f_low,
                                                                    high_f=wn_f_high)
             if self.calc_cfgs.get('subscan') is None:
@@ -578,6 +593,7 @@ def calc_and_save(self, aman, proc_aman):
             wn_f_high = self.calc_cfgs.get("high_f", 10)
             wn = tod_ops.fft_ops.calc_wn(aman, pxx=pxx,
                                          freqs=psd.freqs,
+                                         nseg=psd.get('nseg'),
                                          low_f=wn_f_low,
                                          high_f=wn_f_high)
             if not self.subscan:
diff --git a/sotodlib/tod_ops/fft_ops.py b/sotodlib/tod_ops/fft_ops.py
@@ -4,14 +4,15 @@
 from functools import lru_cache, partial
 from typing_extensions import Callable
 from numpy.typing import NDArray
-import sys
+import warnings
 import numdifftools as ndt
 import numpy as np
 import pyfftw
 import so3g
-from so3g.proj import Ranges, RangesMatrix
+from so3g.proj import Ranges
 from scipy.optimize import minimize
 from scipy.signal import welch
+from scipy.stats import chi2
 from sotodlib import core, hwp
 from sotodlib.tod_ops import detrend_tod
 
@@ -269,10 +270,11 @@ def calc_psd(
     max_samples=2**18,
     prefer='center',
     freq_spacing=None,
-    merge=False, 
+    merge=False,
     merge_suffix=None,
-    overwrite=True, 
+    overwrite=True,
     subscan=False,
+    full_output=False,
     **kwargs
 ):
     """Calculates the power spectrum density of an input signal using signal.welch().
@@ -295,17 +297,38 @@ def calc_psd(
         merge_suffix (str, optional): Suffix to append to the Pxx field name in aman. Defaults to None (merged as Pxx).
         overwrite (bool): if true will overwrite f, Pxx axes.
         subscan (bool): if True, compute psd on subscans.
+        full_output: if True this also outputs nseg, the number of segments used for
+            welch, for correcting bias of median white noise estimation by calc_wn.
         **kwargs: keyword args to be passed to signal.welch().
 
     Returns:
         freqs: array of frequencies corresponding to PSD calculated from welch.
         Pxx: array of PSD values.
+        nseg: number of segments used for welch. this is returned if full_output is True.
     """
     if signal is None:
         signal = aman.signal
+
+    if ("noverlap" not in kwargs) or \
+            ("noverlap" in kwargs and kwargs["noverlap"] != 0):
+        warnings.warn('calc_wn will be biased. noverlap argument of welch '
+                      'needs to be 0 to get unbiased median white noise estimate.')
+    if not full_output:
+        warnings.warn('calc_wn will be biased. full_output argument of calc_psd '
+                      'needs to be True to get unbiased median white noise estimate.')
+
     if subscan:
-        freqs, Pxx = _calc_psd_subscan(aman, signal=signal, freq_spacing=freq_spacing, **kwargs)
+        if full_output:
+            freqs, Pxx, nseg = _calc_psd_subscan(aman, signal=signal,
+                                                 freq_spacing=freq_spacing,
+                                                 full_output=True,
+                                                 **kwargs)
+        else:
+            freqs, Pxx = _calc_psd_subscan(aman, signal=signal,
+                                           freq_spacing=freq_spacing,
+                                           **kwargs)
         axis_map_pxx = [(0, "dets"), (1, "nusamps"), (2, "subscans")]
+        axis_map_nseg = [(0, "subscans")]
     else:
         if timestamps is None:
             timestamps = aman.timestamps
@@ -334,8 +357,14 @@ def calc_psd(
                 nperseg = int(2 ** (np.around(np.log2((stop - start) / 50.0))))
             kwargs["nperseg"] = nperseg
 
+        if kwargs["nperseg"] > max_samples:
+            nseg = 1
+        else:
+            nseg = int(max_samples / kwargs["nperseg"])
+
         freqs, Pxx = welch(signal[:, start:stop], fs, **kwargs)
         axis_map_pxx = [(0, aman.dets), (1, "nusamps")]
+        axis_map_nseg = None
 
     if merge:
         if 'nusamps' not in aman:
@@ -345,19 +374,29 @@ def calc_psd(
             if len(freqs) != aman.nusamps.count:
                 raise ValueError('New freqs does not match the shape of nusamps\
                                 To avoid this, use the same value for nperseg')
-        
+
         if merge_suffix is None:
             Pxx_name = 'Pxx'
         else:
             Pxx_name = f'Pxx_{merge_suffix}'
-        
+
         if overwrite:
             if Pxx_name in aman._fields:
                 aman.move("Pxx", None)
         aman.wrap(Pxx_name, Pxx, axis_map_pxx)
-    return freqs, Pxx
 
-def _calc_psd_subscan(aman, signal=None, freq_spacing=None, **kwargs):
+        if full_output:
+            if overwrite and "nseg" in aman._fields:
+                aman.move("nseg", None)
+            aman.wrap("nseg", nseg, axis_map_nseg)
+
+    if full_output:
+        return freqs, Pxx, nseg
+    else:
+        return freqs, Pxx
+
+
+def _calc_psd_subscan(aman, signal=None, freq_spacing=None, full_output=False, **kwargs):
     """
     Calculate the power spectrum density of subscans using signal.welch().
     Data defaults to aman.signal. aman.timestamps is used for times.
@@ -378,20 +417,27 @@ def _calc_psd_subscan(aman, signal=None, freq_spacing=None, **kwargs):
             nperseg = int(2 ** (np.around(np.log2(np.median(duration_samps) / 4))))
         kwargs["nperseg"] = nperseg
 
-    Pxx = []
+    Pxx, nseg = [], []
     for iss in range(aman.subscan_info.subscans.count):
         signal_ss = get_subscan_signal(aman, signal, iss)
         axis = -1 if "axis" not in kwargs else kwargs["axis"]
-        if signal_ss.shape[axis] >= kwargs["nperseg"]:
+        nsamps = signal_ss.shape[axis]
+        if nsamps >= kwargs["nperseg"]:
             freqs, pxx_sub = welch(signal_ss, fs, **kwargs)
             Pxx.append(pxx_sub)
+            nseg.append(int(nsamps / kwargs["nperseg"]))
         else:
             Pxx.append(np.full((signal.shape[0], kwargs["nperseg"]//2+1), np.nan)) # Add nans if subscan is too short
+            nseg.append(np.nan)
+    nseg = np.array(nseg)
     Pxx = np.array(Pxx)
     Pxx = Pxx.transpose(1, 2, 0) # Dets, nusamps, subscans
-    return freqs, Pxx
+    if full_output:
+        return freqs, Pxx, nseg
+    else:
+        return freqs, Pxx
 
-def calc_wn(aman, pxx=None, freqs=None, low_f=5, high_f=10):
+def calc_wn(aman, pxx=None, freqs=None, nseg=None, low_f=5, high_f=10):
     """
     Function that calculates the white noise level as a median PSD value between
     two frequencies. Defaults to calculation of white noise between 5 and 10Hz.
@@ -408,6 +454,13 @@ def calc_wn(aman, pxx=None, freqs=None, low_f=5, high_f=10):
         freqs (1d Float array):
             frequency information related to the psd. Defaults to aman.freqs
 
+        nseg (Int or 1d Int array):
+            number of segmnents used for welch. Defaults to aman.nseg. This is
+            necessary for debiasing median white noise estimation. welch PSD with
+            non-overlapping n segments follows chi square distribution with
+            2 * nseg degrees of freedom. The median of chi square distribution is
+            biased from its average.
+
         low_f (Float):
             low frequency cutoff to calculate median psd value. Defaults to 5Hz
 
@@ -424,12 +477,28 @@ def calc_wn(aman, pxx=None, freqs=None, low_f=5, high_f=10):
     if pxx is None:
         pxx = aman.Pxx
 
+    if nseg is None:
+        nseg = aman.get('nseg')
+
+    if nseg is None:
+        warnings.warn('white noise level estimated by median PSD is biased. '
+                      'nseg is necessary to debias. Need to use following '
+                      'arguments in calc_psd to get correct nseg. '
+                      '`noverlap=0, full_output=True`')
+        debias = None
+    else:
+        debias = 2 * nseg / chi2.ppf(0.5, 2 * nseg)
+
     fmsk = np.all([freqs >= low_f, freqs <= high_f], axis=0)
     if pxx.ndim == 1:
         wn2 = np.median(pxx[fmsk])
     else:
         wn2 = np.median(pxx[:, fmsk], axis=1)
-
+    if debias is not None:
+        if pxx.ndim == 3:
+            wn2 *= debias[None, :]
+        else:
+            wn2 *= debias
     wn = np.sqrt(wn2)
     return wn
 
diff --git a/tests/test_psd.py b/tests/test_psd.py
@@ -0,0 +1,117 @@
+""" Test psd calculation
+"""
+
+
+import unittest
+import numpy as np
+from numpy.fft import rfftfreq, irfft
+
+from sotodlib import core
+from sotodlib.tod_ops import detrend_tod
+from sotodlib.tod_ops.flags import get_turnaround_flags
+from sotodlib.tod_ops.fft_ops import (
+    calc_psd, calc_wn, fit_noise_model, noise_model)
+
+from .test_azss import get_scan
+
+TOL_BIAS = 0.005
+
+
+class PSDTest(unittest.TestCase):
+    def test_psd_fit(self):
+        fs = 200.
+        dets = core.LabelAxis('dets', [f'det{di:003}' for di in range(20)])
+        nsamps = 200 * 3600
+
+        aman = core.AxisManager(dets)
+        ndets = aman.dets.count
+
+        white_noise_amp_input = 50 + np.random.randn(ndets)  # W/sqrt{Hz}
+        fknee_input = 1 + 0.1 * np.random.randn(ndets)
+        alpha_input = 3 + 0.2 * np.random.randn(ndets)
+
+        freqs = rfftfreq(nsamps, d=1/fs)
+        params = [white_noise_amp_input[:, np.newaxis],
+                  fknee_input[:, np.newaxis],
+                  alpha_input[:, np.newaxis]]
+        pxx_input = noise_model(freqs, params)
+
+        pxx_input[:, 0] = 0
+
+        T = nsamps/fs
+        ft_amps = np.sqrt(pxx_input * T * fs**2 / 2)
+
+        ft_phases = np.random.uniform(0, 2 * np.pi, size=ft_amps.shape)
+        ft_coefs = ft_amps * np.exp(1.0j * ft_phases)
+        realized_noise = irfft(ft_coefs)
+        timestamps = 1700000000 + np.arange(0, realized_noise.shape[1])/fs
+        aman.add_axis(core.OffsetAxis('samps', len(timestamps)))
+        aman.wrap('timestamps', timestamps, [(0, 'samps')])
+        aman.wrap('signal', realized_noise, [(0, 'dets'), (1, 'samps')])
+
+        detrend_tod(aman)
+        freqs_output, Pxx_output = calc_psd(aman, nperseg=200*100)
+        fit_result = fit_noise_model(aman, wn_est=50, fknee_est=1.0,
+                                     alpha_est=3.3, lowf=0.05,
+                                     f_max=5, binning=True,
+                                     psdargs={'nperseg': 200*1000})
+        wnl_fit = fit_result.fit[:, 0]
+        fk_fit = fit_result.fit[:, 1]
+        alpha_fit = fit_result.fit[:, 2]
+
+        self.assertTrue(np.abs(np.median(white_noise_amp_input - wnl_fit)) < 1)
+        self.assertTrue(np.abs(np.median(fknee_input - fk_fit)) < 0.1)
+        self.assertTrue(np.abs(np.median(alpha_input - alpha_fit)) < 0.1)
+
+    def test_wn_debias(self):
+        # prep
+        timestamps, az = get_scan(
+            n_scans=20, scan_accel=0.25, scanrate=0.5, az0=0, az1=40)
+
+        nsamps = len(timestamps)
+        ndets = 100
+        np.random.seed(0)
+        signal = np.random.normal(0, 1, size=(ndets, nsamps))
+
+        dets = [f"det{i}" for i in range(ndets)]
+        aman = core.AxisManager(
+                core.LabelAxis("dets", dets),
+                core.IndexAxis("samps", nsamps)
+        )
+        aman.wrap("timestamps", timestamps, [(0, "samps")])
+        aman.wrap("signal", signal, [(0, "dets"), (1, "samps")])
+        boresight = core.AxisManager(aman.samps)
+        boresight.wrap("az", az, [(0, "samps")])
+        aman.wrap('boresight', boresight)
+        aman.wrap('flags', core.AxisManager(aman.dets, aman.samps))
+        get_turnaround_flags(aman)
+
+        # test default arguments, this is biased
+        calc_psd(aman, merge=True, nperseg=2**18)
+        wn = calc_wn(aman)
+        ratio = np.average(wn) / np.sqrt(np.average(aman.Pxx))
+        self.assertTrue(abs(ratio - 1) > TOL_BIAS)
+        # test debias, full_output=True, noverlap=0
+        freqs, Pxx, nseg = calc_psd(aman, merge=False, full_output=True,
+                                    noverlap=0, nperseg=2**18)
+        wn = calc_wn(aman, Pxx, freqs, nseg)
+        ratio = np.average(wn) / np.sqrt(np.average(Pxx))
+        self.assertAlmostEqual(ratio, 1, delta=TOL_BIAS)
+        # test quarter nperseg
+        freqs, Pxx, nseg = calc_psd(aman, merge=False, full_output=True,
+                                    noverlap=0, nperseg=2**16)
+        wn = calc_wn(aman, Pxx, freqs, nseg)
+        ratio = np.average(wn) / np.sqrt(np.average(Pxx))
+        self.assertAlmostEqual(ratio, 1, delta=TOL_BIAS)
+        # test defulat nperseg
+        freqs, Pxx, nseg = calc_psd(aman, merge=False, full_output=True,
+                                    noverlap=0)
+        wn = calc_wn(aman, Pxx, freqs, nseg)
+        ratio = np.average(wn) / np.sqrt(np.average(Pxx))
+        self.assertAlmostEqual(ratio, 1, delta=TOL_BIAS)
+        # test subscan
+        freqs, Pxx, nseg = calc_psd(aman, merge=False, full_output=True,
+                                    noverlap=0, subscan=True)
+        wn = calc_wn(aman, Pxx, freqs, nseg)
+        ratio = np.average(wn) / np.sqrt(np.average(Pxx))
+        self.assertAlmostEqual(ratio, 1, delta=TOL_BIAS)
diff --git a/tests/test_tod_ops.py b/tests/test_tod_ops.py