Skip to content

Commit b2eb43f

Browse files
author
trevor.stout
committed
Flag to generate additional outputs using original sample rate
1 parent 9f96c5f commit b2eb43f

2 files changed

Lines changed: 151 additions & 3 deletions

File tree

batbot/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def pipeline(
6969
force_overwrite=False,
7070
quiet=False,
7171
plot_uncompressed_amplitude=False,
72+
include_original_sr=False,
7273
debug=False,
7374
):
7475
"""
@@ -109,6 +110,7 @@ def pipeline(
109110
force_overwrite=force_overwrite,
110111
quiet=quiet,
111112
plot_uncompressed_amplitude=plot_uncompressed_amplitude,
113+
include_original_sr=include_original_sr,
112114
debug=debug,
113115
)
114116

@@ -308,6 +310,7 @@ def example():
308310
fast_mode=False,
309311
force_overwrite=True,
310312
plot_uncompressed_amplitude=True,
313+
include_original_sr=True,
311314
)
312315
stop_time = time.time()
313316
print('Example pipeline completed in {} seconds.'.format(stop_time - start_time))

batbot/spectrogram/__init__.py

Lines changed: 148 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ def load_stft(
253253
win_length=256,
254254
hop_length=16,
255255
fast_mode=False,
256+
use_original_sr=False,
256257
):
257258
assert exists(wav_filepath)
258259
log.debug(f'Computing spectrogram on {wav_filepath}')
@@ -265,7 +266,18 @@ def load_stft(
265266
raise OSError(f'Error loading file: {e}')
266267

267268
# Resample the waveform
268-
waveform = librosa.resample(waveform_, orig_sr=orig_sr, target_sr=sr)
269+
if not use_original_sr:
270+
waveform = librosa.resample(waveform_, orig_sr=orig_sr, target_sr=sr)
271+
else:
272+
waveform = waveform_
273+
# # define a next-power-of-2 factor to increase window and hop length
274+
# sr_factor = np.pow(2, np.ceil(np.log2(orig_sr / sr)))
275+
sr_factor = orig_sr / sr
276+
277+
sr *= sr_factor
278+
n_fft = int(np.round(n_fft * sr_factor))
279+
win_length = int(np.round(win_length * sr_factor))
280+
hop_length = int(np.round(hop_length * sr_factor))
269281

270282
# TODO: signal processing: remove DC offset, time window edges of waveform
271283

@@ -292,7 +304,7 @@ def load_stft(
292304
band_min = bands[index] - delta_f / 2.0
293305
band_max = bands[index] + delta_f / 2.0
294306
# accept bands with any part of their range within interval [FREQ_MIN, FREQ_MAX]
295-
if FREQ_MIN <= band_max and band_min <= FREQ_MAX:
307+
if FREQ_MIN <= band_max and (use_original_sr or band_min <= FREQ_MAX):
296308
goods.append(index)
297309
min_index = min(goods)
298310
max_index = max(goods)
@@ -1407,6 +1419,7 @@ def compute_wrapper(
14071419
bitdepth=16,
14081420
mask_secondary_effects=False,
14091421
plot_uncompressed_amplitude=False,
1422+
include_original_sr=False,
14101423
debug=False,
14111424
**kwargs,
14121425
):
@@ -1470,7 +1483,7 @@ def compute_wrapper(
14701483
warnings.simplefilter('ignore', category=DeprecationWarning)
14711484
# ignore warning due to aifc deprecation
14721485
stft_db, waveplot, sr, bands, duration, freq_offset, time_vec, orig_sr, max_band_idx = (
1473-
load_stft(wav_filepath, fast_mode=fast_mode)
1486+
load_stft(wav_filepath, fast_mode=fast_mode, use_original_sr=False)
14741487
)
14751488

14761489
# Apply a dynamic range to a fixed dB range
@@ -1837,6 +1850,70 @@ def compute_wrapper(
18371850
[cv2.IMWRITE_TIFF_COMPRESSION, 1],
18381851
)
18391852

1853+
# If desired, also generate uncompressed and compressed spectrograms
1854+
# without reducing the sample rate. These should have identical step
1855+
# size in time and frequency
1856+
if include_original_sr:
1857+
with warnings.catch_warnings():
1858+
warnings.simplefilter('ignore', category=DeprecationWarning)
1859+
# ignore warning due to aifc deprecation
1860+
(
1861+
stft_db_origsr,
1862+
_,
1863+
_,
1864+
bands_origsr,
1865+
duration_origsr,
1866+
_,
1867+
time_vec_origsr,
1868+
orig_sr,
1869+
max_band_idx_origsr,
1870+
) = load_stft(wav_filepath, fast_mode=fast_mode, use_original_sr=True)
1871+
# Apply a dynamic range to a fixed dB range
1872+
stft_db_origsr = gain_stft(stft_db_origsr, max_band_idx=max_band_idx_origsr)
1873+
1874+
# Bin the floating point data to X-bit integers (X=8 or X=16)
1875+
stft_db_origsr = normalize_stft(stft_db_origsr, None, dtype)
1876+
1877+
# Vertically flip the spectrogram, lowest frequencies on the bottom
1878+
# Convert to a C++ contiguous array for OpenCV
1879+
stft_db_origsr = np.ascontiguousarray(stft_db_origsr[::-1, :])
1880+
bands_origsr = bands_origsr[::-1]
1881+
y_step_freq_origsr = float(bands_origsr[0] - bands_origsr[1])
1882+
x_step_ms_origsr = float(1e3 * (time_vec_origsr[1] - time_vec_origsr[0]))
1883+
bands_origsr = np.around(bands_origsr).astype(np.int32).tolist()
1884+
1885+
# Allow up to 5% change in step sizes or frequency bands when comparing
1886+
# to band-limited spectrogram.
1887+
tol = 5e-2
1888+
assert (
1889+
np.abs(x_step_ms - x_step_ms_origsr) / x_step_ms <= tol
1890+
), 'time step changed unexpectedly much when using original sample rate'
1891+
assert (
1892+
np.abs(y_step_freq - y_step_freq_origsr) / y_step_freq <= tol
1893+
), 'frequency step changed unexpectedly much when using original sample rate'
1894+
assert all(
1895+
[np.abs(x - y) / x <= tol for x, y in zip(bands, bands_origsr[-len(bands) :])]
1896+
), 'lower frequency bands changed unexpectedly much when using original sample rate'
1897+
1898+
# Create compressed spectrogram using segment start and stop times
1899+
segments_origsr = []
1900+
for segment_meta in metas:
1901+
start = int(np.round(segment_meta['segment start.ms'] / x_step_ms_origsr))
1902+
end = int(np.round(segment_meta['segment end.ms'] / x_step_ms_origsr))
1903+
segments_origsr.append(stft_db_origsr[:, start:end])
1904+
segments['stft_db_origsr'] = np.concatenate(segments_origsr, axis=1)
1905+
1906+
# Save some metadata
1907+
meta_origsr = {
1908+
'sr.hz': int(orig_sr),
1909+
'duration.ms': round(duration_origsr * 1e3, 3),
1910+
'frequencies': {
1911+
'min.hz': int(FREQ_MIN),
1912+
'max.hz': int(max(bands_origsr)),
1913+
'pixels.hz': bands_origsr,
1914+
},
1915+
}
1916+
18401917
output_paths = []
18411918
compressed_paths = []
18421919
mask_paths = []
@@ -1847,6 +1924,10 @@ def compute_wrapper(
18471924
datas = [
18481925
(output_paths, 'jpg', stft_db),
18491926
]
1927+
if not fast_mode and include_original_sr:
1928+
datas += [
1929+
(output_paths, 'origsr.jpg', stft_db_origsr),
1930+
]
18501931
if plot_uncompressed_amplitude:
18511932
datas += [
18521933
(waveplot_plots, 'waveplot.jpg', waveplot),
@@ -1855,6 +1936,10 @@ def compute_wrapper(
18551936
datas += [
18561937
(compressed_paths, 'compressed.jpg', segments['stft_db']),
18571938
]
1939+
if 'stft_db_origsr' in segments:
1940+
datas += [
1941+
(compressed_paths, 'compressed.origsr.jpg', segments['stft_db_origsr']),
1942+
]
18581943
if 'waveplot' in segments:
18591944
datas += [
18601945
(waveplot_compressed_paths, 'compressed.waveplot.jpg', segments['waveplot']),
@@ -1868,6 +1953,53 @@ def compute_wrapper(
18681953
(masked_paths, 'masked.jpg', masked),
18691954
]
18701955

1956+
# Interpolate waveplots, mask, and masked images to approximately match the original sample rate images
1957+
if include_original_sr:
1958+
if plot_uncompressed_amplitude:
1959+
waveplot_interp = cv2.resize(
1960+
waveplot,
1961+
(stft_db_origsr.shape[1], waveplot.shape[0]),
1962+
interpolation=cv2.INTER_LINEAR,
1963+
)
1964+
datas += [
1965+
(waveplot_plots, 'waveplot.origsr.jpg', waveplot_interp),
1966+
]
1967+
if 'waveplot' in segments:
1968+
waveplot_compressed_interp = cv2.resize(
1969+
segments['waveplot'],
1970+
(segments['stft_db_origsr'].shape[1], segments['waveplot'].shape[0]),
1971+
interpolation=cv2.INTER_LINEAR,
1972+
)
1973+
datas += [
1974+
(
1975+
waveplot_compressed_paths,
1976+
'compressed.waveplot.origsr.jpg',
1977+
waveplot_compressed_interp,
1978+
),
1979+
]
1980+
if 'costs' in segments and 'stft_db' in segments:
1981+
mask_interp = cv2.resize(
1982+
segments['costs'],
1983+
(segments['stft_db_origsr'].shape[1], segments['costs'].shape[0]),
1984+
interpolation=cv2.INTER_LINEAR,
1985+
)
1986+
masked_interp = cv2.resize(
1987+
masked,
1988+
(segments['stft_db_origsr'].shape[1], masked.shape[0]),
1989+
interpolation=cv2.INTER_LINEAR,
1990+
)
1991+
# Pad mask and masked to account for extra higher frequencies
1992+
mask_interp = np.pad(
1993+
mask_interp, ((stft_db_origsr.shape[0] - mask_interp.shape[0], 0), (0, 0))
1994+
)
1995+
masked_interp = np.pad(
1996+
masked_interp, ((stft_db_origsr.shape[0] - masked_interp.shape[0], 0), (0, 0))
1997+
)
1998+
datas += [
1999+
(mask_paths, 'mask.origsr.jpg', mask_interp),
2000+
(masked_paths, 'masked.origsr.jpg', masked_interp),
2001+
]
2002+
18712003
for accumulator, tag, data in datas:
18722004
if data.dtype != np.uint8:
18732005
data_ = data.astype(np.float32)
@@ -1924,9 +2056,22 @@ def compute_wrapper(
19242056
'width.px': segments['stft_db'].shape[1],
19252057
'height.px': segments['stft_db'].shape[0],
19262058
}
2059+
if 'stft_db_origsr' in segments:
2060+
metadata['size']['compressed_origsr'] = {
2061+
'width.px': segments['stft_db_origsr'].shape[1],
2062+
'height.px': segments['stft_db_origsr'].shape[0],
2063+
}
2064+
metadata['size']['uncompressed_origsr'] = {
2065+
'width.px': stft_db_origsr.shape[1],
2066+
'height.px': stft_db_origsr.shape[0],
2067+
}
2068+
metadata['metadata_origsr'] = meta_origsr
19272069
if 'costs' in segments and 'stft_db' in segments:
19282070
metadata['size']['mask'] = metadata['size']['compressed']
19292071
metadata['size']['masked'] = metadata['size']['compressed']
2072+
if include_original_sr:
2073+
metadata['size']['mask_origsr'] = metadata['size']['compressed_origsr']
2074+
metadata['size']['masked_origsr'] = metadata['size']['compressed_origsr']
19302075

19312076
metadata_path = f'{out_file_stem}.metadata.json'
19322077
with open(metadata_path, 'w') as metafile:

0 commit comments

Comments
 (0)