@@ -253,6 +253,7 @@ def load_stft(
253253 win_length = 256 ,
254254 hop_length = 16 ,
255255 fast_mode = False ,
256+ use_original_sr = False ,
256257):
257258 assert exists (wav_filepath )
258259 log .debug (f'Computing spectrogram on { wav_filepath } ' )
@@ -265,7 +266,18 @@ def load_stft(
265266 raise OSError (f'Error loading file: { e } ' )
266267
267268 # Resample the waveform
268- waveform = librosa .resample (waveform_ , orig_sr = orig_sr , target_sr = sr )
269+ if not use_original_sr :
270+ waveform = librosa .resample (waveform_ , orig_sr = orig_sr , target_sr = sr )
271+ else :
272+ waveform = waveform_
273+ # # define a next-power-of-2 factor to increase window and hop length
274+ # sr_factor = np.pow(2, np.ceil(np.log2(orig_sr / sr)))
275+ sr_factor = orig_sr / sr
276+
277+ sr *= sr_factor
278+ n_fft = int (np .round (n_fft * sr_factor ))
279+ win_length = int (np .round (win_length * sr_factor ))
280+ hop_length = int (np .round (hop_length * sr_factor ))
269281
270282 # TODO: signal processing: remove DC offset, time window edges of waveform
271283
@@ -292,7 +304,7 @@ def load_stft(
292304 band_min = bands [index ] - delta_f / 2.0
293305 band_max = bands [index ] + delta_f / 2.0
294306 # accept bands with any part of their range within interval [FREQ_MIN, FREQ_MAX]
295- if FREQ_MIN <= band_max and band_min <= FREQ_MAX :
307+ if FREQ_MIN <= band_max and ( use_original_sr or band_min <= FREQ_MAX ) :
296308 goods .append (index )
297309 min_index = min (goods )
298310 max_index = max (goods )
@@ -1407,6 +1419,7 @@ def compute_wrapper(
14071419 bitdepth = 16 ,
14081420 mask_secondary_effects = False ,
14091421 plot_uncompressed_amplitude = False ,
1422+ include_original_sr = False ,
14101423 debug = False ,
14111424 ** kwargs ,
14121425):
@@ -1470,7 +1483,7 @@ def compute_wrapper(
14701483 warnings .simplefilter ('ignore' , category = DeprecationWarning )
14711484 # ignore warning due to aifc deprecation
14721485 stft_db , waveplot , sr , bands , duration , freq_offset , time_vec , orig_sr , max_band_idx = (
1473- load_stft (wav_filepath , fast_mode = fast_mode )
1486+ load_stft (wav_filepath , fast_mode = fast_mode , use_original_sr = False )
14741487 )
14751488
14761489 # Apply a dynamic range to a fixed dB range
@@ -1837,6 +1850,70 @@ def compute_wrapper(
18371850 [cv2 .IMWRITE_TIFF_COMPRESSION , 1 ],
18381851 )
18391852
1853+ # If desired, also generate uncompressed and compressed spectrograms
1854+ # without reducing the sample rate. These should have identical step
1855+ # size in time and frequency
1856+ if include_original_sr :
1857+ with warnings .catch_warnings ():
1858+ warnings .simplefilter ('ignore' , category = DeprecationWarning )
1859+ # ignore warning due to aifc deprecation
1860+ (
1861+ stft_db_origsr ,
1862+ _ ,
1863+ _ ,
1864+ bands_origsr ,
1865+ duration_origsr ,
1866+ _ ,
1867+ time_vec_origsr ,
1868+ orig_sr ,
1869+ max_band_idx_origsr ,
1870+ ) = load_stft (wav_filepath , fast_mode = fast_mode , use_original_sr = True )
1871+ # Apply a dynamic range to a fixed dB range
1872+ stft_db_origsr = gain_stft (stft_db_origsr , max_band_idx = max_band_idx_origsr )
1873+
1874+ # Bin the floating point data to X-bit integers (X=8 or X=16)
1875+ stft_db_origsr = normalize_stft (stft_db_origsr , None , dtype )
1876+
1877+ # Vertically flip the spectrogram, lowest frequencies on the bottom
1878+ # Convert to a C++ contiguous array for OpenCV
1879+ stft_db_origsr = np .ascontiguousarray (stft_db_origsr [::- 1 , :])
1880+ bands_origsr = bands_origsr [::- 1 ]
1881+ y_step_freq_origsr = float (bands_origsr [0 ] - bands_origsr [1 ])
1882+ x_step_ms_origsr = float (1e3 * (time_vec_origsr [1 ] - time_vec_origsr [0 ]))
1883+ bands_origsr = np .around (bands_origsr ).astype (np .int32 ).tolist ()
1884+
1885+ # Allow up to 5% change in step sizes or frequency bands when comparing
1886+ # to band-limited spectrogram.
1887+ tol = 5e-2
1888+ assert (
1889+ np .abs (x_step_ms - x_step_ms_origsr ) / x_step_ms <= tol
1890+ ), 'time step changed unexpectedly much when using original sample rate'
1891+ assert (
1892+ np .abs (y_step_freq - y_step_freq_origsr ) / y_step_freq <= tol
1893+ ), 'frequency step changed unexpectedly much when using original sample rate'
1894+ assert all (
1895+ [np .abs (x - y ) / x <= tol for x , y in zip (bands , bands_origsr [- len (bands ) :])]
1896+ ), 'lower frequency bands changed unexpectedly much when using original sample rate'
1897+
1898+ # Create compressed spectrogram using segment start and stop times
1899+ segments_origsr = []
1900+ for segment_meta in metas :
1901+ start = int (np .round (segment_meta ['segment start.ms' ] / x_step_ms_origsr ))
1902+ end = int (np .round (segment_meta ['segment end.ms' ] / x_step_ms_origsr ))
1903+ segments_origsr .append (stft_db_origsr [:, start :end ])
1904+ segments ['stft_db_origsr' ] = np .concatenate (segments_origsr , axis = 1 )
1905+
1906+ # Save some metadata
1907+ meta_origsr = {
1908+ 'sr.hz' : int (orig_sr ),
1909+ 'duration.ms' : round (duration_origsr * 1e3 , 3 ),
1910+ 'frequencies' : {
1911+ 'min.hz' : int (FREQ_MIN ),
1912+ 'max.hz' : int (max (bands_origsr )),
1913+ 'pixels.hz' : bands_origsr ,
1914+ },
1915+ }
1916+
18401917 output_paths = []
18411918 compressed_paths = []
18421919 mask_paths = []
@@ -1847,6 +1924,10 @@ def compute_wrapper(
18471924 datas = [
18481925 (output_paths , 'jpg' , stft_db ),
18491926 ]
1927+ if not fast_mode and include_original_sr :
1928+ datas += [
1929+ (output_paths , 'origsr.jpg' , stft_db_origsr ),
1930+ ]
18501931 if plot_uncompressed_amplitude :
18511932 datas += [
18521933 (waveplot_plots , 'waveplot.jpg' , waveplot ),
@@ -1855,6 +1936,10 @@ def compute_wrapper(
18551936 datas += [
18561937 (compressed_paths , 'compressed.jpg' , segments ['stft_db' ]),
18571938 ]
1939+ if 'stft_db_origsr' in segments :
1940+ datas += [
1941+ (compressed_paths , 'compressed.origsr.jpg' , segments ['stft_db_origsr' ]),
1942+ ]
18581943 if 'waveplot' in segments :
18591944 datas += [
18601945 (waveplot_compressed_paths , 'compressed.waveplot.jpg' , segments ['waveplot' ]),
@@ -1868,6 +1953,53 @@ def compute_wrapper(
18681953 (masked_paths , 'masked.jpg' , masked ),
18691954 ]
18701955
1956+ # Interpolate waveplots, mask, and masked images to approximately match the original sample rate images
1957+ if include_original_sr :
1958+ if plot_uncompressed_amplitude :
1959+ waveplot_interp = cv2 .resize (
1960+ waveplot ,
1961+ (stft_db_origsr .shape [1 ], waveplot .shape [0 ]),
1962+ interpolation = cv2 .INTER_LINEAR ,
1963+ )
1964+ datas += [
1965+ (waveplot_plots , 'waveplot.origsr.jpg' , waveplot_interp ),
1966+ ]
1967+ if 'waveplot' in segments :
1968+ waveplot_compressed_interp = cv2 .resize (
1969+ segments ['waveplot' ],
1970+ (segments ['stft_db_origsr' ].shape [1 ], segments ['waveplot' ].shape [0 ]),
1971+ interpolation = cv2 .INTER_LINEAR ,
1972+ )
1973+ datas += [
1974+ (
1975+ waveplot_compressed_paths ,
1976+ 'compressed.waveplot.origsr.jpg' ,
1977+ waveplot_compressed_interp ,
1978+ ),
1979+ ]
1980+ if 'costs' in segments and 'stft_db' in segments :
1981+ mask_interp = cv2 .resize (
1982+ segments ['costs' ],
1983+ (segments ['stft_db_origsr' ].shape [1 ], segments ['costs' ].shape [0 ]),
1984+ interpolation = cv2 .INTER_LINEAR ,
1985+ )
1986+ masked_interp = cv2 .resize (
1987+ masked ,
1988+ (segments ['stft_db_origsr' ].shape [1 ], masked .shape [0 ]),
1989+ interpolation = cv2 .INTER_LINEAR ,
1990+ )
1991+ # Pad mask and masked to account for extra higher frequencies
1992+ mask_interp = np .pad (
1993+ mask_interp , ((stft_db_origsr .shape [0 ] - mask_interp .shape [0 ], 0 ), (0 , 0 ))
1994+ )
1995+ masked_interp = np .pad (
1996+ masked_interp , ((stft_db_origsr .shape [0 ] - masked_interp .shape [0 ], 0 ), (0 , 0 ))
1997+ )
1998+ datas += [
1999+ (mask_paths , 'mask.origsr.jpg' , mask_interp ),
2000+ (masked_paths , 'masked.origsr.jpg' , masked_interp ),
2001+ ]
2002+
18712003 for accumulator , tag , data in datas :
18722004 if data .dtype != np .uint8 :
18732005 data_ = data .astype (np .float32 )
@@ -1924,9 +2056,22 @@ def compute_wrapper(
19242056 'width.px' : segments ['stft_db' ].shape [1 ],
19252057 'height.px' : segments ['stft_db' ].shape [0 ],
19262058 }
2059+ if 'stft_db_origsr' in segments :
2060+ metadata ['size' ]['compressed_origsr' ] = {
2061+ 'width.px' : segments ['stft_db_origsr' ].shape [1 ],
2062+ 'height.px' : segments ['stft_db_origsr' ].shape [0 ],
2063+ }
2064+ metadata ['size' ]['uncompressed_origsr' ] = {
2065+ 'width.px' : stft_db_origsr .shape [1 ],
2066+ 'height.px' : stft_db_origsr .shape [0 ],
2067+ }
2068+ metadata ['metadata_origsr' ] = meta_origsr
19272069 if 'costs' in segments and 'stft_db' in segments :
19282070 metadata ['size' ]['mask' ] = metadata ['size' ]['compressed' ]
19292071 metadata ['size' ]['masked' ] = metadata ['size' ]['compressed' ]
2072+ if include_original_sr :
2073+ metadata ['size' ]['mask_origsr' ] = metadata ['size' ]['compressed_origsr' ]
2074+ metadata ['size' ]['masked_origsr' ] = metadata ['size' ]['compressed_origsr' ]
19302075
19312076 metadata_path = f'{ out_file_stem } .metadata.json'
19322077 with open (metadata_path , 'w' ) as metafile :
0 commit comments