From 9bd11acc29954c80b9b2e4dd5c6820d91cd0e3e8 Mon Sep 17 00:00:00 2001 From: dw42CSCE Date: Mon, 8 Sep 2025 16:30:38 -0400 Subject: [PATCH 1/3] Adding Visualizations and Split Method... havent pushed in awhile --- contourusv/detection.py | 160 +++++++++++++++++++++++++--------------- contourusv/main.py | 44 +++++++++-- 2 files changed, 137 insertions(+), 67 deletions(-) diff --git a/contourusv/detection.py b/contourusv/detection.py index 0e06b33..89f04b6 100644 --- a/contourusv/detection.py +++ b/contourusv/detection.py @@ -1,9 +1,31 @@ import cv2 +import pandas as pd +import os +# import remove_suffix # Removed because not needed + + +def draw_dashed_rect(img, pt1, pt2, color, thickness=2, dash_length=5): + x1, y1 = pt1 + x2, y2 = pt2 + + # horizontal top + for x in range(x1, x2, dash_length*2): + cv2.line(img, (x, y1), (min(x+dash_length, x2), y1), color, thickness) + # horizontal bottom + for x in range(x1, x2, dash_length*2): + cv2.line(img, (x, y2), (min(x+dash_length, x2), y2), color, thickness) + # vertical left + for y in range(y1, y2, dash_length*2): + cv2.line(img, (x1, y), (x1, min(y+dash_length, y2)), color, thickness) + # vertical right + for y in range(y1, y2, dash_length*2): + cv2.line(img, (x2, y), (x2, min(y+dash_length, y2)), color, thickness) def detect_contours(cleaned_image, start_time, end_time, freq_min, freq_max, - file_name, annotations, call_type_defs=None, processing="adaptive"): + file_name, annotations, call_type_defs=None, processing="adaptive",): """ - Detect and classify USVs in cleaned spectrogram images. + Detect and classify USVs in cleaned spectrogram images, with optional + overlay of manual annotations for evaluation. Parameters ---------- @@ -21,6 +43,13 @@ def detect_contours(cleaned_image, start_time, end_time, freq_min, freq_max, Source audio file path annotations : list List to accumulate detection annotations + call_type_defs : dict + Call type definitions + processing : str + Thresholding method ("adaptive" or "Otsu") + manual_annots : pd.DataFrame or None + Optional DataFrame of ground truth annotations with columns + ["begin_time", "end_time"] Returns ------- @@ -31,21 +60,20 @@ def detect_contours(cleaned_image, start_time, end_time, freq_min, freq_max, """ if call_type_defs is None: call_type_defs = { - "22kHz": {"freq_min": 15, - "freq_max": 45, - "freq_span_max": 10, - "duration_min": 0.03, # .03 was original - "duration_max": 3.0}, - "50kHz": {"freq_min": 40, - "freq_max": 80, - "freq_span_max": 10, - "duration_min": 0.01, - "duration_max": 0.3}, - - } - - # Re-apply Otsu's Thresholding (Cant do if using adaptive) - if(processing == "Otsu"): + "22kHz": {"freq_min": 25, + "freq_max": 45, + "freq_span_max": 10, + "duration_min": 0.03, + "duration_max": 3.0}, + "50kHz": {"freq_min": 40, + "freq_max": 80, + "freq_span_max": 40, + "duration_min": 0.01, + "duration_max": 0.3}, + } + + # Re-apply Otsu's Thresholding (if requested) + if processing == "Otsu": ret, thresholded_image = cv2.threshold( cleaned_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) else: @@ -54,60 +82,72 @@ def detect_contours(cleaned_image, start_time, end_time, freq_min, freq_max, contours, _ = cv2.findContours( thresholded_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - # Initialize a list to hold the details of each detected USV usv_details = [] - # Process each contour + # Process each contour (automatic detection) for contour in contours: x, y, w, h = cv2.boundingRect(contour) - duration_start = start_time + \ - (x / thresholded_image.shape[1]) * (end_time - start_time) - duration_end = start_time + \ - ((x + w) / - thresholded_image.shape[1]) * (end_time - start_time) - freq_start = freq_min + \ - (y / thresholded_image.shape[0]) * (freq_max - freq_min) - freq_end = freq_min + \ - ((y + h) / thresholded_image.shape[0]) * (freq_max - freq_min) + duration_start = start_time + (x / thresholded_image.shape[1]) * (end_time - start_time) + duration_end = start_time + ((x + w) / thresholded_image.shape[1]) * (end_time - start_time) + freq_start = freq_min + (y / thresholded_image.shape[0]) * (freq_max - freq_min) + freq_end = freq_min + ((y + h) / thresholded_image.shape[0]) * (freq_max - freq_min) duration = duration_end - duration_start - # Frequency span freq_span = freq_end - freq_start - # For 22 kHz USVs for call_type, call_def in call_type_defs.items(): - if (freq_start > call_def["freq_min"] and - freq_end < call_def["freq_max"] and - freq_span < call_def["freq_span_max"] and - call_def["duration_max"] >= duration >= call_def["duration_min"]): - - usv_details.append({ - 'bounding_box': (x, y, w, h), - 'duration': (x, x + w), - 'duration_start': duration_start, - 'duration_end': duration_end, - 'freq_start': freq_start, - 'freq_end': freq_end - }) - - annotations.append({ - 'file': file_name.name, - 'begin_time': duration_start, - 'end_time': duration_end, - 'low_freq': freq_start, - 'high_freq': freq_end, - 'duration': duration, - 'USV_TYPE': call_type - }) - # Annotate the image with the bounding boxes - image_with_annotations = cv2.cvtColor( - thresholded_image, cv2.COLOR_GRAY2BGR) + if (freq_start > call_def["freq_min"] and + freq_end < call_def["freq_max"] and + freq_span < call_def["freq_span_max"] and + call_def["duration_max"] >= duration >= call_def["duration_min"]): + + usv_details.append({ + 'bounding_box': (x, y, w, h), + 'duration': (x, x + w), + 'duration_start': duration_start, + 'duration_end': duration_end, + 'freq_start': freq_start, + 'freq_end': freq_end + }) + + annotations.append({ + 'file': file_name.name, + 'begin_time': duration_start, + 'end_time': duration_end, + 'low_freq': freq_start, + 'high_freq': freq_end, + 'duration': duration, + 'USV_TYPE': call_type + }) + + # Annotate detections + image_with_annotations = cv2.cvtColor(thresholded_image, cv2.COLOR_GRAY2BGR) for usv in usv_details: x, y, w, h = usv['bounding_box'] cv2.rectangle(image_with_annotations, (x, y), - (x + w, y + h), (0, 255, 0), 1) + (x + w, y + h), (0, 255, 0), 1) # green boxes + + manual_csv = str(file_name).replace(".wav", ".csv") + + colors = [(255, 0, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)] + + # Overlay manual annotations + if os.path.exists(manual_csv): + manual_annots = pd.read_csv(manual_csv, header=None, names=["begin_time", "end_time"]) + manual_subset = manual_annots[ + (manual_annots["end_time"] >= start_time) & + (manual_annots["begin_time"] <= end_time) + ].reset_index(drop=True) + + for idx, row in manual_subset.iterrows(): + # map times to x coordinates + x1 = int(((row["begin_time"] - start_time) / (end_time - start_time)) * thresholded_image.shape[1]) + x2 = int(((row["end_time"] - start_time) / (end_time - start_time)) * thresholded_image.shape[1]) + y1, y2 = 0, thresholded_image.shape[0] # full height + + color = colors[idx % len(colors)] # cycle through colors + draw_dashed_rect(image_with_annotations, (x1, y1), (x2, y2), color, thickness=2, dash_length=5) - final_image = cv2.cvtColor( - image_with_annotations, cv2.COLOR_BGR2RGB) + final_image = cv2.cvtColor(image_with_annotations, cv2.COLOR_BGR2RGB) - return final_image, annotations \ No newline at end of file + return final_image, annotations diff --git a/contourusv/main.py b/contourusv/main.py index bf7f8fc..fedf2fb 100644 --- a/contourusv/main.py +++ b/contourusv/main.py @@ -8,6 +8,7 @@ import matplotlib.pyplot as plt from codecarbon import EmissionsTracker from scipy.signal import spectrogram, butter, filtfilt +from scipy import ndimage from sklearn.decomposition import NMF, FastICA from preprocessing import clean_spec_imp, clean_spec_orig from evaluation import run_evaluation @@ -15,6 +16,7 @@ from generate_annotation import generate_annotations from sklearn.exceptions import ConvergenceWarning import warnings +import cv2 warnings.simplefilter("ignore", ConvergenceWarning) from sklearn.decomposition import FastICA @@ -179,7 +181,7 @@ def low_pass_filter(data, cutoff, fs, order=5): def run_detection(root_path, file_name, experiment, trial, overlap=3, - winlen=10, freq_min=15, freq_max=115, wsize=2500, th_perc=95, processing='none', overlapsize=.25): + winlen=10, freq_min=15, freq_max=115, wsize=2500, th_perc=85, processing='none', overlapsize=.25): """ Process audio file to detect ultrasonic vocalizations (USVs). @@ -266,13 +268,41 @@ def run_detection(root_path, file_name, experiment, trial, overlap=3, noise_floor = np.percentile(Sxx, th_perc) Sxx[Sxx < noise_floor] = noise_floor - if (processing != "Otsu"): - Sxx = use_NMF_Small(Sxx) + # Current thought: Split the spectrogram into upper and lower frequency bands + # Lower band: use current process... might need to make AT a bit less aggressive? + # Upper band: need to make a less aggressive process... CLAHE maybe? - if(processing == "Otsu"): - cleaned_image = clean_spec_orig(Sxx) + + # Find index where frequency >= 40 kHz (f is in kHz) + split_idx = np.argmax(f >= 40) + + # Store original upper half from *raw* Sxx for later restoration + Sxx_high_original = Sxx[split_idx:, :] + + # Process entire Sxx + if processing != "Otsu": + Sxx_processed = use_NMF_Small(Sxx) + else: + Sxx_processed = Sxx # no NMF for Otsu + + # Clean entire spectrogram + if processing == "Otsu": + cleaned_image = clean_spec_orig(Sxx_processed) else: - cleaned_image = clean_spec_imp(Sxx) + cleaned_image = clean_spec_imp(Sxx_processed) + + # Apply mild median filter to reduce noise while keeping signals + Sxx_high_original = ndimage.median_filter(Sxx_high_original, size=3) + + # Normalize spectrogram PERFORMS POORLY ON C57 + Sxx_high_original = cv2.normalize(Sxx_high_original, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8) + + min_cols = min(cleaned_image.shape[1], Sxx_high_original.shape[1]) + cleaned_image = cleaned_image[:, :min_cols] + Sxx_high_norm = Sxx_high_original[:, :min_cols] + + # Combine the lower band (processed) and upper band (original) into cleaned_image + cleaned_image[split_idx:, :] = Sxx_high_norm final_image, annotations = detect_contours(cleaned_image, start_time, end_time, freq_min, freq_max, file_name, annotations, processing=processing) @@ -369,7 +399,7 @@ def run_detection(root_path, file_name, experiment, trial, overlap=3, # Run detection for each audio file for audio_file in tqdm(audio_files, desc=f"Running Detection on audio files for {experiment} {trial}"): - run_detection(root_path, audio_file, experiment, trial, **ac_kwargs) + run_detection(root_path, audio_file, experiment, trial,**ac_kwargs) # Generate ground truth annotations generate_annotations(experiment, trial, root_path, file_ext) From 7b2bcbf64411aa9fbea922e6ea4b8ad51ff199f8 Mon Sep 17 00:00:00 2001 From: dw42CSCE Date: Sun, 28 Sep 2025 16:04:39 -0400 Subject: [PATCH 2/3] Adding new comments --- contourusv/detection.py | 6 +++--- contourusv/main.py | 6 +++--- contourusv/preprocessing.py | 13 +++---------- 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/contourusv/detection.py b/contourusv/detection.py index 89f04b6..db75527 100644 --- a/contourusv/detection.py +++ b/contourusv/detection.py @@ -72,7 +72,7 @@ def detect_contours(cleaned_image, start_time, end_time, freq_min, freq_max, "duration_max": 0.3}, } - # Re-apply Otsu's Thresholding (if requested) + # Re-apply Otsu's Thresholding if using Otsus if processing == "Otsu": ret, thresholded_image = cv2.threshold( cleaned_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) @@ -84,7 +84,7 @@ def detect_contours(cleaned_image, start_time, end_time, freq_min, freq_max, usv_details = [] - # Process each contour (automatic detection) + # Process each contour for contour in contours: x, y, w, h = cv2.boundingRect(contour) duration_start = start_time + (x / thresholded_image.shape[1]) * (end_time - start_time) @@ -140,7 +140,7 @@ def detect_contours(cleaned_image, start_time, end_time, freq_min, freq_max, ].reset_index(drop=True) for idx, row in manual_subset.iterrows(): - # map times to x coordinates + # map times to x coordinates for markers x1 = int(((row["begin_time"] - start_time) / (end_time - start_time)) * thresholded_image.shape[1]) x2 = int(((row["end_time"] - start_time) / (end_time - start_time)) * thresholded_image.shape[1]) y1, y2 = 0, thresholded_image.shape[0] # full height diff --git a/contourusv/main.py b/contourusv/main.py index fedf2fb..0bd5d2c 100644 --- a/contourusv/main.py +++ b/contourusv/main.py @@ -80,14 +80,14 @@ def use_NMF_Small(Sxx, num_splits=120, n_components=25): Sxx_part = Sxx[:, i:end_idx] # Extract segment - # Pad small segments with zeros or mean value to ensure correct dimensions + # Pad small segments with zeros to ensure correct dimensions if Sxx_part.shape[1] < n_components: print(f"Padding segment {i}-{end_idx} to meet {n_components} components") # Padding with zeros padding = np.zeros((Sxx_part.shape[0], n_components - Sxx_part.shape[1])) Sxx_part = np.hstack((Sxx_part, padding)) # Add padding to the segment - # Use `nndsvd` if possible, otherwise fall back to `random` + # Use `nndsvd` if possible, otherwise fall back to `random` This depends on number of components, might not be enough init_method = 'nndsvd' if Sxx_part.shape[1] >= n_components else 'random' if init_method == 'nndsvd': max_iter = 100 @@ -101,7 +101,7 @@ def use_NMF_Small(Sxx, num_splits=120, n_components=25): W = model.fit_transform(Sxx_part) H = model.components_ - # Reconstruct the matrix segment + # Reconstruct an approximation the matrix segment reconstructed_Sxx_part = np.dot(W, H) transformed_parts.append(reconstructed_Sxx_part) diff --git a/contourusv/preprocessing.py b/contourusv/preprocessing.py index 998bf58..a78e3e4 100644 --- a/contourusv/preprocessing.py +++ b/contourusv/preprocessing.py @@ -3,13 +3,6 @@ from scipy import ndimage from sklearn.decomposition import NMF -# Lists for PR: -# 1. Combine preprocessing functions into a single function with a parameter to choose between them. -# 2. Generate f1 score graph using sb, add to poster -# 3. Fun tests on rat_pleasant using both methods -# 4. Push PR to main repo -# 5. Make sure ghpages is pushing correctly - def clean_spec_imp(Sxx): """ Improved preprocessing of spectrogram data for USV detection, using adaptive theshholding. @@ -40,14 +33,14 @@ def clean_spec_imp(Sxx): enhanced_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, - 15, 15 + 15, 15 # blockSize, C ) # best results so far, 15, 15 # 15, 19 seems to do well # Morphological processing to connect weakly detected signals - # kernel = np.ones((1, 1), np.uint8) # Moderate kernel size + # kernel = np.ones((1, 1), np.uint8) # Moderate kernel size THIS DOESNT WORK WELL # final_img = cv2.morphologyEx(adaptive_img, cv2.MORPH_OPEN, kernel) @@ -55,7 +48,7 @@ def clean_spec_imp(Sxx): - +# older version of Clean_spec_imp def clean_spec_orig(Sxx): """ Preprocess spectrogram data for USV detection using Otsu's threshholding and CLAHE contrast enhancement. From 01fa6b6834a6e334ad28b392d07b620731bf728b Mon Sep 17 00:00:00 2001 From: dw42CSCE Date: Sun, 12 Oct 2025 01:24:06 -0400 Subject: [PATCH 3/3] Adding comments --- contourusv/detection.py | 5 +---- contourusv/main.py | 5 ++++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/contourusv/detection.py b/contourusv/detection.py index db75527..fdc0cd1 100644 --- a/contourusv/detection.py +++ b/contourusv/detection.py @@ -47,9 +47,6 @@ def detect_contours(cleaned_image, start_time, end_time, freq_min, freq_max, Call type definitions processing : str Thresholding method ("adaptive" or "Otsu") - manual_annots : pd.DataFrame or None - Optional DataFrame of ground truth annotations with columns - ["begin_time", "end_time"] Returns ------- @@ -60,7 +57,7 @@ def detect_contours(cleaned_image, start_time, end_time, freq_min, freq_max, """ if call_type_defs is None: call_type_defs = { - "22kHz": {"freq_min": 25, + "22kHz": {"freq_min": 15, # 25kh works to eliminate false low call detections, but is incorrect logically "freq_max": 45, "freq_span_max": 10, "duration_min": 0.03, diff --git a/contourusv/main.py b/contourusv/main.py index 0bd5d2c..4c5ab6b 100644 --- a/contourusv/main.py +++ b/contourusv/main.py @@ -274,7 +274,10 @@ def run_detection(root_path, file_name, experiment, trial, overlap=3, # Find index where frequency >= 40 kHz (f is in kHz) - split_idx = np.argmax(f >= 40) + + split = 40 + + split_idx = np.argmax(f >= split) # Store original upper half from *raw* Sxx for later restoration Sxx_high_original = Sxx[split_idx:, :]