diff --git a/.gitignore b/.gitignore index 1e7dd58..bd73232 100644 --- a/.gitignore +++ b/.gitignore @@ -128,3 +128,4 @@ dmypy.json # Pyre type checker .pyre/ +/temp1 diff --git a/app.py b/app.py index 6f10a74..3181c3f 100644 --- a/app.py +++ b/app.py @@ -1,65 +1,559 @@ import os +import json +import time +import glob from io import BytesIO +from datetime import datetime +from pathlib import Path import gradio as gr import requests +import numpy as np from PIL import Image from lang_sam import SAM_MODELS from lang_sam.server import PORT, server -def inference(sam_type, box_threshold, text_threshold, image, text_prompt): +def mask_to_polygon(mask): + """Convert a binary mask to polygon representation.""" + import cv2 + # Ensure mask is binary and in uint8 format + if mask.dtype != np.uint8: + binary_mask = (mask > 0).astype(np.uint8) * 255 + else: + binary_mask = mask + + # Find contours + contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + # Convert contours to polygons + polygons = [] + for contour in contours: + # Flatten the contour and convert to list + polygon = contour.flatten().tolist() + # Only add polygons with enough points + if len(polygon) >= 6: # At least 3 points (x,y pairs) + polygons.append(polygon) + + return polygons + +# def save_mask_images(masks, image_path, output_dir="annotations/masks"): +# """Save individual mask images for visualization and further processing.""" +# os.makedirs(output_dir, exist_ok=True) +# +# # Create a unique base name +# image_filename = os.path.basename(image_path) +# base_name = os.path.splitext(image_filename)[0] +# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") +# +# mask_paths = [] +# for i, mask in enumerate(masks): +# # Convert mask to binary image +# binary_mask = (mask > 0).astype(np.uint8) * 255 +# mask_img = Image.fromarray(binary_mask) +# +# # Save mask +# mask_path = os.path.join(output_dir, f"{base_name}_mask_{i}_{timestamp}.png") +# mask_img.save(mask_path) +# mask_paths.append(mask_path) +# +# return mask_paths + +def dummy_save_mask_images(masks, image_path, output_dir="annotations/masks"): + """Dummy function that returns empty paths without saving mask images.""" + return ["" for _ in range(len(masks))] + +def save_coco_annotations(image_path, masks, boxes, scores, labels, output_dir="annotations"): + """Save annotations in COCO format with proper segmentation masks.""" + import cv2 + os.makedirs(output_dir, exist_ok=True) + + # Create the main output directory + os.makedirs(output_dir, exist_ok=True) + + # Create subdirectories for images, bounding boxes, and masks + images_dir = os.path.join(output_dir, "images") + bbox_dir = os.path.join(output_dir, "bounding_boxes") + masks_dir = os.path.join(output_dir, "masks") + + os.makedirs(images_dir, exist_ok=True) + os.makedirs(bbox_dir, exist_ok=True) + os.makedirs(masks_dir, exist_ok=True) + + # Create a filename based on the original image name + image_filename = os.path.basename(image_path) + base_name = os.path.splitext(image_filename)[0] + file_ext = os.path.splitext(image_filename)[1] + + # Create COCO data structure + coco_data = { + "info": { + "description": "COCO annotations generated by LangSAM", + "date_created": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + }, + "images": [], + "annotations": [], + "categories": [] + } + + # Load image to get dimensions + img = Image.open(image_path) + width, height = img.size + + # Copy the image to the images directory + image_output_file = os.path.join(images_dir, f"{base_name}{file_ext}") + img.save(image_output_file) + + # Add image info + image_id = 1 + coco_data["images"].append({ + "id": image_id, + "file_name": base_name + file_ext, + "width": width, + "height": height + }) + + # Add categories based on unique labels + unique_labels = set(labels) + category_map = {} + for i, label in enumerate(unique_labels): + category_id = i + 1 + coco_data["categories"].append({ + "id": category_id, + "name": label, + "supercategory": "object" + }) + category_map[label] = category_id + + # Use dummy function instead of saving mask images + mask_paths = dummy_save_mask_images(masks, image_path) + + # Add annotations + for i, (mask, box, score, label) in enumerate(zip(masks, boxes, scores, labels)): + # Convert mask to polygon + polygons = mask_to_polygon(mask) + + if not polygons: # If no valid polygons, use bounding box + x1, y1, x2, y2 = box + polygons = [[float(x1), float(y1), float(x2), float(y1), + float(x2), float(y2), float(x1), float(y2)]] + + # Calculate area from mask + area = float(np.sum(mask > 0)) + + # Get bounding box in COCO format [x, y, width, height] + x1, y1, x2, y2 = box + coco_bbox = [float(x1), float(y1), float(x2 - x1), float(y2 - y1)] + + coco_data["annotations"].append({ + "id": i + 1, + "image_id": 1, + "category_id": category_map.get(label, 1), + "segmentation": polygons, + "area": area, + "bbox": coco_bbox, + "iscrowd": 0, + "score": float(score), + "mask_path": mask_paths[i] if i < len(mask_paths) else None + }) + + # Save the COCO JSON - one in the masks folder and one in the bounding_boxes folder + coco_masks_file = os.path.join(masks_dir, f"{base_name}.json") + coco_bbox_file = os.path.join(bbox_dir, f"{base_name}.json") + + with open(coco_masks_file, "w") as f: + json.dump(coco_data, f, indent=2) + + with open(coco_bbox_file, "w") as f: + json.dump(coco_data, f, indent=2) + + return coco_masks_file + +def save_yolo_annotations(image_path, masks, boxes, scores, labels, output_dir="annotations", custom_filename=None): + """Save annotations in YOLO format with segmentation support.""" + # Create the main output directory + os.makedirs(output_dir, exist_ok=True) + + # Create subdirectories for images, bounding boxes, and masks + images_dir = os.path.join(output_dir, "images") + bbox_dir = os.path.join(output_dir, "bounding_boxes") + masks_dir = os.path.join(output_dir, "masks") + + os.makedirs(images_dir, exist_ok=True) + os.makedirs(bbox_dir, exist_ok=True) + os.makedirs(masks_dir, exist_ok=True) + + # Load image to get dimensions + img = Image.open(image_path) + width, height = img.size + + # Create a filename based on the original image name + image_filename = os.path.basename(image_path) + base_name = os.path.splitext(image_filename)[0] + file_ext = os.path.splitext(image_filename)[1] + + # Use custom filename if provided + if custom_filename: + base_name = custom_filename + + # Create output files with consistent naming + yolo_seg_file = os.path.join(masks_dir, f"{base_name}.txt") + yolo_file = os.path.join(bbox_dir, f"{base_name}.txt") + image_output_file = os.path.join(images_dir, f"{base_name}{file_ext}") + + # Copy the image to the images directory + img.save(image_output_file) + + # Create a class mapping dictionary + unique_labels = list(set(labels)) + class_map = {label: i for i, label in enumerate(unique_labels)} + + # Return the unique labels for batch processing + # (We'll create a single classes file for the whole batch) + + # Save both standard YOLO and segmentation YOLO formats + with open(yolo_file, "w") as f_box, open(yolo_seg_file, "w") as f_seg: + for mask, box, score, label in zip(masks, boxes, scores, labels): + # Get class ID + class_id = class_map[label] + + # Convert box coordinates to YOLO format: [class_id, x_center, y_center, width, height] + x1, y1, x2, y2 = box + x_center = (x1 + x2) / 2 / width + y_center = (y1 + y2) / 2 / height + box_width = (x2 - x1) / width + box_height = (y2 - y1) / height + + # Write standard YOLO format (bounding box) + f_box.write(f"{class_id} {x_center:.6f} {y_center:.6f} {box_width:.6f} {box_height:.6f}\n") + + # Get polygon points for segmentation + polygons = mask_to_polygon(mask) + if polygons: + # Use the first polygon (usually the largest) + polygon = polygons[0] + + # Convert polygon points to normalized coordinates + normalized_polygon = [] + for i in range(0, len(polygon), 2): + if i+1 < len(polygon): + x = polygon[i] / width + y = polygon[i+1] / height + normalized_polygon.extend([x, y]) + + # Write YOLO segmentation format + # Format: class_id x1 y1 x2 y2 ... xn yn + seg_line = f"{class_id}" + for point in normalized_polygon: + seg_line += f" {point:.6f}" + f_seg.write(seg_line + "\n") + else: + # If no polygon, use the bounding box as fallback + f_seg.write(f"{class_id} {x1/width:.6f} {y1/height:.6f} {x2/width:.6f} {y1/height:.6f} " + f"{x2/width:.6f} {y2/height:.6f} {x1/width:.6f} {y2/height:.6f}\n") + + # We've commented out the mask image saving code as requested + + # Return the YOLO segmentation file path and the unique labels + return yolo_seg_file, unique_labels + +def inference(sam_type, box_threshold, text_threshold, image, text_prompt, save_format="none", output_dir=None, custom_filename=None): """Gradio function that makes a request to the /predict LitServe endpoint.""" url = f"http://localhost:{PORT}/predict" # Adjust port if needed + + # Check if image is valid + if not image or not os.path.exists(image): + print(f"Invalid image path: {image}") + return None, "Invalid image path" + + # Get the raw prediction results directly from the model + try: + # Load the image + image_pil = Image.open(image).convert("RGB") + + # Initialize the model locally for direct access + from lang_sam import LangSAM + model = LangSAM(sam_type=sam_type) + + # Make prediction + results = model.predict( + images_pil=[image_pil], + texts_prompt=[text_prompt], + box_threshold=box_threshold, + text_threshold=text_threshold + )[0] # Get the first result + + # Draw the results on the image + from lang_sam.utils import draw_image + image_array = np.asarray(image_pil) + + # Convert masks to numpy array if it's a list or handle empty case + masks = results["masks"] + if len(masks) == 0: + # Create an empty numpy array with the right shape for the image + empty_mask = np.zeros((0, image_array.shape[0], image_array.shape[1]), dtype=bool) + output_image = image_array.copy() # Just return the original image if no masks + else: + # Convert to numpy array if it's a list + if isinstance(masks, list): + masks = np.array(masks) + + output_image = draw_image( + image_array, + masks, + results["boxes"], + results["scores"], + results["labels"] + ) + output_image = Image.fromarray(np.uint8(output_image)).convert("RGB") + + # Save annotations if requested + annotation_path = "" + if save_format != "none" and len(results["masks"]) > 0: + masks = results["masks"] + boxes = results["boxes"] + scores = results["scores"] + labels = results["labels"] + + # Use custom output directory if provided + custom_output_dir = output_dir if output_dir else "annotations" + + if save_format == "coco": + annotation_path = save_coco_annotations(image, masks, boxes, scores, labels, output_dir=custom_output_dir) + print(f"Saved COCO annotations to {annotation_path}") + elif save_format == "yolo": + yolo_path, unique_labels = save_yolo_annotations( + image, masks, boxes, scores, labels, + output_dir=custom_output_dir, + custom_filename=custom_filename + ) + annotation_path = yolo_path + print(f"Saved YOLO annotations to {yolo_path}") + # Return the unique labels for batch processing + return output_image, unique_labels + + return output_image, annotation_path + + except Exception as e: + import traceback + print(f"Error in inference: {e}") + print(traceback.format_exc()) + return None, f"Error: {str(e)}" - # Prepare the multipart form data - with open(image, "rb") as img_file: - files = { - "image": img_file, - } - data = { - "sam_type": sam_type, - "box_threshold": str(box_threshold), - "text_threshold": str(text_threshold), - "text_prompt": text_prompt, - } - - try: - response = requests.post(url, files=files, data=data) - except Exception as e: - print(f"Request failed: {e}") - return None - if response.status_code == 200: +def process_folder(sam_type, box_threshold, text_threshold, folder_path, text_prompt, save_format, output_folder, progress=gr.Progress()): + """Process all images in a folder and save annotations with real-time progress updates.""" + if not folder_path or not os.path.isdir(folder_path): + return "Error: Invalid folder path" + + # Create output folder with timestamp to avoid overwriting + if not output_folder: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + output_folder = os.path.join(folder_path, f"annotations_{timestamp}") + + # Create main output directory + os.makedirs(output_folder, exist_ok=True) + + # Get all image files in the folder - use a more robust approach + image_files = [] + for root, _, files in os.walk(folder_path): + if root == folder_path: # Only process files in the top-level directory, not subdirectories + for file in files: + # Check if the file has an image extension (case insensitive) + if any(file.lower().endswith(ext.lower()) for ext in [".jpg", ".jpeg", ".png", ".bmp", ".tiff"]): + image_files.append(os.path.join(root, file)) + + if not image_files: + return "Error: No image files found in the folder" + + total_images = len(image_files) + processed_count = 0 + failed_count = 0 + results = [] + + # Initialize progress + progress(0, desc="Starting batch processing...") + + # For collecting all unique labels across all images + all_unique_labels = set() + + # Process each image with progress updates + for i, image_file in enumerate(image_files): try: - output_image = Image.open(BytesIO(response.content)).convert("RGB") - return output_image + # Update progress + progress_percentage = (i / total_images) + progress(progress_percentage, desc=f"Processing image {i+1}/{total_images}: {os.path.basename(image_file)}") + + # Get the base name for saving annotations with matching name + base_name = os.path.splitext(os.path.basename(image_file))[0] + + # Process the image + try: + # Run inference on the image + if save_format == "yolo": + # For YOLO format, we need to collect unique labels + result, image_labels = inference( + sam_type, + box_threshold, + text_threshold, + image_file, + text_prompt, + save_format, + output_folder, + base_name # Pass the base name for consistent naming + ) + # Add these labels to our global set + if isinstance(image_labels, list): + all_unique_labels.update(image_labels) + annotation_path = image_labels # This will be the yolo_seg_file path + else: + # For COCO or other formats + result, annotation_path = inference( + sam_type, + box_threshold, + text_threshold, + image_file, + text_prompt, + save_format, + output_folder, + base_name # Pass the base name for consistent naming + ) + + # No need to copy the image here as it's handled in save_yolo_annotations + + results.append(f"Processed {i+1}/{total_images}: {os.path.basename(image_file)}") + processed_count += 1 + + # Update progress text in real-time + progress(progress_percentage, desc=f"Completed {processed_count}/{total_images} images. Processing...") + + except Exception as e: + print(f"Error processing {image_file}: {str(e)}") + failed_count += 1 + results.append(f"Failed {i+1}/{total_images}: {os.path.basename(image_file)} - {str(e)}") + continue + except Exception as e: - print(f"Failed to process response image: {e}") - return None - else: - print(f"Request failed with status code {response.status_code}: {response.text}") - return None - + results.append(f"Failed {i+1}/{total_images}: {os.path.basename(image_file)} - {str(e)}") + failed_count += 1 + + # Create a single classes file for the entire batch if using YOLO format + if save_format == "yolo" and all_unique_labels: + # Create a class mapping + class_map = {label: i for i, label in enumerate(sorted(all_unique_labels))} + + # Save the single classes file in both bounding_boxes and masks folders + bbox_dir = os.path.join(output_folder, "bounding_boxes") + masks_dir = os.path.join(output_folder, "masks") + + bbox_classes_file = os.path.join(bbox_dir, "classes.txt") + masks_classes_file = os.path.join(masks_dir, "classes.txt") + + # Write classes to both locations + for file_path in [bbox_classes_file, masks_classes_file]: + with open(file_path, "w") as f: + for label in sorted(all_unique_labels): + f.write(f"{label}\n") + + results.append(f"\nCreated classes files in both bounding_boxes and masks folders") + results.append(f"Classes: {', '.join(sorted(all_unique_labels))}") + + # Update progress to 100% + progress(1.0, desc=f"✅ Completed processing {processed_count}/{total_images} images") + + # Return summary + summary = f"✅ Processed {processed_count} images, {failed_count} failed. Results saved to {output_folder}\n\n" + "\n".join(results) + return summary with gr.Blocks(title="lang-sam") as blocks: - with gr.Row(): - sam_model_choices = gr.Dropdown(choices=list(SAM_MODELS.keys()), label="SAM Model", value="sam2.1_hiera_small") - box_threshold = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, label="Box Threshold") - text_threshold = gr.Slider(minimum=0.0, maximum=1.0, value=0.25, label="Text Threshold") - with gr.Row(): - image_input = gr.Image(type="filepath", label="Input Image") - output_image = gr.Image(type="pil", label="Output Image") - text_prompt = gr.Textbox(lines=1, label="Text Prompt") - - submit_btn = gr.Button("Run Prediction") - - submit_btn.click( - fn=inference, - inputs=[sam_model_choices, box_threshold, text_threshold, image_input, text_prompt], - outputs=output_image, - ) + gr.Markdown("# Language Segment-Anything") + + with gr.Tabs(): + with gr.TabItem("Single Image Processing"): + with gr.Row(): + sam_model_choices = gr.Dropdown(choices=list(SAM_MODELS.keys()), label="SAM Model", value="sam2.1_hiera_small") + box_threshold = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, label="Box Threshold") + text_threshold = gr.Slider(minimum=0.0, maximum=1.0, value=0.25, label="Text Threshold") + with gr.Row(): + image_input = gr.Image(type="filepath", label="Input Image") + output_image = gr.Image(type="pil", label="Output Image") + + with gr.Row(): + text_prompt = gr.Textbox(lines=1, label="Text Prompt") + save_format = gr.Dropdown( + choices=["none", "coco", "yolo"], + label="Save Annotations Format", + value="none", + info="Select format to save annotations" + ) + + submit_btn = gr.Button("Run Prediction") + annotation_output = gr.Textbox(label="Annotation Path", interactive=False) + + submit_btn.click( + fn=inference, + inputs=[sam_model_choices, box_threshold, text_threshold, image_input, text_prompt, save_format], + outputs=[output_image, annotation_output], + ) + + examples = [ + [ + "sam2.1_hiera_small", + 0.32, + 0.25, + os.path.join(os.path.dirname(__file__), "assets", "fruits.jpg"), + "kiwi. watermelon. blueberry.", + ], + [ + "sam2.1_hiera_small", + 0.3, + 0.25, + os.path.join(os.path.dirname(__file__), "assets", "car.jpeg"), + "wheel.", + ], + [ + "sam2.1_hiera_small", + 0.3, + 0.25, + os.path.join(os.path.dirname(__file__), "assets", "food.jpg"), + "food.", + ], + ] + + gr.Examples( + examples=examples, + inputs=[sam_model_choices, box_threshold, text_threshold, image_input, text_prompt], + outputs=output_image, + ) + + with gr.Tab("Batch Processing"): + with gr.Row(): + with gr.Column(): + folder_path = gr.Textbox(label="Input Folder Path", placeholder="Enter folder path containing images") + output_folder = gr.Textbox(label="Output Folder Path (optional)", placeholder="Leave empty to use default") + batch_sam_model = gr.Dropdown( + choices=list(SAM_MODELS.keys()), + label="SAM Model", + value="sam2.1_hiera_small", + info="Select SAM model version" + ) + batch_box_threshold = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.001, label="Box Threshold") + batch_text_threshold = gr.Slider(minimum=0.0, maximum=1.0, value=0.25, step=0.001, label="Text Threshold") + batch_text_prompt = gr.Textbox(label="Text Prompt", placeholder="Enter comma-separated objects to detect") + batch_save_format = gr.Radio(["coco", "yolo", "none"], label="Save Format", value="coco") + batch_process_btn = gr.Button("Process Folder") + with gr.Column(): + batch_result = gr.Textbox(label="Processing Results", lines=10) + + # Connect the batch processing button with progress tracking + batch_process_btn.click( + fn=process_folder, + inputs=[batch_sam_model, batch_box_threshold, batch_text_threshold, folder_path, + batch_text_prompt, batch_save_format, output_folder], + outputs=[batch_result], + show_progress=True # Enable progress bar + ) examples = [ [ diff --git a/client.py b/client.py new file mode 100644 index 0000000..12b71e5 --- /dev/null +++ b/client.py @@ -0,0 +1,18 @@ + +# Copyright The Lightning AI team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import requests + +response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0}) +print(f"Status: {response.status_code}\nResponse:\n {response.text}") diff --git a/lang_sam/server.py b/lang_sam/server.py index 1acba82..bb400bc 100644 --- a/lang_sam/server.py +++ b/lang_sam/server.py @@ -2,7 +2,7 @@ import litserve as ls import numpy as np -from fastapi import Response, UploadFile +from fastapi import Response, UploadFile, FastAPI from PIL import Image from lang_sam import LangSAM @@ -110,11 +110,76 @@ def encode_response(self, output: dict) -> Response: return Response(content=buffer.getvalue(), media_type="image/png") except StopIteration: raise ValueError("No output generated by the prediction.") + + def predict_json(self, inputs: dict) -> dict: + """Perform prediction and return raw results as JSON. + + Returns: + dict: Contains the raw prediction results (masks, boxes, scores, labels). + """ + print("Starting prediction with parameters (JSON response):") + print( + f"sam_type: {inputs['sam_type']}, \ + box_threshold: {inputs['box_threshold']}, \ + text_threshold: {inputs['text_threshold']}, \ + text_prompt: {inputs['text_prompt']}" + ) + + if inputs["sam_type"] != self.model.sam_type: + print(f"Updating SAM model type to {inputs['sam_type']}") + self.model.sam.build_model(inputs["sam_type"]) + + try: + image_pil = Image.open(BytesIO(inputs["image_bytes"])).convert("RGB") + except Exception as e: + raise ValueError(f"Invalid image data: {e}") + + results = self.model.predict( + images_pil=[image_pil], + texts_prompt=[inputs["text_prompt"]], + box_threshold=inputs["box_threshold"], + text_threshold=inputs["text_threshold"], + ) + results = results[0] + + # Convert numpy arrays to lists for JSON serialization + serializable_results = { + "masks": results["masks"].tolist() if len(results["masks"]) > 0 else [], + "boxes": results["boxes"].tolist() if len(results["boxes"]) > 0 else [], + "scores": results["scores"].tolist() if len(results["scores"]) > 0 else [], + "labels": results["labels"] if "labels" in results else [] + } + + return serializable_results + + def encode_json_response(self, output: dict) -> Response: + """Encode the prediction result into a JSON HTTP response. + + Returns: + Response: Contains the raw prediction data as JSON. + """ + from fastapi.responses import JSONResponse + return JSONResponse(content=output) lit_api = LangSAMAPI() server = ls.LitServer(lit_api) +# Add a custom endpoint for JSON response +@server.app.post("/predict_json") +async def predict_json(request: ls.Request): + """Custom endpoint to return raw prediction results as JSON.""" + try: + inputs = await lit_api.decode_request(request) + results = lit_api.predict_json(inputs) + return lit_api.encode_json_response(results) + except Exception as e: + from fastapi.responses import JSONResponse + import traceback + print(f"Error in predict_json: {e}") + print(traceback.format_exc()) + return JSONResponse(content={"error": str(e)}, status_code=500) + if __name__ == "__main__": print(f"Starting LitServe and Gradio server on port {PORT}...") diff --git a/lang_sam/utils.py b/lang_sam/utils.py index fef18ed..79a5f85 100644 --- a/lang_sam/utils.py +++ b/lang_sam/utils.py @@ -11,9 +11,21 @@ def load_image(image_path: str): def draw_image(image_rgb, masks, xyxy, probs, labels): + # Handle case where no masks are detected + if len(masks) == 0 or len(xyxy) == 0 or len(probs) == 0 or len(labels) == 0: + return image_rgb.copy() + box_annotator = sv.BoxCornerAnnotator() label_annotator = sv.LabelAnnotator() mask_annotator = sv.MaskAnnotator() + + # Convert masks to numpy array if it's a list + if isinstance(masks, list): + if len(masks) > 0: + masks = np.array(masks) + else: + return image_rgb.copy() + # Create class_id for each unique label unique_labels = list(set(labels)) class_id_map = {label: idx for idx, label in enumerate(unique_labels)} diff --git a/win_requirements.txt b/win_requirements.txt new file mode 100644 index 0000000..683feab --- /dev/null +++ b/win_requirements.txt @@ -0,0 +1,11 @@ +gradio==5.29.0 +litserve==0.2.8 +opencv-python==4.10.0.84 +pydantic>=2.9.2 +sam-2 @ git+https://github.com/facebookresearch/segment-anything-2@c2ec8e14a185632b0a5d8b161928ceb50197eddc +supervision==0.23.0 ; python_full_version > '3.10' +transformers==4.44.2 +# Windows-specific PyTorch with CUDA 12.1 support +--extra-index-url https://download.pytorch.org/whl/cu121 +torch==2.4.1 +torchvision==0.19.1