diff --git a/README.md b/README.md index 49af20c..681cc3f 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Together, we're diving into development of an AI-powered API for **generating va We kicked off the project in February/March 2025 and aim to have the work delivered by end of June 2025. -#### *BTW: this tool is an experimental project, is not intended to be used yet by the general user/mapper to update OSM* +#### _BTW: this tool is an experimental project, is not intended to be used yet by the general user/mapper to update OSM_ ## Demo @@ -98,6 +98,7 @@ You should receive a response with OSM tags: ```json { + "tag_id": "f9ed4777-e9ef-46e0-b629-004eba0d2997", "tags": [ { "key": "smoothness", @@ -113,6 +114,42 @@ You should receive a response with OSM tags: } ``` +### Upload API + +You can also upload images directly using the `/upload` endpoint: + +``` +curl --location 'http://localhost:8000/api/v1/tags/upload' \ +--form 'category="roads"' \ +--form 'lat="6.248001"' \ +--form 'lon="-75.540833"' \ +--form 'image=@"/Users/jeremyherzog/Downloads/bad_unpaved_road.jpg"' +``` + +### Curating Tags + +Once tags have been confirmed, they can be saved for future use to improve +accuracy using the `tag_id`: + +``` +curl --location 'http://localhost:8000/api/v1/tags/f9ed4777-e9ef-46e0-b629-004eba0d2997' \ +--header 'Content-Type: application/json' \ +--data '{ + "tags": [ + { + "key": "smoothness", + "value": "very_bad", + "confidence": 0.9497874632288248 + }, + { + "key": "surface", + "value": "unpaved", + "confidence": 0.9497874632288248 + } + ] +}' +``` + ## ChatMap ChatMap (chatmap.hotosm.org) is a simple but powerful app that enables mapping using common instant messaging apps like WhatsApp, Signal or Telegram. diff --git a/tagger/api/schema/tags.py b/tagger/api/schema/tags.py index a122e31..8bae00c 100644 --- a/tagger/api/schema/tags.py +++ b/tagger/api/schema/tags.py @@ -26,10 +26,9 @@ class Tags(BaseModel): class TagsResponse(BaseModel): + tag_id: str tags: List[Tags] class SaveTagsRequest(BaseModel): - category: str - image: Image tags: List[Tags] diff --git a/tagger/api/v1/tags.py b/tagger/api/v1/tags.py index 4a074fa..e27ddbc 100644 --- a/tagger/api/v1/tags.py +++ b/tagger/api/v1/tags.py @@ -5,9 +5,10 @@ from tagger.core.tags import ( download_image_url, generate_tags, + save_curated_tag_s3, save_tag_embedding, resize_image, - generate_tags_upload + generate_tags_upload, ) router = APIRouter(prefix="/tags") @@ -18,27 +19,17 @@ async def create_tags(tag: TagsRequest): return generate_tags(tag) -@router.post("/save", response_model=TagsResponse) -async def save_tags(tag: SaveTagsRequest): +@router.post("/{tag_id}") +async def save_tags(tag_id: str, tag: SaveTagsRequest): """ - Save generated tags for an image to the database. + Save curated tags for an image to S3. """ - - # Download image from url - base64_image = resize_image(download_image_url(tag.image.url)) - - # Generate image embedding - image_embedding_value = VISION_EMBEDDING_MODEL.image_embedding([base64_image])[0] - - # Save image embedding + tags to database - save_tag_embedding( - category=tag.category, - image_url=tag.image.url, - image_embeddings=image_embedding_value, - coordinates=tag.image.coordinates, + save_curated_tag_s3( + tag_id=tag_id, tags=tag.tags, ) + @router.post("/upload", response_model=TagsResponse) async def create_tags_from_upload( category: str = Form(), @@ -47,4 +38,3 @@ async def create_tags_from_upload( image: UploadFile = File(), ): return generate_tags_upload(category, lat, lon, image) - diff --git a/tagger/config/storage.py b/tagger/config/storage.py index b46ee68..765709d 100644 --- a/tagger/config/storage.py +++ b/tagger/config/storage.py @@ -1,6 +1,9 @@ +import os + import boto3 S3_CLIENT = boto3.resource("s3") +IMAGE_BUCKET = os.getenv("IMAGE_BUCKET", "hotosm-osm-tagger") # MinIO # S3_CLIENT = boto3.resource( diff --git a/tagger/core/tags.py b/tagger/core/tags.py index d2aceec..60f5f97 100644 --- a/tagger/core/tags.py +++ b/tagger/core/tags.py @@ -1,9 +1,10 @@ from urllib.parse import urlparse from ast import Dict from io import BytesIO -from typing import List, Dict +from typing import List, Dict, Literal import json import base64 +import uuid from PIL import Image as PILImage @@ -12,12 +13,11 @@ from sqlalchemy import desc from sqlmodel import Session, select import requests -import boto3 from tagger.api.schema.tags import Coordinates, Tags, TagsRequest, TagsResponse from tagger.config.models import JSON_OUTPUT_MODEL, VISION_EMBEDDING_MODEL, VISION_MODEL from tagger.config.db import TAGGING_DB_ENGINE -from tagger.config.storage import S3_CLIENT +from tagger.config.storage import IMAGE_BUCKET, S3_CLIENT from tagger.core.models.interface import ( ImageMessage, TextMessage, @@ -67,18 +67,12 @@ class GeneratedTagsSchema(BaseModel): ) -def generate_tags_from_base64(category: str, base64_image: str) -> TagsResponse: - # category = request.category - # image = request.image - - # base64_image = resize_image(download_image_url(image.url), max_size=240) - - image_embedding_value = VISION_EMBEDDING_MODEL.image_embedding([base64_image])[0] +def generate_tags_from_base64( + category: str, image_embedding_value: List[float], base64_image: str +) -> List[Tags]: # TODO: bias images by lat and lon similar_image_tags = get_similar_images(image_embedding_value, k=3) - print("SIMILAR IMAGE TAGS:", similar_image_tags) - generated_tags = VISION_MODEL.vision_completion( messages=[ TextMessage( @@ -155,11 +149,42 @@ def generate_tags_from_base64(category: str, base64_image: str) -> TagsResponse: # print("TAGS JSON:", tags_json) + return [ + Tags(key=tag.key, value=tag.value, confidence=tag.confidence) + for tag in tags_json.tags + ] + + +def _generate_and_save_tags( + category: str, + base64_image: str, + coordinates: Coordinates, +) -> TagsResponse: + """Helper function to generate and save tags for an image.""" + image_embedding_value = VISION_EMBEDDING_MODEL.image_embedding([base64_image])[0] + + tags = generate_tags_from_base64(category, image_embedding_value, base64_image) + + # Save generated tags to S3 + tag_id = str(uuid.uuid4()) + + save_generated_tag_image_s3( + category=category, + tag_id=tag_id, + image_data=BytesIO(base64.b64decode(base64_image)), + ) + + save_generated_tag_s3( + tag_id=tag_id, + category=category, + coordinates=coordinates, + tags=tags, + image_embeddings=image_embedding_value, + ) + return TagsResponse( - tags=[ - Tags(key=tag.key, value=tag.value, confidence=tag.confidence) - for tag in tags_json.tags - ] + tag_id=tag_id, + tags=tags, ) @@ -169,18 +194,25 @@ def generate_tags(request: TagsRequest) -> TagsResponse: base64_image = resize_image(download_image_url(image_url), max_size=240) - return generate_tags_from_base64(category, base64_image) + return _generate_and_save_tags( + category=category, + base64_image=base64_image, + coordinates=request.image.coordinates, + ) def generate_tags_upload( category: str, lat: float, lon: float, image: UploadFile ) -> TagsResponse: - # Read bytes and convert to base64 before resizing to avoid UTF-8 decode error image_data = BytesIO(image.file.read()) base64_image = resize_image(image_data, max_size=240) - return generate_tags_from_base64(category, base64_image) + return _generate_and_save_tags( + category=category, + base64_image=base64_image, + coordinates=Coordinates(lat=lat, lon=lon), + ) def download_image_url(image_url: str) -> BytesIO: @@ -260,6 +292,56 @@ def save_tag_embedding( session.commit() +def save_generated_tag_s3( + tag_id: str, + category: str, + image_embeddings: List[float], + coordinates: Coordinates, + tags: List[Tags], +): + tag = { + "tags": [tag.model_dump() for tag in tags], + "image_url": f"s3://{IMAGE_BUCKET}/generated/{category}/{tag_id}.png", + "embedding": image_embeddings, + "category": category, + "coordinates": coordinates.model_dump(), + } + + # Save tag JSON to S3 + bucket = S3_CLIENT.Bucket(IMAGE_BUCKET) + key = f"generated/{category}/{tag_id}.json" + + bucket.put_object(Key=key, Body=json.dumps(tag), ContentType="application/json") + + +def save_curated_tag_s3( + tag_id: str, + tags: List[Tags], +): + tag = { + "tags": [tag.model_dump() for tag in tags], + } + + # Save tag JSON to S3 + bucket = S3_CLIENT.Bucket(IMAGE_BUCKET) + key = f"curated/{tag_id}.json" + + bucket.put_object(Key=key, Body=json.dumps(tag), ContentType="application/json") + + +def save_generated_tag_image_s3( + category: str, + tag_id: str, + image_data: BytesIO, +): + # Parse S3 URL + key = f"generated/{category}/{tag_id}.png" + + # Save image to S3 + bucket = S3_CLIENT.Bucket(IMAGE_BUCKET) + bucket.put_object(Key=key, Body=image_data, ContentType="image/png") + + def download_image_s3(image_s3_url: str) -> BytesIO: # Parse S3 URL parsed_url = urlparse(image_s3_url)