EPFL-AI-Team
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎configs/vlm/hardware/v100.yaml‎
Lines changed: 25 additions & 0 deletions b/‎configs/vlm/hardware/v100.yaml‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎configs/vlm/serve.yaml‎
Lines changed: 12 additions & 0 deletions b/‎configs/vlm/serve.yaml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎configs/vlm/train.yaml‎
Lines changed: 19 additions & 0 deletions b/‎configs/vlm/train.yaml‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎scripts/perception/.gitkeep‎ b/‎scripts/perception/.gitkeep‎
diff --git a/‎scripts/perception/train_videomae.example‎ b/‎scripts/perception/train_videomae.example‎
diff --git a/‎src/iris/config/__init__.py‎ b/‎src/iris/config/__init__.py‎
diff --git a/‎src/iris/perception/README.md‎
Lines changed: 0 additions & 13 deletions b/‎src/iris/perception/README.md‎
Lines changed: 0 additions & 13 deletions
diff --git a/‎src/iris/server/app.py‎
Lines changed: 8 additions & 3 deletions b/‎src/iris/server/app.py‎
Lines changed: 8 additions & 3 deletions
@@ -1,4 +1,7 @@
 data/**/*.mp4
+data/**/*.json
+data/**/*.txt
+data/**/*.csv
 models/**/*.pth
 !**/.gitkeep
 
 
@@ -0,0 +1,25 @@
+training:
+#   batch_size: 8
+#   gradient_accumulation_steps: 4  # Effective batch = 32
+
+# model:
+#   torch_dtype: "float16"
+
+# accelerate:
+#   use_accelerate: true
+#   mixed_precision: "fp16"  # V100 supports fp16, NOT bf16
+#   gradient_checkpointing: true
+
+# peft:
+#   use_peft: true
+#   peft_method: "lora"
+#   r: 8
+#   alpha: 16
+#   dropout: 0.1
+
+# quantization:
+#   load_in_4bit: true
+#   bnb_4bit_quant_type: "nf4"
+#   bnb_4bit_compute_dtype: "float16"
+
+# device: "cuda"
@@ -0,0 +1,12 @@
+# Configurations for serving the app
+
+model:
+  model_id: "Qwen/Qwen2.5-VL"
+
+inference:
+  batch_size: 4
+  max_batch_wait_ms: 1000
+
+server:
+  host: "0.0.0.0"
+  port: 8000
@@ -0,0 +1,19 @@
+training:
+  batch_size: 8
+  learning_rate: 5e-5
+
+model:
+  model_id: "Qwen/Qwen2.5-VL"
+
+# data:
+
+logging:
+  log_level: "INFO"
+
+# checkpoint:
+#   save_steps: 1000
+#   save_total_limit: 3
+#   output_dir: "experiments"
+
+peft:
+  use_peft: false
@@ -14,6 +14,7 @@ dependencies = [
     "pillow>=11.3.0",
     "pydantic>=2.12.2",
     "pydantic-settings>=2.12.0",
+    "pyyaml>=6.0.3",
     "qwen-vl-utils>=0.0.14",
     "ruff>=0.13.2",
     "threaded-videocapture>=1.0.1",
 
@@ -9,14 +9,17 @@
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
 from PIL import Image
 
+from iris.server.config import ServerConfig
 from iris.server.dependencies import get_server_state
-from iris.vlm.inference.model_loader import load_model_and_processor
 from iris.vlm.inference.queue.jobs import SingleFrameJob
 from iris.vlm.inference.queue.queue import InferenceQueue
+from iris.vlm.models import load_model_and_processor
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
+config = ServerConfig()
+
 
 @asynccontextmanager
 async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
@@ -25,10 +28,12 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
     state = get_server_state()
 
     logger.info("Loading model...")
-    state.model, state.processor = load_model_and_processor("smolvlm2")
+    state.model, state.processor = load_model_and_processor(config.model_key)
 
     logger.info("Starting inference queue...")
-    state.queue = InferenceQueue(max_queue_size=10, num_workers=1)
+    state.queue = InferenceQueue(
+        max_queue_size=config.max_queue_size, num_workers=config.num_workers
+    )
     await state.queue.start()
 
     state.model_loaded = True