lightspeed-core
diff --git a/‎README.md‎
Lines changed: 9 additions & 7 deletions b/‎README.md‎
Lines changed: 9 additions & 7 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 26 additions & 7 deletions b/‎pyproject.toml‎
Lines changed: 26 additions & 7 deletions
@@ -40,16 +40,18 @@ make install-tools
 By default, lightspeed-evaluation uses remote embedding providers (OpenAI, Gemini). If you need **local embedding models** (HuggingFace/sentence-transformers), install with:
 
 ```bash
-# Using pip
-pip install 'lightspeed-evaluation[local-embeddings]'
-```
-or 
-```bash
-# Using uv (from already cloned repo for local development)
+# Using uv (from an already cloned repo) - CPU-only (default, ~2GB)
 uv sync --extra local-embeddings
+
+# Using uv (from an already cloned repo) - GPU with CUDA support (~6GB)
+cp uv-gpu.lock uv.lock && uv sync --extra local-embeddings --frozen
+
+# Using pip - CPU-only (install torch from CPU index first)
+pip install torch --index-url https://download.pytorch.org/whl/cpu
+pip install 'lightspeed-evaluation[local-embeddings]'
 ```
 
-> **Note**: Local embeddings require PyTorch and related packages (~6GB). Only install if you need `embedding.provider: huggingface` in your configuration.
+> **Note**: The `uv` commands above must be run from an already cloned repository checkout, since they use this project's `pyproject.toml`, `uv.lock`, and `uv-gpu.lock`. The default `uv.lock` uses CPU-only PyTorch (~2GB). For GPU/CUDA support, copy `uv-gpu.lock` to `uv.lock` before syncing (~6GB). When using pip, a plain `pip install` may install CUDA-dependent wheels on Linux; use the `--index-url` flag for guaranteed CPU-only installation. Only install if you need `embedding.provider: huggingface` in your configuration.
 
 #### Optional: NLP metrics
 If you want to install Ragas NLP metrics like ROUGE or Bleu install additional dependencies with:
 
@@ -32,15 +32,14 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-# Local embedding models - includes torch (~6GB with CUDA, ~1.2GB CPU-only)
+# Local embedding models - includes torch
 # Only needed when using embedding.provider: huggingface
-# For CPU-only installation (smaller footprint), first install PyTorch CPU:
-#   pip install torch --index-url https://download.pytorch.org/whl/cpu
-#   pip install 'lightspeed-evaluation[local-embeddings]' --no-deps
-#   pip install sentence-transformers --no-deps
-# or
-#   uv sync --extra local-embeddings
+#
+# Installation options:
+#   CPU (default, ~2GB): uv sync --extra local-embeddings
+#   GPU (~6GB):          cp uv-gpu.lock uv.lock && uv sync --extra local-embeddings --frozen
 local-embeddings = [
+    "torch>=2.0.0",
     "sentence-transformers>=5.1.0",
 ]
 
@@ -112,3 +111,23 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 [tool.hatch.build.targets.wheel]
 packages = ["src/lightspeed_evaluation"]
+
+# UV configuration for CPU-only PyTorch (reduces package size from ~6GB to ~2GB)
+[tool.uv]
+
+[[tool.uv.index]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+explicit = true
+
+# Use CPU index for torch by default
+[tool.uv.sources]
+torch = { index = "pytorch-cpu" }
+
+# Lock files:
+#   uv.lock     - CPU-only PyTorch (default, ~2GB)
+#   uv-gpu.lock - Full PyTorch with CUDA (~6GB)
+#
+# Installation:
+#   CPU: uv sync --extra local-embeddings
+#   GPU: cp uv-gpu.lock uv.lock && uv sync --extra local-embeddings --frozen