NeMo-Retriever/nemo_retriever/pyproject.toml at a520c5df41ba0e0b90153bf5c3f0f299f3c8f4d0 · NVIDIA/NeMo-Retriever · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
[build-system]
requires = ["setuptools>=78.1.1", "wheel>=0.46.2"]
build-backend = "setuptools.build_meta"

[tool.uv]
managed = true
package = true

[project]
name = "nemo-retriever"
dynamic = ["version"]
description = "A modern RAG ingestion pipeline from Nvidia"
readme = "README.md"
requires-python = ">=3.12"
authors = [
  { name = "Jeremy Dyer", email = "jdyer@nvidia.com" },
]
license = { text = "Apache-2.0" }
classifiers = [
  "Programming Language :: Python :: 3",
  "Programming Language :: Python :: 3.12",
  "License :: OSI Approved :: Apache Software License",
  "Operating System :: OS Independent",
]
dependencies = [
  "ray[data,serve]>=2.49.0",
  "pypdfium2==4.30.0",
  "pandas>=2.0,<3",
  "tqdm>=4.66.0",
  "typer>=0.12.0",
  "pyyaml>=6.0",
  "lancedb",
  "nv-ingest==26.03.0rc2",
  "nv-ingest-api==26.03.0rc2",
  "nv-ingest-client==26.03.0rc2",
  "fastapi>=0.114.0",
  "uvicorn[standard]>=0.30.0",
  "httpx>=0.27.0",
  "requests>=2.32.5",
  "pydantic>=2.8.0",
  "typer>=0.12.0",
  "rich>=13.7.0",
  "pillow==12.1.1",
  "numpy>=1.26.0",
  "nltk==3.9.3",
  "urllib3==2.6.3",
  "debugpy>=1.8.0",
  "python-multipart>=0.0.9",
  # transformers>=5 enables loading nvidia/parakeet-ctc-1.1b via pipeline (see
  # parakeet-ctc-1.1b README). If using llama_nemotron_embed_1b_v2, verify
  # compatibility with transformers 5 (it previously relied on HybridCache).
  "transformers>=5.0.0",
  "tokenizers>=0.20.3",
  "accelerate>=1.1.0",
  "torch~=2.9.1",
  "torchvision>=0.24,<0.25",
  "einops",
  "easydict",
  "addict",
  "nemotron-page-elements-v3==3.0.1",
  "nemotron-graphic-elements-v1==1.0.0",
  "nemotron-table-structure-v1==1.0.0",
  "nemotron-ocr==1.0.1",
  "markitdown",
  "timm==1.0.22",
  "accelerate==1.12.0",
  "albumentations==2.0.8",
  "scikit-learn>=1.6.0",
  "open-clip-torch==3.2.0",
  # Local ASR (Parakeet): read chunk files and resample to 16 kHz mono
  "soundfile>=0.12.0",
  "scipy>=1.11.0",
  "nvidia-ml-py",
]

[project.optional-dependencies]
svg = [
  "cairosvg>=2.7.0",
]
dev = [
  "build>=1.2.2",
  "pytest>=8.0.2",
]

[project.scripts]
retriever = "nemo_retriever.__main__:main"

[tool.setuptools.dynamic]
version = {attr = "nemo_retriever.version.get_build_version"}

[tool.uv.sources]
nv-ingest = { path = "../src/", editable = true }
nv-ingest-api = { path = "../api/", editable = true }
nv-ingest-client = { path = "../client/", editable = true }
nemotron-page-elements-v3 = { index = "test-pypi" }
nemotron-graphic-elements-v1 = { index = "test-pypi" }
nemotron-table-structure-v1 = { index = "test-pypi" }
nemotron-ocr = { index = "test-pypi" }
torch = { index = "torch-cuda"}
torchvision = { index ="torch-cuda"}

[[tool.uv.index]]
name = "test-pypi"
url = "https://test.pypi.org/simple/"
explicit = true

[[tool.uv.index]]
name = "torch-cuda"
url = "https://download.pytorch.org/whl/cu130"
explicit = true

[tool.setuptools.packages.find]
where = ["src"]