-
Notifications
You must be signed in to change notification settings - Fork 308
Expand file tree
/
Copy pathpyproject.toml
More file actions
113 lines (104 loc) · 2.83 KB
/
pyproject.toml
File metadata and controls
113 lines (104 loc) · 2.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
[build-system]
requires = ["setuptools>=78.1.1", "wheel>=0.46.2"]
build-backend = "setuptools.build_meta"
[tool.uv]
managed = true
package = true
[project]
name = "nemo-retriever"
dynamic = ["version"]
description = "A modern RAG ingestion pipeline from Nvidia"
readme = "README.md"
requires-python = ">=3.12"
authors = [
{ name = "Jeremy Dyer", email = "jdyer@nvidia.com" },
]
license = { text = "Apache-2.0" }
classifiers = [
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.12",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
]
dependencies = [
"ray[data,serve]>=2.49.0",
"pypdfium2==4.30.0",
"pandas>=2.0,<3",
"tqdm>=4.66.0",
"typer>=0.12.0",
"pyyaml>=6.0",
"lancedb",
"nv-ingest==26.03.0rc2",
"nv-ingest-api==26.03.0rc2",
"nv-ingest-client==26.03.0rc2",
"fastapi>=0.114.0",
"uvicorn[standard]>=0.30.0",
"httpx>=0.27.0",
"requests>=2.32.5",
"pydantic>=2.8.0",
"typer>=0.12.0",
"rich>=13.7.0",
"pillow==12.1.1",
"numpy>=1.26.0",
"nltk==3.9.3",
"urllib3==2.6.3",
"debugpy>=1.8.0",
"python-multipart>=0.0.9",
# transformers>=5 enables loading nvidia/parakeet-ctc-1.1b via pipeline (see
# parakeet-ctc-1.1b README). If using llama_nemotron_embed_1b_v2, verify
# compatibility with transformers 5 (it previously relied on HybridCache).
"transformers>=5.0.0",
"tokenizers>=0.20.3",
"accelerate>=1.1.0",
"torch~=2.9.1",
"torchvision>=0.24,<0.25",
"einops",
"easydict",
"addict",
"nemotron-page-elements-v3==3.0.1",
"nemotron-graphic-elements-v1==1.0.0",
"nemotron-table-structure-v1==1.0.0",
"nemotron-ocr==1.0.1",
"markitdown",
"timm==1.0.22",
"accelerate==1.12.0",
"albumentations==2.0.8",
"scikit-learn>=1.6.0",
"open-clip-torch==3.2.0",
# Local ASR (Parakeet): read chunk files and resample to 16 kHz mono
"soundfile>=0.12.0",
"scipy>=1.11.0",
"nvidia-ml-py",
]
[project.optional-dependencies]
svg = [
"cairosvg>=2.7.0",
]
dev = [
"build>=1.2.2",
"pytest>=8.0.2",
]
[project.scripts]
retriever = "nemo_retriever.__main__:main"
[tool.setuptools.dynamic]
version = {attr = "nemo_retriever.version.get_build_version"}
[tool.uv.sources]
nv-ingest = { path = "../src/", editable = true }
nv-ingest-api = { path = "../api/", editable = true }
nv-ingest-client = { path = "../client/", editable = true }
nemotron-page-elements-v3 = { index = "test-pypi" }
nemotron-graphic-elements-v1 = { index = "test-pypi" }
nemotron-table-structure-v1 = { index = "test-pypi" }
nemotron-ocr = { index = "test-pypi" }
torch = { index = "torch-cuda"}
torchvision = { index ="torch-cuda"}
[[tool.uv.index]]
name = "test-pypi"
url = "https://test.pypi.org/simple/"
explicit = true
[[tool.uv.index]]
name = "torch-cuda"
url = "https://download.pytorch.org/whl/cu130"
explicit = true
[tool.setuptools.packages.find]
where = ["src"]