-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpyproject.toml
More file actions
74 lines (68 loc) · 1.91 KB
/
Copy pathpyproject.toml
File metadata and controls
74 lines (68 loc) · 1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
[build-system]
requires = ["setuptools>=68.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "kvboost"
version = "0.9.0"
description = "Chunk-level KV cache reuse for faster HuggingFace inference"
readme = "README.md"
license = { text = "MIT" }
requires-python = ">=3.9"
authors = [
{ name = "Srihari Unnikrishnan" },
]
keywords = ["llm", "inference", "kv-cache", "transformers", "optimization"]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"Programming Language :: Python :: 3",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dependencies = [
"torch>=2.1.0",
"transformers>=4.38.0",
"accelerate>=0.27.0",
"sentencepiece>=0.1.99",
]
[project.urls]
Repository = "https://github.com/pythongiant/kvboost"
Documentation = "https://kvboost.readthedocs.io"
[project.optional-dependencies]
dev = [
"pytest>=7.0",
"pytest-asyncio>=0.23",
"httpx>=0.27",
"ruff>=0.1.0",
]
cuda = [
"ninja>=1.11",
# Triton backs the 'sage' (INT8 SageAttention) and 'triton_flash' kernels.
# JIT-compiled via the CUDA driver — no nvcc, no flash-attn-style wheel
# build. Ships with the CUDA torch wheel on Linux; pinned here so it's
# explicit. (Linux-only: Triton has no macOS/Windows wheels.)
"triton>=2.1 ; platform_system=='Linux'",
]
streaming = [
"safetensors>=0.4",
"huggingface_hub>=0.23",
"autoawq-kernels>=0.0.6 ; platform_system=='Linux' and platform_machine=='x86_64'",
]
server = [
"fastapi>=0.110.0",
"uvicorn[standard]>=0.29.0",
"pydantic>=2.0",
"httpx>=0.27",
]
docs = [
"sphinx>=7.0",
"furo>=2024.0",
"sphinx-copybutton>=0.5",
"myst-parser>=3.0",
]
[project.scripts]
kvboost-server = "kvboost.server.__main__:main"
[tool.setuptools.packages.find]
where = ["src"]
[tool.setuptools.package-data]
kvboost = ["py.typed"]