forked from huggingface/text-embeddings-inference
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathCargo.toml
More file actions
104 lines (96 loc) · 3.89 KB
/
Cargo.toml
File metadata and controls
104 lines (96 loc) · 3.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
[workspace]
members = [
"backends",
"backends/candle",
"backends/ort",
"backends/core",
"backends/python",
"backends/grpc-client",
"core",
"router",
]
default-members = [
"backends",
"backends/candle",
"backends/ort",
"backends/core",
"backends/python",
"backends/grpc-client",
"core",
"router",
]
resolver = "2"
[workspace.package]
version = "1.9.3"
edition = "2021"
authors = ["Olivier Dehaene", "Nicolas Patry", "Alvaro Bartolome"]
homepage = "https://github.com/huggingface/text-embeddings-inference"
[workspace.dependencies]
anyhow = "1.0.75"
clap = { version = "4.1", features = ["derive", "env"] }
hf-hub = { version = "0.4", features = ["tokio"], default-features = false }
metrics = "0.23"
nohash-hasher = "0.2"
tokenizers = { version = "0.21.0", default-features = false, features = [
"onig",
"esaxx_fast",
] }
num_cpus = "1.16.0"
tokio = { version = "1.25", features = [
"rt",
"rt-multi-thread",
"parking_lot",
"sync",
"signal",
] }
tracing = "0.1"
serde = { version = "1.0", features = ["serde_derive"] }
serde_json = "1.0"
thiserror = "1.0"
rand = "0.9"
serial_test = "2.0.0"
# cudarc = { version = "0.13", features = [
# "cuda-12020",
# ], default-features = false }
intel-mkl-src = { version = "0.8", default-features = false }
candle = { version = "0.10", package = "candle-core" }
# candle-nn = { version = "0.8" }
# candle-transformers = { version = "0.8" }
# candle-flash-attn = { version = "0.8" }
# candle-cublaslt = { version = "0.0.1" }
# candle-layer-norm = { version = "0.0.1" }
# candle-index-select-cu = { version = "0.0.1", features = ["cuda-11"], default-features = false }
# candle-rotary = { version = "0.0.1" }
# candle-flash-attn-v1 = { version = "0.0.1" }
half = { version = "2.3.1", features = ["num-traits"] }
candle-nn = { version = "*" }
candle-transformers = { version = "*" }
candle-flash-attn = { version = "*" } #, optional = true }
candle-flash-attn-v1 = { git = "https://github.com/huggingface/candle-flash-attn-v1", rev = "3f1870b0d708579904c76e41745c659c3f9fa038" } #, optional = true }
candle-cublaslt = { git = "https://github.com/spiceai/candle-cublaslt", rev = "b74d30e0a212ee1ee517b2ef2efd8d46ae7687de" } # , optional = true }
cudarc = { git = "https://github.com/EricLBuehler/cudarc", rev = "34834440ada40a7c53d46d45b65d5e42c5f5d903" } #, optional = true }
candle-rotary = { git = "https://github.com/spiceai/candle-rotary", rev = "a4c4efcd5fcb2d7f750073341f663098c2a50c98" } #, optional = true }
candle-index-select-cu = { version = "0.0.1", features = ["cuda-11"], default-features = false }
candle-layer-norm = { git = "https://github.com/spiceai/candle-layer-norm", rev = "62f936a1c5c79a08008e6967297543b4c154784e" } #, optional = true }
[patch.crates-io]
# This could be a problem.
# cudarc = { git = "https://github.com/Narsil/cudarc", rev = "8b4f18b4bcd5e4b1a9daf40abc3a2e27f83f06e9" }
cudarc = { git = "https://github.com/EricLBuehler/cudarc", rev = "34834440ada40a7c53d46d45b65d5e42c5f5d903" }
#
candle = { git = "https://github.com/spiceai/candle", rev = "7de0d9fdff7dfb6289442abfba821e8d77f57c58", package = "candle-core" }
candle-nn = { git = "https://github.com/spiceai/candle", rev = "7de0d9fdff7dfb6289442abfba821e8d77f57c58", package = "candle-nn" }
candle-transformers = { git = "https://github.com/spiceai/candle", rev = "7de0d9fdff7dfb6289442abfba821e8d77f57c58", package = "candle-transformers" }
candle-flash-attn = { git = "https://github.com/spiceai/candle", rev = "7de0d9fdff7dfb6289442abfba821e8d77f57c58", package = "candle-flash-attn" }
[profile.release]
debug = 0
lto = "fat"
opt-level = 3
codegen-units = 1
strip = "symbols"
panic = "abort"
[profile.release-debug]
inherits = "release"
debug = 1
lto = "thin"
codegen-units = 16
strip = "none"