-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCargo.toml
More file actions
108 lines (91 loc) · 2.37 KB
/
Copy pathCargo.toml
File metadata and controls
108 lines (91 loc) · 2.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
[workspace]
resolver = "2"
members = [
"crates/budtiktok-core",
"crates/budtiktok-simd",
"crates/budtiktok-gpu",
"crates/budtiktok-ipc",
"crates/budtiktok-coordinator",
"crates/budtiktok-cli",
"crates/budtiktok-bench",
"crates/budtiktok-accuracy",
"crates/budtiktok-hf-compat",
"crates/budtiktok-python",
]
[workspace.package]
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
repository = "https://github.com/latentbud/budtiktok"
authors = ["LatentBud Team"]
rust-version = "1.75"
[workspace.dependencies]
# Core dependencies
tokio = { version = "1.35", features = ["full"] }
rayon = "1.8"
ahash = { version = "0.8", features = ["serde"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
thiserror = "1.0"
anyhow = "1.0"
bytes = "1.5"
parking_lot = "0.12"
crossbeam = "0.8"
dashmap = "5.5"
flume = "0.11"
once_cell = "1.19"
lazy_static = "1.4"
# Unicode
unicode-normalization = "0.1"
unicode-segmentation = "1.10"
unicode-general-category = "0.6"
# Regex (for GPT-2 pre-tokenization)
fancy-regex = "0.13"
# Data structures
aho-corasick = "1.1"
daachorse = "1.0" # Fast double-array Aho-Corasick (1.5-2x faster)
bumpalo = { version = "3.14", features = ["collections"] }
smallvec = "1.11"
fastrand = "2.0"
lru = "0.12"
# Hashing
xxhash-rust = { version = "0.8", features = ["xxh3"] }
# Logging and tracing
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
# Metrics
prometheus = "0.13"
# Serialization
bincode = "1.3"
rkyv = "0.7"
# CLI
clap = { version = "4.4", features = ["derive"] }
# Testing
criterion = { version = "0.5", features = ["html_reports"] }
proptest = "1.4"
rstest = "0.18"
assert_approx_eq = "1.1"
# Benchmarking against HuggingFace
tokenizers = "0.15"
# Memory allocators
tikv-jemallocator = "0.5"
mimalloc = "0.1"
# Workspace crates (internal)
budtiktok-core = { path = "crates/budtiktok-core" }
budtiktok-simd = { path = "crates/budtiktok-simd" }
budtiktok-gpu = { path = "crates/budtiktok-gpu" }
budtiktok-ipc = { path = "crates/budtiktok-ipc" }
budtiktok-coordinator = { path = "crates/budtiktok-coordinator" }
budtiktok-hf-compat = { path = "crates/budtiktok-hf-compat" }
[profile.release]
lto = "fat"
codegen-units = 1
panic = "abort"
strip = true
[profile.release-with-debug]
inherits = "release"
debug = true
strip = false
[profile.bench]
inherits = "release"
debug = true