forked from antirez/ds4
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathds4_ssd.c
More file actions
181 lines (155 loc) · 5.86 KB
/
ds4_ssd.c
File metadata and controls
181 lines (155 loc) · 5.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#include "ds4_ssd.h"
#include <ctype.h>
#include <errno.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
static const uint64_t DS4_GIB = 1024ull * 1024ull * 1024ull;
bool ds4_parse_gib_arg(const char *s, uint64_t *bytes) {
if (bytes) *bytes = 0;
if (!s || !s[0] || !bytes) return false;
size_t len = strlen(s);
if (len > 2 &&
(s[len - 2] == 'g' || s[len - 2] == 'G') &&
(s[len - 1] == 'b' || s[len - 1] == 'B')) {
len -= 2;
}
if (len == 0) return false;
for (size_t i = 0; i < len; i++) {
if (!isdigit((unsigned char)s[i])) return false;
}
char numbuf[32];
if (len >= sizeof(numbuf)) return false;
memcpy(numbuf, s, len);
numbuf[len] = '\0';
errno = 0;
unsigned long long v = strtoull(numbuf, NULL, 10);
if (errno != 0 || v == 0 || v > UINT64_MAX / DS4_GIB) return false;
*bytes = (uint64_t)v * DS4_GIB;
return true;
}
bool ds4_parse_streaming_cache_experts_arg(const char *s,
uint32_t *experts,
uint64_t *bytes) {
if (experts) *experts = 0;
if (bytes) *bytes = 0;
if (!s || !s[0] || !experts || !bytes) return false;
const size_t len = strlen(s);
if (len > 2 &&
(s[len - 2] == 'g' || s[len - 2] == 'G') &&
(s[len - 1] == 'b' || s[len - 1] == 'B')) {
return ds4_parse_gib_arg(s, bytes);
}
for (size_t i = 0; i < len; i++) {
if (!isdigit((unsigned char)s[i])) return false;
}
errno = 0;
unsigned long v = strtoul(s, NULL, 10);
if (errno != 0 || v == 0 || v > UINT32_MAX) return false;
*experts = (uint32_t)v;
return true;
}
uint32_t ds4_ssd_cache_experts_for_byte_budget(uint64_t bytes,
uint64_t per_expert_bytes) {
if (bytes == 0 || per_expert_bytes == 0) return 0;
const uint64_t experts = bytes / per_expert_bytes;
if (experts == 0 || experts > UINT32_MAX) return 0;
return (uint32_t)experts;
}
bool ds4_ssd_auto_cache_plan(uint64_t recommended_bytes,
uint64_t non_routed_bytes,
uint64_t per_expert_bytes,
uint64_t max_model_experts,
ds4_ssd_cache_plan *out) {
if (!out) return false;
memset(out, 0, sizeof(*out));
if (recommended_bytes == 0 || per_expert_bytes == 0) return false;
out->model_target_bytes =
recommended_bytes > UINT64_MAX / 4ull ?
UINT64_MAX : (recommended_bytes * 4ull) / 5ull;
if (out->model_target_bytes > non_routed_bytes) {
out->cache_bytes = out->model_target_bytes - non_routed_bytes;
}
uint64_t cache_experts = out->cache_bytes / per_expert_bytes;
if (cache_experts == 0) cache_experts = 1;
if (max_model_experts != 0 && cache_experts > max_model_experts) {
cache_experts = max_model_experts;
}
if (cache_experts > UINT32_MAX) cache_experts = UINT32_MAX;
out->cache_experts = (uint32_t)cache_experts;
out->effective_cache_bytes = cache_experts * per_expert_bytes;
return out->cache_experts != 0;
}
bool ds4_ssd_memory_lock_acquire(ds4_ssd_memory_lock *lock,
uint64_t bytes) {
if (!lock) return false;
lock->ptr = NULL;
lock->bytes = 0;
if (bytes == 0) return true;
if (bytes > (uint64_t)SIZE_MAX) {
fprintf(stderr,
"ds4: --simulate-used-memory is too large for this process\n");
return false;
}
void *ptr = mmap(NULL,
(size_t)bytes,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0);
if (ptr == MAP_FAILED) {
fprintf(stderr,
"ds4: --simulate-used-memory mmap %.2f GiB failed: %s\n",
(double)bytes / (double)DS4_GIB,
strerror(errno));
return false;
}
const long page_long = sysconf(_SC_PAGESIZE);
const uint64_t page = page_long > 0 ? (uint64_t)page_long : 4096ull;
const uint64_t chunk_bytes = 256ull * 1024ull * 1024ull;
volatile unsigned char *p = (volatile unsigned char *)ptr;
/*
* Touch and lock in bounded chunks. A single very large mlock() is harder
* to diagnose when it fails and can create long uninterruptible VM work on
* macOS; chunking mirrors the standalone diagnostic utility.
*/
uint64_t locked = 0;
for (uint64_t off = 0; off < bytes; off += chunk_bytes) {
uint64_t len = bytes - off;
if (len > chunk_bytes) len = chunk_bytes;
for (uint64_t pos = off; pos < off + len; pos += page) {
p[pos] = (unsigned char)(pos / page);
}
if (len != 0) p[off + len - 1u] = 1;
if (mlock((void *)(p + off), (size_t)len) != 0) {
fprintf(stderr,
"ds4: --simulate-used-memory mlock failed after %.2f/%.2f GiB: %s\n",
(double)locked / (double)DS4_GIB,
(double)bytes / (double)DS4_GIB,
strerror(errno));
if (locked != 0) munlock(ptr, (size_t)locked);
munmap(ptr, (size_t)bytes);
return false;
}
locked += len;
}
lock->ptr = ptr;
lock->bytes = bytes;
fprintf(stderr,
"ds4: simulated used memory: locked %.2f GiB before model load\n",
(double)bytes / (double)DS4_GIB);
return true;
}
void ds4_ssd_memory_lock_release(ds4_ssd_memory_lock *lock) {
if (!lock || !lock->ptr || lock->bytes == 0) return;
munlock(lock->ptr, (size_t)lock->bytes);
munmap(lock->ptr, (size_t)lock->bytes);
lock->ptr = NULL;
lock->bytes = 0;
}