Skip to content

Commit 8db9e1a

Browse files
authored
Reduce memory usage (#134)
Memory usage is reduced by: * Dropping the `refs` map entirely, it is not essential for the functioning of `qcow_stream` * Dropping cached clusters as soon as they've been read * Using `Qcow_mapping` (implemented in C as a simple array) instead of `ClusterMap` to store physical-to-virtual mappings Memory usage and runtime comparisons for before/after this PR: ``` image | memory peak | runtime (> /dev/null) | runtime (> disk) ----------------------------------------------------------------------------------- 23G XO-produced image | 142mb -> 12mb | 12.1 -> 10.5 | 17.9 -> 16.4 54G image | 271mb -> 19mb | 33.5 -> 26.2 | 49.8 -> 44.1 ``` (a 200gb qcow2 image takes up 56mb of memory at peak after the PR, meaning for every additional terabyte in the image size, memory usage grows by ~120mb, and a 20tb image would take up around 2.5gb) This required changing types in the interface, so I've bumped the version to 0.13.0 as a "breaking" change (nothing except one caller in xapi uses this)
2 parents 32a223e + 9d24854 commit 8db9e1a

16 files changed

+434
-92
lines changed

CHANGES.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
## 0.13.0 (2026-02-06)
2+
- qcow_stream: Breaking changes to the interface, with the allocated data
3+
clusters now returned in Qcow_mapping.t. Great reductions in memory usage,
4+
various other optimizations (last-genius #134)
5+
16
## 0.12.3 (2026-01-28)
27
- qcow_cache: Fix skipped reads, which could lead to crashes
38
and corruption (last-genius #133)

dune-project

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@
2424
(diet (>= 0.3.0))
2525
logs
2626
lwt
27+
lwt_ppx
2728
(mirage-block (>= 3.0.0))
2829
ppx_sexp_conv
2930
prometheus
3031
sexplib
32+
(alcotest :with-test)
3133
)
3234
)
3335

lib/dune

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
(name qcow_types)
88
(public_name qcow-types)
99
(modules (:standard \ qcow qcow_debug qcow_block_cache qcow_cstructs
10-
qcow_padded qcow_recycler qcow_stream))
10+
qcow_padded qcow_recycler qcow_stream
11+
qcow_mapping qcow_mapping_test))
1112
(libraries
1213
astring
1314
(re_export cstruct)
@@ -27,19 +28,29 @@
2728
(library
2829
(name qcow_stream)
2930
(public_name qcow-stream)
30-
(modules qcow_stream)
31+
(modules qcow_stream qcow_mapping)
3132
(libraries
3233
cstruct-lwt
3334
io-page
3435
lwt.unix
3536
qcow_types
3637
)
38+
(foreign_stubs
39+
(language c)
40+
(names qcow_mapping_stubs))
3741
(wrapped false)
3842
(preprocess
39-
(pps ppx_sexp_conv)
43+
(pps ppx_sexp_conv lwt_ppx)
4044
)
4145
)
4246

47+
(test
48+
(modes native)
49+
(name qcow_mapping_test)
50+
(modules qcow_mapping_test)
51+
(libraries alcotest qcow_stream)
52+
)
53+
4354
(library
4455
(name qcow)
4556
(public_name qcow)

lib/qcow_cache.ml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,11 @@ let write t cluster data =
8989

9090
let remove t cluster =
9191
if Cluster.Map.mem cluster t.clusters then
92-
Printf.fprintf stderr "Dropping cache for cluster %s\n"
93-
(Cluster.to_string cluster) ;
92+
Log.err (fun f ->
93+
f "Dropping cache for cluster %s (length is %d)\n"
94+
(Cluster.to_string cluster)
95+
(Cluster.Map.cardinal t.clusters)
96+
) ;
9497
t.clusters <- Cluster.Map.remove cluster t.clusters
9598

9699
let resize t new_size_clusters =

lib/qcow_mapping.ml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
type t
2+
3+
external create : int64 -> t = "stub_qcow_mapping_create"
4+
5+
external extend : t -> int64 -> unit = "stub_qcow_mapping_extend"
6+
7+
external get : t -> int64 -> int64 = "stub_qcow_mapping_get"
8+
9+
external set : t -> int64 -> int64 -> unit = "stub_qcow_mapping_set"
10+
11+
external length : t -> int64 = "stub_qcow_mapping_length"
12+
13+
external get_sparse_interval_stub :
14+
t -> int64 -> int64 -> (int64 * int64 * int64) option
15+
= "stub_qcow_mapping_get_sparse_interval"
16+
17+
let get_sparse_interval t ~index ~cluster_bits =
18+
get_sparse_interval_stub t index cluster_bits
19+
20+
let to_interval_seq arr cluster_bits =
21+
let rec aux arr length index () =
22+
if index = length then
23+
Seq.Nil
24+
else
25+
match get_sparse_interval arr ~index ~cluster_bits with
26+
| None ->
27+
Seq.Nil
28+
| Some (right_index, left_cluster, right_cluster) ->
29+
Seq.Cons
30+
( (left_cluster, right_cluster)
31+
, aux arr length (Int64.succ right_index)
32+
)
33+
in
34+
aux arr (length arr) 0L

lib/qcow_mapping.mli

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
type t
2+
3+
val create : int64 -> t
4+
5+
val extend : t -> int64 -> unit
6+
7+
val get : t -> int64 -> int64
8+
9+
val set : t -> int64 -> int64 -> unit
10+
11+
val length : t -> int64
12+
13+
(** [to_interval_seq t cluster_bits] returns a sequence of allocated virtual
14+
data cluster intervals, intended to be used with sparse disks.
15+
16+
Thus, for a physical image represented by an array of physical clusters
17+
and their corresponding virtual clusters:
18+
[-1; 1; 2; 3; 7; -1; 4]
19+
20+
[to_interval_seq] will return:
21+
(1,3); (7,7); (4,4)
22+
*)
23+
val to_interval_seq : t -> int64 -> (int64 * int64) Seq.t

lib/qcow_mapping_stubs.c

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
#include <stdint.h>
2+
#include <string.h>
3+
#include <errno.h>
4+
5+
#define CAML_NAME_SPACE
6+
#include <caml/alloc.h>
7+
#include <caml/memory.h>
8+
#include <caml/threads.h>
9+
#include <caml/fail.h>
10+
#include <caml/custom.h>
11+
12+
typedef struct array {
13+
uint64_t length;
14+
int64_t* a;
15+
} array;
16+
17+
static inline array *
18+
qcow_mapping_arr_of_val (value v)
19+
{
20+
array *arr = *(array **) Data_custom_val(v);
21+
22+
return arr;
23+
}
24+
25+
static void
26+
qcow_mapping_finalize (value v)
27+
{
28+
array *arr = qcow_mapping_arr_of_val (v);
29+
free(arr->a);
30+
free(arr);
31+
}
32+
33+
static struct custom_operations qcow_mapping_ops = {
34+
.identifier = "qcow_mapping_array",
35+
.finalize = qcow_mapping_finalize,
36+
.compare = custom_compare_default, /* Can't compare */
37+
.hash = custom_hash_default, /* Can't hash */
38+
.serialize = custom_serialize_default, /* Can't serialize */
39+
.deserialize = custom_deserialize_default, /* Can't deserialize */
40+
.compare_ext = custom_compare_ext_default, /* Can't compare */
41+
};
42+
43+
#define Arr_val(v) (*((array **) Data_custom_val(v)))
44+
45+
CAMLprim value
46+
stub_qcow_mapping_create (value length_val)
47+
{
48+
CAMLparam1(length_val);
49+
CAMLlocal1(result);
50+
array *arr = malloc(sizeof *arr);
51+
52+
arr->length = Int64_val(length_val);
53+
result = caml_alloc_custom(&qcow_mapping_ops, sizeof(array *), 0, 1);
54+
55+
caml_release_runtime_system();
56+
arr->a = malloc(sizeof(uint64_t) * arr->length);
57+
caml_acquire_runtime_system();
58+
59+
if (!arr->a)
60+
caml_failwith(strerror(errno));
61+
62+
caml_release_runtime_system();
63+
// Initialize to -1, otherwise there's no way to distinguish data clusters
64+
// from table clusters and empty clusters
65+
for (size_t i = 0; i < arr->length; i++) {
66+
arr->a[i] = -1;
67+
}
68+
caml_acquire_runtime_system();
69+
70+
Arr_val(result) = arr;
71+
72+
CAMLreturn(result);
73+
}
74+
75+
CAMLprim value
76+
stub_qcow_mapping_extend (value t_val, value length_val)
77+
{
78+
CAMLparam2(t_val, length_val);
79+
CAMLlocal1(result);
80+
array* arr = qcow_mapping_arr_of_val(t_val);
81+
82+
uint64_t new_length = Int64_val(length_val);
83+
84+
caml_release_runtime_system();
85+
arr->a = realloc(arr->a, sizeof(uint64_t) * new_length);
86+
caml_acquire_runtime_system();
87+
88+
if (!arr->a)
89+
caml_failwith(strerror(errno));
90+
91+
caml_release_runtime_system();
92+
// Initialize the newly allocated cells to -1, otherwise there's no way
93+
// to distinguish data clusters from table clusters and empty clusters
94+
for (size_t i = arr->length; i < new_length; i++) {
95+
arr->a[i] = -1;
96+
}
97+
arr->length = new_length;
98+
caml_acquire_runtime_system();
99+
100+
CAMLreturn(Val_unit);
101+
}
102+
103+
CAMLprim value
104+
stub_qcow_mapping_get (value t_val, value index_val)
105+
{
106+
CAMLparam2(t_val, index_val);
107+
CAMLlocal1(result);
108+
array* arr = qcow_mapping_arr_of_val(t_val);
109+
110+
uint64_t index = Int64_val(index_val);
111+
result = caml_copy_int64(arr->a[index]);
112+
113+
CAMLreturn(result);
114+
}
115+
116+
CAMLprim value
117+
stub_qcow_mapping_set (value t_val, value index_val, value new_val)
118+
{
119+
CAMLparam3(t_val, index_val, new_val);
120+
array* arr = qcow_mapping_arr_of_val(t_val);
121+
122+
uint64_t index = Int64_val(index_val);
123+
int64_t new = Int64_val(new_val);
124+
arr->a[index] = new;
125+
126+
CAMLreturn(Val_unit);
127+
}
128+
129+
CAMLprim value
130+
stub_qcow_mapping_length (value t_val)
131+
{
132+
CAMLparam1(t_val);
133+
CAMLlocal1(result);
134+
135+
array *arr = qcow_mapping_arr_of_val(t_val);
136+
result = caml_copy_int64(arr->length);
137+
138+
CAMLreturn(result);
139+
}
140+
141+
142+
CAMLprim value
143+
stub_qcow_mapping_get_sparse_interval (value t_val, value index_val, value cluster_bits_val)
144+
{
145+
CAMLparam3(t_val, index_val, cluster_bits_val);
146+
CAMLlocal1(result);
147+
result = caml_alloc_tuple(3);
148+
149+
array* arr = qcow_mapping_arr_of_val(t_val);
150+
151+
uint64_t cluster_bits = Int64_val(cluster_bits_val);
152+
uint64_t diff_bw_next_clusters = 1 << cluster_bits;
153+
uint64_t left_index = Int64_val(index_val);
154+
uint64_t right_index = left_index;
155+
156+
// Find the longest interval of subsequent allocated data clusters
157+
// (data clusters are subsequent if they're located right next to each
158+
// other on the virtual disk), return the index to be used in the next
159+
// search iteration and the [interval_start, interval_end] pair of virtual
160+
// clusters (both inclusive)
161+
for (size_t i = left_index+1; i < arr->length; i++, right_index++) {
162+
if (arr->a[i-1] == -1) {
163+
if (arr->a[i] == -1 && i == (arr->length-1)) {
164+
CAMLreturn(Val_none);
165+
}
166+
left_index++;
167+
} else if (arr->a[i-1] + diff_bw_next_clusters != arr->a[i]) {
168+
break;
169+
}
170+
}
171+
172+
Store_field(result, 0, caml_copy_int64(right_index));
173+
Store_field(result, 1, caml_copy_int64((arr->a[left_index]) >> cluster_bits));
174+
Store_field(result, 2, caml_copy_int64((arr->a[right_index]) >> cluster_bits));
175+
176+
CAMLreturn(caml_alloc_some(result));
177+
}

lib/qcow_mapping_test.ml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
module Arr = Qcow_mapping
2+
3+
let ( ++ ) = Int64.add
4+
5+
let basic_test () =
6+
let arr = Arr.create 16L in
7+
8+
Alcotest.(check' int64)
9+
~msg:"wrong length" ~actual:(Arr.length arr) ~expected:16L ;
10+
11+
for i = 0 to Int64.to_int (Arr.length arr) - 1 do
12+
let i = Int64.of_int i in
13+
Arr.set arr i i
14+
done ;
15+
for i = 0 to Int64.to_int (Arr.length arr) - 1 do
16+
let i = Int64.of_int i in
17+
let v = Arr.get arr i in
18+
Alcotest.(check' int64) ~msg:"wrong value" ~actual:v ~expected:i
19+
done ;
20+
21+
Arr.extend arr 32L ;
22+
Alcotest.(check' int64)
23+
~msg:"wrong length after .extend" ~actual:(Arr.length arr) ~expected:32L ;
24+
25+
(* Check values were copied properly *)
26+
for i = 0 to 15 do
27+
let i = Int64.of_int i in
28+
let v = Arr.get arr i in
29+
Alcotest.(check' int64)
30+
~msg:"wrong value after .extend" ~actual:v ~expected:i
31+
done ;
32+
33+
for i = 0 to Int64.to_int (Arr.length arr) - 1 do
34+
let i = Int64.of_int i in
35+
Arr.set arr i (i ++ 1L)
36+
done ;
37+
for i = 0 to Int64.to_int (Arr.length arr) - 1 do
38+
let i = Int64.of_int i in
39+
let v = Arr.get arr i in
40+
Alcotest.(check' int64) ~msg:"wrong value" ~actual:v ~expected:(i ++ 1L)
41+
done ;
42+
43+
()
44+
45+
let interval_test () =
46+
(* Set a few of the cells, representing a sparse disk *)
47+
let arr = Arr.create 16L in
48+
for i = 2 to 4 do
49+
let i = Int64.of_int i in
50+
Arr.set arr i i
51+
done ;
52+
53+
(* Non-subsequent clusters next to each other *)
54+
Arr.set arr 6L 0L ;
55+
Arr.set arr 7L 6L ;
56+
Arr.set arr 8L 8L ;
57+
58+
for i = 10 to 13 do
59+
let i = Int64.of_int i in
60+
Arr.set arr i i
61+
done ;
62+
63+
(* Verify only the intervals filled with subsequent clusters
64+
are reported as populated *)
65+
let l = Arr.to_interval_seq arr 0L |> List.of_seq in
66+
Alcotest.(check' @@ list @@ pair int64 int64)
67+
~msg:"wrong interval" ~actual:l
68+
~expected:[(2L, 4L); (0L, 0L); (6L, 6L); (8L, 8L); (10L, 13L)] ;
69+
()
70+
71+
let () =
72+
Alcotest.run "Test qcow_mapping library"
73+
[
74+
( "Basic tests"
75+
, [
76+
("basic_test", `Quick, basic_test)
77+
; ("interval_test", `Quick, interval_test)
78+
]
79+
)
80+
]

0 commit comments

Comments
 (0)