Skip to content

Commit 7bd82fb

Browse files
Philippe Bidingermeta-codesync[bot]
authored andcommitted
split hash sets facts into smaller chunks
Summary: The hack db has this predicate which represents a set of hashes of the input files, in no particular order. It's arbitrarily split into 15 facts (... because there are 15 indexing shards). ``` www.hack.light> :stat hack.IndexerInputsHash hack.IndexerInputsHash.6 count: 15 size: 129201564 (123.22 MiB) 0.1334% ``` Each of these fact is about 10MB. We've seen query to this predicate failing with an allocation failure error from the server. I'm guessing that making the facts smaller than the pagination limit will enable the server to paginate these results. The `glean query` CLI has a page limit of ``` - --page-bytes: Default is 1,000,000 (1 MB) ``` This diff constructs the hashes as <1MB facts. Reviewed By: jjuliamolin Differential Revision: D92045104 fbshipit-source-id: fd031f98adc120bcb4793f7291616a4039877068
1 parent e771467 commit 7bd82fb

File tree

1 file changed

+35
-7
lines changed

1 file changed

+35
-7
lines changed

hphp/hack/src/typing/write_symbol_info/entrypoint.ml

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,43 @@ let write_facts_file out_dir ?(global = false) files json_chunks =
7676
indexing run. There are
7777
- symbol hash facts for incrementality (empty if gen_sym_hash isn't set)
7878
- the namespace aliases defined in .hhconfig *)
79+
80+
let bytes_per_hash = 16
81+
82+
(* input hashes are stored in facts, we try to keep the max size
83+
of such fact to a reasonable size (800KB), ideally less than
84+
Glean pagination limit *)
85+
let max_fact_bytes = 800 * 1024 (* 800KB *)
86+
87+
let max_hashes_per_fact = max_fact_bytes / bytes_per_hash
88+
7989
let gen_global_facts ns ~ownership ~shard_name all_hashes =
80-
let fa = Fact_acc.init ~ownership in
8190
let list_hashes = Set.to_list all_hashes in
82-
if ownership then Fact_acc.set_ownership_unit fa (Some ".hhconfig");
83-
List.fold ns ~init:fa ~f:(fun fa (from, to_) ->
84-
Add_fact.global_namespace_alias fa ~from ~to_ |> snd)
85-
|> Add_fact.indexerInputsHash shard_name list_hashes
86-
|> snd
87-
|> Fact_acc.to_json
91+
(* Split hashes into chunks *)
92+
let hash_chunks = List.chunks_of list_hashes ~length:max_hashes_per_fact in
93+
let num_chunks = List.length hash_chunks in
94+
(* Generate facts for each chunk *)
95+
List.concat_mapi hash_chunks ~f:(fun i chunk_hashes ->
96+
let fa = Fact_acc.init ~ownership in
97+
(* Only include namespace aliases in the first shard *)
98+
let fa =
99+
if i = 0 then begin
100+
if ownership then Fact_acc.set_ownership_unit fa (Some ".hhconfig");
101+
List.fold ns ~init:fa ~f:(fun fa (from, to_) ->
102+
Add_fact.global_namespace_alias fa ~from ~to_ |> snd)
103+
end else
104+
fa
105+
in
106+
let chunk_shard_name =
107+
if num_chunks = 1 then
108+
shard_name
109+
else
110+
Printf.sprintf "%s%03d" shard_name i
111+
in
112+
fa
113+
|> Add_fact.indexerInputsHash chunk_shard_name chunk_hashes
114+
|> snd
115+
|> Fact_acc.to_json)
88116

89117
let write_json
90118
(ctx : Provider_context.t)

0 commit comments

Comments
 (0)