Skip to content

Commit ce8f8ce

Browse files
authored
Merge pull request #1735 from herd/opt-atomic-pairs
[herd] More efficient computation of atomic load X stores pairs We take inspiration from the efficient computation of register read-from (see PR #1704): atomic stores are ordered according to (extended by iico) program order and an atomic load is paired with the closest atomic write that follows it.
2 parents 86b798a + af44a24 commit ce8f8ce

3 files changed

Lines changed: 103 additions & 45 deletions

File tree

herd/mem.ml

Lines changed: 71 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -901,12 +901,10 @@ struct
901901

902902
let find_first_after e set =
903903
find_first_opt (fun e' -> is_before_strict e e') set
904-
[@@warning "-32"]
904+
905905
end
906906

907-
let map_loc_find loc m =
908-
try U.LocEnv.find loc m
909-
with Not_found -> []
907+
let map_loc_find loc m = U.LocEnv.safe_find [] loc m
910908

911909
let match_reg_events add_eq es csn =
912910
let loc_loads_stores = U.collect_reg_loads_stores es in
@@ -1827,40 +1825,74 @@ let get_rf_value test read =
18271825
| _,_ -> k)
18281826
rfm E.EventRel.empty
18291827

1830-
(* Reconstruct load/store atomic pairs *)
1828+
(*
1829+
* Reconstruct load/store atomic pairs,
1830+
* By definition such a pair exists when the
1831+
* store precedes the load in generalised program order
1832+
* (_i.e._ the union of program order and of iico), and
1833+
* that there is no atomic effect to the same location
1834+
* in-between (w.r.t generalised po) the load and the store.
1835+
* Computation proceeds as follows:
1836+
* First, atomic events are grouped first by thread
1837+
* and then by location. Then, to each atomic load, we
1838+
* associate the closest generalised po successor store,
1839+
* by using a set of stores ordered by generalised po.
1840+
* We additionally check that no atomic load exists
1841+
* between the load and store. Notice that it is not possible
1842+
* to use a set of all atomic events (by a given thread and
1843+
* with a given location) ordered by po because some atomic loads
1844+
* may be unrelated.
1845+
* Finally, such atomic pairs can be spurious, that is not performed
1846+
* by a specific thread. In that case, pairs are given
1847+
* simply by the intra causality data relation.
1848+
*)
18311849

18321850
let make_atomic_load_store es =
1833-
let all = E.atomics_of es.E.events in
1834-
let atms = U.collect_atomics es in
1835-
U.LocEnv.fold
1836-
(fun _loc atms k ->
1837-
let atms =
1838-
List.filter
1839-
(fun e -> not (E.is_load e && E.is_store e))
1840-
atms in (* get rid of C RMW *)
1841-
let rs,ws = List.partition E.is_load atms in
1842-
List.fold_left
1843-
(fun k r ->
1844-
let exp = E.is_explicit r in
1845-
List.fold_left
1846-
(fun k w ->
1847-
if
1848-
S.atomic_pair_allowed r w &&
1849-
U.is_before_strict es r w &&
1850-
E.is_explicit w = exp &&
1851-
not
1852-
(E.EventSet.exists
1853-
(fun e ->
1854-
E.is_explicit e = exp &&
1855-
U.is_before_strict es r e &&
1856-
U.is_before_strict es e w)
1857-
all)
1858-
then E.EventRel.add (r,w) k
1859-
else k)
1860-
k ws)
1861-
k rs)
1862-
atms E.EventRel.empty
1863-
1851+
let atms,spurious = U.collect_atomics es in
1852+
let module StoreSet = EvtSetByPo(struct let es = es end) in
1853+
let make_atomic_pairs es k =
1854+
let rs,ws = List.partition E.is_load es in
1855+
let ws = StoreSet.of_list ws
1856+
and intervening_read er ew =
1857+
List.exists
1858+
(fun e ->
1859+
StoreSet.is_before_strict er e
1860+
&& StoreSet.is_before_strict e ew)
1861+
rs in
1862+
List.fold_left
1863+
(fun k er ->
1864+
match StoreSet.find_first_after er ws with
1865+
| Some ew ->
1866+
if
1867+
S.atomic_pair_allowed er ew
1868+
&& not (intervening_read er ew)
1869+
then
1870+
E.EventRel.add (er,ew) k
1871+
else k
1872+
| None -> k)
1873+
k rs in
1874+
let r1 =
1875+
List.map
1876+
(fun (_,m) ->
1877+
U.LocEnv.fold
1878+
(fun _loc es k ->
1879+
let exps,nexps = List.partition E.is_explicit es in
1880+
make_atomic_pairs exps @@ make_atomic_pairs nexps k)
1881+
m E.EventRel.empty)
1882+
atms |> E.EventRel.unions
1883+
and r2 =
1884+
let iico = es.E.intra_causality_data in
1885+
List.map
1886+
(fun e ->
1887+
if E.is_load e then
1888+
match
1889+
E.EventRel.succs iico e |> E.EventSet.as_singleton
1890+
with
1891+
| None -> assert false (* spurious updates are by pairs *)
1892+
| Some w -> E.EventRel.singleton (e,w)
1893+
else E.EventRel.empty)
1894+
spurious |> E.EventRel.unions in
1895+
E.EventRel.union r1 r2
18641896

18651897
(* Retrieve last store from rfmap *)
18661898
let get_max_store _test _es rfm loc =
@@ -1955,12 +1987,9 @@ let get_rf_value test read =
19551987
if C.debug.Debug_herd.mem then begin
19561988
eprintf "Observed locs: {%s}\n" (pp_locations observed_locs)
19571989
end ;
1958-
U.LocEnv.fold
1959-
(fun loc ws k ->
1960-
if keep_observed_loc loc observed_locs then
1961-
U.LocEnv.add loc ws k
1962-
else k)
1963-
loc_stores U.LocEnv.empty
1990+
U.LocEnv.filter
1991+
(fun loc _ -> keep_observed_loc loc observed_locs)
1992+
loc_stores
19641993
else loc_stores in
19651994

19661995
let possible_finals =

herd/memUtils.ml

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,9 +387,26 @@ let lift_proc_info i evts =
387387
and collect_stores es = collect_by_loc es E.is_store
388388
and collect_loads_non_spec es = collect_by_loc es (fun e -> E.is_load e && not_speculated es e)
389389
and collect_stores_non_spec es = collect_by_loc es (fun e -> E.is_store e && not_speculated es e)
390-
and collect_atomics es = collect_by_loc es E.is_atomic
391390
and collect_reg_loads_stores es = collect_by_loc2 es E.is_reg_load_any E.is_reg_store_any
392391

392+
let accumulate_loc_proc proc loc e =
393+
IntMap.update proc @@
394+
function
395+
| None -> Some (LocEnv.singleton loc [e])
396+
| Some m -> Some (LocEnv.accumulate loc e m)
397+
398+
let collect_atomics es =
399+
let m,sp =
400+
E.EventSet.fold
401+
(fun e (m,sp as k) ->
402+
if E.is_atomic e then
403+
match E.proc_of e with
404+
| None -> if E.is_spurious e then (m, e::sp) else k
405+
| Some proc -> accumulate_loc_proc proc (get_loc e) e m, sp
406+
else k)
407+
es.E.events (IntMap.empty,[]) in
408+
IntMap.bindings m,sp
409+
393410
let partition_events es =
394411
let env =
395412
E.EventSet.fold

herd/memUtils.mli

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ module Make : functor (S: SemExtra.S) -> sig
7070
relation *)
7171
val make_fr : S.concrete -> S.event_rel -> S.event_rel
7272

73-
(* Mapping from locations *)
73+
(* Mapping from locations, a.k.a. location maps *)
7474
module LocEnv : MyMap.S with type key = S.location
7575

7676
(* Collect various events, indexed by location *)
@@ -85,7 +85,19 @@ module Make : functor (S: SemExtra.S) -> sig
8585
val collect_stores : S.event_structure -> S.event list LocEnv.t
8686
val collect_stores_non_spec : S.event_structure -> S.event list LocEnv.t
8787
val collect_loads_non_spec : S.event_structure -> S.event list LocEnv.t
88-
val collect_atomics : S.event_structure -> S.event list LocEnv.t
88+
89+
(*
90+
* Collect atomic effects indexed by threads and by locations.
91+
* When given an event structure as argument, the function
92+
* returns a pair [(maps,evts)], where:
93+
* + [maps] is a list of location maps, one per thread.
94+
* The values of this map are the atomic effects
95+
of the given thread.
96+
* + [evts] is the list of spurious (atomic) effects.
97+
*)
98+
99+
val collect_atomics :
100+
S.event_structure -> (Proc.t * S.event list LocEnv.t) list * S.event list
89101

90102
(* Partition by location *)
91103
val partition_events : S.event_set -> S.event_set list

0 commit comments

Comments
 (0)