From 20bd900cd5e9989ddb5edd3d9fac893db7acea38 Mon Sep 17 00:00:00 2001
From: Michael Tautschnig <tautschn@amazon.com>
Date: Tue, 17 Jan 2023 15:58:57 +0000
Subject: [PATCH] Partial-order concurrency: permit data races

Data races are undefined behaviour, and we shouldn't be giving a
semantics for these. Instead, omit write-serialisation constraints when
at least one write is non-atomic.

This is work in progress as more debugging is to be done and we need to
figure out the right way to conditionally add assertions (for a
--data-race-check option).
---
 .../cbmc-concurrency/memory_barrier2/main.c   |  4 +
 .../cbmc-concurrency/memory_barrier2/msvc.c   |  4 +
 src/ansi-c/library/pthread_lib.c              |  4 +
 src/goto-symex/memory_model.cpp               | 16 +++-
 src/goto-symex/memory_model.h                 |  5 +-
 src/goto-symex/memory_model_sc.cpp            | 43 +++++++----
 src/goto-symex/memory_model_sc.h              |  2 +-
 src/goto-symex/memory_model_tso.cpp           | 75 +++++++++++++++++++
 src/goto-symex/memory_model_tso.h             |  1 +
 9 files changed, 134 insertions(+), 20 deletions(-)

diff --git a/regression/cbmc-concurrency/memory_barrier2/main.c b/regression/cbmc-concurrency/memory_barrier2/main.c
index 62c781f2f9e..92433364052 100644
--- a/regression/cbmc-concurrency/memory_barrier2/main.c
+++ b/regression/cbmc-concurrency/memory_barrier2/main.c
@@ -7,6 +7,7 @@ int x; // variable to test mutual exclusion
 volatile int flag1 = 0, flag2 = 0; // boolean flags
 
 void* thr1(void * arg) { // frontend produces 12 transitions from this thread. It would be better if it would produce only 8!
+  __CPROVER_atomic_begin();
   flag1 = 1;
   turn = 1;
 #ifdef __GNUC__
@@ -20,9 +21,11 @@ void* thr1(void * arg) { // frontend produces 12 transitions from this thread. I
   assert(x<=0);
   // end: critical section
   flag1 = 0;
+  __CPROVER_atomic_end();
 }
 
 void* thr2(void * arg) {
+  __CPROVER_atomic_begin();
   flag2 = 1;
   turn = 0;
 #ifdef __GNUC__
@@ -36,6 +39,7 @@ void* thr2(void * arg) {
   assert (x>=1);
   // end: critical section
   flag2 = 0;
+  __CPROVER_atomic_end();
 }
 
 int main()
diff --git a/regression/cbmc-concurrency/memory_barrier2/msvc.c b/regression/cbmc-concurrency/memory_barrier2/msvc.c
index b36e9a1be00..53a719a478b 100644
--- a/regression/cbmc-concurrency/memory_barrier2/msvc.c
+++ b/regression/cbmc-concurrency/memory_barrier2/msvc.c
@@ -5,7 +5,9 @@ volatile int flag1 = 0, flag2 = 0;
 void *thr1(void *arg)
 {
   flag1 = 1;
+  __CPROVER_atomic_begin();
   turn = 1;
+  __CPROVER_atomic_end();
   __asm { mfence; }
   __CPROVER_assume(!(flag2 == 1 && turn == 1));
   x = 0;
@@ -16,7 +18,9 @@ void *thr1(void *arg)
 void *thr2(void *arg)
 {
   flag2 = 1;
+  __CPROVER_atomic_begin();
   turn = 0;
+  __CPROVER_atomic_end();
   __asm { mfence; }
   __CPROVER_assume(!(flag1 == 1 && turn == 0));
   x = 1;
diff --git a/src/ansi-c/library/pthread_lib.c b/src/ansi-c/library/pthread_lib.c
index d893d6440a0..24f9cb0193e 100644
--- a/src/ansi-c/library/pthread_lib.c
+++ b/src/ansi-c/library/pthread_lib.c
@@ -356,7 +356,9 @@ __CPROVER_HIDE:;
   if((unsigned long)thread>__CPROVER_next_thread_id) return ESRCH;
   if((unsigned long)thread==__CPROVER_thread_id) return EDEADLK;
   if(value_ptr!=0) (void)**(char**)value_ptr;
+  __CPROVER_atomic_begin();
   __CPROVER_assume(__CPROVER_threads_exited[(unsigned long)thread]);
+  __CPROVER_atomic_end();
 
   return 0;
 }
@@ -604,7 +606,9 @@ __CPROVER_HIDE:;
       __CPROVER_thread_key_dtors[i](key);
   }
 #endif
+  __CPROVER_atomic_begin();
   __CPROVER_threads_exited[this_thread_id] = 1;
+  __CPROVER_atomic_end();
 }
 
 /* FUNCTION: pthread_create */
diff --git a/src/goto-symex/memory_model.cpp b/src/goto-symex/memory_model.cpp
index 9059b669b1c..69569d23950 100644
--- a/src/goto-symex/memory_model.cpp
+++ b/src/goto-symex/memory_model.cpp
@@ -48,6 +48,7 @@ void memory_model_baset::read_from(symex_target_equationt &equation)
 
   for(const auto &address : address_map)
   {
+    std::size_t r_w_property_counter = 0;
     for(const auto &read_event : address.second.reads)
     {
       exprt::operandst rf_choice_symbols;
@@ -60,7 +61,7 @@ void memory_model_baset::read_from(symex_target_equationt &equation)
         if(!po(read_event, write_event))
         {
           rf_choice_symbols.push_back(register_read_from_choice_symbol(
-            read_event, write_event, equation));
+            read_event, write_event, equation, r_w_property_counter));
         }
       }
 
@@ -83,7 +84,8 @@ void memory_model_baset::read_from(symex_target_equationt &equation)
 symbol_exprt memory_model_baset::register_read_from_choice_symbol(
   const event_it &r,
   const event_it &w,
-  symex_target_equationt &equation)
+  symex_target_equationt &equation,
+  std::size_t &property_counter)
 {
   symbol_exprt s = nondet_bool_symbol("rf");
 
@@ -106,6 +108,16 @@ symbol_exprt memory_model_baset::register_read_from_choice_symbol(
     // if r reads from w, then w must have happened before r
     add_constraint(
       equation, implies_exprt{s, before(w, r)}, "rf-order", r->source);
+#if 0
+    notequal_exprt ne{clock(w, axiomt::AX_PROPAGATION), clock(r, axiomt::AX_PROPAGATION)};
+    const std::string sym_name = id2string(r->ssa_lhs.get_identifier());
+    equation.assertion(
+      simplify_expr(and_exprt{w->guard, r->guard}, ns),
+      ne,
+      sym_name + ".r_w_data_race." + std::to_string(++property_counter),
+      "read/write data race on " + sym_name,
+      r->source);
+#endif
   }
 
   return s;
diff --git a/src/goto-symex/memory_model.h b/src/goto-symex/memory_model.h
index eb4cad75753..50e3da2f98a 100644
--- a/src/goto-symex/memory_model.h
+++ b/src/goto-symex/memory_model.h
@@ -50,11 +50,14 @@ class memory_model_baset : public partial_order_concurrencyt
   /// \param r: read event
   /// \param w: write event
   /// \param equation: symex equation where the new constraints should be added
+  /// \param property_counter: used and updated to count the number of
+  ///   read/write data race assertions
   /// \return the new choice symbol
   symbol_exprt register_read_from_choice_symbol(
     const event_it &r,
     const event_it &w,
-    symex_target_equationt &equation);
+    symex_target_equationt &equation,
+    std::size_t &property_counter);
 
   // maps thread numbers to an event list
   typedef std::map<unsigned, event_listt> per_thread_mapt;
diff --git a/src/goto-symex/memory_model_sc.cpp b/src/goto-symex/memory_model_sc.cpp
index bbf2cab2692..945193cc96e 100644
--- a/src/goto-symex/memory_model_sc.cpp
+++ b/src/goto-symex/memory_model_sc.cpp
@@ -208,6 +208,9 @@ void memory_model_sct::write_serialization_external(
     // This is quadratic in the number of writes
     // per address. Perhaps some better encoding
     // based on 'places'?
+#if 0
+    std::size_t w_w_data_race_property_counter = 0;
+#endif
     for(event_listt::const_iterator
         w_it1=a_rec.writes.begin();
         w_it1!=a_rec.writes.end();
@@ -225,23 +228,31 @@ void memory_model_sct::write_serialization_external(
            (*w_it2)->source.thread_nr)
           continue;
 
-        // ws is a total order, no two elements have the same rank
-        // s -> w_evt1 before w_evt2; !s -> w_evt2 before w_evt1
-
-        symbol_exprt s=nondet_bool_symbol("ws-ext");
-
-        // write-to-write edge
-        add_constraint(
-          equation,
-          implies_exprt(s, before(*w_it1, *w_it2)),
-          "ws-ext",
-          (*w_it1)->source);
+        // ws is a total order, no two elements in distinct atomic sections have
+        // the same rank
+        DATA_INVARIANT(
+          (*w_it1)->atomic_section_id == 0 ||
+          (*w_it1)->atomic_section_id != (*w_it2)->atomic_section_id,
+          "non-atomic writes in distinct threads cannot belong to the same "
+          "atomic section");
 
-        add_constraint(
-          equation,
-          implies_exprt(not_exprt(s), before(*w_it2, *w_it1)),
-          "ws-ext",
-          (*w_it1)->source);
+        if((*w_it1)->atomic_section_id != 0 && (*w_it2)->atomic_section_id != 0)
+        {
+          notequal_exprt ne{clock(*w_it1, axiomt::AX_PROPAGATION), clock(*w_it2, axiomt::AX_PROPAGATION)};
+          equation.constraint(ne, "ws-ext", (*w_it1)->source);
+        }
+        else
+        {
+#if 0
+           notequal_exprt ne{clock(*w_it1, axiomt::AX_PROPAGATION), clock(*w_it2, axiomt::AX_PROPAGATION)};
+          equation.assertion(
+            simplify_expr(and_exprt{(*w_it1)->guard, (*w_it2)->guard}, ns),
+            ne,
+            id2string(a_it->first) + ".w_w_data_race." + std::to_string(++w_w_data_race_property_counter),
+            "write/write data race on " + id2string(a_it->first),
+            (*w_it1)->source);
+#endif
+        }
       }
     }
   }
diff --git a/src/goto-symex/memory_model_sc.h b/src/goto-symex/memory_model_sc.h
index 82782af6e53..602e37a517e 100644
--- a/src/goto-symex/memory_model_sc.h
+++ b/src/goto-symex/memory_model_sc.h
@@ -38,7 +38,7 @@ class memory_model_sct:public memory_model_baset
     const per_thread_mapt &per_thread_map);
   void program_order(symex_target_equationt &equation);
   void from_read(symex_target_equationt &equation);
-  void write_serialization_external(symex_target_equationt &equation);
+  virtual void write_serialization_external(symex_target_equationt &equation);
 };
 
 #endif // CPROVER_GOTO_SYMEX_MEMORY_MODEL_SC_H
diff --git a/src/goto-symex/memory_model_tso.cpp b/src/goto-symex/memory_model_tso.cpp
index 54d6b58b677..d3ea3393c2d 100644
--- a/src/goto-symex/memory_model_tso.cpp
+++ b/src/goto-symex/memory_model_tso.cpp
@@ -164,3 +164,78 @@ void memory_model_tsot::program_order(
     }
   }
 }
+
+void memory_model_tsot::write_serialization_external(
+  symex_target_equationt &equation)
+{
+  for(address_mapt::const_iterator
+      a_it=address_map.begin();
+      a_it!=address_map.end();
+      a_it++)
+  {
+    const a_rect &a_rec=a_it->second;
+
+    // This is quadratic in the number of writes
+    // per address. Perhaps some better encoding
+    // based on 'places'?
+#if 0
+    std::size_t w_w_data_race_property_counter = 0;
+#endif
+    for(event_listt::const_iterator
+        w_it1=a_rec.writes.begin();
+        w_it1!=a_rec.writes.end();
+        ++w_it1)
+    {
+      event_listt::const_iterator next=w_it1;
+      ++next;
+
+      for(event_listt::const_iterator w_it2=next;
+          w_it2!=a_rec.writes.end();
+          ++w_it2)
+      {
+        // external?
+        if((*w_it1)->source.thread_nr==
+           (*w_it2)->source.thread_nr)
+          continue;
+
+        // ws is a total order, no two elements in distinct atomic sections have
+        // the same rank
+        DATA_INVARIANT(
+          (*w_it1)->atomic_section_id == 0 ||
+          (*w_it1)->atomic_section_id != (*w_it2)->atomic_section_id,
+          "non-atomic writes in distinct threads cannot belong to the same "
+          "atomic section");
+
+        if((*w_it1)->atomic_section_id != 0 && (*w_it2)->atomic_section_id != 0)
+        {
+          symbol_exprt s=nondet_bool_symbol("ws-ext");
+
+          // write-to-write edge
+          add_constraint(
+            equation,
+            implies_exprt(s, before(*w_it1, *w_it2)),
+            "ws-ext",
+            (*w_it1)->source);
+
+          add_constraint(
+            equation,
+            implies_exprt(not_exprt(s), before(*w_it2, *w_it1)),
+            "ws-ext",
+            (*w_it1)->source);
+        }
+        else
+        {
+#if 0
+           notequal_exprt ne{clock(*w_it1, axiomt::AX_PROPAGATION), clock(*w_it2, axiomt::AX_PROPAGATION)};
+          equation.assertion(
+            simplify_expr(and_exprt{(*w_it1)->guard, (*w_it2)->guard}, ns),
+            ne,
+            id2string(a_it->first) + ".w_w_data_race." + std::to_string(++w_w_data_race_property_counter),
+            "write/write data race on " + id2string(a_it->first),
+            (*w_it1)->source);
+#endif
+        }
+      }
+    }
+  }
+}
diff --git a/src/goto-symex/memory_model_tso.h b/src/goto-symex/memory_model_tso.h
index 23684fa09b3..25766c39b0f 100644
--- a/src/goto-symex/memory_model_tso.h
+++ b/src/goto-symex/memory_model_tso.h
@@ -30,6 +30,7 @@ class memory_model_tsot:public memory_model_sct
     partial_order_concurrencyt::event_it e1,
     partial_order_concurrencyt::event_it e2) const;
   void program_order(symex_target_equationt &equation);
+  void write_serialization_external(symex_target_equationt &equation) override;
 };
 
 #endif // CPROVER_GOTO_SYMEX_MEMORY_MODEL_TSO_H