Record state count info in manifest, check during CI (#122)

ahelwer · web-flow · commit b7d2d8c24338 · 2024-02-22T10:24:43.000-05:00
Signed-off-by: Andrew Helwer &lt;2n8rn1w1f@mozmail.com&gt;
diff --git a/.github/scripts/check_manifest_features.py b/.github/scripts/check_manifest_features.py
@@ -9,13 +9,16 @@
 
 from argparse import ArgumentParser
 from dataclasses import dataclass
+import logging
 import glob
 from os.path import basename, dirname, join, normpath, splitext
 from typing import Any
 import re
 import tla_utils
 from tree_sitter import Language, Parser
 
+logging.basicConfig(level=logging.INFO)
+
 def build_ts_grammar(ts_path):
     """
     Builds the tree-sitter-tlaplus grammar and constructs the parser.
@@ -199,44 +202,51 @@ def check_features(parser, queries, manifest, examples_root):
     for spec in manifest['specifications']:
         if spec['title'] == '':
             success = False
-            print(f'ERROR: Spec {spec["path"]} does not have a title')
+            logging.error(f'Spec {spec["path"]} does not have a title')
         if spec['description'] == '':
             success = False
-            print(f'ERROR: Spec {spec["path"]} does not have a description')
+            logging.error(f'Spec {spec["path"]} does not have a description')
         if not any(spec['authors']):
             success = False
-            print(f'ERROR: Spec {spec["path"]} does not have any listed authors')
+            logging.error(f'Spec {spec["path"]} does not have any listed authors')
         for module in spec['modules']:
             module_path = module['path']
             tree, text, parse_err = parse_module(examples_root, parser, module_path)
             if parse_err:
                 success = False
-                print(f'ERROR: Module {module["path"]} contains syntax errors')
+                logging.error(f'Module {module["path"]} contains syntax errors')
             expected_features = get_tree_features(tree, queries)
             actual_features = set(module['features'])
             if expected_features != actual_features:
                 success = False
-                print(
-                    f'ERROR: Module {module["path"]} has incorrect features in manifest; '
+                logging.error(
+                    f'Module {module["path"]} has incorrect features in manifest; '
                     + f'expected {list(expected_features)}, actual {list(actual_features)}'
                 )
             expected_imports = get_community_imports(examples_root, tree, text, dirname(module_path), 'proof' in expected_features, queries)
             actual_imports = set(module['communityDependencies'])
             if expected_imports != actual_imports:
                 success = False
-                print(
-                    f'ERROR: Module {module["path"]} has incorrect community dependencies in manifest; '
+                logging.error(
+                    f'Module {module["path"]} has incorrect community dependencies in manifest; '
                     + f'expected {list(expected_imports)}, actual {list(actual_imports)}'
                 )
             for model in module['models']:
                 expected_features = get_model_features(examples_root, model['path'])
                 actual_features = set(model['features'])
                 if expected_features != actual_features:
                     success = False
-                    print(
-                        f'ERROR: Model {model["path"]} has incorrect features in manifest; '
+                    logging.error(
+                        f'Model {model["path"]} has incorrect features in manifest; '
                         + f'expected {list(expected_features)}, actual {list(actual_features)}'
                     )
+                if tla_utils.has_state_count(model) and not tla_utils.is_state_count_valid(model):
+                    success = False
+                    logging.error(
+                        f'Model {model["path"]} has state count info recorded; this is '
+                        + 'only valid for exhaustive search models that complete successfully.'
+                    )
+
     return success
 
 if __name__ == '__main__':
@@ -253,9 +263,9 @@ def check_features(parser, queries, manifest, examples_root):
     queries = build_queries(TLAPLUS_LANGUAGE)
 
     if check_features(parser, queries, manifest, examples_root):
-        print('SUCCESS: metadata in manifest is correct')
+        logging.info('SUCCESS: metadata in manifest is correct')
         exit(0)
     else:
-        print("ERROR: metadata in manifest is incorrect")
+        logging.error("Metadata in manifest is incorrect")
         exit(1)
 
diff --git a/.github/scripts/check_small_models.py b/.github/scripts/check_small_models.py
@@ -55,6 +55,17 @@ def check_model(module_path, model, expected_runtime):
                 logging.error(f'Model {model_path} expected result {expected_result} but got {actual_result}')
                 logging.error(tlc_result.stdout)
                 return False
+            if tla_utils.is_state_count_valid(model) and tla_utils.has_state_count(model):
+                state_count_info = tla_utils.extract_state_count_info(tlc_result.stdout)
+                if state_count_info is None:
+                    logging.error("Failed to find state info in TLC output")
+                    logging.error(tlc_result.stdout)
+                    return False
+                if not tla_utils.is_state_count_info_correct(model, *state_count_info):
+                    logging.error("Recorded state count info differed from actual state counts:")
+                    logging.error(f"(distinct/total/depth); expected: {tla_utils.get_state_count_info(model)}, actual: {state_count_info}")
+                    logging.error(tlc_result.stdout)
+                    return False
             return True
         case TimeoutExpired():
             logging.error(f'{model_path} hit hard timeout of {hard_timeout_in_seconds} seconds')
diff --git a/.github/scripts/generate_manifest.py b/.github/scripts/generate_manifest.py
@@ -140,9 +140,10 @@ def find_corresponding_model(old_model, new_module):
     return models[0] if any(models) else None
 
 def integrate_model_info(old_model, new_model):
-    fields = ['runtime', 'size', 'mode', 'result']
+    fields = ['runtime', 'size', 'mode', 'result', 'distinctStates', 'totalStates', 'stateDepth']
     for field in fields:
-        new_model[field] = old_model[field]
+        if field in old_model:
+            new_model[field] = old_model[field]
 
 def integrate_old_manifest_into_new(old_manifest, new_manifest):
     for old_spec in old_manifest['specifications']:
@@ -180,7 +181,5 @@ def integrate_old_manifest_into_new(old_manifest, new_manifest):
     new_manifest = generate_new_manifest(examples_root, ignored_dirs, parser, queries)
     integrate_old_manifest_into_new(old_manifest, new_manifest)
 
-    # Write generated manifest to file
-    with open(manifest_path, 'w', encoding='utf-8') as new_manifest_file:
-        json.dump(new_manifest, new_manifest_file, indent=2, ensure_ascii=False)
+    tla_utils.write_json(new_manifest, manifest_path)
 
diff --git a/.github/scripts/record_model_state_space.py b/.github/scripts/record_model_state_space.py
@@ -0,0 +1,94 @@
+"""
+Records the number of unique & total states encountered by TLC for each small
+model where that info is not present, then writes it to the manifest.json.
+"""
+
+from argparse import ArgumentParser
+import logging
+from os.path import dirname, normpath
+from subprocess import CompletedProcess, TimeoutExpired
+import tla_utils
+
+parser = ArgumentParser(description='Updates manifest.json with unique & total model states for each small model.')
+parser.add_argument('--tools_jar_path', help='Path to the tla2tools.jar file', required=True)
+parser.add_argument('--tlapm_lib_path', help='Path to the TLA+ proof manager module directory; .tla files should be in this directory', required=True)
+parser.add_argument('--community_modules_jar_path', help='Path to the CommunityModules-deps.jar file', required=True)
+parser.add_argument('--manifest_path', help='Path to the tlaplus/examples manifest.json file', required=True)
+args = parser.parse_args()
+
+logging.basicConfig(level=logging.INFO)
+
+tools_jar_path = normpath(args.tools_jar_path)
+tlapm_lib_path = normpath(args.tlapm_lib_path)
+community_jar_path = normpath(args.community_modules_jar_path)
+manifest_path = normpath(args.manifest_path)
+examples_root = dirname(manifest_path)
+
+def check_model(module_path, model):
+    module_path = tla_utils.from_cwd(examples_root, module_path)
+    model_path = tla_utils.from_cwd(examples_root, model['path'])
+    logging.info(model_path)
+    hard_timeout_in_seconds = 60
+    tlc_result = tla_utils.check_model(
+        tools_jar_path,
+        module_path,
+        model_path,
+        tlapm_lib_path,
+        community_jar_path,
+        model['mode'],
+        hard_timeout_in_seconds
+    )
+    match tlc_result:
+        case CompletedProcess():
+            expected_result = model['result']
+            actual_result = tla_utils.resolve_tlc_exit_code(tlc_result.returncode)
+            if expected_result != actual_result:
+                logging.error(f'Model {model_path} expected result {expected_result} but got {actual_result}')
+                logging.error(tlc_result.stdout)
+                return False
+            state_count_info = tla_utils.extract_state_count_info(tlc_result.stdout)
+            if state_count_info is None:
+                logging.error("Failed to find state info in TLC output")
+                logging.error(tlc_result.stdout)
+                return False
+            logging.info(f'States (distinct, total, depth): {state_count_info}')
+            model['distinctStates'], model['totalStates'], model['stateDepth'] = state_count_info
+            return True
+        case TimeoutExpired():
+            logging.error(f'{model_path} hit hard timeout of {hard_timeout_in_seconds} seconds')
+            logging.error(tlc_result.output.decode('utf-8'))
+            return False
+        case _:
+            logging.error(f'Unhandled TLC result type {type(tlc_result)}: {tlc_result}')
+            return False
+
+# Ensure longest-running modules go first
+manifest = tla_utils.load_json(manifest_path)
+small_models = sorted(
+    [
+        (module['path'], model, tla_utils.parse_timespan(model['runtime']))
+        for spec in manifest['specifications']
+        for module in spec['modules']
+        for model in module['models']
+            if model['size'] == 'small'
+            and tla_utils.is_state_count_valid(model)
+            and (
+                'distinctStates' not in model
+                or 'totalStates' not in model
+                or 'stateDepth' not in model
+            )
+            # This model is nondeterministic due to use of the Random module
+            and model['path'] != 'specifications/SpanningTree/SpanTreeRandom.cfg'
+    ],
+    key = lambda m: m[2],
+    reverse=True
+)
+
+for module_path, model, _ in small_models:
+    success = check_model(module_path, model)
+    if not success:
+        exit(1)
+    tla_utils.write_json(manifest, manifest_path)
+
+exit(0)
+
diff --git a/.github/scripts/tla_utils.py b/.github/scripts/tla_utils.py
@@ -2,6 +2,7 @@
 import json
 from os.path import join, normpath, pathsep
 import subprocess
+import re
 
 def from_cwd(root, path):
     """
@@ -42,6 +43,13 @@ def load_json(path):
     with open(normpath(path), 'r', encoding='utf-8') as file:
         return json.load(file)
 
+def write_json(data, path):
+    """
+    Writes the given json to the given file.
+    """
+    with open(path, 'w', encoding='utf-8') as file:
+        json.dump(data, file, indent=2, ensure_ascii=False)
+
 def parse_timespan(unparsed):
     """
     Parses the timespan format used in the manifest.json format.
@@ -116,3 +124,47 @@ def resolve_tlc_exit_code(code):
 
     return tlc_exit_codes[code] if code in tlc_exit_codes else str(code)
 
+def is_state_count_valid(model):
+    """
+    Whether state count info could be valid for the given model.
+    """
+    return model['mode'] == 'exhaustive search' and model['result'] == 'success'
+
+def has_state_count(model):
+    """
+    Whether the given model has state count info.
+    """
+    return 'distinctStates' in model or 'totalStates' in model or 'stateDepth' in model
+
+def get_state_count_info(model):
+    """
+    Gets whatever state count info is present in the given model.
+    """
+    get_or_none = lambda key: model[key] if key in model else None
+    return (get_or_none('distinctStates'), get_or_none('totalStates'), get_or_none('stateDepth'))
+
+def is_state_count_info_correct(model, distinct_states, total_states, state_depth):
+    """
+    Whether the given state count info concords with the model.
+    """
+    expected_distinct_states, expected_total_states, expected_state_depth = get_state_count_info(model)
+    none_or_equal = lambda expected, actual: expected is None or expected == actual
+    # State depth not yet deterministic due to TLC bug: https://github.com/tlaplus/tlaplus/issues/883
+    return none_or_equal(expected_distinct_states, distinct_states) and none_or_equal(expected_total_states, total_states) #and none_or_equal(expected_state_depth, state_depth)
+
+state_count_regex = re.compile(r'(?P<total_states>\d+) states generated, (?P<distinct_states>\d+) distinct states found, 0 states left on queue.')
+state_depth_regex = re.compile(r'The depth of the complete state graph search is (?P<state_depth>\d+).')
+
+def extract_state_count_info(tlc_output):
+    """
+    Parse & extract state count info from TLC output.
+    """
+    state_count_findings = state_count_regex.search(tlc_output)
+    state_depth_findings = state_depth_regex.search(tlc_output)
+    if state_count_findings is None or state_depth_findings is None:
+        return None
+    distinct_states = int(state_count_findings.group('distinct_states'))
+    total_states = int(state_count_findings.group('total_states'))
+    state_depth = int(state_depth_findings.group('state_depth'))
+    return (distinct_states, total_states, state_depth)
+
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -18,7 +18,8 @@ jobs:
         include:
         - { os: windows-latest }
         - { os: ubuntu-latest }
-        - { os: macos-14 }
+        # https://github.com/tlaplus/Examples/issues/119
+        #- { os: macos-14 }
       fail-fast: false
     env:
       SCRIPT_DIR: .github/scripts
diff --git a/README.md b/README.md
@@ -52,6 +52,7 @@ Here is a list of specs included in this repository, with links to the relevant
 | [Minimal Circular Substring](specifications/LeastCircularSubstring)                                 | Andrew Helwer                                       |          |             |    ✔    |     ✔     |          |
 | [Snapshot Key-Value Store](specifications/KeyValueStore)                                            | Andrew Helwer, Murat Demirbas                       |          |             |    ✔    |     ✔     |          |
 | [Chang-Roberts Algorithm for Leader Election in a Ring](specifications/chang_roberts)               | Stephan Merz                                        |          |             |    ✔    |     ✔     |          |
+| [MultiPaxos in SMR-Style](specifications/MultiPaxos-SMR)                                            | Guanzhou Hu                                         |          |             |    ✔    |     ✔     |          |
 | [Resource Allocator](specifications/allocator)                                                      | Stephan Merz                                        |          |             |         |     ✔     |          |
 | [Transitive Closure](specifications/TransitiveClosure)                                              | Stephan Merz                                        |          |             |         |     ✔     |          |
 | [Atomic Commitment Protocol](specifications/acp)                                                    | Stephan Merz                                        |          |             |         |     ✔     |          |
@@ -86,7 +87,6 @@ Here is a list of specs included in this repository, with links to the relevant
 | [RFC 3506: Voucher Transaction System](specifications/byihive)                                      | Santhosh Raju, Cherry G. Mathew, Fransisca Andriani |          |             |         |     ✔     |          |
 | [TLA⁺ Level Checking](specifications/LevelChecking)                                                 | Leslie Lamport                                      |          |             |         |           |          |
 | [Condition-Based Consensus](specifications/cbc_max)                                                 | Thanh Hai Tran, Igor Konnov, Josef Widder           |          |             |         |           |          |
-| [MultiPaxos in SMR-Style](specifications/MultiPaxos-SMR)                                            | Guanzhou Hu                                         |          |             |    ✔    |     ✔     |          |
 
 ## Examples Elsewhere
 Here is a list of specs stored in locations outside this repository, including submodules.
@@ -185,6 +185,9 @@ Otherwise, follow these directions:
      - `"safety failure"` if the model violates an invariant
      - `"liveness failure"` if the model fails to satisfy a liveness property
      - `"deadlock failure"` if the model encounters deadlock
+   - (Optional) Model state count info: distinct states, total states, and state depth
+     - These are all individually optional and only valid if your model uses exhaustive search and results in success
+     - Recording these turns your model into a powerful regression test for TLC
    - Other fields are auto-generated by the script; if you are adding an entry manually, ensure their values are present and correct (see other entries or the [`manifest-schema.json`](manifest-schema.json) file)
 
 Before submitted your changes to run in the CI, you can quickly check your [`manifest.json`](manifest.json) for errors and also check it against [`manifest-schema.json`](manifest-schema.json) at https://www.jsonschemavalidator.net/.
diff --git a/manifest-schema.json b/manifest-schema.json
@@ -55,6 +55,9 @@
                         "pattern": "^(([0-9][0-9]:[0-9][0-9]:[0-9][0-9])|unknown)$"
                       },
                       "size": {"enum": ["small", "medium", "large", "unknown"]},
+                      "distinctStates": {"type": "integer"},
+                      "totalStates": {"type": "integer"},
+                      "stateDepth": {"type": "integer"},
                       "mode": {
                         "oneOf": [
                           {
diff --git a/manifest.json b/manifest.json