martinmoldrup · martinmoldrup · Aug 25, 2025 · Aug 24, 2025 · Aug 24, 2025 · Aug 24, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,9 @@ All notable changes to this project will be documented in this file.
 
 *NOTE:* Version 0.X.X might have breaking changes in bumps of the minor version number. This is because the project is still in early development and the API is not yet stable. It will still be marked clearly in the release notes.
 
+## [0.6.1] - 2025-08-25
+- 🐞 Improve the handling of comparing large snapshots with test results. Snappylapy should be working well on large data structures. Disable the pytest assertion rewrite for large comparisons.
+
 ## [0.6.0] - 2025-08-23
 - 🆕 Added: `snappylapy diff` CLI command for comparing changed test results with the snapshot. Currently it only supports VScode for showing diffs.
 - 🆕 Support generic types. Now custom objects can be used. It can serialize and deserialize any that jsonpickle supports.

diff --git a/README.md b/README.md
@@ -38,27 +38,33 @@ Snapshot testing is a powerful technique for verifying the output of your code b
 When working on a test suite for a project, it’s important to ensure tests are independent. This is to avoid situations where changes in one part of the code cause failures in tests for other unrelated areas, making it challenging to isolate and fix errors. Snappylapy addresses this by providing a mechanism to capture snapshots of your data and use them in your later tests, ensuring that each component can be tested independently. While also making sure that they are dependent enought to test the integration between them. It provides serialization and deserialization of the snapshots, making it easy to reuse them in different test cases. This is aimed at function working with large and complex data structures (dataframes or large nested dictionaries.)
 
 ### Example  
-
+
+`test_expect_snapshot_dict.py`
 ```python
 from snappylapy import Expect
-from mypackage import my_function
-
+
+def generate_dict(size: int) -> dict[str, int]:
+    """Function to test."""
+    return {f"key_{i}": i for i in range(size)}
+
 def test_snapshot_dict(expect: Expect):
     """Test snapshot with dictionary data."""
-    data: dict = my_function()
+    data: dict = generate_dict(100)
     expect(data).to_match_snapshot()
     # or expect.dict(data).to_match_snapshot()
 ```
 
 In this example, `snappylapy` captures the output of `my_function` and compares it against a stored snapshot. If the output changes unexpectedly, pytest will flag the test, allowing you to review the differences and ensure your code behaves as expected.
 
 Snappylapy can use the snapshots created for inputs in another test. You can think of it as automated/easier mock data generation and management.
+
+`test_expect_and_loadsnapshot.py`
 ```python
 import pytest
 from snappylapy import Expect, LoadSnapshot
 
 def test_snapshot_dict(expect: Expect):
-    """Test snapshot with dictionary data."""
+    """Test snapshot with dictionary data.****"""
     expect({
         "name": "John Doe",
         "age": 31

diff --git a/devtools/extract_examples_from_markdown.py b/devtools/extract_examples_from_markdown.py
@@ -0,0 +1,97 @@
+"""Read the markdown file and extract all the code blocks and put them into tests."""
+from dataclasses import dataclass
+import re
+import pathlib
+from toolit import tool
+
+codeblock_opening = r'^\s*```python'
+codeblock_closing = r'^\s*```'
+
+PATH = pathlib.Path("snappylapy")
+ADDITIONAL_PATHS = [
+    pathlib.Path(".") / "README.md",
+]
+PATH_SAVE_DIR = pathlib.Path(__file__).parent.parent / "tests" / "doc_examples"
+
+@dataclass
+class CodeBlock:
+    text: str
+    filename: str
+
+class CodeBlockBuilder:
+    """Build a single code block."""
+
+    def __init__(self, opening_line: str, name: str) -> None:
+        self.lines: list[str] = []
+        self.indent_chars: str = self._extract_indent(opening_line)
+        self.name = name
+
+    def add_line(self, line_cleaned: str) -> None:
+        line_cleaned = line_cleaned[len(self.indent_chars):].strip("\n\r")
+        self.lines.append(line_cleaned)
+
+    def _extract_indent(self, line: str) -> str:
+        match = re.match(r'^\s*', line)
+        return match.group(0) if match else ""
+
+    def build(self) -> CodeBlock:
+        return CodeBlock(text="\n".join(self.lines), filename=self.name)
+
+
+
+def extract_codeblocks(filepath: pathlib.Path) -> list[CodeBlock]:
+    """Extract code blocks from a markdown file."""
+    code_blocks: list[CodeBlock] = []
+    in_code_block = False
+
+    with filepath.open("r", encoding="utf-8") as file:
+        lines = file.readlines()
+        for i, line in enumerate(lines):
+            if re.match(codeblock_opening, line):
+                in_code_block = True
+                previous_line = lines[i - 1] if i > 0 else ""
+                # Get text in between two ` chars from previous_line
+                matches = re.findall(r'`([^`]*)`', previous_line)
+                name = matches[0] if matches else f"test_example_{len(code_blocks)}"
+                code_block_builder = CodeBlockBuilder(line, name)
+            elif re.match(codeblock_closing, line) and in_code_block:
+                in_code_block = False
+                code_blocks.append(code_block_builder.build())
+            elif in_code_block:
+                code_block_builder.add_line(line)
+
+    return code_blocks
+
+def save_codeblocks(code_blocks: list[CodeBlock], save_dir: pathlib.Path) -> None:
+    """Save code blocks to files."""
+    if not code_blocks:
+        return
+    save_dir.mkdir(exist_ok=True, parents=True)
+    for block in code_blocks:
+        save_dir_file = save_dir / block.filename
+        print(f"Saving code block to {save_dir_file}")
+        with save_dir_file.open("w", encoding="utf-8") as file:
+            file.write(block.text)
+
+@tool
+def extract_examples() -> None:
+    """Extract examples from markdown and docstrings."""
+    # Delete everything in the save directory
+    # if PATH_SAVE_DIR.exists():
+    #     for file in PATH_SAVE_DIR.iterdir():
+    #         file.unlink()
+
+    for path in PATH.rglob("*.py"):
+        extract_and_save_codeblocks(path)
+
+    for path in ADDITIONAL_PATHS:
+        extract_and_save_codeblocks(path)
+
+def extract_and_save_codeblocks(path: pathlib.Path) -> None:
+    code_blocks = extract_codeblocks(path)
+    savedir = PATH_SAVE_DIR / path.with_suffix("")
+    save_codeblocks(code_blocks, savedir)
+
+
+if __name__ == "__main__":
+    extract_examples()
diff --git a/docs/design.md b/docs/design.md
@@ -0,0 +1,24 @@
+The following is a design document for Snappylapy, outlining its target audience, guiding principles, and non-functional requirements. It serves as a reference for developers working on the project and helps ensure that the design decisions align with the overall goals of the library.
+
+# Target Audience
+The target audience for Snappylapy includes software developers working with mutable data objects, such as those involved in AI or external API endpoint calls. This audience may also include data engineers and data scientists who are writing production-quality code that requires thorough testing.
+
+> This is for the lazy developer that wants to write tests fast and manage them easily.
+
+The developer experience should be good both running tests from a test runner in an IDE and from the command line.
+
+# Guiding Principles of Design
+When making design decisions, the following principles should be considered and guide api design, implementation and tone of the outputs and documentation.
+
+1. Snappylapy always allow linters, type checkers and IDEs to provide useful feedback and show documentation. Meaning all public functions should allow ctrl+space completion, type hints and well written docstrings.
+2. Snappylapy make all parts of the poject easily extensible and provide examples of how to extend them. When users are extending the project, the principles in point 1 should still apply.
+3. Always write tests for all public functions, such that no breaking changes can be made without the tests failing. If a breaking change is made, always update the major version number. (applicable after version 1.0.0)
+4. In the snappylapy we always provide examples of how to use the library, and make the examples as simple as possible. The examples should be runnable (included in test suite) and should cover all the main use cases of the library.
+5. The Snappylapy public APIs should be expressive and explicit over concise and implicit. This means that the API should be easy to understand and use, even if it is a bit more verbose.
+6. Always provide a way to get the raw data, even if it is not the default. This is to allow for more advanced users to use the library in ways that are not directly supported by the library.
+7. Snappylapy should be performing well on large data structures. We always tests if code performs well with large inputs (it is okay disabling some features for large inputs, but it should be quick).
+
+# Top 3-Non-Functional Requirements for Snappylapy
+1. 🎉 **Enjoyability**: Snappylapy should be fun and easy to use, providing an enjoyable experience and satisfying user experience.
+2. 🧩 **Extensibility**: Snappylapy should be easy to extend and should provide examples of how to extend it.
+3. 📚 **Usability**: Snappylapy should be well documented and easy to use and should provide many examples of how to use it. It should provide good error messages and helpful messages about actions that can be taken to fix the error. Snappylapy should be easy to learn, just by installing the package. The features should be easy to discover just by exploring the package api from the IDE.
diff --git a/docs/gen_ref_pages.py b/docs/gen_ref_pages.py
@@ -68,8 +68,10 @@ def generate_summary(nav: mkdocs_gen_files.Nav) -> None:
     with mkdocs_gen_files.open("reference/SUMMARY.md", "w") as nav_file:
         nav_file.writelines(nav.build_literate_nav())
 
-
+print("Generating reference documentation...")
 generate_cli_docs()
 nav = mkdocs_gen_files.Nav()
+print("Generating documentation for Python files...")
 generate_documentation_for_py_files(nav)
+print("Generating summary...")
 generate_summary(nav)
diff --git a/experimentation/plugin.py b/experimentation/plugin.py
@@ -59,8 +59,8 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config) -> None:
         html_string = jinja2.Template(HTML_REPORT_TEMPLATE.read_text()).render(results=results)
         pathlib.Path(htmlpath).write_text(html_string)
         for result in results:
-            snapshot = result.expect.read_snapshot()
-            result_data = result.expect.read_test_results()
+            snapshot = result.expect._read_snapshot()
+            result_data = result.expect._read_test_results()
             # diff = dmp.diff_main(snapshot.decode(), result_data.decode())
             # dmp.diff_cleanupSemantic(diff)
             # result_html = dmp.diff_prettyHtml(diff)

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -1,3 +1,5 @@
+# Create the docs using the command `uv run mkdocs build`
+
 site_name: Snappylapy
 repo_name: martinmoldrup/snappylapy
 repo_url: https://github.com/martinmoldrup/snappylapy
@@ -68,4 +70,5 @@ nav:
   - Home: index.md
   - Code Reference: reference/
   - Change Log: changelog.md
-  - CLI: cli.md
+  - CLI: cli.md
+  - Design Philosophy: design.md
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,6 +38,7 @@ dependencies = [
     "jsonpickle>=1.0",
     "typer",
     "typing-extensions ; python_full_version == '3.9.*'",
+    "levenshtein>=0.27.1",
 ]
 
 [project.urls]

diff --git a/scripts/extract_examples_from_markdown.py b/scripts/extract_examples_from_markdown.py
diff --git a/snappylapy/_cli.py b/snappylapy/_cli.py
@@ -11,15 +11,28 @@
 app = typer.Typer(
     no_args_is_help=True,
     help="""
-    The CLI provides commands to initialize the repo and to update or clear test results and snapshots.
-    In the future the future the CLI will be expanded with review.
+    Welcome to the snappylapy CLI!
+
+    Use these commands to initialize your repository, update or clear test results and snapshots,
+    and review differences between your test results and snapshots using the 'diff' command.
+
+    - Run 'init' to set up your repo for snappylapy.
+    - Use 'update' to refresh snapshots with the latest test results.
+    - Use 'clear' to remove all test results and snapshots (add --force to skip confirmation).
+    - Use 'diff' to view changes between test results and snapshots in your editor.
+
+    For more details on each command, use --help after the command name.
     """,
 )
 
 
 @app.command()
 def init() -> None:
-    """Initialize repo by adding line to .gitignore."""
+    """
+    Run this command to initialize your repository for snappylapy.
+
+    This will add a line to your .gitignore file to ensure test results are not tracked by git.
+    """
     # Check if .gitignore exists
     gitignore_path = pathlib.Path(".gitignore")
     if not gitignore_path.exists():
@@ -49,7 +62,14 @@ def clear(
         help="Force deletion without confirmation",
     ),
 ) -> None:
-    """Clear all test results and snapshots, recursively, using pathlib."""
+    """
+    Use this command to clear all test results and snapshots created by snappylapy.
+
+    This will recursively delete all files and directories related to test results and snapshots.
+    Use --force to skip confirmation.
+
+    This finds and deletes all __test_results__ and __snapshots__ directories recursively across the working directory.
+    """
     directories_to_delete = DirectoryNamesUtil().get_all_directories_created_by_snappylapy()
     list_of_files_to_delete = DirectoryNamesUtil().get_all_file_paths_created_by_snappylapy()
     if not list_of_files_to_delete:
@@ -80,7 +100,13 @@ def clear(
 
 @app.command()
 def update() -> None:
-    """Update the snapshot files by copying the test results, to the snapshot directory."""
+    """
+    Use this command to update all snapshot files with the latest test results.
+
+    This will overwrite existing snapshots with current test outputs, ensuring your snapshots reflect the latest changes.
+
+    The file contents of any files in any of the __test_results__ folders will be copied to the corresponding __snapshots__ folder.
+    """  # noqa: E501
     files_test_results = DirectoryNamesUtil().get_all_file_paths_test_results()
     if not files_test_results:
         typer.echo("No files to update.")
@@ -105,7 +131,14 @@ def update() -> None:
 
 @app.command()
 def diff() -> None:
-    """Show the differences between the test results and the snapshots."""
+    """
+    Show the differences between the test results and the snapshots.
+
+    Opens all of the changed diffs in the Visual Studio Code (VSCode) editor.
+    This requires that you have VSCode installed and the `code` command available in your PATH.
+
+    More diff viewers will be supported in the future, please raise a request on github with your needs.
+    """
     files_test_results = DirectoryNamesUtil().get_all_file_paths_test_results()
     file_statuses = check_file_statuses(files_test_results)
     files_to_diff = [file for file, status in file_statuses.items() if status == FileStatus.CHANGED]