From 5f173ba2e3ebdf8369c7c7de9c339445bca75a1a Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@uni-paderborn.de>
Date: Mon, 1 Sep 2025 09:14:48 +0200
Subject: [PATCH 01/17] Add first cache implementation related file

---
 src/finn/transformation/fpgadataflow/ip_cache.py | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 src/finn/transformation/fpgadataflow/ip_cache.py

diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
new file mode 100644
index 0000000000..efa50cbcfc
--- /dev/null
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -0,0 +1,2 @@
+class IPCache:
+    pass

From 5587203f3f8ddfe4c0f1a260f3a4f7adc123d122 Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@me.com>
Date: Wed, 3 Sep 2025 15:36:12 +0200
Subject: [PATCH 02/17] Initial commit for implementingIP caching

---
 .gitignore                                    |   3 +
 .../transformation/fpgadataflow/ip_cache.py   | 322 +++++++++++++++++-
 2 files changed, 324 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 2d48ddac55..af5fd11f0e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,6 +53,9 @@ tags
 poetry.lock
 *.code-workspace
 .env
+settings.yaml
+settings.yml
+deps/
 
 # Package files
 *.egg
diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
index efa50cbcfc..bbff34a596 100644
--- a/src/finn/transformation/fpgadataflow/ip_cache.py
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -1,2 +1,322 @@
+"""Manage IP caching for FINN."""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import numpy as np
+import shlex
+import shutil
+import subprocess
+from pathlib import Path
+from qonnx.custom_op.registry import getCustomOp
+from typing import TYPE_CHECKING, Callable, Final
+
+from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
+from finn.custom_op.fpgadataflow.matrixvectoractivation import MVAU
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.util.basic import make_build_dir
+from finn.util.deps import get_deps_path
+from finn.util.exception import FINNConfigurationError, FINNInternalError
+from finn.util.logging import log
+
+if TYPE_CHECKING:
+    from qonnx.core.modelwrapper import ModelWrapper
+
+
+CACHE_IP_DEFINITIONS: dict[type, dict[str, list[str]]] = {}
+"""Contains all node attributes that a custom operator needs to be characterized.
+Filled by the cache_ip decorator. If the field "use" is defined, these attributes are
+used to hash the op.
+>>> CACHE_IP_DEFINITIONS[my_operator]["use"] = [...]
+
+However if "ignore" is used, every attribute _except_ those listed are used.
+>>> CACHE_IP_DEFINITIONS[my_operator]["ignore"] = [...]
+"""
+
+
+def cache_ip(attributes: list[str] | None = None) -> HWCustomOp:
+    """Decorate the given custom operator to be cacheable.
+
+    Args:
+        attributes: List of the key names of all node attributes needed to
+                    identify IP cores.
+    """
+    global CACHE_IP_DEFINITIONS
+
+    def wrapper(op_cls: type) -> type:
+        assert issubclass(op_cls, HWCustomOp), (
+            f"Can only cache HWCustomOp instances, " f"but {op_cls.__name__} is not a HWCustomOP!"
+        )
+        if op_cls not in CACHE_IP_DEFINITIONS.keys():
+            CACHE_IP_DEFINITIONS[op_cls] = {}
+        if attributes is not None:
+            CACHE_IP_DEFINITIONS[op_cls]["use"] = attributes
+        else:
+            # List of fields that don't define the IP core itself,
+            # and can thus be ignored when hashing
+            ignore_fields = [
+                "code_gen_dir_ipgen",
+                "ipgen_path",
+                "ip_path",
+                "cycles_rtlsim",
+                "cycles_estimate",
+                "res_estimate",
+                "res_synth",
+                "rtlsim_so",
+                "executable_path",
+                "res_hls",
+                "code_gen_dir_cppsim",
+            ]
+            CACHE_IP_DEFINITIONS[op_cls]["ignore"] = ignore_fields
+        print(f"Added custom op {op_cls.__name__} to the cacheable IP registry!")
+        return op_cls
+
+    return wrapper
+
+
 class IPCache:
-    pass
+    """Manage IP caching."""
+
+    # TODO: Update hash functions
+    allowed_hashfuncs: Final[list[str]] = ["sha256"]
+
+    def __init__(self, cache_dir: Path, hashfunc: str) -> None:
+        """Construct a new IPCache object.
+
+        Args:
+            cache_dir: The path of the cache directory.
+            hashfunc: The name of the hash function to be used.
+        """
+        self.cache_dir = cache_dir
+        if not self.cache_dir.exists():
+            self.cache_dir.mkdir()
+        log.info(f"[IPCache] Cache directory: {self.cache_dir}")
+        if hashfunc not in dir(hashlib):
+            raise FINNConfigurationError(f"There is no hash function with the name {hashfunc}!")
+        if hashfunc not in self.allowed_hashfuncs:
+            raise FINNConfigurationError(
+                f"Hash function {hashfunc} not available for caching. "
+                f"Choose one of: {self.allowed_hashfuncs}"
+            )
+
+        self.hashfunc_name = hashfunc
+        self.hasher: Callable = getattr(hashlib, hashfunc)
+
+        # Prepare some always needed values
+        # FINN Commit
+        self.finn_commit = subprocess.run(
+            shlex.split("git rev-parse HEAD"),
+            text=True,
+            capture_output=True,
+            cwd=Path(__file__).parent,
+        ).stdout.strip()
+        log.info(f"[IPCache] FINN Commit reads: {self.finn_commit}")
+
+        # FINN HLSLIB Commit
+        self.hlslib_commit = subprocess.run(
+            shlex.split("git rev-parse HEAD"),
+            text=True,
+            capture_output=True,
+            cwd=get_deps_path() / "finn-hlslib",
+        ).stdout.strip()
+        log.info(f"[IPCache] HLSLIB Commit reads: {self.hlslib_commit}")
+
+    def _get_key(self, op: HWCustomOp, model: ModelWrapper) -> str:
+        """Return the key that can be hashed, for the given custom op.
+
+        Returns:
+            str: The human-readable key. Can be used to generate the caching
+                    hash and the metadata file packed with the cached data.
+        """
+        global CACHE_IP_DEFINITIONS
+
+        # TODO: Maybe exchange simple string concat for something more elegant at some point.
+        # TODO: Practical, because we can include the unhashed key in the directory for debugging
+        # Always use the current FINN and HLSLIB commits so that the correct versions are used
+        key = f"FINN: {self.finn_commit}\nHLSLIB: {self.hlslib_commit}\n"
+
+        # Two custom ops might need the same attributes, so add the type
+        key += "type:" + str(type(op)) + "\n"
+
+        # Add all node attributes required
+        typ = type(op)
+        if typ not in CACHE_IP_DEFINITIONS.keys():
+            raise FINNInternalError(
+                "Tried getting the hash for a non-cacheable custom operator. "
+                "Did you perhaps forget to register the op for caching via "
+                "@cache_ip(...)?"
+            )
+
+        # If both "use" and "ignore" are given, only use "use"
+        if "use" in CACHE_IP_DEFINITIONS[typ].keys():
+            keys = CACHE_IP_DEFINITIONS[typ]["use"]
+            for attr in keys:
+                data = None
+                try:
+                    data = op.get_nodeattr(attr)
+                except Exception:
+                    continue
+                try:
+                    data = str(data)
+                except Exception as e:
+                    raise FINNInternalError(
+                        f"Unable to create string-representation for node "
+                        f"attribute {attr} of custom op {op.onnx_node.name} of "
+                        f"type {type(op)}."
+                    ) from e
+                # Finally add to key
+                key += f"{attr}:{data}\n"
+
+        elif "ignore" in CACHE_IP_DEFINITIONS[typ].keys():
+            for name in op.get_nodeattr_types().keys():
+                if name not in CACHE_IP_DEFINITIONS[typ]["ignore"]:
+                    data = None
+                    try:
+                        data = op.get_nodeattr(name)
+                    except Exception:
+                        continue
+                    try:
+                        data = str(data)
+                    except Exception as e:
+                        raise FINNInternalError(
+                            f"Unable to create string-representation for node "
+                            f"attribute {name} of custom "
+                            f"op {op.onnx_node.name} of "
+                            f"type {type(op)}."
+                        ) from e
+                key += f"{name}:{data}\n"
+
+        # Add parameters if existing
+        # TODO: Extend to all custom ops that require this
+        if isinstance(op, MVAU):
+            mem_mode = None
+            try:
+                mem_mode = op.get_nodeattr("mem_mode")
+            except Exception as e:
+                raise FINNInternalError(
+                    f"Cannot cache {op.onnx_node.name} because op is of "
+                    f"type MVAU but has no mem_mode set!"
+                ) from e
+            if mem_mode in ["internal_embedded", "internal_decoupled"]:
+                # TODO: Add shape!
+                weight = np.ascontiguousarray(model.get_initializer(op.onnx_node.input[1]))
+                array_hash = self.hasher(weight.tobytes()).hexdigest()
+                key += f"weights_hash:{array_hash}\n"
+
+        # TODO: Other ops that require parameters
+        return key
+
+    def _get_hash_hex(self, key: str) -> str:
+        """Return the hex repr of the hash of the given key.
+
+        The key can be created using _get_key(...)
+        """
+        return self.hasher(key.encode("UTF-8")).hexdigest()
+
+    def _create_key_file(self, key: str, path: Path) -> None:
+        """Write the given key data into a file at the given path."""
+        with path.open("w+") as f:
+            f.write(f"Hashed using {self.hashfunc_name}. Key:\n------------------------\n")
+            f.write(key)
+
+    def _cache_dir_path(self, hashed_key: str) -> Path:
+        """Return the path to the directory matching the hashed key."""
+        return self.cache_dir / hashed_key
+
+    def _dump_nodeattrs(self, op: HWCustomOp, path: Path) -> None:
+        """Dump the custom ops node attributes at the given path as a JSON."""
+        required = ["ip_vlnv", "ipgen_path", "ip_path"]
+        d = {}
+        for name in op.get_nodeattr_types().keys():
+            if name in required:
+                try:
+                    d[name] = op.get_nodeattr(name)
+                except Exception:
+                    continue
+        with path.open("w+") as f:
+            json.dump(d, f)
+
+    def _prepare_from_cached_ip(self, op: HWCustomOp, hashed_key: str, make_copy: bool) -> None:
+        """Prepare the given custom op for usage of the given cached IP.
+
+        We have to set some node attributes normally set by HLSSynth and PrepareIP. This needs to
+        be done to use the cached IP.
+
+        Args:
+            op: The operator of which the node attributes we have to modify.
+            hashed_key: The hash hex repr of the key for this op. Used to find the cached IP.
+            make_copy: If True, first makes a copy of the cached IP in the current FINN_BUILD_DIR
+                        and sets the path towards this copy instead of the cached original.
+        """
+        log.info(f"Preparing {op.onnx_node.name} from cached IP (hashed key: {hashed_key[:10]}...)")
+        ip_dir = self._cache_dir_path(hashed_key)
+        saved_nodeattrs = {}
+
+        # Check if the cached IP really exists
+        if not ip_dir.exists():
+            raise FINNInternalError(
+                f"Cannot use hashed key {hashed_key}: " f"Cache dir {ip_dir} does not exist!"
+            )
+
+        # Read node attributes from saved directory
+        with (ip_dir / "nodeattrs.json").open("r") as f:
+            saved_nodeattrs = json.load(f)
+
+        # If needed make copy of the cached dir
+        if make_copy:
+            copied_dir = Path(make_build_dir(prefix=f"cached_code_gen_ipgen_{op.onnx_node.name}"))
+            shutil.copytree(ip_dir, copied_dir)
+            ip_dir = copied_dir
+
+        # Set node attributes correctly to point to cached directory
+        transfer_from_cached = ["ip_vlnv", "ipgen_path"]
+        for nodeattr in transfer_from_cached:
+            if nodeattr not in saved_nodeattrs.keys():
+                raise FINNInternalError(
+                    f"Tried using cached IP for {op.onnx_node.name} but "
+                    f"nodeattrs.json at {ip_dir} did not contain required "
+                    f"node attribute {nodeattr}!"
+                )
+            op.set_nodeattr(nodeattr, saved_nodeattrs[nodeattr])
+
+        # Set IP path to already synthesized IP
+        op.set_nodeattr("ip_path", str(ip_dir / "sol1" / "impl" / "ip"))
+
+    def apply_cache(self, model: ModelWrapper) -> ModelWrapper:
+        """First apply all cached IPs, then run synthesis and cache the ones not yet cached."""
+
+        # TODO: Include PrepareIP for RTL nodes (not only synthesis)!
+
+        # First Pass: Apply all cached IPs
+        log.info("[IPCache] First pass: Applying cached IPs...")
+        for node in model.graph.node:
+            op = getCustomOp(node)
+            key = self._get_key(op, model)
+            hashed_key = self._get_hash_hex(key)
+            cache_dir = self._cache_dir_path(hashed_key)
+            if cache_dir.exists():
+                log.info(f"Node {node.name} is already cached! (hashed key: {hashed_key[:10]}...)")
+                log.info("Applying cached IP...")
+                self._prepare_from_cached_ip(op, hashed_key, make_copy=True)
+
+        # Second Pass: Run synthesis and cache not yet cached nodes
+        log.info("[IPCache] Second pass: Synthesizing and caching new IPs...")
+        model = model.transform(HLSSynthIP())
+        for node in model.graph.node:
+            op = getCustomOp(node)
+            key = self._get_key(op, model)
+            hashed_key = self._get_hash_hex(key)
+            target_dir = self._cache_dir_path(hashed_key)
+            if not target_dir.exists():
+                code_gen_dir = Path(op.get_nodeattr("code_gen_dir_ipgen"))
+                if not code_gen_dir.exists():
+                    raise FINNInternalError(
+                        f"PrepareIP and/or HLSSynth for {node.name} "
+                        f"were not successful: code_gen_dir_ipgen not found!"
+                    )
+                shutil.copytree(code_gen_dir, target_dir)
+                self._create_key_file(key, target_dir / "key.txt")
+                self._dump_nodeattrs(op, target_dir / "nodeattrs.json")
+                log.info(f"Cached node {node.name}. Cached at: {target_dir} from {code_gen_dir}!")
+        return model

From e54910822d0768b9d82742f758f3232acb208ef9 Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@uni-paderborn.de>
Date: Wed, 3 Sep 2025 17:45:14 +0200
Subject: [PATCH 03/17] Fixed several path bugs

---
 .../transformation/fpgadataflow/ip_cache.py   | 28 +++++++------------
 1 file changed, 10 insertions(+), 18 deletions(-)

diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
index bbff34a596..9c3d6f0b53 100644
--- a/src/finn/transformation/fpgadataflow/ip_cache.py
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -226,7 +226,7 @@ def _cache_dir_path(self, hashed_key: str) -> Path:
 
     def _dump_nodeattrs(self, op: HWCustomOp, path: Path) -> None:
         """Dump the custom ops node attributes at the given path as a JSON."""
-        required = ["ip_vlnv", "ipgen_path", "ip_path"]
+        required = ["ip_vlnv"]
         d = {}
         for name in op.get_nodeattr_types().keys():
             if name in required:
@@ -256,7 +256,7 @@ def _prepare_from_cached_ip(self, op: HWCustomOp, hashed_key: str, make_copy: bo
         # Check if the cached IP really exists
         if not ip_dir.exists():
             raise FINNInternalError(
-                f"Cannot use hashed key {hashed_key}: " f"Cache dir {ip_dir} does not exist!"
+                f"Cannot use hashed key {hashed_key}: Cache dir {ip_dir} does not exist!"
             )
 
         # Read node attributes from saved directory
@@ -266,22 +266,16 @@ def _prepare_from_cached_ip(self, op: HWCustomOp, hashed_key: str, make_copy: bo
         # If needed make copy of the cached dir
         if make_copy:
             copied_dir = Path(make_build_dir(prefix=f"cached_code_gen_ipgen_{op.onnx_node.name}"))
-            shutil.copytree(ip_dir, copied_dir)
+            shutil.copytree(ip_dir, copied_dir, dirs_exist_ok=True)
             ip_dir = copied_dir
 
         # Set node attributes correctly to point to cached directory
-        transfer_from_cached = ["ip_vlnv", "ipgen_path"]
-        for nodeattr in transfer_from_cached:
-            if nodeattr not in saved_nodeattrs.keys():
-                raise FINNInternalError(
-                    f"Tried using cached IP for {op.onnx_node.name} but "
-                    f"nodeattrs.json at {ip_dir} did not contain required "
-                    f"node attribute {nodeattr}!"
-                )
-            op.set_nodeattr(nodeattr, saved_nodeattrs[nodeattr])
-
-        # Set IP path to already synthesized IP
-        op.set_nodeattr("ip_path", str(ip_dir / "sol1" / "impl" / "ip"))
+        op.set_nodeattr("code_gen_dir_ipgen", str(ip_dir))
+        op.set_nodeattr("ip_vlnv", saved_nodeattrs["ip_vlnv"])
+        op.set_nodeattr(
+            "ip_path", str(ip_dir / f"project_{op.onnx_node.name}" / "sol1" / "impl" / "ip")
+        )
+        op.set_nodeattr("ipgen_path", str(ip_dir / f"project_{op.onnx_node.name}"))
 
     def apply_cache(self, model: ModelWrapper) -> ModelWrapper:
         """First apply all cached IPs, then run synthesis and cache the ones not yet cached."""
@@ -296,8 +290,6 @@ def apply_cache(self, model: ModelWrapper) -> ModelWrapper:
             hashed_key = self._get_hash_hex(key)
             cache_dir = self._cache_dir_path(hashed_key)
             if cache_dir.exists():
-                log.info(f"Node {node.name} is already cached! (hashed key: {hashed_key[:10]}...)")
-                log.info("Applying cached IP...")
                 self._prepare_from_cached_ip(op, hashed_key, make_copy=True)
 
         # Second Pass: Run synthesis and cache not yet cached nodes
@@ -315,7 +307,7 @@ def apply_cache(self, model: ModelWrapper) -> ModelWrapper:
                         f"PrepareIP and/or HLSSynth for {node.name} "
                         f"were not successful: code_gen_dir_ipgen not found!"
                     )
-                shutil.copytree(code_gen_dir, target_dir)
+                shutil.copytree(code_gen_dir, target_dir, dirs_exist_ok=True)
                 self._create_key_file(key, target_dir / "key.txt")
                 self._dump_nodeattrs(op, target_dir / "nodeattrs.json")
                 log.info(f"Cached node {node.name}. Cached at: {target_dir} from {code_gen_dir}!")

From 9b74125e98b14296bf055612efcd202ae47b5afa Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@uni-paderborn.de>
Date: Thu, 4 Sep 2025 10:27:00 +0200
Subject: [PATCH 04/17] Infrastructure changes for cached IPs

---
 .gitignore                                    |  2 ++
 src/finn/builder/build_dataflow.py            |  7 +++++
 src/finn/builder/build_dataflow_config.py     |  9 ++++++
 src/finn/builder/build_dataflow_steps.py      | 10 +++++-
 src/finn/interface/interface_utils.py         | 21 +++++++++++++
 src/finn/interface/run_finn.py                | 31 +++++++++++++++++--
 .../transformation/fpgadataflow/ip_cache.py   | 19 +++++++++++-
 src/finn/util/deps.py                         | 14 +++++++++
 8 files changed, 108 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index af5fd11f0e..23a663410a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -56,6 +56,8 @@ poetry.lock
 settings.yaml
 settings.yml
 deps/
+FINN_TMP
+FINN_IP_CACHE
 
 # Package files
 *.egg
diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py
index 8549685cb1..34691ba0c0 100644
--- a/src/finn/builder/build_dataflow.py
+++ b/src/finn/builder/build_dataflow.py
@@ -46,6 +46,7 @@
 
 from finn.builder.build_dataflow_config import DataflowBuildConfig, default_build_dataflow_steps
 from finn.builder.build_dataflow_steps import build_dataflow_step_lookup
+from finn.transformation.fpgadataflow.ip_cache import CACHE_IP_DEFINITIONS
 from finn.util.exception import FINNConfigurationError, FINNDataflowError, FINNError, FINNUserError
 
 
@@ -256,6 +257,12 @@ def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig):
     print(f"Final outputs will be generated in {cfg.output_dir}")
     print(f"Build log is at {cfg.output_dir}/build_dataflow.log")
 
+    # Printing all cached IPs
+    if cfg.use_ip_caching:
+        log.info("IP Caching enabled.")
+        cached_type_names = [k.__name__ for k in CACHE_IP_DEFINITIONS.keys()]
+        log.info("Caching enabled for operators: " + ", ".join(cached_type_names))
+
     # Setup done, start build flow
     try:
         # If start_step is specified, override the input model
diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
index 9e977897cc..87b22aa835 100644
--- a/src/finn/builder/build_dataflow_config.py
+++ b/src/finn/builder/build_dataflow_config.py
@@ -311,6 +311,15 @@ class DataflowBuildConfig(DataClassJSONMixin, DataClassYAMLMixin):
     #: If not specified it will default to synth_clk_period_ns
     hls_clk_period_ns: Optional[float] = None
 
+    #: If True, use an IP Cache to avoid unnecessary waiting
+    #: times to run HLSSynthIP() repeatedly for the same
+    #: model / configuration
+    use_ip_caching: Optional[bool] = True
+
+    #: Hash function to be used when caching the IP cores. Only
+    #: relevant if use_ip_caching = True
+    ip_cache_hashfunction: str = "sha256"
+
     #: Call CapConvolutionFIFODepths in InsertAndSetFIFODepths transform
     #: to make convolution FIFOs smaller where appropriate
     default_swg_exception: Optional[bool] = False
diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py
index 19f26cbe12..60534cf530 100644
--- a/src/finn/builder/build_dataflow_steps.py
+++ b/src/finn/builder/build_dataflow_steps.py
@@ -82,6 +82,7 @@
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
+from finn.transformation.fpgadataflow.ip_cache import CachedHLSSynthIP
 from finn.transformation.fpgadataflow.make_driver import (
     MakeCPPDriver,
     MakePYNQDriverInstrumentation,
@@ -527,7 +528,14 @@ def step_hw_ipgen(model: ModelWrapper, cfg: DataflowBuildConfig):
     """Run Vitis HLS synthesis on generated code for HLSBackend nodes,
     in order to generate IP blocks. For RTL nodes this step does not do anything."""
 
-    model = model.transform(HLSSynthIP())
+    # TODO: Move out of step_hw_ipgen, reorder steps
+    if cfg.use_ip_caching:
+        log.info("Using IP cache to fetch generated IPs...")
+        model = model.transform(CachedHLSSynthIP(cfg.ip_cache_hashfunction))
+    else:
+        log.info("Generating all IPs from scratch...")
+        model = model.transform(HLSSynthIP())
+
     model = model.transform(ReplaceVerilogRelPaths())
     report_dir = cfg.output_dir + "/report"
     os.makedirs(report_dir, exist_ok=True)
diff --git a/src/finn/interface/interface_utils.py b/src/finn/interface/interface_utils.py
index 085b63c814..9592ba182b 100644
--- a/src/finn/interface/interface_utils.py
+++ b/src/finn/interface/interface_utils.py
@@ -127,6 +127,27 @@ def resolve_deps_path(deps: Path | None, settings: dict) -> Path | None:
     return None
 
 
+def resolve_cache_path(cache: Path | None, settings: dict) -> Path:
+    """Resolve the path to the IP cache. Always returns a valid Path.
+
+    Resolution order is:
+    Command Line Argument -> Environment var -> Settings -> Default (finn-plus/FINN_IP_CACHE)
+    """
+    if cache is not None:
+        return cache
+    if "FINN_IP_CACHE" in os.environ.keys():
+        p = Path(os.environ["FINN_IP_CACHE"])
+        if p.is_absolute():
+            return p
+        return Path(__file__).parent.parent.parent.parent / p
+    if "FINN_IP_CACHE" in settings.keys():
+        p = Path(settings["FINN_IP_CACHE"])
+        if p.is_absolute():
+            return p
+        return Path(__file__).parent.parent.parent.parent / p
+    return Path(__file__).parent.parent.parent.parent / "FINN_IP_CACHE"
+
+
 def resolve_num_workers(num: int, settings: dict) -> int:
     """Resolve the number of workers to use. Uses 75% of cores available as default fallback"""
     if num > -1:
diff --git a/src/finn/interface/run_finn.py b/src/finn/interface/run_finn.py
index e672c4ccaa..dd3eb32a7a 100644
--- a/src/finn/interface/run_finn.py
+++ b/src/finn/interface/run_finn.py
@@ -22,6 +22,7 @@
     assert_path_valid,
     error,
     resolve_build_dir,
+    resolve_cache_path,
     resolve_deps_path,
     resolve_num_workers,
     set_synthesis_tools_paths,
@@ -52,16 +53,20 @@ def _resolve_module_path(name: str) -> str:
 
 def prepare_finn(
     deps: Path | None,
+    cache_path: Path | None,
     flow_config: Path,
     build_dir: Path | None,
     num_workers: int,
     is_test_run: bool = False,
     skip_dep_update: bool = False,
 ) -> None:
-    """Prepare a FINN environment by:
+    """Prepare a FINN environment. Leaves this process ready to run any FINN related script.
+
+    This is done by:
     0. Reading all settings and environment vars
     1. Updating all dependencies
     2. Setting all environment vars
+    3. Installing depdendencies
     """
     # Resolve settings and dependencies, error if this doesnt work
     if not settings_found():
@@ -70,6 +75,8 @@ def prepare_finn(
         sp = _resolve_settings_path()
         status(f"Using settings file at {sp}")
     settings = get_settings(force_update=True)
+
+    # Set deps envvar
     deps_path = resolve_deps_path(deps, settings)
     if deps_path is None:
         error("Dependency location could not be resolved!")
@@ -78,6 +85,12 @@ def prepare_finn(
         status(f"Using dependency path: {deps_path}")
     os.environ["FINN_DEPS"] = str(deps_path.absolute())
 
+    # Set cache envvar
+    resolved_cache_path = str(resolve_cache_path(cache_path, settings).absolute())
+    os.environ["FINN_IP_CACHE"] = resolved_cache_path
+    status(f"IP Cache set to: {resolved_cache_path}")
+
+    # Clear PYTHONPATH
     if "PYTHONPATH" not in os.environ.keys():
         os.environ["PYTHONPATH"] = ""
 
@@ -131,6 +144,7 @@ def main_group() -> None:
 @click.command(help="Build a hardware design")
 @click.option("--dependency-path", "-d", default="")
 @click.option("--build-path", "-b", help="Specify a build temp path of your choice", default="")
+@click.option("--ip-cache-path", "-c", help="Path to the FINN IP Cache directory", default="")
 @click.option(
     "--num-workers",
     "-n",
@@ -162,6 +176,7 @@ def main_group() -> None:
 def build(
     dependency_path: str,
     build_path: str,
+    ip_cache_path: str,
     num_workers: int,
     skip_dep_update: bool,
     start: str,
@@ -175,9 +190,11 @@ def build(
     assert_path_valid(config_path)
     assert_path_valid(model_path)
     dep_path = Path(dependency_path).expanduser() if dependency_path != "" else None
+    cache_path = Path(ip_cache_path).expanduser() if ip_cache_path != "" else None
     status(f"Starting FINN build with config {config_path.name} and model {model_path.name}!")
     prepare_finn(
         dep_path,
+        cache_path,
         config_path,
         build_dir,
         num_workers,
@@ -233,6 +250,7 @@ def build(
 @click.command(help="Run a script in a FINN environment")
 @click.option("--dependency-path", "-d", default="")
 @click.option("--build-path", "-b", help="Specify a build temp path of your choice", default="")
+@click.option("--ip-cache-path", "-c", help="Path to the FINN IP Cache directory", default="")
 @click.option(
     "--skip-dep-update",
     "-s",
@@ -249,14 +267,21 @@ def build(
 )
 @click.argument("script")
 def run(
-    dependency_path: str, build_path: str, skip_dep_update: bool, num_workers: int, script: str
+    dependency_path: str,
+    build_path: str,
+    ip_cache_path: str,
+    skip_dep_update: bool,
+    num_workers: int,
+    script: str,
 ) -> None:
     script_path = Path(script).expanduser()
     build_dir = Path(build_path).expanduser() if build_path != "" else None
     assert_path_valid(script_path)
     dep_path = Path(dependency_path).expanduser() if dependency_path != "" else None
+    cache_path = Path(ip_cache_path).expanduser() if ip_cache_path != "" else None
     prepare_finn(
         dep_path,
+        cache_path,
         script_path,
         build_dir,
         num_workers,
@@ -394,7 +419,7 @@ def config_set(key: str, value: str) -> None:
 @click.command(
     "create",
     help="Create a template settings file. If one exists at the given path, "
-    "its overwritten. Please enter a directory, no filename",
+    "its overwritten. Please enter a directory, not a filename",
 )
 @click.argument("path", default="~/.finn/")
 def config_create(path: str) -> None:
diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
index 9c3d6f0b53..21f196d82f 100644
--- a/src/finn/transformation/fpgadataflow/ip_cache.py
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -10,13 +10,14 @@
 import subprocess
 from pathlib import Path
 from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
 from typing import TYPE_CHECKING, Callable, Final
 
 from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
 from finn.custom_op.fpgadataflow.matrixvectoractivation import MVAU
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.util.basic import make_build_dir
-from finn.util.deps import get_deps_path
+from finn.util.deps import get_cache_path, get_deps_path
 from finn.util.exception import FINNConfigurationError, FINNInternalError
 from finn.util.logging import log
 
@@ -312,3 +313,19 @@ def apply_cache(self, model: ModelWrapper) -> ModelWrapper:
                 self._dump_nodeattrs(op, target_dir / "nodeattrs.json")
                 log.info(f"Cached node {node.name}. Cached at: {target_dir} from {code_gen_dir}!")
         return model
+
+
+class CachedHLSSynthIP(Transformation):
+    """HLSSynth but cached."""
+
+    # TODO: Remove / reorder steps hw_ipgen and hw_codegen
+    def __init__(self, hash_function: str) -> None:
+        """HLSSynth but cached."""
+        super().__init__()
+        self.hashfunc = hash_function
+
+    def apply(self, model: ModelWrapper) -> tuple[ModelWrapper, bool]:
+        """Apply cached HLS Synthesis."""
+        cache = IPCache(cache_dir=get_cache_path(), hashfunc=self.hashfunc)
+        model = cache.apply_cache(model)
+        return model, False
diff --git a/src/finn/util/deps.py b/src/finn/util/deps.py
index 8728481e75..44c32c6dbf 100644
--- a/src/finn/util/deps.py
+++ b/src/finn/util/deps.py
@@ -1,6 +1,8 @@
 import os
 from pathlib import Path
 
+from finn.util.exception import FINNInternalError
+
 
 def get_deps_path() -> Path:
     """Get the dependency path from the environment variable.
@@ -8,3 +10,15 @@ def get_deps_path() -> Path:
     if "FINN_DEPS" not in os.environ.keys():
         return Path.home() / ".finn" / "deps"
     return Path(os.environ["FINN_DEPS"])
+
+
+# TODO: Move to own file?
+def get_cache_path() -> Path:
+    """Return the path to the cache."""
+    if "FINN_IP_CACHE" not in os.environ.keys():
+        raise FINNInternalError(
+            "FINN_IP_CACHE environment variable not found! This may be a "
+            "bug, since the setup (run_finn.py) should always set this "
+            "variable!"
+        )
+    return Path(os.environ["FINN_IP_CACHE"])

From 9d5d782b9cc915dd8d38a790f22d402730297bfd Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@uni-paderborn.de>
Date: Thu, 4 Sep 2025 11:38:12 +0200
Subject: [PATCH 05/17] Refactoring the cache class

---
 .../transformation/fpgadataflow/ip_cache.py   | 217 ++++++++++--------
 1 file changed, 120 insertions(+), 97 deletions(-)

diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
index 21f196d82f..84fd43d739 100644
--- a/src/finn/transformation/fpgadataflow/ip_cache.py
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -11,7 +11,7 @@
 from pathlib import Path
 from qonnx.custom_op.registry import getCustomOp
 from qonnx.transformation.base import Transformation
-from typing import TYPE_CHECKING, Callable, Final
+from typing import TYPE_CHECKING, Callable, Final, cast
 
 from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
 from finn.custom_op.fpgadataflow.matrixvectoractivation import MVAU
@@ -22,6 +22,7 @@
 from finn.util.logging import log
 
 if TYPE_CHECKING:
+    from onnx import NodeProto
     from qonnx.core.modelwrapper import ModelWrapper
 
 
@@ -46,9 +47,9 @@ def cache_ip(attributes: list[str] | None = None) -> HWCustomOp:
     global CACHE_IP_DEFINITIONS
 
     def wrapper(op_cls: type) -> type:
-        assert issubclass(op_cls, HWCustomOp), (
-            f"Can only cache HWCustomOp instances, " f"but {op_cls.__name__} is not a HWCustomOP!"
-        )
+        assert issubclass(
+            op_cls, HWCustomOp
+        ), f"Can only cache HWCustomOp instances, but {op_cls.__name__} is not a HWCustomOP!"
         if op_cls not in CACHE_IP_DEFINITIONS.keys():
             CACHE_IP_DEFINITIONS[op_cls] = {}
         if attributes is not None:
@@ -77,7 +78,16 @@ def wrapper(op_cls: type) -> type:
 
 
 class IPCache:
-    """Manage IP caching."""
+    """Manage IP caching.
+
+    Application: To apply this in a normal flow, execute somewhat like this:
+    ```
+    cache = IPCache(...)
+    model = cache.apply(model)              # Apply already cached IPs
+    model = model.transform(HLSSynthIP())   # Generate IPs that weren't available
+    cache.update(model)                     # Cache the newly generated IPs too
+    ```
+    """
 
     # TODO: Update hash functions
     allowed_hashfuncs: Final[list[str]] = ["sha256"]
@@ -92,7 +102,7 @@ def __init__(self, cache_dir: Path, hashfunc: str) -> None:
         self.cache_dir = cache_dir
         if not self.cache_dir.exists():
             self.cache_dir.mkdir()
-        log.info(f"[IPCache] Cache directory: {self.cache_dir}")
+        log.info(f"Opened cache handler. Cache directory: {self.cache_dir}")
         if hashfunc not in dir(hashlib):
             raise FINNConfigurationError(f"There is no hash function with the name {hashfunc}!")
         if hashfunc not in self.allowed_hashfuncs:
@@ -112,7 +122,7 @@ def __init__(self, cache_dir: Path, hashfunc: str) -> None:
             capture_output=True,
             cwd=Path(__file__).parent,
         ).stdout.strip()
-        log.info(f"[IPCache] FINN Commit reads: {self.finn_commit}")
+        log.info(f"FINN Commit reads: {self.finn_commit}")
 
         # FINN HLSLIB Commit
         self.hlslib_commit = subprocess.run(
@@ -121,74 +131,41 @@ def __init__(self, cache_dir: Path, hashfunc: str) -> None:
             capture_output=True,
             cwd=get_deps_path() / "finn-hlslib",
         ).stdout.strip()
-        log.info(f"[IPCache] HLSLIB Commit reads: {self.hlslib_commit}")
-
-    def _get_key(self, op: HWCustomOp, model: ModelWrapper) -> str:
-        """Return the key that can be hashed, for the given custom op.
-
-        Returns:
-            str: The human-readable key. Can be used to generate the caching
-                    hash and the metadata file packed with the cached data.
-        """
-        global CACHE_IP_DEFINITIONS
+        log.info(f"HLSLIB Commit reads: {self.hlslib_commit}")
 
-        # TODO: Maybe exchange simple string concat for something more elegant at some point.
-        # TODO: Practical, because we can include the unhashed key in the directory for debugging
-        # Always use the current FINN and HLSLIB commits so that the correct versions are used
-        key = f"FINN: {self.finn_commit}\nHLSLIB: {self.hlslib_commit}\n"
-
-        # Two custom ops might need the same attributes, so add the type
-        key += "type:" + str(type(op)) + "\n"
-
-        # Add all node attributes required
+    def _get_key_part_attributes(self, op: HWCustomOp) -> str:
+        """Return the part of the key that contains attributes and their values."""
+        key_part = ""
         typ = type(op)
-        if typ not in CACHE_IP_DEFINITIONS.keys():
-            raise FINNInternalError(
-                "Tried getting the hash for a non-cacheable custom operator. "
-                "Did you perhaps forget to register the op for caching via "
-                "@cache_ip(...)?"
-            )
-
-        # If both "use" and "ignore" are given, only use "use"
+        attrs: list[str] = []
         if "use" in CACHE_IP_DEFINITIONS[typ].keys():
-            keys = CACHE_IP_DEFINITIONS[typ]["use"]
-            for attr in keys:
-                data = None
-                try:
-                    data = op.get_nodeattr(attr)
-                except Exception:
-                    continue
-                try:
-                    data = str(data)
-                except Exception as e:
-                    raise FINNInternalError(
-                        f"Unable to create string-representation for node "
-                        f"attribute {attr} of custom op {op.onnx_node.name} of "
-                        f"type {type(op)}."
-                    ) from e
-                # Finally add to key
-                key += f"{attr}:{data}\n"
-
+            attrs = CACHE_IP_DEFINITIONS[typ]["use"]
         elif "ignore" in CACHE_IP_DEFINITIONS[typ].keys():
-            for name in op.get_nodeattr_types().keys():
-                if name not in CACHE_IP_DEFINITIONS[typ]["ignore"]:
-                    data = None
-                    try:
-                        data = op.get_nodeattr(name)
-                    except Exception:
-                        continue
-                    try:
-                        data = str(data)
-                    except Exception as e:
-                        raise FINNInternalError(
-                            f"Unable to create string-representation for node "
-                            f"attribute {name} of custom "
-                            f"op {op.onnx_node.name} of "
-                            f"type {type(op)}."
-                        ) from e
-                key += f"{name}:{data}\n"
+            attrs = [
+                k
+                for k in op.get_nodeattr_types().keys()
+                if k not in CACHE_IP_DEFINITIONS[typ]["ignore"]
+            ]
+        else:
+            raise FINNInternalError("This codepath should not be reachable!")
+        for attr in attrs:
+            data = None
+            try:
+                data = op.get_nodeattr(attr)
+            except Exception:
+                continue
+            try:
+                data = str(data)
+            except Exception as e:
+                raise FINNInternalError(
+                    f"Unable to create string-representation for node "
+                    f"attribute {attr} of custom op {op.onnx_node.name} of "
+                    f"type {type(op)}."
+                ) from e
+            key_part += f"{attr}:{data}\n"
+        return key_part
 
-        # Add parameters if existing
+    def _get_key_part_parameter(self, op: HWCustomOp, model: ModelWrapper) -> str:
         # TODO: Extend to all custom ops that require this
         if isinstance(op, MVAU):
             mem_mode = None
@@ -203,15 +180,44 @@ def _get_key(self, op: HWCustomOp, model: ModelWrapper) -> str:
                 # TODO: Add shape!
                 weight = np.ascontiguousarray(model.get_initializer(op.onnx_node.input[1]))
                 array_hash = self.hasher(weight.tobytes()).hexdigest()
-                key += f"weights_hash:{array_hash}\n"
+                return f"weights_hash:{array_hash}\n"
+        return ""
+
+    def get_key(self, op: HWCustomOp, model: ModelWrapper) -> str:
+        """Return the key that can be hashed, for the given custom op.
+
+        Returns:
+            str: The human-readable key. Can be used to generate the caching
+                    hash and the metadata file packed with the cached data.
+        """
+        # TODO: Maybe exchange simple string concat for something more elegant at some point.
+        # TODO: Practical, because we can include the unhashed key in the directory for debugging
+        global CACHE_IP_DEFINITIONS
+        if type(op) not in CACHE_IP_DEFINITIONS.keys():
+            raise FINNInternalError(
+                "Tried getting the hash for a non-cacheable custom operator. "
+                "Did you perhaps forget to register the op for caching via "
+                "@cache_ip(...)?"
+            )
+
+        # Always use the current FINN and HLSLIB commits so that the correct versions are used
+        key = f"FINN: {self.finn_commit}\nHLSLIB: {self.hlslib_commit}\n"
+
+        # Two custom ops might need the same attributes, so add the type
+        key += "type:" + str(type(op)) + "\n"
+
+        # Add all node attributes required
+        key += self._get_key_part_attributes(op) + "\n"
+
+        # Add parameters if existing
+        key += self._get_key_part_parameter(op, model)
 
-        # TODO: Other ops that require parameters
         return key
 
-    def _get_hash_hex(self, key: str) -> str:
+    def get_hash_hex(self, key: str) -> str:
         """Return the hex repr of the hash of the given key.
 
-        The key can be created using _get_key(...)
+        The key can be created using get_key(...)
         """
         return self.hasher(key.encode("UTF-8")).hexdigest()
 
@@ -278,41 +284,50 @@ def _prepare_from_cached_ip(self, op: HWCustomOp, hashed_key: str, make_copy: bo
         )
         op.set_nodeattr("ipgen_path", str(ip_dir / f"project_{op.onnx_node.name}"))
 
-    def apply_cache(self, model: ModelWrapper) -> ModelWrapper:
-        """First apply all cached IPs, then run synthesis and cache the ones not yet cached."""
-
-        # TODO: Include PrepareIP for RTL nodes (not only synthesis)!
+    def _get_node_data(
+        self, node: NodeProto, model: ModelWrapper
+    ) -> tuple[HWCustomOp, str, str, Path]:
+        """Return the op, key, hashed key, cache dir path for a given node."""
+        op = getCustomOp(node)
+        key = self.get_key(op, model)
+        hashed_key = self.get_hash_hex(key)
+        return op, key, hashed_key, self._cache_dir_path(hashed_key)
+
+    def get_num_cached_ips(self, model: ModelWrapper) -> int:
+        """Return the number of cached IPs in the model."""
+        count = 0
+        for node in model.graph.node:
+            _, _, _, cache_dir = self._get_node_data(node, model)
+            if cache_dir.exists():
+                count += 1
+        return count
 
-        # First Pass: Apply all cached IPs
-        log.info("[IPCache] First pass: Applying cached IPs...")
+    def apply(self, model: ModelWrapper) -> ModelWrapper:
+        """Apply all IPs that were cached to the model and return it."""
         for node in model.graph.node:
-            op = getCustomOp(node)
-            key = self._get_key(op, model)
-            hashed_key = self._get_hash_hex(key)
-            cache_dir = self._cache_dir_path(hashed_key)
+            op, key, hashed_key, cache_dir = self._get_node_data(node, model)
             if cache_dir.exists():
                 self._prepare_from_cached_ip(op, hashed_key, make_copy=True)
+        return model
 
-        # Second Pass: Run synthesis and cache not yet cached nodes
-        log.info("[IPCache] Second pass: Synthesizing and caching new IPs...")
-        model = model.transform(HLSSynthIP())
+    def update(self, model: ModelWrapper) -> None:
+        """Check a model for generated IPs that were not yet cached, and cache them.
+
+        Requires HLSSynthIP() to be run before.
+        """
         for node in model.graph.node:
-            op = getCustomOp(node)
-            key = self._get_key(op, model)
-            hashed_key = self._get_hash_hex(key)
-            target_dir = self._cache_dir_path(hashed_key)
+            op, key, hashed_key, target_dir = self._get_node_data(node, model)
             if not target_dir.exists():
-                code_gen_dir = Path(op.get_nodeattr("code_gen_dir_ipgen"))
+                code_gen_dir = Path(cast(str, op.get_nodeattr("code_gen_dir_ipgen")))
                 if not code_gen_dir.exists():
-                    raise FINNInternalError(
-                        f"PrepareIP and/or HLSSynth for {node.name} "
-                        f"were not successful: code_gen_dir_ipgen not found!"
+                    log.warning(
+                        f"Could not cache {node.name}: code_gen_dir_ipgen not set. "
+                        f"Did HLSSynthIP() fail/was not run?"
                     )
                 shutil.copytree(code_gen_dir, target_dir, dirs_exist_ok=True)
                 self._create_key_file(key, target_dir / "key.txt")
                 self._dump_nodeattrs(op, target_dir / "nodeattrs.json")
                 log.info(f"Cached node {node.name}. Cached at: {target_dir} from {code_gen_dir}!")
-        return model
 
 
 class CachedHLSSynthIP(Transformation):
@@ -327,5 +342,13 @@ def __init__(self, hash_function: str) -> None:
     def apply(self, model: ModelWrapper) -> tuple[ModelWrapper, bool]:
         """Apply cached HLS Synthesis."""
         cache = IPCache(cache_dir=get_cache_path(), hashfunc=self.hashfunc)
-        model = cache.apply_cache(model)
+        log.info(
+            f"Applying cache to {cache.get_num_cached_ips(model)} "
+            f"/ {len(model.graph.node)} nodes!"
+        )
+        model = cache.apply(model)
+        log.info("Running synthesis for uncached IPs...")
+        model = model.transform(HLSSynthIP())
+        log.info("Updating cache with newly generated IPs...")
+        cache.update(model)
         return model, False

From 4d332322ca431afb6f3d451d296ce5569c2a046b Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@uni-paderborn.de>
Date: Thu, 4 Sep 2025 13:18:43 +0200
Subject: [PATCH 06/17] Introduced new unified step for IP generation, updated
 IPCache

---
 src/finn/builder/build_dataflow.py            | 19 +++++++-
 src/finn/builder/build_dataflow_config.py     |  5 +-
 src/finn/builder/build_dataflow_steps.py      | 46 +++++++++++++++----
 .../transformation/fpgadataflow/ip_cache.py   | 45 ++++++++++++++----
 4 files changed, 92 insertions(+), 23 deletions(-)

diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py
index 34691ba0c0..6e0e2ae816 100644
--- a/src/finn/builder/build_dataflow.py
+++ b/src/finn/builder/build_dataflow.py
@@ -260,8 +260,23 @@ def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig):
     # Printing all cached IPs
     if cfg.use_ip_caching:
         log.info("IP Caching enabled.")
-        cached_type_names = [k.__name__ for k in CACHE_IP_DEFINITIONS.keys()]
-        log.info("Caching enabled for operators: " + ", ".join(cached_type_names))
+        if cfg.verbose:
+            log.info("Caching enabled for operators: ")
+            for k, v in CACHE_IP_DEFINITIONS.items():
+                s = f"[{k.__name__}]:\n\tuse: "
+                if "use" in v.keys():
+                    s += ", ".join(v["use"])
+                else:
+                    s += "*"
+                s += "\n\tignore: "
+                if "ignore" in v.keys():
+                    if "use" not in v.keys():
+                        s += "defaults"
+                    else:
+                        s += ", ".join(v["ignore"])
+                else:
+                    s += ""
+                log.info(s)
 
     # Setup done, start build flow
     try:
diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
index 87b22aa835..2d201e51a0 100644
--- a/src/finn/builder/build_dataflow_config.py
+++ b/src/finn/builder/build_dataflow_config.py
@@ -136,8 +136,9 @@ class VerificationStepType(str, Enum):
     "step_minimize_bit_width",
     "step_generate_estimate_reports",
     "step_set_fifo_depths",
-    "step_hw_codegen",
-    "step_hw_ipgen",
+    "step_ip_generation",
+    # "step_hw_codegen",
+    # "step_hw_ipgen",
     "step_create_stitched_ip",
     "step_measure_rtlsim_performance",
     "step_out_of_context_synthesis",
diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py
index 60534cf530..dafc7a1a5b 100644
--- a/src/finn/builder/build_dataflow_steps.py
+++ b/src/finn/builder/build_dataflow_steps.py
@@ -82,7 +82,7 @@
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
-from finn.transformation.fpgadataflow.ip_cache import CachedHLSSynthIP
+from finn.transformation.fpgadataflow.ip_cache import CachedIPGen
 from finn.transformation.fpgadataflow.make_driver import (
     MakeCPPDriver,
     MakePYNQDriverInstrumentation,
@@ -516,6 +516,39 @@ def step_minimize_bit_width(model: ModelWrapper, cfg: DataflowBuildConfig):
     return model
 
 
+def _make_hls_estimate_report(model: ModelWrapper, cfg: DataflowBuildConfig) -> None:
+    report_dir = cfg.output_dir + "/report"
+    os.makedirs(report_dir, exist_ok=True)
+    estimate_layer_resources_hls = model.analysis(hls_synth_res_estimation)
+    estimate_layer_resources_hls["total"] = aggregate_dict_keys(estimate_layer_resources_hls)
+    with open(report_dir + "/estimate_layer_resources_hls.json", "w") as f:
+        json.dump(estimate_layer_resources_hls, f, indent=2)
+
+
+def step_ip_generation(model: ModelWrapper, cfg: DataflowBuildConfig) -> ModelWrapper:
+    """Unified step, that does what step_hw_codegen and step_hw_ipgen did before. (With cache!)."""
+    if cfg.use_ip_caching:
+        model = model.transform(
+            CachedIPGen(
+                cfg.ip_cache_hashfunction,
+                include_prepare_ip=True,
+                fpgapart=cfg._resolve_fpga_part(),
+                clk=cfg._resolve_hls_clk_period(),
+            )
+        )
+    else:
+        model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()))
+        model = model.transform(HLSSynthIP())
+    model = model.transform(ReplaceVerilogRelPaths())
+    _make_hls_estimate_report(model, cfg)
+
+    if VerificationStepType.NODE_BY_NODE_RTLSIM in cfg._resolve_verification_steps():
+        model = model.transform(PrepareRTLSim())
+        model = model.transform(SetExecMode("rtlsim"))
+        verify_step(model, cfg, "node_by_node_rtlsim", need_parent=True)
+    return model
+
+
 def step_hw_codegen(model: ModelWrapper, cfg: DataflowBuildConfig):
     """Generate Vitis HLS code to prepare HLSBackend nodes for IP generation.
     And fills RTL templates for RTLBackend nodes."""
@@ -531,19 +564,13 @@ def step_hw_ipgen(model: ModelWrapper, cfg: DataflowBuildConfig):
     # TODO: Move out of step_hw_ipgen, reorder steps
     if cfg.use_ip_caching:
         log.info("Using IP cache to fetch generated IPs...")
-        model = model.transform(CachedHLSSynthIP(cfg.ip_cache_hashfunction))
+        model = model.transform(CachedIPGen(cfg.ip_cache_hashfunction, include_prepare_ip=False))
     else:
         log.info("Generating all IPs from scratch...")
         model = model.transform(HLSSynthIP())
 
     model = model.transform(ReplaceVerilogRelPaths())
-    report_dir = cfg.output_dir + "/report"
-    os.makedirs(report_dir, exist_ok=True)
-    estimate_layer_resources_hls = model.analysis(hls_synth_res_estimation)
-    estimate_layer_resources_hls["total"] = aggregate_dict_keys(estimate_layer_resources_hls)
-    with open(report_dir + "/estimate_layer_resources_hls.json", "w") as f:
-        json.dump(estimate_layer_resources_hls, f, indent=2)
-
+    _make_hls_estimate_report(model, cfg)
     if VerificationStepType.NODE_BY_NODE_RTLSIM in cfg._resolve_verification_steps():
         model = model.transform(PrepareRTLSim())
         model = model.transform(SetExecMode("rtlsim"))
@@ -1044,6 +1071,7 @@ def step_deployment_package(model: ModelWrapper, cfg: DataflowBuildConfig):
     "step_apply_folding_config": step_apply_folding_config,
     "step_minimize_bit_width": step_minimize_bit_width,
     "step_generate_estimate_reports": step_generate_estimate_reports,
+    "step_ip_generation": step_ip_generation,
     "step_hw_codegen": step_hw_codegen,
     "step_hw_ipgen": step_hw_ipgen,
     "step_set_fifo_depths": step_set_fifo_depths,
diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
index 84fd43d739..4d434f3023 100644
--- a/src/finn/transformation/fpgadataflow/ip_cache.py
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -16,6 +16,7 @@
 from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
 from finn.custom_op.fpgadataflow.matrixvectoractivation import MVAU
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.util.basic import make_build_dir
 from finn.util.deps import get_cache_path, get_deps_path
 from finn.util.exception import FINNConfigurationError, FINNInternalError
@@ -37,7 +38,7 @@
 """
 
 
-def cache_ip(attributes: list[str] | None = None) -> HWCustomOp:
+def cache_ip(attributes: list[str] | None = None) -> Callable[[type], type]:
     """Decorate the given custom operator to be cacheable.
 
     Args:
@@ -52,6 +53,9 @@ def wrapper(op_cls: type) -> type:
         ), f"Can only cache HWCustomOp instances, but {op_cls.__name__} is not a HWCustomOP!"
         if op_cls not in CACHE_IP_DEFINITIONS.keys():
             CACHE_IP_DEFINITIONS[op_cls] = {}
+        else:
+            # Already marked
+            return op_cls
         if attributes is not None:
             CACHE_IP_DEFINITIONS[op_cls]["use"] = attributes
         else:
@@ -71,7 +75,6 @@ def wrapper(op_cls: type) -> type:
                 "code_gen_dir_cppsim",
             ]
             CACHE_IP_DEFINITIONS[op_cls]["ignore"] = ignore_fields
-        print(f"Added custom op {op_cls.__name__} to the cacheable IP registry!")
         return op_cls
 
     return wrapper
@@ -194,8 +197,8 @@ def get_key(self, op: HWCustomOp, model: ModelWrapper) -> str:
         # TODO: Practical, because we can include the unhashed key in the directory for debugging
         global CACHE_IP_DEFINITIONS
         if type(op) not in CACHE_IP_DEFINITIONS.keys():
-            raise FINNInternalError(
-                "Tried getting the hash for a non-cacheable custom operator. "
+            log.error(
+                "Tried getting the key for a non-cacheable custom operator. "
                 "Did you perhaps forget to register the op for caching via "
                 "@cache_ip(...)?"
             )
@@ -330,23 +333,45 @@ def update(self, model: ModelWrapper) -> None:
                 log.info(f"Cached node {node.name}. Cached at: {target_dir} from {code_gen_dir}!")
 
 
-class CachedHLSSynthIP(Transformation):
-    """HLSSynth but cached."""
+class CachedIPGen(Transformation):
+    """(PrepareIP and) HLSSynth but cached."""
+
+    def __init__(
+        self,
+        hash_function: str,
+        include_prepare_ip: bool,
+        fpgapart: str | None = None,
+        clk: float | None = None,
+    ) -> None:
+        """(PrepareIP and) HLSSynth but cached.
 
-    # TODO: Remove / reorder steps hw_ipgen and hw_codegen
-    def __init__(self, hash_function: str) -> None:
-        """HLSSynth but cached."""
+        Args:
+            hash_function: Hashfunction to use.
+            include_prepare_ip: If True, also run PrepareIP before synthesis.
+            fpgapart: Required if PrepareIP is being run.
+            clk: Required if PrepareIP is being run.
+        """
         super().__init__()
         self.hashfunc = hash_function
+        self.prepareip = include_prepare_ip
+        self.part = fpgapart
+        self.clk = clk
 
     def apply(self, model: ModelWrapper) -> tuple[ModelWrapper, bool]:
-        """Apply cached HLS Synthesis."""
+        """Apply cached HLS Synthesis (and PrepareIP)."""
         cache = IPCache(cache_dir=get_cache_path(), hashfunc=self.hashfunc)
         log.info(
             f"Applying cache to {cache.get_num_cached_ips(model)} "
             f"/ {len(model.graph.node)} nodes!"
         )
         model = cache.apply(model)
+        if self.prepareip:
+            if self.part is None or self.clk is None:
+                raise FINNInternalError(
+                    "Cannot run PrepareIP in CachedIPGen without " "fpgapart and clk being passed!"
+                )
+            log.info("Running PrepareIP for uncached IPs...")
+            model = model.transform(PrepareIP(self.part, self.clk))
         log.info("Running synthesis for uncached IPs...")
         model = model.transform(HLSSynthIP())
         log.info("Updating cache with newly generated IPs...")

From eea8347eeba6e5345643b063496d96f5da1779c5 Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@uni-paderborn.de>
Date: Thu, 4 Sep 2025 13:48:52 +0200
Subject: [PATCH 07/17] Automatically mark all ops cacheable. Implement guards
 to make sure external parameters are considered

---
 .../custom_op/fpgadataflow/hls/__init__.py    |  6 ++++
 .../custom_op/fpgadataflow/rtl/__init__.py    |  7 ++++
 .../transformation/fpgadataflow/ip_cache.py   | 36 +++++++++++++++++--
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/hls/__init__.py b/src/finn/custom_op/fpgadataflow/hls/__init__.py
index cfb2fb8f82..8b0bf9a2c5 100644
--- a/src/finn/custom_op/fpgadataflow/hls/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/hls/__init__.py
@@ -32,6 +32,7 @@
 # The base class of all generic custom operations before specializing to either
 # HLS or RTL backend
 from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
+from finn.transformation.fpgadataflow.ip_cache import cache_ip
 
 # Dictionary of HLSBackend implementations
 custom_op = dict()
@@ -131,3 +132,8 @@ def register_custom_op(cls):
 custom_op["SplitMultiHeads_hls"] = SplitMultiHeads_hls
 custom_op["MergeMultiHeads_hls"] = MergeMultiHeads_hls
 custom_op["ReplicateStream_hls"] = ReplicateStream_hls
+
+# Apply cache to all ops
+for key in custom_op.keys():
+    if issubclass(custom_op[key], HWCustomOp):
+        custom_op[key] = cache_ip(attributes=None)(custom_op[key])
diff --git a/src/finn/custom_op/fpgadataflow/rtl/__init__.py b/src/finn/custom_op/fpgadataflow/rtl/__init__.py
index 06067a4fca..1f5e54e99a 100644
--- a/src/finn/custom_op/fpgadataflow/rtl/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/rtl/__init__.py
@@ -26,6 +26,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
 from finn.custom_op.fpgadataflow.rtl.convolutioninputgenerator_rtl import (
     ConvolutionInputGenerator_rtl,
 )
@@ -37,6 +38,7 @@
 from finn.custom_op.fpgadataflow.rtl.streamingfifo_rtl import StreamingFIFO_rtl
 from finn.custom_op.fpgadataflow.rtl.thresholding_rtl import Thresholding_rtl
 from finn.custom_op.fpgadataflow.rtl.vectorvectoractivation_rtl import VVAU_rtl
+from finn.transformation.fpgadataflow.ip_cache import cache_ip
 
 custom_op = dict()
 
@@ -49,3 +51,8 @@
 custom_op["MVAU_rtl"] = MVAU_rtl
 custom_op["VVAU_rtl"] = VVAU_rtl
 custom_op["Thresholding_rtl"] = Thresholding_rtl
+
+# Apply cache to all ops
+for key in custom_op.keys():
+    if issubclass(custom_op[key], HWCustomOp):
+        custom_op[key] = cache_ip(attributes=None)(custom_op[key])
diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
index 4d434f3023..4d0adc447d 100644
--- a/src/finn/transformation/fpgadataflow/ip_cache.py
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -13,8 +13,17 @@
 from qonnx.transformation.base import Transformation
 from typing import TYPE_CHECKING, Callable, Final, cast
 
+from finn.custom_op.fpgadataflow.attention import ScaledDotProductAttention
+from finn.custom_op.fpgadataflow.attention_heads import MergeMultiHeads, SplitMultiHeads
+from finn.custom_op.fpgadataflow.channelwise_op import ChannelwiseOp
+from finn.custom_op.fpgadataflow.convolutioninputgenerator import ConvolutionInputGenerator
+from finn.custom_op.fpgadataflow.elementwise_binary import ElementwiseBinaryOperation
 from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
+from finn.custom_op.fpgadataflow.lookup import Lookup
 from finn.custom_op.fpgadataflow.matrixvectoractivation import MVAU
+from finn.custom_op.fpgadataflow.pool import Pool
+from finn.custom_op.fpgadataflow.thresholding import Thresholding
+from finn.custom_op.fpgadataflow.vectorvectoractivation import VVAU
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.util.basic import make_build_dir
@@ -180,10 +189,31 @@ def _get_key_part_parameter(self, op: HWCustomOp, model: ModelWrapper) -> str:
                     f"type MVAU but has no mem_mode set!"
                 ) from e
             if mem_mode in ["internal_embedded", "internal_decoupled"]:
-                # TODO: Add shape!
-                weight = np.ascontiguousarray(model.get_initializer(op.onnx_node.input[1]))
-                array_hash = self.hasher(weight.tobytes()).hexdigest()
+                tensor = model.get_initializer(op.onnx_node.input[1])
+                weight = np.ascontiguousarray(tensor)
+                array_hash = self.hasher(weight.tobytes())
+                # TODO: Fix typing error for next line
+                array_hash.update(str(tensor.shape).encode("UTF-8"))
+                array_hash = array_hash.hexdigest()
                 return f"weights_hash:{array_hash}\n"
+        elif isinstance(
+            op,
+            (
+                ScaledDotProductAttention,
+                SplitMultiHeads,
+                MergeMultiHeads,
+                ChannelwiseOp,
+                ConvolutionInputGenerator,
+                ElementwiseBinaryOperation,
+                Lookup,
+                Pool,
+                Thresholding,
+                VVAU,
+            ),
+        ):
+            raise NotImplementedError(
+                "Need to implement which parameters need to be " "cached for this component!"
+            )
         return ""
 
     def get_key(self, op: HWCustomOp, model: ModelWrapper) -> str:

From f1489871ec58945f700516dd2223e1af61eb0d6a Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@uni-paderborn.de>
Date: Thu, 4 Sep 2025 18:57:35 +0200
Subject: [PATCH 08/17] Added parameter hashing for all operators that have
 external parameters

---
 .../transformation/fpgadataflow/ip_cache.py   | 82 ++++++++++++-------
 1 file changed, 51 insertions(+), 31 deletions(-)

diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
index 4d0adc447d..ba0ddefe60 100644
--- a/src/finn/transformation/fpgadataflow/ip_cache.py
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -11,17 +11,14 @@
 from pathlib import Path
 from qonnx.custom_op.registry import getCustomOp
 from qonnx.transformation.base import Transformation
-from typing import TYPE_CHECKING, Callable, Final, cast
+from typing import TYPE_CHECKING, Any, Callable, Final, cast
 
 from finn.custom_op.fpgadataflow.attention import ScaledDotProductAttention
-from finn.custom_op.fpgadataflow.attention_heads import MergeMultiHeads, SplitMultiHeads
 from finn.custom_op.fpgadataflow.channelwise_op import ChannelwiseOp
-from finn.custom_op.fpgadataflow.convolutioninputgenerator import ConvolutionInputGenerator
 from finn.custom_op.fpgadataflow.elementwise_binary import ElementwiseBinaryOperation
 from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
 from finn.custom_op.fpgadataflow.lookup import Lookup
 from finn.custom_op.fpgadataflow.matrixvectoractivation import MVAU
-from finn.custom_op.fpgadataflow.pool import Pool
 from finn.custom_op.fpgadataflow.thresholding import Thresholding
 from finn.custom_op.fpgadataflow.vectorvectoractivation import VVAU
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
@@ -178,8 +175,18 @@ def _get_key_part_attributes(self, op: HWCustomOp) -> str:
         return key_part
 
     def _get_key_part_parameter(self, op: HWCustomOp, model: ModelWrapper) -> str:
-        # TODO: Extend to all custom ops that require this
-        if isinstance(op, MVAU):
+        """Get the key part defined by the op parameters.
+
+        If, for example, weights, are embedded into the operators, they need to
+        be part of the hashed key as well.
+        """
+
+        def ndarray_to_bytes(tensor: Any) -> bytes:
+            cont = np.ascontiguousarray(tensor)
+            assert type(tensor) is np.ndarray
+            return cont.tobytes() + str(tensor.shape).encode("UTF-8")
+
+        if isinstance(op, (MVAU, VVAU)):
             mem_mode = None
             try:
                 mem_mode = op.get_nodeattr("mem_mode")
@@ -189,31 +196,44 @@ def _get_key_part_parameter(self, op: HWCustomOp, model: ModelWrapper) -> str:
                     f"type MVAU but has no mem_mode set!"
                 ) from e
             if mem_mode in ["internal_embedded", "internal_decoupled"]:
-                tensor = model.get_initializer(op.onnx_node.input[1])
-                weight = np.ascontiguousarray(tensor)
-                array_hash = self.hasher(weight.tobytes())
-                # TODO: Fix typing error for next line
-                array_hash.update(str(tensor.shape).encode("UTF-8"))
-                array_hash = array_hash.hexdigest()
-                return f"weights_hash:{array_hash}\n"
-        elif isinstance(
-            op,
-            (
-                ScaledDotProductAttention,
-                SplitMultiHeads,
-                MergeMultiHeads,
-                ChannelwiseOp,
-                ConvolutionInputGenerator,
-                ElementwiseBinaryOperation,
-                Lookup,
-                Pool,
-                Thresholding,
-                VVAU,
-            ),
-        ):
-            raise NotImplementedError(
-                "Need to implement which parameters need to be " "cached for this component!"
-            )
+                weightbytes = ndarray_to_bytes(model.get_initializer(op.onnx_node.input[1]))
+                threshbytes = ndarray_to_bytes(model.get_initializer(op.onnx_node.input[2]))
+                array_hash = self.hasher(weightbytes + threshbytes).hexdigest()
+                return f"param_hash:{array_hash}\n"
+        elif isinstance(op, (Thresholding, ChannelwiseOp, Lookup)):
+            parambytes = ndarray_to_bytes(model.get_initializer(op.onnx_node.input[1]))
+            array_hash = self.hasher(parambytes).hexdigest()
+            return f"param_hash:{array_hash}\n"
+        elif isinstance(op, (ElementwiseBinaryOperation,)):
+            parambytes0 = ndarray_to_bytes(model.get_initializer(op.onnx_node.input[0]))
+            parambytes1 = ndarray_to_bytes(model.get_initializer(op.onnx_node.input[1]))
+            array_hash = self.hasher(parambytes0 + parambytes1).hexdigest()
+            return f"param_hash:{array_hash}\n"
+        elif isinstance(op, ScaledDotProductAttention):
+            key_part = ""
+            if op.get_nodeattr("ActQKMatMul") == "thresholds":
+                thresholds = model.get_initializer(
+                    op.get_input_name_by_name("thresholds_qk_matmul")
+                )
+                hashed = self.hasher(ndarray_to_bytes(thresholds)).hexdigest()
+                key_part += f"thresholds_qk_matmul:{hashed}\n"
+            if op.get_nodeattr("ActASoftmax") == "thresholds":
+                thresholds = model.get_initializer(
+                    op.get_input_name_by_name("thresholds_a_softmax")
+                )
+                hashed = self.hasher(ndarray_to_bytes(thresholds)).hexdigest()
+                key_part += f"thresholds_a_softmax:{hashed}\n"
+            if op.get_nodeattr("ActAVMatMul") == "thresholds":
+                thresholds = model.get_initializer(
+                    op.get_input_name_by_name("thresholds_av_matmul")
+                )
+                hashed = self.hasher(ndarray_to_bytes(thresholds)).hexdigest()
+                key_part += f"thresholds_av_matmul:{hashed}\n"
+            if op.get_nodeattr("mask_mode") == "const":
+                mask = model.get_initializer(op.get_input_name_by_name("M"))
+                hashed = self.hasher(ndarray_to_bytes(mask)).hexdigest()
+                key_part += f"M:{hashed}\n"
+            return key_part
         return ""
 
     def get_key(self, op: HWCustomOp, model: ModelWrapper) -> str:

From f6c9549872142e2f78bacc062e6b0f8004857a76 Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@uni-paderborn.de>
Date: Mon, 8 Sep 2025 15:41:25 +0200
Subject: [PATCH 09/17] Added bug fixes, first test, consideration for the
 clock and the fpgapart

---
 src/finn/builder/build_dataflow_config.py     |  11 ++
 src/finn/builder/build_dataflow_steps.py      |  38 ++++++-
 .../transformation/fpgadataflow/ip_cache.py   |  82 ++++++++++++--
 tests/infrastructure/test_ip_cache.py         | 106 ++++++++++++++++++
 4 files changed, 226 insertions(+), 11 deletions(-)
 create mode 100644 tests/infrastructure/test_ip_cache.py

diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
index 2d201e51a0..6d6c988381 100644
--- a/src/finn/builder/build_dataflow_config.py
+++ b/src/finn/builder/build_dataflow_config.py
@@ -321,6 +321,17 @@ class DataflowBuildConfig(DataClassJSONMixin, DataClassYAMLMixin):
     #: relevant if use_ip_caching = True
     ip_cache_hashfunction: str = "sha256"
 
+    #: If use_ip_caching is enabled, this flag determines whether
+    #: the value of _resolve_hls_clk_period() is used as part of
+    #: the cached key. Can be turned off for more cache hits, but
+    #: then delivers an IP with an outdated constraints file. This
+    #: might affect OOC Synthesis and other parts of the design, use
+    #: at your own risk.
+    cache_hls_clk_period: bool = True
+
+    #: The same as `cache_hls_clk_period`, but for the passed FPGA part.
+    cache_fpgapart: bool = True
+
     #: Call CapConvolutionFIFODepths in InsertAndSetFIFODepths transform
     #: to make convolution FIFOs smaller where appropriate
     default_swg_exception: Optional[bool] = False
diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py
index dafc7a1a5b..423d20831b 100644
--- a/src/finn/builder/build_dataflow_steps.py
+++ b/src/finn/builder/build_dataflow_steps.py
@@ -115,7 +115,7 @@
 from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
 from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
 from finn.util.basic import get_liveness_threshold_cycles, get_rtlsim_trace_depth
-from finn.util.exception import FINNUserError
+from finn.util.exception import FINNConfigurationError, FINNUserError
 from finn.util.logging import log
 from finn.util.test import execute_parent
 
@@ -528,12 +528,22 @@ def _make_hls_estimate_report(model: ModelWrapper, cfg: DataflowBuildConfig) ->
 def step_ip_generation(model: ModelWrapper, cfg: DataflowBuildConfig) -> ModelWrapper:
     """Unified step, that does what step_hw_codegen and step_hw_ipgen did before. (With cache!)."""
     if cfg.use_ip_caching:
+        clk = cfg._resolve_hls_clk_period()
+        if clk is None:
+            # TODO: Change into a logging error instead of an exception?
+            raise FINNConfigurationError(
+                "Please specify synth_clk_period_ns in your build "
+                "config (and optionally hls_clk_period_ns) before "
+                "generating IPs!"
+            )
         model = model.transform(
             CachedIPGen(
                 cfg.ip_cache_hashfunction,
                 include_prepare_ip=True,
+                cache_clock=cfg.cache_hls_clk_period,
                 fpgapart=cfg._resolve_fpga_part(),
-                clk=cfg._resolve_hls_clk_period(),
+                clk=clk,
+                cache_fpgapart=cfg.cache_fpgapart,
             )
         )
     else:
@@ -561,10 +571,30 @@ def step_hw_ipgen(model: ModelWrapper, cfg: DataflowBuildConfig):
     """Run Vitis HLS synthesis on generated code for HLSBackend nodes,
     in order to generate IP blocks. For RTL nodes this step does not do anything."""
 
-    # TODO: Move out of step_hw_ipgen, reorder steps
     if cfg.use_ip_caching:
         log.info("Using IP cache to fetch generated IPs...")
-        model = model.transform(CachedIPGen(cfg.ip_cache_hashfunction, include_prepare_ip=False))
+        clk = cfg._resolve_hls_clk_period()
+        if clk is None and cfg.cache_hls_clk_period:
+            log.critical(
+                "No HLS/general synthesis clock period was specified, but required for "
+                "caching (cfg.cache_hls_clk_period). Skipping caching for safety. "
+                "Executing just HLSSynthIP()..."
+            )
+            model = model.transform(HLSSynthIP())
+        else:
+            # If clk is None but we don't use it anways, give it some placeholder value
+            if clk is None:
+                clk = 0
+            model = model.transform(
+                CachedIPGen(
+                    cfg.ip_cache_hashfunction,
+                    cache_clock=cfg.cache_hls_clk_period,
+                    include_prepare_ip=False,
+                    fpgapart=cfg._resolve_fpga_part(),
+                    clk=clk,
+                    cache_fpgapart=cfg.cache_fpgapart,
+                )
+            )
     else:
         log.info("Generating all IPs from scratch...")
         model = model.transform(HLSSynthIP())
diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
index ba0ddefe60..9385312781 100644
--- a/src/finn/transformation/fpgadataflow/ip_cache.py
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -26,6 +26,7 @@
 from finn.util.basic import make_build_dir
 from finn.util.deps import get_cache_path, get_deps_path
 from finn.util.exception import FINNConfigurationError, FINNInternalError
+from finn.util.fpgadataflow import is_hls_node, is_rtl_node
 from finn.util.logging import log
 
 if TYPE_CHECKING:
@@ -101,7 +102,15 @@ class IPCache:
     # TODO: Update hash functions
     allowed_hashfuncs: Final[list[str]] = ["sha256"]
 
-    def __init__(self, cache_dir: Path, hashfunc: str) -> None:
+    def __init__(
+        self,
+        cache_dir: Path,
+        hashfunc: str,
+        hls_clk_period: float,
+        cache_hls_clk: bool,
+        fpgapart: str,
+        cache_fpgapart: bool,
+    ) -> None:
         """Construct a new IPCache object.
 
         Args:
@@ -109,6 +118,8 @@ def __init__(self, cache_dir: Path, hashfunc: str) -> None:
             hashfunc: The name of the hash function to be used.
         """
         self.cache_dir = cache_dir
+        self.cache_hls_clk = cache_hls_clk
+        self.cache_fpgapart = cache_fpgapart
         if not self.cache_dir.exists():
             self.cache_dir.mkdir()
         log.info(f"Opened cache handler. Cache directory: {self.cache_dir}")
@@ -142,6 +153,10 @@ def __init__(self, cache_dir: Path, hashfunc: str) -> None:
         ).stdout.strip()
         log.info(f"HLSLIB Commit reads: {self.hlslib_commit}")
 
+        # HLS Clk and device
+        self.clk = hls_clk_period
+        self.fpgapart = fpgapart
+
     def _get_key_part_attributes(self, op: HWCustomOp) -> str:
         """Return the part of the key that contains attributes and their values."""
         key_part = ""
@@ -248,7 +263,7 @@ def get_key(self, op: HWCustomOp, model: ModelWrapper) -> str:
         global CACHE_IP_DEFINITIONS
         if type(op) not in CACHE_IP_DEFINITIONS.keys():
             log.error(
-                "Tried getting the key for a non-cacheable custom operator. "
+                f"Tried getting the key for a non-cacheable custom operator ({type(op).__name__}). "
                 "Did you perhaps forget to register the op for caching via "
                 "@cache_ip(...)?"
             )
@@ -257,7 +272,13 @@ def get_key(self, op: HWCustomOp, model: ModelWrapper) -> str:
         key = f"FINN: {self.finn_commit}\nHLSLIB: {self.hlslib_commit}\n"
 
         # Two custom ops might need the same attributes, so add the type
-        key += "type:" + str(type(op)) + "\n"
+        key += "type:" + type(op).__name__ + "\n"
+
+        # Hash synth clk period and part
+        if self.cache_hls_clk:
+            key += f"hls_clk_period_ns:{self.clk}\n"
+        if self.cache_fpgapart:
+            key += f"fpgapart:{self.fpgapart}\n"
 
         # Add all node attributes required
         key += self._get_key_part_attributes(op) + "\n"
@@ -368,9 +389,34 @@ def update(self, model: ModelWrapper) -> None:
 
         Requires HLSSynthIP() to be run before.
         """
+
+        def attribute_path_exists(name: str, op: HWCustomOp) -> bool:
+            try:
+                data = op.get_nodeattr(name)
+                if data is None or data == "":
+                    return False
+                return Path(cast(str, data)).exists()
+            except Exception:
+                return False
+
         for node in model.graph.node:
             op, key, hashed_key, target_dir = self._get_node_data(node, model)
             if not target_dir.exists():
+                # Check to make sure we only cache synthesized IPs
+                if is_hls_node(node) or is_rtl_node(node):
+                    is_done = (
+                        attribute_path_exists("code_gen_dir_ipgen", op)
+                        and attribute_path_exists("ip_path", op)
+                        and attribute_path_exists("ipgen_path", op)
+                    )
+                    if not is_done:
+                        log.warning(
+                            f"Node {node.name} is hasn't been synthesized yet. "
+                            f"Cannot cache. Skipping."
+                        )
+                        continue
+                else:
+                    log.warning(f"Cannot cache node {node.name}. Node is not a HW node!")
                 code_gen_dir = Path(cast(str, op.get_nodeattr("code_gen_dir_ipgen")))
                 if not code_gen_dir.exists():
                     log.warning(
@@ -380,7 +426,17 @@ def update(self, model: ModelWrapper) -> None:
                 shutil.copytree(code_gen_dir, target_dir, dirs_exist_ok=True)
                 self._create_key_file(key, target_dir / "key.txt")
                 self._dump_nodeattrs(op, target_dir / "nodeattrs.json")
-                log.info(f"Cached node {node.name}. Cached at: {target_dir} from {code_gen_dir}!")
+                typ = ""
+                if is_hls_node(node):
+                    typ = "HLS"
+                elif is_rtl_node(node):
+                    typ = "RTL"
+                else:
+                    typ = "unknown type"
+                log.info(
+                    f"Cached {typ} node {node.name}. "
+                    f"Cached at: {target_dir} from {code_gen_dir}!"
+                )
 
 
 class CachedIPGen(Transformation):
@@ -390,8 +446,10 @@ def __init__(
         self,
         hash_function: str,
         include_prepare_ip: bool,
-        fpgapart: str | None = None,
-        clk: float | None = None,
+        cache_clock: bool,
+        clk: float,
+        cache_fpgapart: bool,
+        fpgapart: str,
     ) -> None:
         """(PrepareIP and) HLSSynth but cached.
 
@@ -405,11 +463,20 @@ def __init__(
         self.hashfunc = hash_function
         self.prepareip = include_prepare_ip
         self.part = fpgapart
+        self.cache_part = cache_fpgapart
         self.clk = clk
+        self.cache_clock = cache_clock
 
     def apply(self, model: ModelWrapper) -> tuple[ModelWrapper, bool]:
         """Apply cached HLS Synthesis (and PrepareIP)."""
-        cache = IPCache(cache_dir=get_cache_path(), hashfunc=self.hashfunc)
+        cache = IPCache(
+            cache_dir=get_cache_path(),
+            hashfunc=self.hashfunc,
+            hls_clk_period=self.clk,
+            cache_hls_clk=self.cache_clock,
+            fpgapart=self.part,
+            cache_fpgapart=self.cache_part,
+        )
         log.info(
             f"Applying cache to {cache.get_num_cached_ips(model)} "
             f"/ {len(model.graph.node)} nodes!"
@@ -422,6 +489,7 @@ def apply(self, model: ModelWrapper) -> tuple[ModelWrapper, bool]:
                 )
             log.info("Running PrepareIP for uncached IPs...")
             model = model.transform(PrepareIP(self.part, self.clk))
+            cache.update(model)
         log.info("Running synthesis for uncached IPs...")
         model = model.transform(HLSSynthIP())
         log.info("Updating cache with newly generated IPs...")
diff --git a/tests/infrastructure/test_ip_cache.py b/tests/infrastructure/test_ip_cache.py
new file mode 100644
index 0000000000..1d43a9cf5e
--- /dev/null
+++ b/tests/infrastructure/test_ip_cache.py
@@ -0,0 +1,106 @@
+"""Test that the IP cache is working correctly. (No false positives, no collisions, speed, etc.)."""
+import pytest
+
+import numpy as np
+import os
+from qonnx.core.datatype import DataType
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
+
+from finn.custom_op.fpgadataflow.hls.matrixvectoractivation_hls import MVAU_hls
+from finn.transformation.fpgadataflow.ip_cache import CachedIPGen, IPCache
+from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers
+from finn.util.basic import alveo_part_map
+from finn.util.deps import get_cache_path
+from tests.fpgadataflow.test_fpgadataflow_mvau import make_single_fclayer_modelwrapper
+
+
+@pytest.mark.parametrize("op_type", [MVAU_hls])
+@pytest.mark.parametrize("hashfunc", ["sha256"])
+@pytest.mark.parametrize("fpgapart", [alveo_part_map["U280"]])
+def test_ip_hash_key(op_type: type, hashfunc: str, fpgapart: str) -> None:
+    """Test that key generation doesnt create false positives or collisions."""
+    os.environ["FINN_IP_CACHE"] = os.environ["FINN_BUILD_DIR"]
+    if op_type is MVAU_hls:
+        # TODO: Fix gen_finn_dt_tensor issue in our QONNX (same values
+        # for subsequent calls of the function)
+        W = gen_finn_dt_tensor(DataType["UINT4"], (10, 10), seed=1)
+        T = gen_finn_dt_tensor(DataType["UINT4"], (10, 10), seed=1)
+
+        # Creating the model
+        model = make_single_fclayer_modelwrapper(
+            W, 1, 1, DataType["UINT4"], DataType["UINT4"], DataType["UINT4"], T, DataType["UINT4"]
+        )
+        model = model.transform(SpecializeLayers(fpgapart))
+        model = model.transform(GiveUniqueNodeNames())
+        model = model.transform(GiveReadableTensorNames())
+
+        # Some sanity checks
+        # Not explicitly set. If the default behaviour changes, we need to fix this to be HLS
+        assert model.graph.node[0].op_type == "MVAU_hls"
+        assert getCustomOp(model.graph.node[0]).get_nodeattr("mem_mode") in [
+            "internal_decoupled",
+            "internal_embedded",
+        ]
+
+        # Run the cache transformation
+        model = model.transform(CachedIPGen(hashfunc, True, True, 2.5, True, fpgapart))
+        cache = IPCache(get_cache_path(), hashfunc, 2.5, True, fpgapart, True)
+        original_key = cache.get_key(getCustomOp(model.graph.node[0]), model)
+
+        # Check that the hash changes with the attributes
+        for attribute in [
+            "resType",
+            "MW",
+            "MH",
+            "SIMD",
+            "PE",
+            "inputDataType",
+            "weightDataType",
+            "outputDataType",
+        ]:
+            op = getCustomOp(model.graph.node[0])
+            original_value = op.get_nodeattr(attribute)
+            if attribute in ["MW", "MH", "SIMD", "PE"]:
+                op.set_nodeattr(attribute, original_value + 1)
+            elif attribute == "resType":
+                assert original_value == "auto"
+                op.set_nodeattr(attribute, "dsp")
+            else:
+                op.set_nodeattr(attribute, "UINT6")
+            assert cache.get_key(op, model) != original_key
+            op.set_nodeattr(attribute, original_value)
+
+        # Check that the hash changes with the parameters
+        # Weights
+        new_W = gen_finn_dt_tensor(DataType["UINT4"], (10, 10), seed=2)
+        assert not np.array_equal(W, new_W)
+        weight_init = model.graph.node[0].input[1]
+        model.set_initializer(weight_init, new_W)
+        new_key = cache.get_key(getCustomOp(model.graph.node[0]), model)
+        assert original_key != new_key
+        model.set_initializer(weight_init, W)
+
+        # Thresholds
+        new_T = gen_finn_dt_tensor(DataType["UINT4"], (10, 10), seed=2)
+        assert not np.array_equal(T, new_T)
+        thresh_init = model.graph.node[0].input[2]
+        model.set_initializer(thresh_init, new_T)
+        new_key = cache.get_key(getCustomOp(model.graph.node[0]), model)
+        assert original_key != new_key
+        model.set_initializer(thresh_init, T)
+
+        # Check that the IP was cached at the correct path
+        path = cache._cache_dir_path(cache.get_hash_hex(original_key))
+        assert path.exists()
+        assert (path / "nodeattrs.json").exists()
+        assert (path / "key.txt").exists()
+        with (path / "key.txt").open("r") as f:
+            data = f.read()
+            assert "type:MVAU_hls" in data
+            assert f"Hashed using {hashfunc}" in data
+            assert original_key in data
+
+    else:
+        raise RuntimeError(f"Test for op type {op_type.__name__} not implemented!")

From e763690b360bfaae5dfe334faae5c37321f453d3 Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@uni-paderborn.de>
Date: Mon, 8 Sep 2025 17:46:16 +0200
Subject: [PATCH 10/17] Added hash functions, check max path lengths, cleaned
 up test, refactored some functions.

---
 .../transformation/fpgadataflow/ip_cache.py   |  98 ++++---
 tests/infrastructure/test_ip_cache.py         | 254 ++++++++++++------
 2 files changed, 242 insertions(+), 110 deletions(-)

diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
index 9385312781..6e3fe1fd6a 100644
--- a/src/finn/transformation/fpgadataflow/ip_cache.py
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -5,9 +5,11 @@
 import hashlib
 import json
 import numpy as np
+import os
 import shlex
 import shutil
 import subprocess
+import sys
 from pathlib import Path
 from qonnx.custom_op.registry import getCustomOp
 from qonnx.transformation.base import Transformation
@@ -34,6 +36,44 @@
     from qonnx.core.modelwrapper import ModelWrapper
 
 
+# UTILITY FUNCTIONS
+def _attribute_path_exists(name: str, op: HWCustomOp) -> bool:
+    """Check that the node attribute path exists.
+    If the node attribute cannot be loaded, return False."""  # noqa
+    try:
+        data = op.get_nodeattr(name)
+        if data is None or data == "":
+            return False
+        return Path(cast(str, data)).exists()
+    except Exception:
+        return False
+
+
+def _check_path_lengths(
+    pc_name_max: int, pc_path_max: int, hashed_key: str, target_dir: Path
+) -> bool:
+    """Check if we follow the path length limits. If not return False, otherwise True."""
+    if len(hashed_key) > pc_name_max:
+        log.error(
+            f"Cannot cache an IP: The hash hex representation "
+            f"is too long to be allowed as a filename on your "
+            f"system (best effort detected limit: "
+            f"{pc_name_max}). Skipping caching."
+        )
+        return False
+    path_bytes = len(str(target_dir.absolute()).encode("UTF-8"))
+    if path_bytes > pc_path_max:
+        log.error(
+            f"Cannot cache an IP: the generated path length of "
+            f"the cache location is not allowed on your system! "
+            f"The best effort detected limit is: "
+            f"{pc_path_max} bytes, the path length is "
+            f"{path_bytes} bytes. Skipping caching."
+        )
+        return False
+    return True
+
+
 CACHE_IP_DEFINITIONS: dict[type, dict[str, list[str]]] = {}
 """Contains all node attributes that a custom operator needs to be characterized.
 Filled by the cache_ip decorator. If the field "use" is defined, these attributes are
@@ -100,7 +140,7 @@ class IPCache:
     """
 
     # TODO: Update hash functions
-    allowed_hashfuncs: Final[list[str]] = ["sha256"]
+    allowed_hashfuncs: Final[list[str]] = ["sha256", "sha512", "blake2s", "blake2b"]
 
     def __init__(
         self,
@@ -120,6 +160,17 @@ def __init__(
         self.cache_dir = cache_dir
         self.cache_hls_clk = cache_hls_clk
         self.cache_fpgapart = cache_fpgapart
+
+        # Used to check validity of cache directory names
+        if sys.platform != "win32":
+            self.max_hash_len = os.pathconf("/", "PC_NAME_MAX")
+            self.max_path_len = os.pathconf("/", "PC_PATH_MAX")
+        else:
+            # TODO: Implement filesystem checks
+            # 256 seems to be the default max path length under windows
+            self.max_hash_len = 256
+            self.max_path_len = 256
+
         if not self.cache_dir.exists():
             self.cache_dir.mkdir()
         log.info(f"Opened cache handler. Cache directory: {self.cache_dir}")
@@ -367,6 +418,15 @@ def _get_node_data(
         hashed_key = self.get_hash_hex(key)
         return op, key, hashed_key, self._cache_dir_path(hashed_key)
 
+    def _is_op_synthesized(self, op: HWCustomOp) -> bool:
+        """Return whether the given op is synthesized. This is derived from the existence and
+        validity of the paths in code_gen_dir_ipgen, ipgen_path and ip_path."""  # noqa
+        return (
+            _attribute_path_exists("code_gen_dir_ipgen", op)
+            and _attribute_path_exists("ip_path", op)
+            and _attribute_path_exists("ipgen_path", op)
+        )
+
     def get_num_cached_ips(self, model: ModelWrapper) -> int:
         """Return the number of cached IPs in the model."""
         count = 0
@@ -389,31 +449,17 @@ def update(self, model: ModelWrapper) -> None:
 
         Requires HLSSynthIP() to be run before.
         """
-
-        def attribute_path_exists(name: str, op: HWCustomOp) -> bool:
-            try:
-                data = op.get_nodeattr(name)
-                if data is None or data == "":
-                    return False
-                return Path(cast(str, data)).exists()
-            except Exception:
-                return False
-
         for node in model.graph.node:
             op, key, hashed_key, target_dir = self._get_node_data(node, model)
+            if not _check_path_lengths(
+                self.max_hash_len, self.max_path_len, hashed_key, target_dir
+            ):
+                return
             if not target_dir.exists():
                 # Check to make sure we only cache synthesized IPs
                 if is_hls_node(node) or is_rtl_node(node):
-                    is_done = (
-                        attribute_path_exists("code_gen_dir_ipgen", op)
-                        and attribute_path_exists("ip_path", op)
-                        and attribute_path_exists("ipgen_path", op)
-                    )
-                    if not is_done:
-                        log.warning(
-                            f"Node {node.name} is hasn't been synthesized yet. "
-                            f"Cannot cache. Skipping."
-                        )
+                    if not self._is_op_synthesized(op):
+                        log.warning(f"{node.name} hasn't been synthesized yet and can't be cached.")
                         continue
                 else:
                     log.warning(f"Cannot cache node {node.name}. Node is not a HW node!")
@@ -426,16 +472,8 @@ def attribute_path_exists(name: str, op: HWCustomOp) -> bool:
                 shutil.copytree(code_gen_dir, target_dir, dirs_exist_ok=True)
                 self._create_key_file(key, target_dir / "key.txt")
                 self._dump_nodeattrs(op, target_dir / "nodeattrs.json")
-                typ = ""
-                if is_hls_node(node):
-                    typ = "HLS"
-                elif is_rtl_node(node):
-                    typ = "RTL"
-                else:
-                    typ = "unknown type"
                 log.info(
-                    f"Cached {typ} node {node.name}. "
-                    f"Cached at: {target_dir} from {code_gen_dir}!"
+                    f"Cached node {node.name}. " f"Cached at: {target_dir} from {code_gen_dir}!"
                 )
 
 
diff --git a/tests/infrastructure/test_ip_cache.py b/tests/infrastructure/test_ip_cache.py
index 1d43a9cf5e..3a30f991a7 100644
--- a/tests/infrastructure/test_ip_cache.py
+++ b/tests/infrastructure/test_ip_cache.py
@@ -3,12 +3,18 @@
 
 import numpy as np
 import os
+import time
+from copy import deepcopy
+from pathlib import Path
 from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
 from qonnx.custom_op.registry import getCustomOp
 from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from qonnx.util.basic import gen_finn_dt_tensor
+from typing import cast
 
 from finn.custom_op.fpgadataflow.hls.matrixvectoractivation_hls import MVAU_hls
+from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
 from finn.transformation.fpgadataflow.ip_cache import CachedIPGen, IPCache
 from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers
 from finn.util.basic import alveo_part_map
@@ -16,91 +22,179 @@
 from tests.fpgadataflow.test_fpgadataflow_mvau import make_single_fclayer_modelwrapper
 
 
+def mvau_hls_create_model(fpgapart: str) -> tuple[ModelWrapper, np.ndarray, np.ndarray]:
+    """Create and sanity check a model for testing MVAU_hls caching.
+
+    Returns:
+        ModelWrapper, NDArray, NDArray: Model, weights, thresholds.
+    """
+    # TODO: Fix gen_finn_dt_tensor issue in our QONNX (same values
+    # for subsequent calls of the function)
+    W = gen_finn_dt_tensor(DataType["UINT4"], (10, 10), seed=1)
+    T = gen_finn_dt_tensor(DataType["UINT4"], (10, 10), seed=1)
+
+    # Creating the model
+    model = make_single_fclayer_modelwrapper(
+        W, 1, 1, DataType["UINT4"], DataType["UINT4"], DataType["UINT4"], T, DataType["UINT4"]
+    )
+    model = model.transform(SpecializeLayers(fpgapart))
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+
+    # Some sanity checks
+    # Not explicitly set. If the default behaviour changes, we need to fix this to be HLS
+    assert model.graph.node[0].op_type == "MVAU_hls"
+    assert getCustomOp(model.graph.node[0]).get_nodeattr("mem_mode") in [
+        "internal_decoupled",
+        "internal_embedded",
+    ]
+    return model, W, T
+
+
+def mvau_hls_specific_asserts(
+    model: ModelWrapper,
+    original_op: HWCustomOp,
+    original_cache: IPCache,
+    original_key: str,
+    W: np.ndarray,
+    T: np.ndarray,
+) -> None:  # noqa
+    """Run MVAU_hls specific asserts to validate caching."""
+    for attribute in [
+        "resType",
+        "MW",
+        "MH",
+        "SIMD",
+        "PE",
+        "inputDataType",
+        "weightDataType",
+        "outputDataType",
+    ]:
+        original_value = original_op.get_nodeattr(attribute)
+        if attribute in ["MW", "MH", "SIMD", "PE"]:
+            original_op.set_nodeattr(attribute, original_value + 1)  # type: ignore
+        elif attribute == "resType":
+            assert original_value == "auto"
+            original_op.set_nodeattr(attribute, "dsp")
+        else:
+            original_op.set_nodeattr(attribute, "UINT6")
+        assert original_cache.get_key(original_op, model) != original_key
+        original_op.set_nodeattr(attribute, original_value)
+
+    # Check that the hash changes with the parameters
+    # Weights
+    new_W = gen_finn_dt_tensor(DataType["UINT4"], (10, 10), seed=2)
+    assert not np.array_equal(W, new_W)
+    weight_init = model.graph.node[0].input[1]
+    model.set_initializer(weight_init, new_W)
+    new_key = original_cache.get_key(original_op, model)
+    assert original_key != new_key
+    model.set_initializer(weight_init, W)
+
+    # Thresholds
+    new_T = gen_finn_dt_tensor(DataType["UINT4"], (10, 10), seed=2)
+    assert not np.array_equal(T, new_T)
+    thresh_init = model.graph.node[0].input[2]
+    model.set_initializer(thresh_init, new_T)
+    new_key = original_cache.get_key(original_op, model)
+    assert original_key != new_key
+    model.set_initializer(thresh_init, T)
+
+
+def get_first_op(model: ModelWrapper) -> HWCustomOp:
+    """Return the op of the first node in the model."""
+    return getCustomOp(model.graph.node[0])
+
+
 @pytest.mark.parametrize("op_type", [MVAU_hls])
 @pytest.mark.parametrize("hashfunc", ["sha256"])
 @pytest.mark.parametrize("fpgapart", [alveo_part_map["U280"]])
-def test_ip_hash_key(op_type: type, hashfunc: str, fpgapart: str) -> None:
-    """Test that key generation doesnt create false positives or collisions."""
+@pytest.mark.parametrize("hls_clk", [2.5])
+def test_ip_hash_key(op_type: type, hashfunc: str, fpgapart: str, hls_clk: float) -> None:
+    """Test IP Caching.
+
+    To do so, we create models that we then run the cache on. We check, that for
+    changes in any attribute, external parameter and clock the hash generated changes as well.
+    We also check, that the generated IP is at the correct path, with all meta-information,
+    and that subsequent synthesis actually use the cached IP by measuring the time needed
+    to re-run synthesis on a fresh copy of the original model.
+    """
     os.environ["FINN_IP_CACHE"] = os.environ["FINN_BUILD_DIR"]
+
+    # Create the model
+    model: ModelWrapper
     if op_type is MVAU_hls:
-        # TODO: Fix gen_finn_dt_tensor issue in our QONNX (same values
-        # for subsequent calls of the function)
-        W = gen_finn_dt_tensor(DataType["UINT4"], (10, 10), seed=1)
-        T = gen_finn_dt_tensor(DataType["UINT4"], (10, 10), seed=1)
-
-        # Creating the model
-        model = make_single_fclayer_modelwrapper(
-            W, 1, 1, DataType["UINT4"], DataType["UINT4"], DataType["UINT4"], T, DataType["UINT4"]
+        model, W, T = mvau_hls_create_model(fpgapart)
+    else:
+        raise AssertionError(f"Cache test for op {op_type.__name__} not yet implemented!")
+
+    # Save a copy of the unsynthesized model for later
+    unsynth_model = deepcopy(model)
+
+    # Run the cache transformation
+    model = model.transform(
+        CachedIPGen(
+            hash_function=hashfunc,
+            include_prepare_ip=True,
+            cache_clock=True,
+            clk=hls_clk,
+            cache_fpgapart=True,
+            fpgapart=fpgapart,
         )
-        model = model.transform(SpecializeLayers(fpgapart))
-        model = model.transform(GiveUniqueNodeNames())
-        model = model.transform(GiveReadableTensorNames())
-
-        # Some sanity checks
-        # Not explicitly set. If the default behaviour changes, we need to fix this to be HLS
-        assert model.graph.node[0].op_type == "MVAU_hls"
-        assert getCustomOp(model.graph.node[0]).get_nodeattr("mem_mode") in [
-            "internal_decoupled",
-            "internal_embedded",
-        ]
-
-        # Run the cache transformation
-        model = model.transform(CachedIPGen(hashfunc, True, True, 2.5, True, fpgapart))
-        cache = IPCache(get_cache_path(), hashfunc, 2.5, True, fpgapart, True)
-        original_key = cache.get_key(getCustomOp(model.graph.node[0]), model)
-
-        # Check that the hash changes with the attributes
-        for attribute in [
-            "resType",
-            "MW",
-            "MH",
-            "SIMD",
-            "PE",
-            "inputDataType",
-            "weightDataType",
-            "outputDataType",
-        ]:
-            op = getCustomOp(model.graph.node[0])
-            original_value = op.get_nodeattr(attribute)
-            if attribute in ["MW", "MH", "SIMD", "PE"]:
-                op.set_nodeattr(attribute, original_value + 1)
-            elif attribute == "resType":
-                assert original_value == "auto"
-                op.set_nodeattr(attribute, "dsp")
-            else:
-                op.set_nodeattr(attribute, "UINT6")
-            assert cache.get_key(op, model) != original_key
-            op.set_nodeattr(attribute, original_value)
-
-        # Check that the hash changes with the parameters
-        # Weights
-        new_W = gen_finn_dt_tensor(DataType["UINT4"], (10, 10), seed=2)
-        assert not np.array_equal(W, new_W)
-        weight_init = model.graph.node[0].input[1]
-        model.set_initializer(weight_init, new_W)
-        new_key = cache.get_key(getCustomOp(model.graph.node[0]), model)
-        assert original_key != new_key
-        model.set_initializer(weight_init, W)
-
-        # Thresholds
-        new_T = gen_finn_dt_tensor(DataType["UINT4"], (10, 10), seed=2)
-        assert not np.array_equal(T, new_T)
-        thresh_init = model.graph.node[0].input[2]
-        model.set_initializer(thresh_init, new_T)
-        new_key = cache.get_key(getCustomOp(model.graph.node[0]), model)
-        assert original_key != new_key
-        model.set_initializer(thresh_init, T)
-
-        # Check that the IP was cached at the correct path
-        path = cache._cache_dir_path(cache.get_hash_hex(original_key))
-        assert path.exists()
-        assert (path / "nodeattrs.json").exists()
-        assert (path / "key.txt").exists()
-        with (path / "key.txt").open("r") as f:
-            data = f.read()
-            assert "type:MVAU_hls" in data
-            assert f"Hashed using {hashfunc}" in data
-            assert original_key in data
+    )
+    cache = IPCache(
+        cache_dir=get_cache_path(),
+        hashfunc=hashfunc,
+        hls_clk_period=hls_clk,
+        cache_hls_clk=True,
+        fpgapart=fpgapart,
+        cache_fpgapart=True,
+    )
+    original_op = get_first_op(model)
+    original_key = cache.get_key(original_op, model)
 
+    # Check that the hash changes with the attributes
+    if op_type is MVAU_hls:
+        mvau_hls_specific_asserts(model, original_op, cache, original_key, W, T)
     else:
-        raise RuntimeError(f"Test for op type {op_type.__name__} not implemented!")
+        raise AssertionError(f"{op_type.__name__} specific cache test asserts not yet implemented!")
+
+    # Check that the IP was cached at the correct path
+    path = cache._cache_dir_path(cache.get_hash_hex(original_key))
+    assert path.exists()
+    assert (path / "nodeattrs.json").exists()
+    assert (path / "key.txt").exists()
+    with (path / "key.txt").open("r") as f:
+        data = f.read()
+        assert f"type:{op_type.__name__}" in data
+        assert f"Hashed using {hashfunc}" in data
+        assert original_key in data
+
+    # Check that a different HLS clk generates a different key
+    other_clk_cache = IPCache(get_cache_path(), hashfunc, hls_clk + 1.0, True, fpgapart, True)
+    assert cache.get_key(original_op, model) != other_clk_cache.get_key(original_op, model)
+
+    # Check speed of the second call (should be much faster)
+    start: float = time.time()
+    unsynth_model = unsynth_model.transform(
+        CachedIPGen(hashfunc, True, True, hls_clk, True, fpgapart)
+    )
+    ms_elapsed = time.time() - start
+
+    # Time in seconds that the cached transform may take.
+    # 10s should be enough, even on slow systems, but if it is clear that
+    # there isn't a bug, this can be adjusted if it leads to failing
+    # CI runs.
+    CACHE_TIME_ALLOWED = 10
+    assert ms_elapsed <= 1000 * CACHE_TIME_ALLOWED
+
+    # Check that the cached and re-used IP does exist
+    first_op = get_first_op(unsynth_model)
+    expected_ip_path = (
+        Path(cast(str, first_op.get_nodeattr("code_gen_dir_ipgen")))
+        / f"project_{first_op.onnx_node.name}"
+        / "sol1"
+        / "impl"
+        / "ip"
+    )
+    assert expected_ip_path.exists()

From bb1bdda8bafc542559722eeb42292cf0bb8340a4 Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@uni-paderborn.de>
Date: Tue, 9 Sep 2025 14:02:31 +0200
Subject: [PATCH 11/17] Refactoring, additional test, docs, readme

---
 README.md                                     |   1 +
 src/finn/builder/build_dataflow.py            |  15 +-
 src/finn/builder/build_dataflow_config.py     |   2 -
 .../transformation/fpgadataflow/ip_cache.py   | 139 ++++++++++--------
 src/finn/util/deps.py                         |   1 -
 tests/infrastructure/test_ip_cache.py         |  79 ++++++----
 6 files changed, 132 insertions(+), 105 deletions(-)

diff --git a/README.md b/README.md
index cc11567c68..bc3d4b127c 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,7 @@ The framework is fully open-source in order to give a higher degree of flexibili
     - Optimized C++ driver
 - Quality-of-live improvements
     - Better logging and error handling
+    - IP Caching for faster build flow times
     - Type hinting/checking
     - Alternative YAML-based build configuration
     - Containerless setup
diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py
index 6e0e2ae816..a50c9ea14f 100644
--- a/src/finn/builder/build_dataflow.py
+++ b/src/finn/builder/build_dataflow.py
@@ -263,20 +263,11 @@ def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig):
         if cfg.verbose:
             log.info("Caching enabled for operators: ")
             for k, v in CACHE_IP_DEFINITIONS.items():
-                s = f"[{k.__name__}]:\n\tuse: "
+                log.info(f"Operator: {k}:")
                 if "use" in v.keys():
-                    s += ", ".join(v["use"])
-                else:
-                    s += "*"
-                s += "\n\tignore: "
+                    log.info("\tuse: " + ", ".join(v["use"]))
                 if "ignore" in v.keys():
-                    if "use" not in v.keys():
-                        s += "defaults"
-                    else:
-                        s += ", ".join(v["ignore"])
-                else:
-                    s += ""
-                log.info(s)
+                    log.info("\nignore: " + ", ".join(v["use"]))
 
     # Setup done, start build flow
     try:
diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
index 6d6c988381..c0b196fe6b 100644
--- a/src/finn/builder/build_dataflow_config.py
+++ b/src/finn/builder/build_dataflow_config.py
@@ -137,8 +137,6 @@ class VerificationStepType(str, Enum):
     "step_generate_estimate_reports",
     "step_set_fifo_depths",
     "step_ip_generation",
-    # "step_hw_codegen",
-    # "step_hw_ipgen",
     "step_create_stitched_ip",
     "step_measure_rtlsim_performance",
     "step_out_of_context_synthesis",
diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
index 6e3fe1fd6a..b70018f9ed 100644
--- a/src/finn/transformation/fpgadataflow/ip_cache.py
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -37,6 +37,12 @@
 
 
 # UTILITY FUNCTIONS
+def _ndarray_to_bytes(tensor: Any) -> bytes:
+    cont = np.ascontiguousarray(tensor)
+    assert type(tensor) is np.ndarray
+    return cont.tobytes() + str(tensor.shape).encode("UTF-8")
+
+
 def _attribute_path_exists(name: str, op: HWCustomOp) -> bool:
     """Check that the node attribute path exists.
     If the node attribute cannot be loaded, return False."""  # noqa
@@ -49,7 +55,7 @@ def _attribute_path_exists(name: str, op: HWCustomOp) -> bool:
         return False
 
 
-def _check_path_lengths(
+def _check_path_lengths_okay(
     pc_name_max: int, pc_path_max: int, hashed_key: str, target_dir: Path
 ) -> bool:
     """Check if we follow the path length limits. If not return False, otherwise True."""
@@ -130,16 +136,16 @@ def wrapper(op_cls: type) -> type:
 class IPCache:
     """Manage IP caching.
 
-    Application: To apply this in a normal flow, execute somewhat like this:
-    ```
-    cache = IPCache(...)
-    model = cache.apply(model)              # Apply already cached IPs
-    model = model.transform(HLSSynthIP())   # Generate IPs that weren't available
-    cache.update(model)                     # Cache the newly generated IPs too
-    ```
+    Public methods that are relevant for the caches usage:
+    - `model = cache.apply(model)`: Fetch cached IPs and apply them to the model,
+        returning the new model
+    - `cache.update(model)`: Update the cache by adding synthesized IPs that are not
+        yet cached into the cache.
+    - `cache.get_key(op, model)`: Get the key (string) of the given custom op
+    - `cache.get_hash_hex(key)`: Get the hex representation of the hash of the given key.
+    - `cache.get_num_cached_ips(model)`: Get the number of cached IPs in the given model.
     """
 
-    # TODO: Update hash functions
     allowed_hashfuncs: Final[list[str]] = ["sha256", "sha512", "blake2s", "blake2b"]
 
     def __init__(
@@ -156,6 +162,10 @@ def __init__(
         Args:
             cache_dir: The path of the cache directory.
             hashfunc: The name of the hash function to be used.
+            hls_clk_period: HLS clock period in ns.
+            cache_hls_clk: Use the HLS clock as part of the key.
+            fpgapart: FPGA-part used for HLSSynth and PrepareIP.
+            cache_fpgapart: Use the fpgapart as part of the key.
         """
         self.cache_dir = cache_dir
         self.cache_hls_clk = cache_hls_clk
@@ -246,12 +256,6 @@ def _get_key_part_parameter(self, op: HWCustomOp, model: ModelWrapper) -> str:
         If, for example, weights, are embedded into the operators, they need to
         be part of the hashed key as well.
         """
-
-        def ndarray_to_bytes(tensor: Any) -> bytes:
-            cont = np.ascontiguousarray(tensor)
-            assert type(tensor) is np.ndarray
-            return cont.tobytes() + str(tensor.shape).encode("UTF-8")
-
         if isinstance(op, (MVAU, VVAU)):
             mem_mode = None
             try:
@@ -262,17 +266,17 @@ def ndarray_to_bytes(tensor: Any) -> bytes:
                     f"type MVAU but has no mem_mode set!"
                 ) from e
             if mem_mode in ["internal_embedded", "internal_decoupled"]:
-                weightbytes = ndarray_to_bytes(model.get_initializer(op.onnx_node.input[1]))
-                threshbytes = ndarray_to_bytes(model.get_initializer(op.onnx_node.input[2]))
+                weightbytes = _ndarray_to_bytes(model.get_initializer(op.onnx_node.input[1]))
+                threshbytes = _ndarray_to_bytes(model.get_initializer(op.onnx_node.input[2]))
                 array_hash = self.hasher(weightbytes + threshbytes).hexdigest()
                 return f"param_hash:{array_hash}\n"
         elif isinstance(op, (Thresholding, ChannelwiseOp, Lookup)):
-            parambytes = ndarray_to_bytes(model.get_initializer(op.onnx_node.input[1]))
+            parambytes = _ndarray_to_bytes(model.get_initializer(op.onnx_node.input[1]))
             array_hash = self.hasher(parambytes).hexdigest()
             return f"param_hash:{array_hash}\n"
         elif isinstance(op, (ElementwiseBinaryOperation,)):
-            parambytes0 = ndarray_to_bytes(model.get_initializer(op.onnx_node.input[0]))
-            parambytes1 = ndarray_to_bytes(model.get_initializer(op.onnx_node.input[1]))
+            parambytes0 = _ndarray_to_bytes(model.get_initializer(op.onnx_node.input[0]))
+            parambytes1 = _ndarray_to_bytes(model.get_initializer(op.onnx_node.input[1]))
             array_hash = self.hasher(parambytes0 + parambytes1).hexdigest()
             return f"param_hash:{array_hash}\n"
         elif isinstance(op, ScaledDotProductAttention):
@@ -281,23 +285,23 @@ def ndarray_to_bytes(tensor: Any) -> bytes:
                 thresholds = model.get_initializer(
                     op.get_input_name_by_name("thresholds_qk_matmul")
                 )
-                hashed = self.hasher(ndarray_to_bytes(thresholds)).hexdigest()
+                hashed = self.hasher(_ndarray_to_bytes(thresholds)).hexdigest()
                 key_part += f"thresholds_qk_matmul:{hashed}\n"
             if op.get_nodeattr("ActASoftmax") == "thresholds":
                 thresholds = model.get_initializer(
                     op.get_input_name_by_name("thresholds_a_softmax")
                 )
-                hashed = self.hasher(ndarray_to_bytes(thresholds)).hexdigest()
+                hashed = self.hasher(_ndarray_to_bytes(thresholds)).hexdigest()
                 key_part += f"thresholds_a_softmax:{hashed}\n"
             if op.get_nodeattr("ActAVMatMul") == "thresholds":
                 thresholds = model.get_initializer(
                     op.get_input_name_by_name("thresholds_av_matmul")
                 )
-                hashed = self.hasher(ndarray_to_bytes(thresholds)).hexdigest()
+                hashed = self.hasher(_ndarray_to_bytes(thresholds)).hexdigest()
                 key_part += f"thresholds_av_matmul:{hashed}\n"
             if op.get_nodeattr("mask_mode") == "const":
                 mask = model.get_initializer(op.get_input_name_by_name("M"))
-                hashed = self.hasher(ndarray_to_bytes(mask)).hexdigest()
+                hashed = self.hasher(_ndarray_to_bytes(mask)).hexdigest()
                 key_part += f"M:{hashed}\n"
             return key_part
         return ""
@@ -305,12 +309,23 @@ def ndarray_to_bytes(tensor: Any) -> bytes:
     def get_key(self, op: HWCustomOp, model: ModelWrapper) -> str:
         """Return the key that can be hashed, for the given custom op.
 
+        These parts are used to build the key which is then hashed for the cache:
+        - FINN commit
+        - FINN-HLSLIB commit
+        - Custom Op type
+        - (Optional) HLS clock
+        - (Optional) HLS Synthesis FPGA-part
+        - All node attributes that define a unique instance of the operator (set by @cache_ip(...))
+        - All external parameters for ops that have these (for example MVAU)
+            - These are hashed themselves for brevity, otherwise the key might be megabytes of data
+
+        **IMPORTANT**: Keep in mind that changes in this function will require caching everything
+        again.
+
         Returns:
             str: The human-readable key. Can be used to generate the caching
                     hash and the metadata file packed with the cached data.
         """
-        # TODO: Maybe exchange simple string concat for something more elegant at some point.
-        # TODO: Practical, because we can include the unhashed key in the directory for debugging
         global CACHE_IP_DEFINITIONS
         if type(op) not in CACHE_IP_DEFINITIONS.keys():
             log.error(
@@ -318,32 +333,18 @@ def get_key(self, op: HWCustomOp, model: ModelWrapper) -> str:
                 "Did you perhaps forget to register the op for caching via "
                 "@cache_ip(...)?"
             )
-
-        # Always use the current FINN and HLSLIB commits so that the correct versions are used
         key = f"FINN: {self.finn_commit}\nHLSLIB: {self.hlslib_commit}\n"
-
-        # Two custom ops might need the same attributes, so add the type
         key += "type:" + type(op).__name__ + "\n"
-
-        # Hash synth clk period and part
         if self.cache_hls_clk:
             key += f"hls_clk_period_ns:{self.clk}\n"
         if self.cache_fpgapart:
             key += f"fpgapart:{self.fpgapart}\n"
-
-        # Add all node attributes required
         key += self._get_key_part_attributes(op) + "\n"
-
-        # Add parameters if existing
         key += self._get_key_part_parameter(op, model)
-
         return key
 
     def get_hash_hex(self, key: str) -> str:
-        """Return the hex repr of the hash of the given key.
-
-        The key can be created using get_key(...)
-        """
+        """Return the hex repr of the hash of the given key."""
         return self.hasher(key.encode("UTF-8")).hexdigest()
 
     def _create_key_file(self, key: str, path: Path) -> None:
@@ -352,13 +353,22 @@ def _create_key_file(self, key: str, path: Path) -> None:
             f.write(f"Hashed using {self.hashfunc_name}. Key:\n------------------------\n")
             f.write(key)
 
-    def _cache_dir_path(self, hashed_key: str) -> Path:
-        """Return the path to the directory matching the hashed key."""
-        return self.cache_dir / hashed_key
+    def _dump_nodeattrs(
+        self, op: HWCustomOp, path: Path, additional_attributes: list[str] | None = None
+    ) -> None:
+        """Dump the custom ops node attributes at the given path as a JSON.
+
+        If a node attribute cannot be accessed, it is silently ignored.
 
-    def _dump_nodeattrs(self, op: HWCustomOp, path: Path) -> None:
-        """Dump the custom ops node attributes at the given path as a JSON."""
-        required = ["ip_vlnv"]
+        Args:
+            op: The HWCustom op of which the node attributes are the target
+            path: Where to dump the node attributes
+            additional_attributes: A list of additional attribute keys that
+                should be included in the dump.
+        """
+        if additional_attributes is None:
+            additional_attributes = []
+        required = {"ip_vlnv", *additional_attributes}
         d = {}
         for name in op.get_nodeattr_types().keys():
             if name in required:
@@ -382,7 +392,7 @@ def _prepare_from_cached_ip(self, op: HWCustomOp, hashed_key: str, make_copy: bo
                         and sets the path towards this copy instead of the cached original.
         """
         log.info(f"Preparing {op.onnx_node.name} from cached IP (hashed key: {hashed_key[:10]}...)")
-        ip_dir = self._cache_dir_path(hashed_key)
+        ip_dir = self.cache_dir / hashed_key
         saved_nodeattrs = {}
 
         # Check if the cached IP really exists
@@ -416,7 +426,7 @@ def _get_node_data(
         op = getCustomOp(node)
         key = self.get_key(op, model)
         hashed_key = self.get_hash_hex(key)
-        return op, key, hashed_key, self._cache_dir_path(hashed_key)
+        return op, key, hashed_key, self.cache_dir / hashed_key
 
     def _is_op_synthesized(self, op: HWCustomOp) -> bool:
         """Return whether the given op is synthesized. This is derived from the existence and
@@ -451,18 +461,21 @@ def update(self, model: ModelWrapper) -> None:
         """
         for node in model.graph.node:
             op, key, hashed_key, target_dir = self._get_node_data(node, model)
-            if not _check_path_lengths(
+            if not _check_path_lengths_okay(
                 self.max_hash_len, self.max_path_len, hashed_key, target_dir
             ):
                 return
+            if not (is_hls_node(node) or is_rtl_node(node)):
+                log.warning(f"Cannot cache node {node.name}. Node is not a HW node!")
+                continue
             if not target_dir.exists():
-                # Check to make sure we only cache synthesized IPs
-                if is_hls_node(node) or is_rtl_node(node):
-                    if not self._is_op_synthesized(op):
-                        log.warning(f"{node.name} hasn't been synthesized yet and can't be cached.")
-                        continue
-                else:
-                    log.warning(f"Cannot cache node {node.name}. Node is not a HW node!")
+                if not self._is_op_synthesized(op):
+                    log.warning(
+                        f"{node.name} hasn't been synthesized yet and can't be cached "
+                        f"(one of code_gen_dir_ipgen, ip_path, ipgen_path is missing or "
+                        f"invalid!)."
+                    )
+                    continue
                 code_gen_dir = Path(cast(str, op.get_nodeattr("code_gen_dir_ipgen")))
                 if not code_gen_dir.exists():
                     log.warning(
@@ -472,9 +485,7 @@ def update(self, model: ModelWrapper) -> None:
                 shutil.copytree(code_gen_dir, target_dir, dirs_exist_ok=True)
                 self._create_key_file(key, target_dir / "key.txt")
                 self._dump_nodeattrs(op, target_dir / "nodeattrs.json")
-                log.info(
-                    f"Cached node {node.name}. " f"Cached at: {target_dir} from {code_gen_dir}!"
-                )
+                log.info(f"Cached node {node.name}. Cached at: {target_dir} from {code_gen_dir}!")
 
 
 class CachedIPGen(Transformation):
@@ -484,10 +495,10 @@ def __init__(
         self,
         hash_function: str,
         include_prepare_ip: bool,
-        cache_clock: bool,
         clk: float,
-        cache_fpgapart: bool,
+        cache_clock: bool,
         fpgapart: str,
+        cache_fpgapart: bool,
     ) -> None:
         """(PrepareIP and) HLSSynth but cached.
 
@@ -495,7 +506,9 @@ def __init__(
             hash_function: Hashfunction to use.
             include_prepare_ip: If True, also run PrepareIP before synthesis.
             fpgapart: Required if PrepareIP is being run.
+            cache_fpgapart: Whether or not to use the fpgapart for the cache ky
             clk: Required if PrepareIP is being run.
+            cache_clock: Whether or not to use the clock for the cache key
         """
         super().__init__()
         self.hashfunc = hash_function
@@ -523,7 +536,7 @@ def apply(self, model: ModelWrapper) -> tuple[ModelWrapper, bool]:
         if self.prepareip:
             if self.part is None or self.clk is None:
                 raise FINNInternalError(
-                    "Cannot run PrepareIP in CachedIPGen without " "fpgapart and clk being passed!"
+                    "Cannot run PrepareIP in CachedIPGen without fpgapart and clk being passed!"
                 )
             log.info("Running PrepareIP for uncached IPs...")
             model = model.transform(PrepareIP(self.part, self.clk))
diff --git a/src/finn/util/deps.py b/src/finn/util/deps.py
index 44c32c6dbf..5512b91f15 100644
--- a/src/finn/util/deps.py
+++ b/src/finn/util/deps.py
@@ -12,7 +12,6 @@ def get_deps_path() -> Path:
     return Path(os.environ["FINN_DEPS"])
 
 
-# TODO: Move to own file?
 def get_cache_path() -> Path:
     """Return the path to the cache."""
     if "FINN_IP_CACHE" not in os.environ.keys():
diff --git a/tests/infrastructure/test_ip_cache.py b/tests/infrastructure/test_ip_cache.py
index 3a30f991a7..72b7cd1822 100644
--- a/tests/infrastructure/test_ip_cache.py
+++ b/tests/infrastructure/test_ip_cache.py
@@ -1,4 +1,6 @@
 """Test that the IP cache is working correctly. (No false positives, no collisions, speed, etc.)."""
+from __future__ import annotations
+
 import pytest
 
 import numpy as np
@@ -7,43 +9,58 @@
 from copy import deepcopy
 from pathlib import Path
 from qonnx.core.datatype import DataType
-from qonnx.core.modelwrapper import ModelWrapper
 from qonnx.custom_op.registry import getCustomOp
 from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from qonnx.util.basic import gen_finn_dt_tensor
-from typing import cast
+from typing import TYPE_CHECKING, Literal, cast
 
 from finn.custom_op.fpgadataflow.hls.matrixvectoractivation_hls import MVAU_hls
-from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
+from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend
+from finn.custom_op.fpgadataflow.rtl.matrixvectoractivation_rtl import MVAU_rtl
+from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend
 from finn.transformation.fpgadataflow.ip_cache import CachedIPGen, IPCache
 from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers
 from finn.util.basic import alveo_part_map
 from finn.util.deps import get_cache_path
 from tests.fpgadataflow.test_fpgadataflow_mvau import make_single_fclayer_modelwrapper
 
+if TYPE_CHECKING:
+    from qonnx.core.modelwrapper import ModelWrapper
+
+    from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
+
 
-def mvau_hls_create_model(fpgapart: str) -> tuple[ModelWrapper, np.ndarray, np.ndarray]:
-    """Create and sanity check a model for testing MVAU_hls caching.
+def mvau_create_model(
+    fpgapart: str, mode: Literal["hls", "rtl"]
+) -> tuple[ModelWrapper, np.ndarray, np.ndarray]:
+    """Create and sanity check a model for testing MVAU caching.
 
     Returns:
         ModelWrapper, NDArray, NDArray: Model, weights, thresholds.
     """
     # TODO: Fix gen_finn_dt_tensor issue in our QONNX (same values
     # for subsequent calls of the function)
-    W = gen_finn_dt_tensor(DataType["UINT4"], (10, 10), seed=1)
-    T = gen_finn_dt_tensor(DataType["UINT4"], (10, 10), seed=1)
+    W = gen_finn_dt_tensor(DataType["INT4"], (10, 10), seed=1)
+    T = gen_finn_dt_tensor(DataType["INT4"], (10, 10), seed=1)
 
     # Creating the model
     model = make_single_fclayer_modelwrapper(
-        W, 1, 1, DataType["UINT4"], DataType["UINT4"], DataType["UINT4"], T, DataType["UINT4"]
+        W, 1, 1, DataType["INT4"], DataType["INT4"], DataType["INT4"], T, DataType["INT4"]
     )
+
+    op: HWCustomOp = getCustomOp(model.graph.node[0])
+    op.set_nodeattr("preferred_impl_style", mode)
+    if mode == "rtl":
+        # Required to set MVAU implementation to rtl
+        op.set_nodeattr("noActivation", 1)
+        op.set_nodeattr("binaryXnorMode", 0)
+
     model = model.transform(SpecializeLayers(fpgapart))
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(GiveReadableTensorNames())
 
     # Some sanity checks
-    # Not explicitly set. If the default behaviour changes, we need to fix this to be HLS
-    assert model.graph.node[0].op_type == "MVAU_hls"
+    assert model.graph.node[0].op_type == "MVAU_" + mode
     assert getCustomOp(model.graph.node[0]).get_nodeattr("mem_mode") in [
         "internal_decoupled",
         "internal_embedded",
@@ -51,15 +68,15 @@ def mvau_hls_create_model(fpgapart: str) -> tuple[ModelWrapper, np.ndarray, np.n
     return model, W, T
 
 
-def mvau_hls_specific_asserts(
+def mvau_specific_asserts(
     model: ModelWrapper,
     original_op: HWCustomOp,
     original_cache: IPCache,
     original_key: str,
     W: np.ndarray,
     T: np.ndarray,
-) -> None:  # noqa
-    """Run MVAU_hls specific asserts to validate caching."""
+) -> None:
+    """Run MVAU specific asserts to validate caching."""
     for attribute in [
         "resType",
         "MW",
@@ -106,7 +123,7 @@ def get_first_op(model: ModelWrapper) -> HWCustomOp:
     return getCustomOp(model.graph.node[0])
 
 
-@pytest.mark.parametrize("op_type", [MVAU_hls])
+@pytest.mark.parametrize("op_type", [MVAU_hls, MVAU_rtl])
 @pytest.mark.parametrize("hashfunc", ["sha256"])
 @pytest.mark.parametrize("fpgapart", [alveo_part_map["U280"]])
 @pytest.mark.parametrize("hls_clk", [2.5])
@@ -124,7 +141,9 @@ def test_ip_hash_key(op_type: type, hashfunc: str, fpgapart: str, hls_clk: float
     # Create the model
     model: ModelWrapper
     if op_type is MVAU_hls:
-        model, W, T = mvau_hls_create_model(fpgapart)
+        model, W, T = mvau_create_model(fpgapart, mode="hls")
+    elif op_type is MVAU_rtl:
+        model, W, T = mvau_create_model(fpgapart, mode="rtl")
     else:
         raise AssertionError(f"Cache test for op {op_type.__name__} not yet implemented!")
 
@@ -154,13 +173,13 @@ def test_ip_hash_key(op_type: type, hashfunc: str, fpgapart: str, hls_clk: float
     original_key = cache.get_key(original_op, model)
 
     # Check that the hash changes with the attributes
-    if op_type is MVAU_hls:
-        mvau_hls_specific_asserts(model, original_op, cache, original_key, W, T)
+    if op_type in [MVAU_hls, MVAU_rtl]:
+        mvau_specific_asserts(model, original_op, cache, original_key, W, T)
     else:
         raise AssertionError(f"{op_type.__name__} specific cache test asserts not yet implemented!")
 
     # Check that the IP was cached at the correct path
-    path = cache._cache_dir_path(cache.get_hash_hex(original_key))
+    path = cache.cache_dir / cache.get_hash_hex(original_key)
     assert path.exists()
     assert (path / "nodeattrs.json").exists()
     assert (path / "key.txt").exists()
@@ -177,7 +196,7 @@ def test_ip_hash_key(op_type: type, hashfunc: str, fpgapart: str, hls_clk: float
     # Check speed of the second call (should be much faster)
     start: float = time.time()
     unsynth_model = unsynth_model.transform(
-        CachedIPGen(hashfunc, True, True, hls_clk, True, fpgapart)
+        CachedIPGen(hashfunc, True, hls_clk, True, fpgapart, True)
     )
     ms_elapsed = time.time() - start
 
@@ -190,11 +209,17 @@ def test_ip_hash_key(op_type: type, hashfunc: str, fpgapart: str, hls_clk: float
 
     # Check that the cached and re-used IP does exist
     first_op = get_first_op(unsynth_model)
-    expected_ip_path = (
-        Path(cast(str, first_op.get_nodeattr("code_gen_dir_ipgen")))
-        / f"project_{first_op.onnx_node.name}"
-        / "sol1"
-        / "impl"
-        / "ip"
-    )
-    assert expected_ip_path.exists()
+    codegen_path = Path(cast(str, first_op.get_nodeattr("code_gen_dir_ipgen")))
+    if issubclass(op_type, HLSBackend):
+        expected_ip_path = (
+            codegen_path / f"project_{first_op.onnx_node.name}" / "sol1" / "impl" / "ip"
+        )
+        assert expected_ip_path.exists()
+    elif issubclass(op_type, RTLBackend):
+        for f in (cache.cache_dir / cache.get_hash_hex(cache.get_key(first_op, model))).iterdir():
+            assert (codegen_path / f).exists()
+    else:
+        raise AssertionError(
+            f"{op_type.__name__} doesnt have either an HLS or RTL backend. "
+            f"Only test subclasses that can actually be cached!"
+        )

From 8fb44bb9aeee799361e87f732dd067e2b3e2d84e Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@uni-paderborn.de>
Date: Fri, 12 Sep 2025 14:51:30 +0200
Subject: [PATCH 12/17] Removing ip_vlnv and gen_top_module from key
 generation. Fixed bug with standlone thresholds and wrong order of cache
 application.

---
 .../transformation/fpgadataflow/ip_cache.py   | 22 ++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
index b70018f9ed..6ceb9a3791 100644
--- a/src/finn/transformation/fpgadataflow/ip_cache.py
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import contextlib
 import hashlib
 import json
 import numpy as np
@@ -116,6 +117,8 @@ def wrapper(op_cls: type) -> type:
             # and can thus be ignored when hashing
             ignore_fields = [
                 "code_gen_dir_ipgen",
+                "gen_top_module",
+                "ip_vlnv",
                 "ipgen_path",
                 "ip_path",
                 "cycles_rtlsim",
@@ -267,7 +270,11 @@ def _get_key_part_parameter(self, op: HWCustomOp, model: ModelWrapper) -> str:
                 ) from e
             if mem_mode in ["internal_embedded", "internal_decoupled"]:
                 weightbytes = _ndarray_to_bytes(model.get_initializer(op.onnx_node.input[1]))
-                threshbytes = _ndarray_to_bytes(model.get_initializer(op.onnx_node.input[2]))
+                try:
+                    threshbytes = _ndarray_to_bytes(model.get_initializer(op.onnx_node.input[2]))
+                except IndexError:
+                    # No thresholds
+                    threshbytes = b""
                 array_hash = self.hasher(weightbytes + threshbytes).hexdigest()
                 return f"param_hash:{array_hash}\n"
         elif isinstance(op, (Thresholding, ChannelwiseOp, Lookup)):
@@ -368,7 +375,7 @@ def _dump_nodeattrs(
         """
         if additional_attributes is None:
             additional_attributes = []
-        required = {"ip_vlnv", *additional_attributes}
+        required = {"ip_vlnv", "gen_top_module", *additional_attributes}
         d = {}
         for name in op.get_nodeattr_types().keys():
             if name in required:
@@ -419,6 +426,10 @@ def _prepare_from_cached_ip(self, op: HWCustomOp, hashed_key: str, make_copy: bo
         )
         op.set_nodeattr("ipgen_path", str(ip_dir / f"project_{op.onnx_node.name}"))
 
+        # If needed insert gen_top_module. If not saved or the attr doesnt exist ignore
+        with contextlib.suppress(Exception):
+            op.set_nodeattr("gen_top_module", saved_nodeattrs["gen_top_module"])
+
     def _get_node_data(
         self, node: NodeProto, model: ModelWrapper
     ) -> tuple[HWCustomOp, str, str, Path]:
@@ -473,7 +484,7 @@ def update(self, model: ModelWrapper) -> None:
                     log.warning(
                         f"{node.name} hasn't been synthesized yet and can't be cached "
                         f"(one of code_gen_dir_ipgen, ip_path, ipgen_path is missing or "
-                        f"invalid!)."
+                        f"invalid!). Hash after synthesis will be: {hashed_key}"
                     )
                     continue
                 code_gen_dir = Path(cast(str, op.get_nodeattr("code_gen_dir_ipgen")))
@@ -541,6 +552,11 @@ def apply(self, model: ModelWrapper) -> tuple[ModelWrapper, bool]:
             log.info("Running PrepareIP for uncached IPs...")
             model = model.transform(PrepareIP(self.part, self.clk))
             cache.update(model)
+        log.info(
+            f"Applying cache to {cache.get_num_cached_ips(model)} "
+            f"/ {len(model.graph.node)} nodes!"
+        )
+        model = cache.apply(model)
         log.info("Running synthesis for uncached IPs...")
         model = model.transform(HLSSynthIP())
         log.info("Updating cache with newly generated IPs...")

From 2d7234098d01bf802ebab4f0c9d40c6eef323863 Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@uni-paderborn.de>
Date: Fri, 12 Sep 2025 15:34:19 +0200
Subject: [PATCH 13/17] Make cache application multithreaded

---
 .../transformation/fpgadataflow/ip_cache.py   | 34 +++++++++++++------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
index 6ceb9a3791..b843235ac7 100644
--- a/src/finn/transformation/fpgadataflow/ip_cache.py
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -11,9 +11,11 @@
 import shutil
 import subprocess
 import sys
+from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
 from qonnx.custom_op.registry import getCustomOp
 from qonnx.transformation.base import Transformation
+from qonnx.util.basic import get_num_default_workers
 from typing import TYPE_CHECKING, Any, Callable, Final, cast
 
 from finn.custom_op.fpgadataflow.attention import ScaledDotProductAttention
@@ -386,7 +388,10 @@ def _dump_nodeattrs(
         with path.open("w+") as f:
             json.dump(d, f)
 
-    def _prepare_from_cached_ip(self, op: HWCustomOp, hashed_key: str, make_copy: bool) -> None:
+    @staticmethod
+    def _prepare_from_cached_ip(
+        op: HWCustomOp, hashed_key: str, make_copy: bool, cache_dir: Path
+    ) -> None:
         """Prepare the given custom op for usage of the given cached IP.
 
         We have to set some node attributes normally set by HLSSynth and PrepareIP. This needs to
@@ -397,9 +402,10 @@ def _prepare_from_cached_ip(self, op: HWCustomOp, hashed_key: str, make_copy: bo
             hashed_key: The hash hex repr of the key for this op. Used to find the cached IP.
             make_copy: If True, first makes a copy of the cached IP in the current FINN_BUILD_DIR
                         and sets the path towards this copy instead of the cached original.
+            cache_dir: FINN_IP_CACHE directory, as passed from the calling IPCache instance.
         """
         log.info(f"Preparing {op.onnx_node.name} from cached IP (hashed key: {hashed_key[:10]}...)")
-        ip_dir = self.cache_dir / hashed_key
+        ip_dir = cache_dir / hashed_key
         saved_nodeattrs = {}
 
         # Check if the cached IP really exists
@@ -459,10 +465,18 @@ def get_num_cached_ips(self, model: ModelWrapper) -> int:
 
     def apply(self, model: ModelWrapper) -> ModelWrapper:
         """Apply all IPs that were cached to the model and return it."""
-        for node in model.graph.node:
-            op, key, hashed_key, cache_dir = self._get_node_data(node, model)
-            if cache_dir.exists():
-                self._prepare_from_cached_ip(op, hashed_key, make_copy=True)
+        with ThreadPoolExecutor(max_workers=get_num_default_workers()) as pool:
+            for node in model.graph.node:
+                op, key, hashed_key, op_cache_dir = self._get_node_data(node, model)
+                if op_cache_dir.exists():
+                    pool.submit(
+                        IPCache._prepare_from_cached_ip,
+                        op=op,
+                        hashed_key=hashed_key,
+                        make_copy=True,
+                        cache_dir=self.cache_dir,
+                    )
+            pool.shutdown(wait=True)
         return model
 
     def update(self, model: ModelWrapper) -> None:
@@ -470,6 +484,7 @@ def update(self, model: ModelWrapper) -> None:
 
         Requires HLSSynthIP() to be run before.
         """
+        total_cached = 0
         for node in model.graph.node:
             op, key, hashed_key, target_dir = self._get_node_data(node, model)
             if not _check_path_lengths_okay(
@@ -497,6 +512,8 @@ def update(self, model: ModelWrapper) -> None:
                 self._create_key_file(key, target_dir / "key.txt")
                 self._dump_nodeattrs(op, target_dir / "nodeattrs.json")
                 log.info(f"Cached node {node.name}. Cached at: {target_dir} from {code_gen_dir}!")
+                total_cached += 1
+        log.info(f"Cached a total of {total_cached} new ops.")
 
 
 class CachedIPGen(Transformation):
@@ -552,11 +569,6 @@ def apply(self, model: ModelWrapper) -> tuple[ModelWrapper, bool]:
             log.info("Running PrepareIP for uncached IPs...")
             model = model.transform(PrepareIP(self.part, self.clk))
             cache.update(model)
-        log.info(
-            f"Applying cache to {cache.get_num_cached_ips(model)} "
-            f"/ {len(model.graph.node)} nodes!"
-        )
-        model = cache.apply(model)
         log.info("Running synthesis for uncached IPs...")
         model = model.transform(HLSSynthIP())
         log.info("Updating cache with newly generated IPs...")

From a890ec36d13a38bbac6d32d1080c068c887bbc12 Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@uni-paderborn.de>
Date: Thu, 18 Sep 2025 11:37:51 +0200
Subject: [PATCH 14/17] Fixed cache application for RTL nodes

---
 .../transformation/fpgadataflow/ip_cache.py   | 83 +++++++++++++++----
 1 file changed, 65 insertions(+), 18 deletions(-)

diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
index b843235ac7..704fa91d0d 100644
--- a/src/finn/transformation/fpgadataflow/ip_cache.py
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import contextlib
 import hashlib
 import json
 import numpy as np
@@ -11,7 +10,7 @@
 import shutil
 import subprocess
 import sys
-from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import Future, ThreadPoolExecutor
 from pathlib import Path
 from qonnx.custom_op.registry import getCustomOp
 from qonnx.transformation.base import Transformation
@@ -21,9 +20,11 @@
 from finn.custom_op.fpgadataflow.attention import ScaledDotProductAttention
 from finn.custom_op.fpgadataflow.channelwise_op import ChannelwiseOp
 from finn.custom_op.fpgadataflow.elementwise_binary import ElementwiseBinaryOperation
+from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend
 from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
 from finn.custom_op.fpgadataflow.lookup import Lookup
 from finn.custom_op.fpgadataflow.matrixvectoractivation import MVAU
+from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend
 from finn.custom_op.fpgadataflow.thresholding import Thresholding
 from finn.custom_op.fpgadataflow.vectorvectoractivation import VVAU
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
@@ -388,6 +389,34 @@ def _dump_nodeattrs(
         with path.open("w+") as f:
             json.dump(d, f)
 
+    @staticmethod
+    def _replace_modulename(directory: Path, old: str, new: str) -> None:
+        """Recursively walk the directory and change all file/directory names, as well
+        as contents in the files from the old string to the new string.
+        """  # noqa
+        if not directory.is_dir():
+            raise FINNInternalError(f"Cannot replace module names in non-directory: {directory}")
+
+        # Walk all paths recursively
+        for obj in directory.rglob("*"):
+            obj: Path
+
+            # Replace file/directory names
+            if old in obj.name:
+                new_path = obj.with_name(obj.name.replace(old, new))
+                obj.rename(new_path)
+                obj = new_path
+
+            # Replace contents in files
+            if obj.is_file():
+                try:
+                    text = obj.read_text()
+                except UnicodeDecodeError:
+                    # We might accidentally read a binary file
+                    # In that case just move on
+                    continue
+                obj.write_text(text.replace(old, new))
+
     @staticmethod
     def _prepare_from_cached_ip(
         op: HWCustomOp, hashed_key: str, make_copy: bool, cache_dir: Path
@@ -426,15 +455,26 @@ def _prepare_from_cached_ip(
 
         # Set node attributes correctly to point to cached directory
         op.set_nodeattr("code_gen_dir_ipgen", str(ip_dir))
-        op.set_nodeattr("ip_vlnv", saved_nodeattrs["ip_vlnv"])
-        op.set_nodeattr(
-            "ip_path", str(ip_dir / f"project_{op.onnx_node.name}" / "sol1" / "impl" / "ip")
-        )
-        op.set_nodeattr("ipgen_path", str(ip_dir / f"project_{op.onnx_node.name}"))
-
-        # If needed insert gen_top_module. If not saved or the attr doesnt exist ignore
-        with contextlib.suppress(Exception):
-            op.set_nodeattr("gen_top_module", saved_nodeattrs["gen_top_module"])
+        if issubclass(type(op), RTLBackend):
+            # Rename module in filenames and contents from the cached name to applied node name
+            old_module_name = saved_nodeattrs["gen_top_module"]
+            new_module_name = op.get_verilog_top_module_name()
+            if old_module_name != new_module_name:
+                log.debug(
+                    f"{op.onnx_node.name}: Replacing cached module name: {old_module_name} "
+                    f"with applied module name: {new_module_name}"
+                )
+                IPCache._replace_modulename(ip_dir, old_module_name, new_module_name)
+            op.set_nodeattr("ip_path", str(ip_dir))
+            op.set_nodeattr("ipgen_path", str(ip_dir))
+            op.set_nodeattr("gen_top_module", new_module_name)
+
+        elif issubclass(type(op), HLSBackend):
+            op.set_nodeattr("ip_vlnv", saved_nodeattrs["ip_vlnv"])
+            op.set_nodeattr(
+                "ip_path", str(ip_dir / f"project_{op.onnx_node.name}" / "sol1" / "impl" / "ip")
+            )
+            op.set_nodeattr("ipgen_path", str(ip_dir / f"project_{op.onnx_node.name}"))
 
     def _get_node_data(
         self, node: NodeProto, model: ModelWrapper
@@ -465,18 +505,25 @@ def get_num_cached_ips(self, model: ModelWrapper) -> int:
 
     def apply(self, model: ModelWrapper) -> ModelWrapper:
         """Apply all IPs that were cached to the model and return it."""
+        futures: list[Future] = []
         with ThreadPoolExecutor(max_workers=get_num_default_workers()) as pool:
             for node in model.graph.node:
-                op, key, hashed_key, op_cache_dir = self._get_node_data(node, model)
+                op, _, hashed_key, op_cache_dir = self._get_node_data(node, model)
                 if op_cache_dir.exists():
-                    pool.submit(
-                        IPCache._prepare_from_cached_ip,
-                        op=op,
-                        hashed_key=hashed_key,
-                        make_copy=True,
-                        cache_dir=self.cache_dir,
+                    futures.append(
+                        pool.submit(
+                            IPCache._prepare_from_cached_ip,
+                            op=op,
+                            hashed_key=hashed_key,
+                            make_copy=True,
+                            cache_dir=self.cache_dir,
+                        )
                     )
             pool.shutdown(wait=True)
+
+            # Raise exceptions from threads if there were any
+            for future in futures:
+                _ = future.result()
         return model
 
     def update(self, model: ModelWrapper) -> None:

From 9d7c2ddd6724d1d1dddecf7023fcc8433559f699 Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@uni-paderborn.de>
Date: Mon, 22 Sep 2025 14:01:21 +0200
Subject: [PATCH 15/17] Added some metadata

---
 .../transformation/fpgadataflow/ip_cache.py   | 25 ++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/src/finn/transformation/fpgadataflow/ip_cache.py b/src/finn/transformation/fpgadataflow/ip_cache.py
index 704fa91d0d..4278017be6 100644
--- a/src/finn/transformation/fpgadataflow/ip_cache.py
+++ b/src/finn/transformation/fpgadataflow/ip_cache.py
@@ -209,7 +209,13 @@ def __init__(
             capture_output=True,
             cwd=Path(__file__).parent,
         ).stdout.strip()
-        log.info(f"FINN Commit reads: {self.finn_commit}")
+        self.finn_commit_time = subprocess.run(
+            shlex.split("git show --quiet --format=%ai"),
+            text=True,
+            capture_output=True,
+            cwd=Path(__file__).parent,
+        ).stdout.strip()
+        log.info(f"FINN Commit reads: {self.finn_commit} (authored at: {self.finn_commit_time})")
 
         # FINN HLSLIB Commit
         self.hlslib_commit = subprocess.run(
@@ -218,7 +224,16 @@ def __init__(
             capture_output=True,
             cwd=get_deps_path() / "finn-hlslib",
         ).stdout.strip()
-        log.info(f"HLSLIB Commit reads: {self.hlslib_commit}")
+        self.hlslib_commit_time = subprocess.run(
+            shlex.split("git show --quiet --format=%ai"),
+            text=True,
+            capture_output=True,
+            cwd=get_deps_path() / "finn-hlslib",
+        ).stdout.strip()
+        log.info(
+            f"HLSLIB Commit reads: {self.hlslib_commit} "
+            f"(authored at: {self.hlslib_commit_time})"
+        )
 
         # HLS Clk and device
         self.clk = hls_clk_period
@@ -360,7 +375,11 @@ def get_hash_hex(self, key: str) -> str:
     def _create_key_file(self, key: str, path: Path) -> None:
         """Write the given key data into a file at the given path."""
         with path.open("w+") as f:
-            f.write(f"Hashed using {self.hashfunc_name}. Key:\n------------------------\n")
+            f.write(f"Hashed using {self.hashfunc_name}.\n")
+            f.write(f"Final overall hashed key: {self.get_hash_hex(key)}")
+            f.write(f"FINN Commit Date: {self.finn_commit_time}\n")
+            f.write(f"FINN HLSLIB Commit Date: {self.hlslib_commit_time}\n")
+            f.write("Key:\n------------------------\n")
             f.write(key)
 
     def _dump_nodeattrs(

From 6c3b2768f89f16afe677386ff9d03dea5e34bf20 Mon Sep 17 00:00:00 2001
From: bwintermann <bjarne.wintermann@me.com>
Date: Tue, 30 Sep 2025 13:38:26 +0200
Subject: [PATCH 16/17] Fix error when calling prepare_finn with missing args

---
 src/finn/interface/run_finn.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/finn/interface/run_finn.py b/src/finn/interface/run_finn.py
index dd3eb32a7a..b5ae265e20 100644
--- a/src/finn/interface/run_finn.py
+++ b/src/finn/interface/run_finn.py
@@ -310,7 +310,7 @@ def bench(bench_config: str, dependency_path: str, num_workers: int, build_path:
     console = Console()
     build_dir = Path(build_path).expanduser() if build_path != "" else None
     dep_path = Path(dependency_path).expanduser() if dependency_path != "" else None
-    prepare_finn(dep_path, Path(), build_dir, num_workers)
+    prepare_finn(dep_path, None, Path(), build_dir, num_workers)
     console.rule("RUNNING BENCHMARK")
 
     # Late import because we need prepare_finn to setup remaining dependencies first
@@ -343,7 +343,7 @@ def test(
     console = Console()
     build_dir = Path(build_path).expanduser() if build_path != "" else None
     dep_path = Path(dependency_path).expanduser() if dependency_path != "" else None
-    prepare_finn(dep_path, Path(), build_dir, num_workers, is_test_run=True)
+    prepare_finn(dep_path, None, Path(), build_dir, num_workers, is_test_run=True)
     status(f"Using {num_test_workers} test workers")
     console.rule("RUNNING TESTS")
     run_test(variant, num_test_workers)
@@ -364,7 +364,7 @@ def deps() -> None:
 )
 def update(path: str) -> None:
     dep_path = Path(path).expanduser() if path != "" else None
-    prepare_finn(dep_path, Path(), None, 1)
+    prepare_finn(deps=dep_path, cache_path=None, flow_config=Path(), build_dir=None, num_workers=1)
 
 
 @click.group(help="Manage FINN settings")

From 924137439afcbc0ea1a72b22e541d28b5b483144 Mon Sep 17 00:00:00 2001
From: Felix Jentzsch <felix.jentzsch@upb.de>
Date: Sun, 5 Oct 2025 21:28:21 +0200
Subject: [PATCH 17/17] Fix merge errors

---
 src/finn/builder/build_dataflow.py        | 12 ------------
 src/finn/builder/build_dataflow_config.py | 20 --------------------
 2 files changed, 32 deletions(-)

diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py
index 177bae2a64..a96467d8c3 100644
--- a/src/finn/builder/build_dataflow.py
+++ b/src/finn/builder/build_dataflow.py
@@ -363,18 +363,6 @@ def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig):
                 if "ignore" in v.keys():
                     log.info("\nignore: " + ", ".join(v["ignore"]))
 
-    # Printing all cached IPs
-    if cfg.use_ip_caching:
-        log.info("IP Caching enabled.")
-        if cfg.verbose:
-            log.info("Caching enabled for operators: ")
-            for k, v in CACHE_IP_DEFINITIONS.items():
-                log.info(f"Operator: {k}:")
-                if "use" in v.keys():
-                    log.info("\tuse: " + ", ".join(v["use"]))
-                if "ignore" in v.keys():
-                    log.info("\nignore: " + ", ".join(v["use"]))
-
     # Setup done, start build flow
     try:
         # If start_step is specified, override the input model
diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
index 15ec6c07d4..75f6199320 100644
--- a/src/finn/builder/build_dataflow_config.py
+++ b/src/finn/builder/build_dataflow_config.py
@@ -333,26 +333,6 @@ class DataflowBuildConfig(DataClassJSONMixin, DataClassYAMLMixin):
     #: If not specified it will default to synth_clk_period_ns
     hls_clk_period_ns: Optional[float] = None
 
-    #: If True, use an IP Cache to avoid unnecessary waiting
-    #: times to run HLSSynthIP() repeatedly for the same
-    #: model / configuration
-    use_ip_caching: Optional[bool] = True
-
-    #: Hash function to be used when caching the IP cores. Only
-    #: relevant if use_ip_caching = True
-    ip_cache_hashfunction: str = "sha256"
-
-    #: If use_ip_caching is enabled, this flag determines whether
-    #: the value of _resolve_hls_clk_period() is used as part of
-    #: the cached key. Can be turned off for more cache hits, but
-    #: then delivers an IP with an outdated constraints file. This
-    #: might affect OOC Synthesis and other parts of the design, use
-    #: at your own risk.
-    cache_hls_clk_period: bool = True
-
-    #: The same as `cache_hls_clk_period`, but for the passed FPGA part.
-    cache_fpgapart: bool = True
-
     #: Use an IP Cache to re-use code-gen (PrepareIP) and HLS (HLSSynthIP)
     #: artifacts from previous runs to speed up the build process.
     use_ip_caching: bool = True