From 0ec71aa84cacb0d9feb6e16f26fb2506b1718671 Mon Sep 17 00:00:00 2001 From: auphelia Date: Mon, 25 May 2026 16:59:59 +0100 Subject: [PATCH 1/4] [Builder] First draft of introducing build phases --- src/finn/builder/build_dataflow.py | 38 ++- src/finn/builder/build_dataflow_config.py | 53 ++-- src/finn/builder/build_dataflow_phases.py | 251 ++++++++++++++++++ .../test_fpgadataflow_finnloop.py | 23 +- .../test_fpgadataflow_layernorm.py | 32 +-- 5 files changed, 322 insertions(+), 75 deletions(-) create mode 100644 src/finn/builder/build_dataflow_phases.py diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py index 198ccd167a..94fd3d826c 100644 --- a/src/finn/builder/build_dataflow.py +++ b/src/finn/builder/build_dataflow.py @@ -45,6 +45,7 @@ DataflowBuildConfig, default_build_dataflow_steps, ) +from finn.builder.build_dataflow_phases import build_dataflow_phase_lookup from finn.builder.build_dataflow_steps import build_dataflow_step_lookup @@ -71,16 +72,43 @@ def resolve_build_steps(cfg: DataflowBuildConfig, partial: bool = True): steps = cfg.steps if steps is None: steps = default_build_dataflow_steps + + # Merge phase and step lookup dictionaries + all_steps = { + **build_dataflow_step_lookup, + **build_dataflow_phase_lookup, + } + steps_as_fxns = [] for transform_step in steps: + step_name = None + + # Get step function and name if type(transform_step) is str: - # lookup step function from step name - steps_as_fxns.append(build_dataflow_step_lookup[transform_step]) + step_name = transform_step + if transform_step in all_steps: + step_fn = all_steps[transform_step] + else: + raise ValueError(f"Unknown step or phase: {transform_step}") elif callable(transform_step): - # treat step as function to be called as-is - steps_as_fxns.append(transform_step) + step_fn = transform_step + step_name = getattr(transform_step, "__name__", None) else: - raise Exception("Could not resolve build step: " + str(transform_step)) + raise ValueError(f"Invalid step type: {type(transform_step)}") + + # Inject steps BEFORE this step + if step_name and step_name in cfg.inject_steps_before: + for injected_step in cfg.inject_steps_before[step_name]: + steps_as_fxns.append(injected_step) + + # Add the main step + steps_as_fxns.append(step_fn) + + # Inject steps AFTER this step + if step_name and step_name in cfg.inject_steps_after: + for injected_step in cfg.inject_steps_after[step_name]: + steps_as_fxns.append(injected_step) + if partial: step_names = list(map(lambda x: x.__name__, steps_as_fxns)) if cfg.start_step is None: diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index 96ecfeb6b7..ae2bd52c91 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -29,10 +29,10 @@ import numpy as np import os -from dataclasses import dataclass +from dataclasses import dataclass, field from dataclasses_json import dataclass_json from enum import Enum -from typing import Any, List, Optional +from typing import Any, Callable, Dict, List, Optional from finn.transformation.fpgadataflow.alveo_build import VitisOptStrategy from finn.util.basic import part_map, vitis_default_platform @@ -108,39 +108,20 @@ class VerificationStepType(str, Enum): #: specified order. Use the `steps` as part of build config to restrict which #: steps will be run. default_build_dataflow_steps = [ - "step_qonnx_to_finn", - "step_tidy_up", - "step_streamline", - "step_convert_to_hw", - "step_create_dataflow_partition", - "step_specialize_layers", - "step_target_fps_parallelization", - "step_apply_folding_config", - "step_minimize_bit_width", - "step_transpose_decomposition", - "step_generate_estimate_reports", - "step_hw_codegen", - "step_hw_ipgen", - "step_set_fifo_depths", - "step_create_stitched_ip", - "step_measure_rtlsim_performance", - "step_synthesize_bitfile", - "step_make_driver", - "step_deployment_package", + "phase_prepare_model", + "phase_optimize_model", + "phase_convert_to_hardware", + "phase_optimize_hardware", + "phase_build_hardware", + "phase_synthesize_hardware", ] #: List of steps to run for an estimate-only (no synthesis) dataflow build estimate_only_dataflow_steps = [ - "step_qonnx_to_finn", - "step_tidy_up", - "step_streamline", - "step_convert_to_hw", - "step_create_dataflow_partition", - "step_specialize_layers", - "step_target_fps_parallelization", - "step_apply_folding_config", - "step_minimize_bit_width", - "step_generate_estimate_reports", + "phase_prepare_model", + "phase_optimize_model", + "phase_convert_to_hardware", + "phase_optimize_hardware", ] #: List of steps to run for a dataflow build including HW code generation, but @@ -406,6 +387,16 @@ class DataflowBuildConfig: #: Warnings and info will still be printed but errors will not halt the build. mute_config_assertions: Optional[bool] = False + #: Inject custom steps after named steps/phases. + #: Dict mapping step/phase names to list of callable functions to run after that step. + #: Example: inject_steps_after={"phase_optimize_model": [my_custom_verification]} + inject_steps_after: Dict[str, List[Callable]] = field(default_factory=dict) + + #: Inject custom steps before named steps/phases. + #: Dict mapping step/phase names to list of callable functions to run before that step. + #: Example: inject_steps_before={"phase_build_hardware": [my_custom_analysis]} + inject_steps_before: Dict[str, List[Callable]] = field(default_factory=dict) + def _resolve_hls_clk_period(self): if self.hls_clk_period_ns is None: # use same clk for synth and hls if not explicitly specified diff --git a/src/finn/builder/build_dataflow_phases.py b/src/finn/builder/build_dataflow_phases.py new file mode 100644 index 0000000000..9f69f58f0d --- /dev/null +++ b/src/finn/builder/build_dataflow_phases.py @@ -0,0 +1,251 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: BSD-3-Clause + +""" +Phases for FINN dataflow builder pipeline. + +Phases group related fine-grained steps into logical build phases. +All phases internally call functions from build_dataflow_steps.py. + +Users can: +- Use phases via default_phase_build_steps +- Still use fine-grained steps +- Mix phases and fine-grained steps in custom pipelines +- Replace individual phases with custom implementations +- Inject custom steps before/after phases using inject_steps_before/after config +""" + +from qonnx.core.modelwrapper import ModelWrapper + +from finn.builder.build_dataflow_config import DataflowBuildConfig, DataflowOutputType +from finn.builder.build_dataflow_steps import ( + step_apply_folding_config, + step_convert_to_hw, + step_create_dataflow_partition, + step_create_stitched_ip, + step_deployment_package, + step_generate_estimate_reports, + step_hw_codegen, + step_hw_ipgen, + step_loop_rolling, + step_make_driver, + step_measure_rtlsim_performance, + step_minimize_bit_width, + step_qonnx_to_finn, + step_set_fifo_depths, + step_specialize_layers, + step_streamline, + step_synthesize_bitfile, + step_target_fps_parallelization, + step_tidy_up, + step_transpose_decomposition, +) + + +def phase_prepare_model(model: ModelWrapper, cfg: DataflowBuildConfig): + """Phase: Import and prepare model for FINN transformations. + + This phase handles the initial model import and cleanup, converting from + QONNX dialect to FINN and performing basic tidying operations. + + Internal steps: + - step_qonnx_to_finn: Convert QONNX dialect to FINN + - step_tidy_up: Shape/dtype inference, constant folding, cleanup + + Args: + model: Input ModelWrapper + cfg: Build configuration + + Returns: + Prepared ModelWrapper ready for optimization + """ + model = step_qonnx_to_finn(model, cfg) + model = step_tidy_up(model, cfg) + return model + + +def phase_optimize_model(model: ModelWrapper, cfg: DataflowBuildConfig): + """Phase: Apply model-specific streamlining transformations. + + This phase applies streamlining to move and absorb operations for hardware + efficiency. Streamlining is highly model-dependent and frequently customized. + + Internal steps: + - step_streamline: Apply streamlining transformations + + Note: This phase can be easily replaced with a custom streamline function + in the steps list for model-specific optimizations. + + Args: + model: Input ModelWrapper + cfg: Build configuration + + Returns: + Streamlined ModelWrapper + """ + model = step_streamline(model, cfg) + return model + + +def phase_convert_to_hardware(model: ModelWrapper, cfg: DataflowBuildConfig): + """Phase: Convert model to hardware-eligible operations and specialize. + + This phase identifies hardware-eligible operations, creates the dataflow + partition, specializes layers for the target backend (HLS/RTL), and handles + loop rolling for FINNLoop nodes. + + Internal steps: + - step_convert_to_hw: Infer hardware layer types + - step_create_dataflow_partition: Create accelerator subgraph + - step_specialize_layers: Convert to HLS or RTL variants + - step_loop_rolling: Process FINNLoop nodes (auto-detects if needed) + + Args: + model: Input ModelWrapper + cfg: Build configuration + + Returns: + ModelWrapper with hardware-specialized operations + """ + model = step_convert_to_hw(model, cfg) + model = step_create_dataflow_partition(model, cfg) + model = step_specialize_layers(model, cfg) + model = step_loop_rolling(model, cfg) + return model + + +def phase_optimize_hardware(model: ModelWrapper, cfg: DataflowBuildConfig): + """Phase: Configure parallelism, apply folding, optimize bit widths, generate reports. + + This phase configures the hardware parallelism and resource usage. It applies + folding configurations, minimizes bit widths (after folding), decomposes + transpose/shuffle operations, and generates analytical performance/resource reports. + + Internal steps: + - step_target_fps_parallelization: Auto-parallelization (if target_fps set) + - step_apply_folding_config: Apply folding configuration + - step_minimize_bit_width: Minimize weight/accumulator bit widths (after folding) + - step_transpose_decomposition: Decompose Shuffle nodes (after folding) + - step_generate_estimate_reports: Generate analytical estimates (if requested) + + Note: This is the extension point for future analytical FIFO sizing. + + Args: + model: Input ModelWrapper + cfg: Build configuration + + Returns: + ModelWrapper with optimized parallelism and resource configuration + """ + # Parallelization + if cfg.target_fps is not None: + model = step_target_fps_parallelization(model, cfg) + + # Apply folding configuration + if cfg.folding_config_file or cfg.auto_folding_config: + model = step_apply_folding_config(model, cfg) + + # Bit-width optimization (happens AFTER folding) + if cfg.minimize_bit_width: + model = step_minimize_bit_width(model, cfg) + + # Transpose/shuffle decomposition (happens AFTER folding and bit-width) + model = step_transpose_decomposition(model, cfg) + + # Report generation (step checks if ESTIMATE_REPORTS is requested) + model = step_generate_estimate_reports(model, cfg) + + return model + + +def phase_build_hardware(model: ModelWrapper, cfg: DataflowBuildConfig): + """Phase: Generate hardware code, synthesize IP blocks, size FIFOs. + + This phase generates hardware code for each layer (HLS C++ for HLS layers, + RTL/SystemVerilog for RTL layers), synthesizes IP blocks via Vitis HLS, and + sizes FIFOs. FIFO sizing is automatically skipped if FIFOs already exist in + the model (e.g., from analytical sizing). + + Internal steps: + - step_hw_codegen: Generate HLS C++ or RTL code via PrepareIP + - step_hw_ipgen: Synthesize IP blocks via HLSSynthIP + - step_set_fifo_depths: Auto or manual FIFO sizing (auto-skipped if FIFOs exist) + + Note: When analytical FIFO sizing is available (future), it would create + StreamingFIFO nodes in phase_optimize_hardware, causing this phase to + auto-skip hardware-based FIFO characterization. + + Args: + model: Input ModelWrapper + cfg: Build configuration + + Returns: + ModelWrapper with generated and synthesized IP blocks + """ + model = step_hw_codegen(model, cfg) + model = step_hw_ipgen(model, cfg) + + # FIFO sizing - auto-detect if already done (e.g., analytically) + fifo_nodes = model.get_nodes_by_op_type("StreamingFIFO") + if len(fifo_nodes) == 0 and cfg.auto_fifo_depths: + # No FIFOs yet, run characterization/rtlsim + model = step_set_fifo_depths(model, cfg) + elif len(fifo_nodes) > 0: + # FIFOs already sized (analytical or manual), skip hardware characterization + print("FIFOs already present in model, skipping step_set_fifo_depths") + + return model + + +def phase_synthesize_hardware(model: ModelWrapper, cfg: DataflowBuildConfig): + """Phase: Create final hardware artifacts (stitched IP or bitfile + deployment package). + + This phase creates the final hardware deliverables based on requested outputs. + It can generate stitched IP (including optional OOC synthesis), measure RTL + simulation performance, or create a complete bitfile with driver and deployment + package. + + Internal steps (conditional on generate_outputs): + - step_create_stitched_ip: Create stitched IP (includes OOC synth if requested) + - step_measure_rtlsim_performance: Measure RTL sim performance (optional) + - step_synthesize_bitfile: Full bitfile synthesis (if BITFILE output requested) + - step_make_driver: Generate PYNQ or C++ driver + - step_deployment_package: Package for deployment + + Note: OOC (out-of-context) synthesis happens inside step_create_stitched_ip + when DataflowOutputType.OOC_SYNTH is requested, not as a separate step. + + Args: + model: Input ModelWrapper + cfg: Build configuration + + Returns: + ModelWrapper with final hardware artifacts generated + """ + # Stitched IP generation (if requested) + # Note: OOC synthesis happens inside step_create_stitched_ip when + # DataflowOutputType.OOC_SYNTH is requested + if DataflowOutputType.STITCHED_IP in cfg.generate_outputs: + model = step_create_stitched_ip(model, cfg) + + if cfg.measure_rtlsim_performance: + model = step_measure_rtlsim_performance(model, cfg) + + # Bitfile generation (if requested) + if DataflowOutputType.BITFILE in cfg.generate_outputs: + model = step_synthesize_bitfile(model, cfg) + model = step_make_driver(model, cfg) + model = step_deployment_package(model, cfg) + + return model + + +#: Map phase name strings to phase functions +build_dataflow_phase_lookup = { + "phase_prepare_model": phase_prepare_model, + "phase_optimize_model": phase_optimize_model, + "phase_convert_to_hardware": phase_convert_to_hardware, + "phase_optimize_hardware": phase_optimize_hardware, + "phase_build_hardware": phase_build_hardware, + "phase_synthesize_hardware": phase_synthesize_hardware, +} diff --git a/tests/fpgadataflow/test_fpgadataflow_finnloop.py b/tests/fpgadataflow/test_fpgadataflow_finnloop.py index 860eb6321c..74ea433f38 100644 --- a/tests/fpgadataflow/test_fpgadataflow_finnloop.py +++ b/tests/fpgadataflow/test_fpgadataflow_finnloop.py @@ -510,23 +510,14 @@ def test_finnloop_end2end_mlo( model.save(tmp_output_dir + "/mlo_model.onnx") - # steps are skipped because test model created with HLS and RTL layers + # Use phase-based pipeline + # Steps are adjusted because test model already has HLS and RTL layers steps = [ - # "step_qonnx_to_finn", - # "step_tidy_up", - # "step_streamline", - # "step_convert_to_hw", - "step_create_dataflow_partition", - # "step_specialize_layers", - "step_loop_rolling", - # "step_target_fps_parallelization", - "step_apply_folding_config", - "step_minimize_bit_width", - "step_generate_estimate_reports", - "step_hw_codegen", - "step_hw_ipgen", - "step_set_fifo_depths", - "step_create_stitched_ip", + "step_create_dataflow_partition", # Fine-grained (model already specialized) + "phase_convert_to_hardware", # Phase (includes loop rolling) + "phase_optimize_hardware", # Phase (includes folding, bit-width, reports) + "phase_build_hardware", # Phase (includes codegen, ipgen, FIFOs) + "step_create_stitched_ip", # Fine-grained (just IP, no full synth) ] cfg = build_cfg.DataflowBuildConfig( diff --git a/tests/fpgadataflow/test_fpgadataflow_layernorm.py b/tests/fpgadataflow/test_fpgadataflow_layernorm.py index 8d3cc7bf62..15d587cc95 100644 --- a/tests/fpgadataflow/test_fpgadataflow_layernorm.py +++ b/tests/fpgadataflow/test_fpgadataflow_layernorm.py @@ -448,19 +448,12 @@ def test_hls_rtl_dsp_conflict_detection(): with open(specialize_config_file, "w") as f: json.dump(specialize_config, f) - # Build steps - includes conversion to HW layers and specialization + # Build steps using phases steps = [ - "step_convert_to_hw", - "step_create_dataflow_partition", - "step_specialize_layers", - "step_target_fps_parallelization", - "step_apply_folding_config", - "step_minimize_bit_width", - "step_generate_estimate_reports", - "step_hw_codegen", - "step_hw_ipgen", - "step_set_fifo_depths", - "step_create_stitched_ip", + "phase_convert_to_hardware", # Includes convert_to_hw, partition, specialize, loop_rolling + "phase_optimize_hardware", # Includes target_fps, folding, bit_width, reports + "phase_build_hardware", # Includes codegen, ipgen, FIFO depths + "step_create_stitched_ip", # Fine-grained (just stitched IP) ] # Request verification steps that will trigger DSP conflict detection @@ -667,17 +660,10 @@ def test_integer_hls_elementwise_no_dsp_conflict(): # Build steps - includes conversion to HW layers and specialization steps = [ - "step_convert_to_hw", - "step_create_dataflow_partition", - "step_specialize_layers", - "step_target_fps_parallelization", - "step_apply_folding_config", - "step_minimize_bit_width", - "step_generate_estimate_reports", - "step_hw_codegen", - "step_hw_ipgen", - "step_set_fifo_depths", - "step_create_stitched_ip", + "phase_convert_to_hardware", # Includes convert_to_hw, partition, specialize, loop_rolling + "phase_optimize_hardware", # Includes target_fps, folding, bit_width, reports + "phase_build_hardware", # Includes codegen, ipgen, FIFO depths + "step_create_stitched_ip", # Fine-grained (just stitched IP) ] # Request verification steps - stitched_ip_rtlsim should NOT be skipped From f2052b7357bc3a0f63755788e9209a80c2201652 Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 26 May 2026 17:26:46 +0100 Subject: [PATCH 2/4] [Builder] Preserve intermediate model saving --- src/finn/builder/build_dataflow.py | 7 ++ src/finn/builder/build_dataflow_config.py | 6 +- src/finn/builder/build_dataflow_phases.py | 104 +++++++++++----------- tests/util/test_build_dataflow_checks.py | 4 +- 4 files changed, 65 insertions(+), 56 deletions(-) diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py index 94fd3d826c..d7201c6514 100644 --- a/src/finn/builder/build_dataflow.py +++ b/src/finn/builder/build_dataflow.py @@ -69,6 +69,13 @@ def flush(self): def resolve_build_steps(cfg: DataflowBuildConfig, partial: bool = True): + """Resolve build steps from config, supporting both phases and fine-grained steps. + + Note: When using phase-based builds with start_step/stop_step, specify phase names + (e.g., start_step="phase_build_hardware") rather than fine-grained step names. + Phases save intermediate models for each internal step, so checkpoints like + step_hw_ipgen.onnx will exist, but the build loop operates at the phase level. + """ steps = cfg.steps if steps is None: steps = default_build_dataflow_steps diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index ae2bd52c91..e7348a86df 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -342,10 +342,14 @@ class DataflowBuildConfig: steps: Optional[List[Any]] = None #: If given, start from this step, loading the intermediate model generated - #: from the previous step (save_intermediate_models must be enabled) + #: from the previous step (save_intermediate_models must be enabled). + #: Note: When using phase-based builds (default), specify phase names + #: (e.g., "phase_build_hardware") rather than fine-grained step names. start_step: Optional[str] = None #: If given, stop at this step. + #: Note: When using phase-based builds (default), specify phase names + #: (e.g., "phase_build_hardware") rather than fine-grained step names. stop_step: Optional[str] = None #: The optional argument `max_multithreshold_bit_width` affects which Quant nodes diff --git a/src/finn/builder/build_dataflow_phases.py b/src/finn/builder/build_dataflow_phases.py index 9f69f58f0d..c8d207f556 100644 --- a/src/finn/builder/build_dataflow_phases.py +++ b/src/finn/builder/build_dataflow_phases.py @@ -15,9 +15,10 @@ - Inject custom steps before/after phases using inject_steps_before/after config """ +import os from qonnx.core.modelwrapper import ModelWrapper -from finn.builder.build_dataflow_config import DataflowBuildConfig, DataflowOutputType +from finn.builder.build_dataflow_config import DataflowBuildConfig from finn.builder.build_dataflow_steps import ( step_apply_folding_config, step_convert_to_hw, @@ -42,6 +43,26 @@ ) +def _execute_step(step_fn, model: ModelWrapper, cfg: DataflowBuildConfig): + """Execute a step and save intermediate model if configured. + + This helper allows phases to save intermediate models after each internal step, + making fine-grained checkpoints available for inspection even when using phases. + """ + model = step_fn(model, cfg) + + # Save intermediate model if requested + if cfg.save_intermediate_models: + step_name = step_fn.__name__ + chkpt_name = f"{step_name}.onnx" + intermediate_model_dir = cfg.output_dir + "/intermediate_models" + if not os.path.exists(intermediate_model_dir): + os.makedirs(intermediate_model_dir) + model.save(f"{intermediate_model_dir}/{chkpt_name}") + + return model + + def phase_prepare_model(model: ModelWrapper, cfg: DataflowBuildConfig): """Phase: Import and prepare model for FINN transformations. @@ -59,8 +80,8 @@ def phase_prepare_model(model: ModelWrapper, cfg: DataflowBuildConfig): Returns: Prepared ModelWrapper ready for optimization """ - model = step_qonnx_to_finn(model, cfg) - model = step_tidy_up(model, cfg) + model = _execute_step(step_qonnx_to_finn, model, cfg) + model = _execute_step(step_tidy_up, model, cfg) return model @@ -83,7 +104,7 @@ def phase_optimize_model(model: ModelWrapper, cfg: DataflowBuildConfig): Returns: Streamlined ModelWrapper """ - model = step_streamline(model, cfg) + model = _execute_step(step_streamline, model, cfg) return model @@ -107,10 +128,10 @@ def phase_convert_to_hardware(model: ModelWrapper, cfg: DataflowBuildConfig): Returns: ModelWrapper with hardware-specialized operations """ - model = step_convert_to_hw(model, cfg) - model = step_create_dataflow_partition(model, cfg) - model = step_specialize_layers(model, cfg) - model = step_loop_rolling(model, cfg) + model = _execute_step(step_convert_to_hw, model, cfg) + model = _execute_step(step_create_dataflow_partition, model, cfg) + model = _execute_step(step_specialize_layers, model, cfg) + model = _execute_step(step_loop_rolling, model, cfg) return model @@ -121,11 +142,11 @@ def phase_optimize_hardware(model: ModelWrapper, cfg: DataflowBuildConfig): folding configurations, minimizes bit widths (after folding), decomposes transpose/shuffle operations, and generates analytical performance/resource reports. - Internal steps: + Internal steps (each step checks its own config parameters): - step_target_fps_parallelization: Auto-parallelization (if target_fps set) - - step_apply_folding_config: Apply folding configuration - - step_minimize_bit_width: Minimize weight/accumulator bit widths (after folding) - - step_transpose_decomposition: Decompose Shuffle nodes (after folding) + - step_apply_folding_config: Apply folding configuration (if config provided) + - step_minimize_bit_width: Minimize weight/accumulator bit widths (if enabled) + - step_transpose_decomposition: Decompose Shuffle nodes - step_generate_estimate_reports: Generate analytical estimates (if requested) Note: This is the extension point for future analytical FIFO sizing. @@ -137,24 +158,11 @@ def phase_optimize_hardware(model: ModelWrapper, cfg: DataflowBuildConfig): Returns: ModelWrapper with optimized parallelism and resource configuration """ - # Parallelization - if cfg.target_fps is not None: - model = step_target_fps_parallelization(model, cfg) - - # Apply folding configuration - if cfg.folding_config_file or cfg.auto_folding_config: - model = step_apply_folding_config(model, cfg) - - # Bit-width optimization (happens AFTER folding) - if cfg.minimize_bit_width: - model = step_minimize_bit_width(model, cfg) - - # Transpose/shuffle decomposition (happens AFTER folding and bit-width) - model = step_transpose_decomposition(model, cfg) - - # Report generation (step checks if ESTIMATE_REPORTS is requested) - model = step_generate_estimate_reports(model, cfg) - + model = _execute_step(step_target_fps_parallelization, model, cfg) + model = _execute_step(step_apply_folding_config, model, cfg) + model = _execute_step(step_minimize_bit_width, model, cfg) + model = _execute_step(step_transpose_decomposition, model, cfg) + model = _execute_step(step_generate_estimate_reports, model, cfg) return model @@ -182,14 +190,14 @@ def phase_build_hardware(model: ModelWrapper, cfg: DataflowBuildConfig): Returns: ModelWrapper with generated and synthesized IP blocks """ - model = step_hw_codegen(model, cfg) - model = step_hw_ipgen(model, cfg) + model = _execute_step(step_hw_codegen, model, cfg) + model = _execute_step(step_hw_ipgen, model, cfg) # FIFO sizing - auto-detect if already done (e.g., analytically) fifo_nodes = model.get_nodes_by_op_type("StreamingFIFO") if len(fifo_nodes) == 0 and cfg.auto_fifo_depths: # No FIFOs yet, run characterization/rtlsim - model = step_set_fifo_depths(model, cfg) + model = _execute_step(step_set_fifo_depths, model, cfg) elif len(fifo_nodes) > 0: # FIFOs already sized (analytical or manual), skip hardware characterization print("FIFOs already present in model, skipping step_set_fifo_depths") @@ -205,12 +213,12 @@ def phase_synthesize_hardware(model: ModelWrapper, cfg: DataflowBuildConfig): simulation performance, or create a complete bitfile with driver and deployment package. - Internal steps (conditional on generate_outputs): + Internal steps (each step checks generate_outputs): - step_create_stitched_ip: Create stitched IP (includes OOC synth if requested) - - step_measure_rtlsim_performance: Measure RTL sim performance (optional) - - step_synthesize_bitfile: Full bitfile synthesis (if BITFILE output requested) - - step_make_driver: Generate PYNQ or C++ driver - - step_deployment_package: Package for deployment + - step_measure_rtlsim_performance: Measure RTL sim performance (if requested) + - step_synthesize_bitfile: Full bitfile synthesis (if BITFILE requested) + - step_make_driver: Generate PYNQ or C++ driver (if BITFILE requested) + - step_deployment_package: Package for deployment (if requested) Note: OOC (out-of-context) synthesis happens inside step_create_stitched_ip when DataflowOutputType.OOC_SYNTH is requested, not as a separate step. @@ -222,21 +230,11 @@ def phase_synthesize_hardware(model: ModelWrapper, cfg: DataflowBuildConfig): Returns: ModelWrapper with final hardware artifacts generated """ - # Stitched IP generation (if requested) - # Note: OOC synthesis happens inside step_create_stitched_ip when - # DataflowOutputType.OOC_SYNTH is requested - if DataflowOutputType.STITCHED_IP in cfg.generate_outputs: - model = step_create_stitched_ip(model, cfg) - - if cfg.measure_rtlsim_performance: - model = step_measure_rtlsim_performance(model, cfg) - - # Bitfile generation (if requested) - if DataflowOutputType.BITFILE in cfg.generate_outputs: - model = step_synthesize_bitfile(model, cfg) - model = step_make_driver(model, cfg) - model = step_deployment_package(model, cfg) - + model = _execute_step(step_create_stitched_ip, model, cfg) + model = _execute_step(step_measure_rtlsim_performance, model, cfg) + model = _execute_step(step_synthesize_bitfile, model, cfg) + model = _execute_step(step_make_driver, model, cfg) + model = _execute_step(step_deployment_package, model, cfg) return model diff --git a/tests/util/test_build_dataflow_checks.py b/tests/util/test_build_dataflow_checks.py index b6cdccd120..84c010ca2a 100644 --- a/tests/util/test_build_dataflow_checks.py +++ b/tests/util/test_build_dataflow_checks.py @@ -33,11 +33,11 @@ def make_test_model(build_dir): def cfg(output_dir, **kw): - """Create config that stops immediately after first step.""" + """Create config that stops immediately after first phase.""" return DataflowBuildConfig( output_dir=output_dir, synth_clk_period_ns=5.0, - stop_step="step_qonnx_to_finn", + stop_step="phase_prepare_model", generate_outputs=kw.pop("generate_outputs", [DataflowOutputType.ESTIMATE_REPORTS]), **kw ) From f264f05427c313ee18c548d66e3cf8e9ea468d56 Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 27 May 2026 11:28:35 +0100 Subject: [PATCH 3/4] [Analysis] Add analysis pass that validates result of HW conversion --- .../validate_dataflow_conversion.py | 100 +++++++++++ src/finn/builder/build_dataflow_phases.py | 17 +- .../test_validate_dataflow_conversion.py | 160 ++++++++++++++++++ 3 files changed, 276 insertions(+), 1 deletion(-) create mode 100644 src/finn/analysis/fpgadataflow/validate_dataflow_conversion.py create mode 100644 tests/fpgadataflow/test_validate_dataflow_conversion.py diff --git a/src/finn/analysis/fpgadataflow/validate_dataflow_conversion.py b/src/finn/analysis/fpgadataflow/validate_dataflow_conversion.py new file mode 100644 index 0000000000..952edac45c --- /dev/null +++ b/src/finn/analysis/fpgadataflow/validate_dataflow_conversion.py @@ -0,0 +1,100 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: BSD-3-Clause + +"""Analysis pass to validate that model has been properly converted to fpgadataflow layers.""" + +from finn.util.fpgadataflow import is_fpgadataflow_node + + +def validate_dataflow_conversion(model): + """Validate that model has been properly converted to dataflow layers. + + Checks that either: + 1. All layers are fpgadataflow layers (ideal case), OR + 2. Fpgadataflow layers form a contiguous block in the middle of the model, + with only non-dataflow layers on the outside (partition case) + + Returns a dictionary with validation results: + - 'valid': bool indicating if validation passed + - 'message': str with validation status message + - 'unconverted_layers': list of (index, name, op_type) tuples for non-dataflow layers + - 'dataflow_block': tuple (first_index, last_index) if dataflow forms a block, else None + + Example usage in transformation: + result = model.analysis(validate_dataflow_conversion) + if not result['valid']: + raise AssertionError(result['message']) + """ + nodes = model.graph.node + fpgadataflow_nodes = [] + non_fpgadataflow_nodes = [] + + for i, node in enumerate(nodes): + if is_fpgadataflow_node(node): + fpgadataflow_nodes.append((i, node.name, node.op_type)) + else: + non_fpgadataflow_nodes.append((i, node.name, node.op_type)) + + # Case 1: All nodes are fpgadataflow (ideal) + if len(non_fpgadataflow_nodes) == 0: + return { + "valid": True, + "message": "Dataflow conversion validation: All layers are fpgadataflow layers", + "unconverted_layers": [], + "dataflow_block": None, + } + + # Case 2: Check if fpgadataflow nodes form contiguous block + if len(fpgadataflow_nodes) > 0: + dataflow_indices = [i for i, _, _ in fpgadataflow_nodes] + first_dataflow = min(dataflow_indices) + last_dataflow = max(dataflow_indices) + + # Check all indices between first and last are dataflow + for i in range(first_dataflow, last_dataflow + 1): + node = nodes[i] + if not is_fpgadataflow_node(node): + # Found non-dataflow layer inside dataflow block + unconverted_str = "\n".join( + [ + f" [{idx}] {name} (op_type: {op})" + for idx, name, op in non_fpgadataflow_nodes + ] + ) + return { + "valid": False, + "message": ( + "Non-contiguous dataflow block detected.\n" + f"Layer '{node.name}' (op_type: {node.op_type}) at position {i} " + "is not a fpgadataflow layer but is between dataflow layers.\n" + f"Dataflow block spans positions {first_dataflow} to {last_dataflow}.\n" + f"Unconverted layers:\n{unconverted_str}" + ), + "unconverted_layers": non_fpgadataflow_nodes, + "dataflow_block": (first_dataflow, last_dataflow), + } + + # Valid: fpgadataflow block in middle + return { + "valid": True, + "message": ( + "Dataflow conversion validation: Fpgadataflow layers form contiguous block " + f"(positions {first_dataflow}-{last_dataflow})" + ), + "unconverted_layers": non_fpgadataflow_nodes, + "dataflow_block": (first_dataflow, last_dataflow), + } + + # Case 3: No fpgadataflow layers at all + unconverted_str = "\n".join( + [f" [{idx}] {name} (op_type: {op})" for idx, name, op in non_fpgadataflow_nodes] + ) + return { + "valid": False, + "message": ( + "No fpgadataflow layers found in model.\n" + f"All layers remain unconverted:\n{unconverted_str}", + ), + "unconverted_layers": non_fpgadataflow_nodes, + "dataflow_block": None, + } diff --git a/src/finn/builder/build_dataflow_phases.py b/src/finn/builder/build_dataflow_phases.py index c8d207f556..7bb13df420 100644 --- a/src/finn/builder/build_dataflow_phases.py +++ b/src/finn/builder/build_dataflow_phases.py @@ -18,6 +18,9 @@ import os from qonnx.core.modelwrapper import ModelWrapper +from finn.analysis.fpgadataflow.validate_dataflow_conversion import ( + validate_dataflow_conversion, +) from finn.builder.build_dataflow_config import DataflowBuildConfig from finn.builder.build_dataflow_steps import ( step_apply_folding_config, @@ -113,13 +116,15 @@ def phase_convert_to_hardware(model: ModelWrapper, cfg: DataflowBuildConfig): This phase identifies hardware-eligible operations, creates the dataflow partition, specializes layers for the target backend (HLS/RTL), and handles - loop rolling for FINNLoop nodes. + loop rolling for FINNLoop nodes. After conversion, validates that all layers + are fpgadataflow layers or form a contiguous dataflow block. Internal steps: - step_convert_to_hw: Infer hardware layer types - step_create_dataflow_partition: Create accelerator subgraph - step_specialize_layers: Convert to HLS or RTL variants - step_loop_rolling: Process FINNLoop nodes (auto-detects if needed) + - Validation: Check dataflow conversion success Args: model: Input ModelWrapper @@ -127,11 +132,21 @@ def phase_convert_to_hardware(model: ModelWrapper, cfg: DataflowBuildConfig): Returns: ModelWrapper with hardware-specialized operations + + Raises: + AssertionError: If dataflow conversion validation fails """ model = _execute_step(step_convert_to_hw, model, cfg) model = _execute_step(step_create_dataflow_partition, model, cfg) model = _execute_step(step_specialize_layers, model, cfg) model = _execute_step(step_loop_rolling, model, cfg) + + # Validate dataflow conversion + validation_result = model.analysis(validate_dataflow_conversion) + print(validation_result["message"]) + if not validation_result["valid"]: + raise AssertionError(validation_result["message"]) + return model diff --git a/tests/fpgadataflow/test_validate_dataflow_conversion.py b/tests/fpgadataflow/test_validate_dataflow_conversion.py new file mode 100644 index 0000000000..b6933d82e6 --- /dev/null +++ b/tests/fpgadataflow/test_validate_dataflow_conversion.py @@ -0,0 +1,160 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: BSD-3-Clause + +"""Tests for dataflow conversion validation analysis pass.""" + +import pytest + +import numpy as np +from onnx import TensorProto, helper +from qonnx.core.datatype import DataType +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.transformation.infer_datatypes import InferDataTypes +from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model + +from finn.analysis.fpgadataflow.validate_dataflow_conversion import ( + validate_dataflow_conversion, +) +from finn.transformation.fpgadataflow.convert_to_hw_layers import ( + InferElementwiseBinaryOperation, + InferHWSoftmax, + InferQuantizedMatrixVectorActivation, + InferThresholdingLayer, +) +from finn.util.fpgadataflow import is_fpgadataflow_node + + +def make_test_model(): + """Create a small model with different layer types for testing validation. + + Model structure (all non-fpgadataflow initially): + - Layer 0: Transpose + - Layer 1: MatMul (INT4 weights) + - Layer 2: MultiThreshold + - Layer 3: Mul + - Layer 4: Softmax + """ + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 4, 4]) + out = helper.make_tensor_value_info("out", TensorProto.FLOAT, [1, 4, 4]) + + # Layer 0: Transpose + node0 = helper.make_node("Transpose", ["inp"], ["t0"], perm=[0, 1, 2]) + + # Layer 1: MatMul with INT4 weights + W1_data = gen_finn_dt_tensor(DataType["INT4"], (4, 4)) + W1 = helper.make_tensor("W1", TensorProto.FLOAT, [4, 4], W1_data.flatten().tolist()) + node1 = helper.make_node("MatMul", ["t0", "W1"], ["t1"]) + + # Layer 2: MultiThreshold (QONNX custom op) + # UINT4 has 16 values (0-15), so we need 15 thresholds per channel + T2_data = gen_finn_dt_tensor(DataType["INT16"], (4, 15)) + T2_data = np.sort(T2_data, axis=1) # Sort thresholds in increasing order + T2 = helper.make_tensor("T2", TensorProto.FLOAT, [4, 15], T2_data.flatten().tolist()) + node2 = helper.make_node( + "MultiThreshold", + ["t1", "T2"], + ["t2"], + domain="qonnx.custom_op.general", + out_dtype="UINT4", + data_layout="NHWC", + ) + + # Layer 3: Mul + scale_data = np.array([2.0], dtype=np.float32) + scale = helper.make_tensor("scale", TensorProto.FLOAT, [1], scale_data.tolist()) + node3 = helper.make_node("Mul", ["t2", "scale"], ["t3"]) + + # Layer 4: Softmax + node4 = helper.make_node("Softmax", ["t3"], ["out"], axis=-1) + + graph = helper.make_graph( + [node0, node1, node2, node3, node4], + "test_validation", + [inp], + [out], + initializer=[W1, T2, scale], + ) + + model = qonnx_make_model(graph) + model = ModelWrapper(model) + + # Set INT4 datatypes + model.set_tensor_datatype("inp", DataType["INT4"]) + model.set_tensor_datatype("W1", DataType["INT4"]) + model.set_tensor_datatype("T2", DataType["INT16"]) + + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + + return model + + +@pytest.mark.fpgadataflow +def test_validate_dataflow_conversion_scenarios(): + """Test validation through progressive conversion scenarios. + + Test plan: + 0. No conversions - should fail + 1. Convert layer 2 (MultiThreshold) → [non, non, fpga, non, non] - should pass + 2. Convert layer 4 (Softmax) → [non, non, fpga, non, fpga] - should FAIL (non-contiguous) + 3. Convert layer 3 and 1 (Mul and MatMul) → [non, fpga, fpga, fpga, fpga] - should pass + """ + + # Scenario 0: No fpgadataflow layers - should fail + print("\n--- Scenario 0: No fpgadataflow layers ---") + model = make_test_model() + result = model.analysis(validate_dataflow_conversion) + print(f"Valid: {result['valid']}") + print(f"Message: {result['message']}") + + assert result["valid"] is False, "Expected validation to fail with no fpgadataflow layers" + assert "No fpgadataflow layers found" in result["message"] + assert len(result["unconverted_layers"]) == 5 + + # Scenario 1: Convert layer 2 (MultiThreshold) → [non, non, fpga, non, non] + print("\n--- Scenario 1: Convert layer 2 (MultiThreshold) ---") + model = model.transform(InferThresholdingLayer()) + result = model.analysis(validate_dataflow_conversion) + print(f"Valid: {result['valid']}") + print(f"Message: {result['message']}") + + assert result["valid"] is True + assert "contiguous block" in result["message"].lower() + assert result["dataflow_block"] == (2, 2) + + # Scenario 2: Convert layer 4 (Softmax) → [non, non, fpga, non, fpga] - should FAIL + print("\n--- Scenario 2: Convert layer 4 (Softmax) - EXPECT FAILURE ---") + model = model.transform(InferHWSoftmax()) + result = model.analysis(validate_dataflow_conversion) + print(f"Valid: {result['valid']}") + print(f"Message: {result['message']}") + + assert ( + result["valid"] is False + ), "Expected validation to fail with non-contiguous dataflow block" + assert "Non-contiguous dataflow block detected" in result["message"] + + # Scenario 3: Convert layer 3 (Mul) and layer 1 (MatMul) → [non, fpga, fpga, fpga, fpga] + print("\n--- Scenario 3: Convert layers 3 (Mul) and 1 (MatMul) ---") + model = model.transform(InferElementwiseBinaryOperation()) + model = model.transform(InferQuantizedMatrixVectorActivation()) + result = model.analysis(validate_dataflow_conversion) + print(f"Valid: {result['valid']}") + print(f"Message: {result['message']}") + + assert result["valid"] is True + assert "contiguous block" in result["message"].lower() + assert result["dataflow_block"] == (1, 4) + + # Final verification + print("\n--- Final verification ---") + nodes = model.graph.node + fpgadataflow_count = sum(1 for node in nodes if is_fpgadataflow_node(node)) + print(f"Fpgadataflow layers: {fpgadataflow_count} / {len(nodes)}") + print(f"Total nodes: {len(nodes)}") + + # 4 out of 5 layers should be fpgadataflow (all except Transpose) + assert fpgadataflow_count == 4 + assert result["valid"] is True + assert len(result["unconverted_layers"]) == 1 # Only Transpose unconverted From 869e2a0fd58fd64dd5d13f0bbda9e033e19fe26f Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 27 May 2026 12:43:21 +0100 Subject: [PATCH 4/4] [Analysis] validate df conv: Fix trailing comma bug in string --- src/finn/analysis/fpgadataflow/validate_dataflow_conversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finn/analysis/fpgadataflow/validate_dataflow_conversion.py b/src/finn/analysis/fpgadataflow/validate_dataflow_conversion.py index 952edac45c..e2a1b1cd2c 100644 --- a/src/finn/analysis/fpgadataflow/validate_dataflow_conversion.py +++ b/src/finn/analysis/fpgadataflow/validate_dataflow_conversion.py @@ -93,7 +93,7 @@ def validate_dataflow_conversion(model): "valid": False, "message": ( "No fpgadataflow layers found in model.\n" - f"All layers remain unconverted:\n{unconverted_str}", + f"All layers remain unconverted:\n{unconverted_str}" ), "unconverted_layers": non_fpgadataflow_nodes, "dataflow_block": None,