Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for multi FPGA builds #952

Draft
wants to merge 3 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions src/finn/builder/build_dataflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@
import sys
import time
import traceback
from concurrent import futures
from copy import deepcopy
from qonnx.core.modelwrapper import ModelWrapper
from qonnx.custom_op.registry import getCustomOp

from finn.builder.build_dataflow_config import (
DataflowBuildConfig,
Expand Down Expand Up @@ -108,6 +111,10 @@ def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig):
:param model_filename: ONNX model filename to build
:param cfg: Build configuration
"""
# Dispatch a distributed build if we are splitting up the model as part of the build.
if "step_split_dataflow" in step_names:
build_distributed_dataflow_cfg(model_filename, cfg)

# if start_step is specified, override the input model
if cfg.start_step is None:
print("Building dataflow accelerator from " + model_filename)
Expand Down Expand Up @@ -189,6 +196,61 @@ def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig):
return 0


def run_async_build_iter(cfg, local_cfg, node, i):
# TODO: Make the output dir match the rank of the partition
local_cfg.output_dir = f"{cfg.output_dir}/{i}"

node_inst = getCustomOp(node)
child_model_filename = node_inst.get_nodeattr("model")

print(f"Launching build for partition {i}")
build_dataflow_cfg(child_model_filename, local_cfg)


def build_distributed_dataflow_cfg(model_filename, cfg: DataflowBuildConfig):
steps = resolve_build_steps(cfg, partial=False)
step_names = list(map(lambda x: x.__name__, steps))

# TODO: Not sure if splitting up the config implicitly up is the best way to do this.
# Maybe it would be better for the user to explicitly provide global and local build
# configs.

split_step = "step_split_dataflow"
if split_step not in step_names:
print("Dataflow should be split up as part of distributed build")
return -1

split_idx = step_names.index(split_step) + 1

global_cfg = deepcopy(cfg)
global_cfg.steps = steps[:split_idx]

local_cfg = deepcopy(cfg)
local_cfg.steps = steps[split_idx:]

if not cfg.save_intermediate_models:
print("save_intermediate_models must be enabled for distributed build")
return -1

if not cfg.start_step or cfg.start_step in step_names[:split_idx]:
if build_dataflow_cfg(model_filename, global_cfg) != 0:
print("Global build failed")
return -1

local_cfg.start_step = None

if cfg.stop_step and cfg.stop_step in step_names[:split_idx]:
return 0

intermediate_models_dir = cfg.output_dir + "/intermediate_models"
parent_model = ModelWrapper(f"{intermediate_models_dir}/{split_step}.onnx")

sdp_nodes = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")
with futures.ProcessPoolExecutor() as pool:
for i, node in enumerate(sdp_nodes):
pool.submit(run_async_build_iter, cfg, local_cfg, node, i)


def build_dataflow_directory(path_to_cfg_dir: str):
"""Best-effort build a dataflow accelerator from the specified directory.

Expand Down
7 changes: 7 additions & 0 deletions src/finn/builder/build_dataflow_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
SplitLargeFIFOs,
)
from finn.transformation.fpgadataflow.set_folding import SetFolding
from finn.transformation.fpgadataflow.split_dataflow import SplitDataflow
from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
from finn.transformation.fpgadataflow.vitis_build import VitisBuild
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
Expand Down Expand Up @@ -601,6 +602,11 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig):
return model


def step_split_dataflow(model: ModelWrapper, cfg: DataflowBuildConfig):
model = model.transform(SplitDataflow())
return model


def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig):
"""Create stitched IP for a graph after all HLS IP blocks have been generated.
Depends on the DataflowOutputType.STITCHED_IP output product."""
Expand Down Expand Up @@ -832,6 +838,7 @@ def step_deployment_package(model: ModelWrapper, cfg: DataflowBuildConfig):
"step_hls_codegen": step_hls_codegen,
"step_hls_ipgen": step_hls_ipgen,
"step_set_fifo_depths": step_set_fifo_depths,
"step_split_dataflow": step_split_dataflow,
"step_create_stitched_ip": step_create_stitched_ip,
"step_measure_rtlsim_performance": step_measure_rtlsim_performance,
"step_make_pynq_driver": step_make_pynq_driver,
Expand Down
36 changes: 36 additions & 0 deletions src/finn/transformation/fpgadataflow/split_dataflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from qonnx.core.modelwrapper import ModelWrapper
from qonnx.custom_op.registry import getCustomOp
from qonnx.transformation.base import Transformation
from qonnx.transformation.create_generic_partitions import PartitionFromLambda
from qonnx.util.basic import get_by_name

from finn.util.basic import make_build_dir

class SplitDataflow(Transformation):
def __init__(self):
self.partition_model_dir = make_build_dir('distributed_partitions_')

def apply(self, model):
def assign_partition_id(node):
if node.op_type in ["GenericPartition", "StreamingDataflowPartition"]:
return -1

return getCustomOp(node).get_nodeattr("device_id")

model = model.transform(
PartitionFromLambda(
partitioning=assign_partition_id, partition_dir=self.partition_model_dir
)
)

p_nodes = model.get_nodes_by_op_type("GenericPartition")

for partition_ind, p_node in enumerate(p_nodes):
p_node.op_type = "StreamingDataflowPartition"
p_node.domain = "finn.custom_op.fpgadataflow"
new_p_node_inst = getCustomOp(p_node)
new_p_node_inst.set_nodeattr("device_id", partition_ind)

model.save('model.onnx')

return (model, False)