-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6f3963f
commit cf91b1f
Showing
11 changed files
with
478 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
# Copyright (c) 2024 RapidStream Design Automation, Inc. and contributors. All rights reserved. | ||
# The contributor(s) of this file has/have agreed to the RapidStream Contributor License Agreement. | ||
|
||
ROOT_DIR := $(shell git rev-parse --show-toplevel) | ||
PLATFORM := xilinx_u280_gen3x16_xdma_1_202211_1 | ||
PART_NUM := xcu280-fsvh2892-2L-e | ||
GRP_UTIL := $(ROOT_DIR)/common/util/get_group.py | ||
KERNEL_NAME := VecAdd | ||
RS_SCRIPT := $(CURDIR)/run_u55c.py | ||
SRC_DIR := $(CURDIR)/design | ||
AB_CONFIG := $(CURDIR)/design/config/$(notdir $(RS_SCRIPT))/floorplan_config.json | ||
IMPL_CONFIG := $(CURDIR)/design/config/$(notdir $(RS_SCRIPT))/impl_config.json | ||
LINK_CONFIG := $(CURDIR)/design/config/$(notdir $(RS_SCRIPT))/link_config.ini | ||
FIX_NOC_TCL := $(CURDIR)/design/config/$(notdir $(RS_SCRIPT))/fix_noc.tcl | ||
TARGET := hw | ||
TEMP_DIR := $(CURDIR)/build/$(notdir $(RS_SCRIPT)) | ||
KERNEL_XO := $(TEMP_DIR)/$(KERNEL_NAME).xo | ||
KERNEL_XCLBIN := $(TEMP_DIR)/$(KERNEL_NAME).xclbin | ||
RS_XCLBIN := $(TEMP_DIR)/dse/solution_0/vitis_run_hw/$(KERNEL_NAME)_$(PLATFORM).xclbin | ||
INCLUDE := -I $(XILINX_HLS)/include | ||
CFLAGS := $(INCLUDE) $(OPT_LEVEL) | ||
CXX := g++ | ||
HOST := app.exe | ||
RS_TARGET := $(TEMP_DIR)/dse/solution_0/updated.xo | ||
TIMING_RPT := impl_1_hw_bb_locked_timing_summary_routed.rpt | ||
SUCCESS := "Build Successful" | ||
SLACK_GETTER := $(ROOT_DIR)/common/util/get_slack.py | ||
BUILD_LOG := $(TEMP_DIR)/build.json | ||
RSXX := rapidstream | ||
RSPATH := $(CURDIR) | ||
RSPYTHON := rapidstream | ||
DEVICE_CONFIG := $(TEMP_DIR)/device.json | ||
|
||
all: $(RS_XCLBIN) | ||
$(RSXX) $(SLACK_GETTER) -d $(TEMP_DIR) -i $(TIMING_RPT) -o $(BUILD_LOG) -c clk_kernel_00_unbuffered_net -p 3.333 | ||
echo $(SUCCESS) | ||
|
||
$(RS_XCLBIN): $(RS_TARGET) | ||
v++ -l -t ${TARGET} \ | ||
--platform $(PLATFORM) \ | ||
--kernel $(KERNEL_NAME) \ | ||
--connectivity.nk $(KERNEL_NAME):1:$(KERNEL_NAME) \ | ||
--config $(LINK_CONFIG) \ | ||
--temp_dir $(TEMP_DIR) \ | ||
-o $@ \ | ||
$^ | ||
|
||
$(RS_TARGET):$(KERNEL_XO) $(DEVICE_CONFIG) $(AB_CONFIG) | ||
mkdir -p $(TEMP_DIR) | ||
cd $(RSPATH) && $(RSXX)-tapaopt \ | ||
--work-dir $(TEMP_DIR) \ | ||
--tapa-xo-path $< \ | ||
--device-config $(DEVICE_CONFIG) \ | ||
--floorplan-config $(AB_CONFIG) \ | ||
--implementation-config $(IMPL_CONFIG) \ | ||
--connectivity-ini $(LINK_CONFIG) | ||
|
||
device: $(DEVICE_CONFIG) | ||
|
||
$(DEVICE_CONFIG):$(RS_SCRIPT) | ||
mkdir -p $(TEMP_DIR) | ||
cd $(RSPATH) && $(RSPYTHON) $(RS_SCRIPT) | ||
|
||
hw: $(KERNEL_XCLBIN) | ||
|
||
$(KERNEL_XCLBIN): $(KERNEL_XO) | ||
v++ -l -t ${TARGET} \ | ||
--platform $(PLATFORM) \ | ||
--kernel $(KERNEL_NAME) \ | ||
--connectivity.nk $(KERNEL_NAME):1:$(KERNEL_NAME) \ | ||
--config $(LINK_CONFIG) \ | ||
--temp_dir $(TEMP_DIR) \ | ||
-o $@ \ | ||
$^ | ||
|
||
xo:$(KERNEL_XO) | ||
|
||
$(KERNEL_XO):$(SRC_DIR)/$(KERNEL_NAME).cpp | ||
mkdir -p $(TEMP_DIR) | ||
cd $(TEMP_DIR) && tapa compile \ | ||
--top $(KERNEL_NAME) \ | ||
--part-num $(PART_NUM) \ | ||
--keep-hls-work-dir \ | ||
--clock-period 3.33 \ | ||
-o $(KERNEL_NAME).xo \ | ||
-f $< \ | ||
2>&1 | tee tapa.log | ||
|
||
csim:$(TEMP_DIR)/main.exe | ||
$(TEMP_DIR)/main.exe | ||
|
||
$(TEMP_DIR)/main.exe: $(SRC_DIR)/main.cpp $(SRC_DIR)/VecAdd.cpp | ||
mkdir -p $(TEMP_DIR) | ||
cd $(TEMP_DIR) && tapa g++ $^ $(INCLUDE) -o $@ -O2 | ||
|
||
|
||
show_groups: | ||
@echo $(RS_KERNEL_XCLBIN) | ||
$(RSXX) $(GRP_UTIL) -i $(TEMP_DIR)/passes/1-importer.json \ | ||
-o $(TEMP_DIR)/module_types.csv | ||
|
||
|
||
clean: | ||
rm -rf $(TEMP_DIR) *.log | ||
rm -rf build | ||
rm -rf .Xil .run | ||
rm -rf *.exe | ||
rm -rf .ipcache |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,199 @@ | ||
<!-- | ||
Copyright (c) 2024 RapidStream Design Automation, Inc. and contributors. All rights reserved. | ||
The contributor(s) of this file has/have agreed to the RapidStream Contributor License Agreement. | ||
--> | ||
|
||
<img src="https://imagedelivery.net/AU8IzMTGgpVmEBfwPILIgw/1b565657-df33-41f9-f29e-0d539743e700/128" width="64px" alt="RapidStream Logo" /> | ||
|
||
# TAPA Design | ||
|
||
## Introduction | ||
|
||
Rapidsteam is fully compatible with [TAPA](https://github.com/rapidstream-org/rapidstream-tapa). | ||
In this recipe, we illustrate how to create a Xilinx objective file (`.xo`) using TAPA, then optimize the `.xo` file with Rapidstream, and finally utilize the optimized output in the ongoing Vitis development process. | ||
|
||
|
||
## Xilinx Object Files | ||
|
||
[Vitis compiled object files (`.xo`)](https://docs.amd.com/r/en-US/ug1393-vitis-application-acceleration/Design-Topology) are IP packages used in the AMD Vitis kernel development flow for programming the programmable logic (PL) region of target devices. These files can be [generated from HLS C++ code](https://docs.amd.com/r/en-US/ug1393-vitis-application-acceleration/Developing-PL-Kernels-using-C) using the `v++` command, [packed from RTL code](https://docs.amd.com/r/en-US/ug1393-vitis-application-acceleration/RTL-Kernel-Development-Flow), or created using third-party frameworks like [RapidStream TAPA](https://github.com/rapidstream-org/rapidstream-tapa). In this example, we use `RapidStream TAPA` to generate the `VecAdd.xo` file, but the same flow applies to object files generated through other methods. | ||
|
||
|
||
## Tutorial | ||
|
||
### Step 1: C++ Simulation | ||
|
||
Since our | ||
design calls Xilinx Libraries, we need to source the Vitis environment before running the simulation. | ||
|
||
```bash | ||
source <Vitis_install_path>/Vitis/2023.2/settings64.sh | ||
``` | ||
|
||
Before generating the `.xo` file, we recommend running a C++ simulation to verify the correctness of the design. This step is optional but highly recommended. Run the following command or `make csim` to perform C++ simulation: | ||
|
||
```bash | ||
tapa g++ design/main.cpp design/VecAdd.cpp \ | ||
-I /opt/tools/xilinx/Vitis_HLS/2023.2/include \ | ||
-o build/run_u55c.py/main.exe -O2 | ||
./build/run_u55c.py/main.exe | ||
``` | ||
|
||
Your should see the following output: | ||
|
||
```bash | ||
I20241010 15:14:52.494259 4113880 task.h:66] running software simulation with TAPA library | ||
kernel time: 0.0197967 s | ||
PASS! | ||
``` | ||
|
||
### Step 2: Generate the Xilinx Object File (`.xo`) | ||
|
||
We use TAPA on top of 2023.2 to generate the `.xo` file. Run the following command or run `make xo`: | ||
|
||
```bash | ||
source <Vitis_install_path>/Vitis/2023.2/settings64.sh | ||
mkdir -p build/run_u55c.py | ||
cd build/run_u55c.py && tapa compile \ | ||
--top VecAdd \ | ||
--part-num xcu280-fsvh2892-2L-e \ | ||
--clock-period 3.33 \ | ||
-o VecAdd.xo \ | ||
-f design/VecAdd.cpp \ | ||
2>&1 | tee tapa.log | ||
``` | ||
|
||
### Step 3 (Optional): Use Vitis --link to Generate the `.xclbin` File | ||
|
||
With the `.xo` file generated, you can use `v++ -link` to generate the `.xclbin` file. Run the following command or execute `make hw`: | ||
|
||
```bash | ||
v++ -l -t hw \ | ||
--platform xilinx_u280_gen3x16_xdma_1_202211_1 \ | ||
--kernel VecAdd \ | ||
--connectivity.nk VecAdd:1:VecAdd \ | ||
--config design/link_config.ini \ | ||
--temp_dir build \ | ||
-o build/VecAdd.xclbin \ | ||
build/VecAdd.xo | ||
``` | ||
|
||
If your machines is equipped with the target FPGA device, you can deploy the optimized design on the FPGA by running the following command: | ||
|
||
```bash | ||
./app.exe <path_to_vitis_xclbin> | ||
``` | ||
|
||
:warning: **Note**: This step can take hours to complete. We recommend using the RapidStream flow to optimize the `.xo` file instead of generating the `.xclbin` file if you are familiar with AMD Vitis flow. | ||
|
||
|
||
### Step 4: Define Virtual Device | ||
|
||
In this tutorial, we use the [Alveo U55C](https://www.amd.com/en/products/accelerators/alveo/u55c/a-u55c-p00g-pq-g.html) as an example. The device is organized into six slots, each | ||
containing 16 clock regions of logic. In actual implementations, the available slots are reduced | ||
based on the platform specifics, as some resources are reserved for shell logic. | ||
|
||
<img src="../../common/img/au55c_virtual_device.jpg" width="400px" alt="AU55C Device"/> | ||
|
||
To generate a `device.json` file that details the device features, such as slot resources and | ||
locations, you can either run the `run_u55c.py` script by invoking RapidStream as shown below or | ||
simply enter `make device` in the terminal. | ||
|
||
```bash | ||
rapidstream run_u55c.py | ||
``` | ||
|
||
|
||
### Step 5: Use Rapidstream to Optimize `.xo` Design | ||
|
||
The RapidStream flow conducts design space exploration and generates solutions by taking all TAPA-generated `.xo` file as the input. | ||
The RapidStream flow for TAPA requires the following key inputs: | ||
|
||
- **tapa-xo-path**: The path to the tapa-generated `xo` file (VecAdd.xo). | ||
- **device-config**: The virtual device (`device.json`) generated in previous step 2 by calling rapidstream APIs based on platform. | ||
- **floorplan-config**: The configure file ([floorplan_config.json](design/config/run_u55c.py/floorplan_config.json)) to guide integrated Autobridge to floorplan the design. | ||
- **implementation-config**: The configure file ([impl_config.json](design/config/run_u55c.py/impl_config.json)) to guide Vitis to implement the design (e.g., kernek clock, vitis_platform and etc.). | ||
- **connectivity-ini**: The link configure file ([link_config.ini](design/config/run_u55c.py/link_config.ini)) to specify how the kernel interfaces are connected the memory controller. This is | ||
the same for vitis link configure file. | ||
|
||
We encapulate the rapidstream command for TAPA as `rapidstream-tapaop` for invoking. | ||
You can run the command below or execute `make all` supported by our [Makefile](Makefile). | ||
|
||
```bash | ||
rapidstream-tapaopt --work-dir build/run_u55c.py \ | ||
--tapa-xo-path ./VecAdd.xo \ | ||
--device-config build/run_u55c.py/device.json \ | ||
--floorplan-config ../../design/config/run_u55c.py/ab_config.json \ | ||
--implementation-config ../../ design/config/run_u55c.py/impl_config.json \ | ||
--connectivity-ini ../../design/config/run_u55c.py/link_config.ini | ||
``` | ||
|
||
When finished, you can locate these files using the following command: | ||
|
||
```bash | ||
find ./build/run_u55c.py/ -name *.xo | ||
``` | ||
|
||
If everything is successful, you should at least get one optimized `.xo` file located in `./build/dse/candidate_0/exported/VecAdd.xo`. | ||
|
||
### Step 7: Check the Group Module Report | ||
|
||
|
||
RapidStream mandates a clear distinction between communication and computation within user designs. | ||
|
||
- In `Group modules`, users are tasked solely with defining inter-submodule communication. For those familiar with Vivado IP Integrator flow, crafting a Group module mirrors the process of connecting IPs in IPI. RapidStream subsequently integrates appropriate pipeline registers into these Group modules. | ||
|
||
- In `Leaf modules`, users retain the flexibility to implement diverse computational patterns, as RapidStream leaves these Leaf modules unchanged. | ||
|
||
For further details, please consult the [code style](https://docs.rapidstream-da.com/required-coding-style/) section in our Documentation. | ||
|
||
To generate a report on group types, execute the commands below or `run make show_groups`: | ||
|
||
```bash | ||
rapidstream ../../common/util/get_group.py \ | ||
-i build/passes/0-imported.json \ | ||
-o build/module_types.csv | ||
``` | ||
|
||
The module types for your design can be found in `build/module_types.csv`. Below, we list the four Group modules. In this design, `VecAdd` serves as a Group module, while the other three modules are added by RapidStream. | ||
|
||
| Module Name | Group Type | | ||
|:--------------------------------:|:--------------:| | ||
| VecAdd | grouped_module | | ||
| __rs_VecAdd_aux | aux_module | | ||
| ... | verilog_module | | ||
|
||
|
||
### Step 8: Use Vitis --link with the Optimized `.xo` File | ||
|
||
With the optimized `.xo` file generated, you can use `v++ -link` to generate the `.xclbin` file. Run the following command or run `make`: | ||
|
||
```bash | ||
v++ -l -t hw \ | ||
--platform xilinx_u280_gen3x16_xdma_1_202211_1 \ | ||
--kernel VecAdd \ | ||
--connectivity.nk VecAdd:1:VecAdd \ | ||
--config design/link_config.ini \ | ||
--temp_dir build/rapidstream \ | ||
-o build/VecAdd_rs_opt.xclbin \ | ||
./build/dse/candidate_0/exported/VecAdd.xo | ||
``` | ||
|
||
|
||
To examine the timing results for each design point, use this command: | ||
|
||
```bash | ||
find ./build -name *.xclbin.info | ||
``` | ||
|
||
|
||
|
||
If your machines is equipped with the target FPGA device, you can deploy the optimized design on the FPGA by running the following command: | ||
|
||
```bash | ||
make host | ||
./app.exe <path_to_optimized_xclbin> | ||
``` | ||
|
||
## Next Step | ||
|
||
**Click here to [go back to Getting Started](../README.md)** |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// Copyright 2024 RapidStream Design Automation, Inc. | ||
// All Rights Reserved. | ||
|
||
|
||
// Includes | ||
#include <cstdint> | ||
#include <tapa.h> | ||
#define DATA_SIZE 4096 | ||
|
||
void read_mem(tapa::mmap<const uint32_t> mem_in, tapa::ostream<uint32_t>& stream_out) { | ||
|
||
for (int i = 0; i < DATA_SIZE; i++) { | ||
stream_out << mem_in[i]; | ||
} | ||
} | ||
|
||
void add_kernel( | ||
tapa::istream<uint32_t> stream_in1, | ||
tapa::istream<uint32_t> stream_in2, | ||
tapa::ostream<uint32_t> stream_out) { | ||
|
||
// Compute the addition | ||
for (int i = 0; i < DATA_SIZE; i++) { | ||
stream_out << (stream_in1.read() + stream_in2.read()); | ||
} | ||
} | ||
|
||
void write_mem(tapa::istream<uint32_t>& stream_in, tapa::mmap<uint32_t> mem_out) { | ||
|
||
for (int i = 0; i < DATA_SIZE; i++) { | ||
stream_in >> mem_out[i]; | ||
} | ||
} | ||
|
||
void VecAdd(tapa::mmap<const uint32_t> mem_in1, tapa::mmap<const uint32_t> mem_in2, tapa::mmap<uint32_t> mem_out) { | ||
|
||
tapa::stream<uint32_t> stream_in1("input_stream_1"); | ||
tapa::stream<uint32_t> stream_in2("input_stream_2"); | ||
tapa::stream<uint32_t> stream_out("output_stream"); | ||
|
||
tapa::task() | ||
.invoke(read_mem, mem_in1, stream_in1) | ||
.invoke(read_mem, mem_in2, stream_in2) | ||
.invoke(add_kernel, stream_in1, stream_in2, stream_out) | ||
.invoke(write_mem, stream_out, mem_out); | ||
|
||
} |
Binary file not shown.
14 changes: 14 additions & 0 deletions
14
getting_started/aie_source/design/config/run_u55c.py/floorplan_config.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"dse_range_max": 0.8, | ||
"dse_range_min": 0.7, | ||
"partition_strategy": "flat", | ||
"port_pre_assignments": { | ||
".*mem_in1_.*": "HBM[16]", | ||
".*mem_in2_.*": "HBM[17]", | ||
".*mem_out_.*": "HBM[18]", | ||
"ap_clk": "CLK_RST", | ||
"ap_rst_n": "CLK_RST", | ||
"interrupt": "CLK_RST", | ||
"s_axi_control_.*": "S_AXI_CONTROL" | ||
} | ||
} |
7 changes: 7 additions & 0 deletions
7
getting_started/aie_source/design/config/run_u55c.py/impl_config.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"max_workers": 2, | ||
"port_to_clock_period": { | ||
"ap_clk": 3.33 | ||
}, | ||
"vitis_platform": "xilinx_u55c_gen3x16_xdma_3_202210_1" | ||
} |
4 changes: 4 additions & 0 deletions
4
getting_started/aie_source/design/config/run_u55c.py/link_config.ini
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
[connectivity] | ||
sp=VecAdd.mem_in1:HBM[16] | ||
sp=VecAdd.mem_in2:HBM[17] | ||
sp=VecAdd.mem_out:HBM[18] |
Oops, something went wrong.