Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
vagrantxiao24 committed Nov 18, 2024
1 parent 6f3963f commit cf91b1f
Show file tree
Hide file tree
Showing 11 changed files with 478 additions and 11 deletions.
108 changes: 108 additions & 0 deletions getting_started/aie_source/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Copyright (c) 2024 RapidStream Design Automation, Inc. and contributors. All rights reserved.
# The contributor(s) of this file has/have agreed to the RapidStream Contributor License Agreement.

ROOT_DIR := $(shell git rev-parse --show-toplevel)
PLATFORM := xilinx_u280_gen3x16_xdma_1_202211_1
PART_NUM := xcu280-fsvh2892-2L-e
GRP_UTIL := $(ROOT_DIR)/common/util/get_group.py
KERNEL_NAME := VecAdd
RS_SCRIPT := $(CURDIR)/run_u55c.py
SRC_DIR := $(CURDIR)/design
AB_CONFIG := $(CURDIR)/design/config/$(notdir $(RS_SCRIPT))/floorplan_config.json
IMPL_CONFIG := $(CURDIR)/design/config/$(notdir $(RS_SCRIPT))/impl_config.json
LINK_CONFIG := $(CURDIR)/design/config/$(notdir $(RS_SCRIPT))/link_config.ini
FIX_NOC_TCL := $(CURDIR)/design/config/$(notdir $(RS_SCRIPT))/fix_noc.tcl
TARGET := hw
TEMP_DIR := $(CURDIR)/build/$(notdir $(RS_SCRIPT))
KERNEL_XO := $(TEMP_DIR)/$(KERNEL_NAME).xo
KERNEL_XCLBIN := $(TEMP_DIR)/$(KERNEL_NAME).xclbin
RS_XCLBIN := $(TEMP_DIR)/dse/solution_0/vitis_run_hw/$(KERNEL_NAME)_$(PLATFORM).xclbin
INCLUDE := -I $(XILINX_HLS)/include
CFLAGS := $(INCLUDE) $(OPT_LEVEL)
CXX := g++
HOST := app.exe
RS_TARGET := $(TEMP_DIR)/dse/solution_0/updated.xo
TIMING_RPT := impl_1_hw_bb_locked_timing_summary_routed.rpt
SUCCESS := "Build Successful"
SLACK_GETTER := $(ROOT_DIR)/common/util/get_slack.py
BUILD_LOG := $(TEMP_DIR)/build.json
RSXX := rapidstream
RSPATH := $(CURDIR)
RSPYTHON := rapidstream
DEVICE_CONFIG := $(TEMP_DIR)/device.json

all: $(RS_XCLBIN)
$(RSXX) $(SLACK_GETTER) -d $(TEMP_DIR) -i $(TIMING_RPT) -o $(BUILD_LOG) -c clk_kernel_00_unbuffered_net -p 3.333
echo $(SUCCESS)

$(RS_XCLBIN): $(RS_TARGET)
v++ -l -t ${TARGET} \
--platform $(PLATFORM) \
--kernel $(KERNEL_NAME) \
--connectivity.nk $(KERNEL_NAME):1:$(KERNEL_NAME) \
--config $(LINK_CONFIG) \
--temp_dir $(TEMP_DIR) \
-o $@ \
$^

$(RS_TARGET):$(KERNEL_XO) $(DEVICE_CONFIG) $(AB_CONFIG)
mkdir -p $(TEMP_DIR)
cd $(RSPATH) && $(RSXX)-tapaopt \
--work-dir $(TEMP_DIR) \
--tapa-xo-path $< \
--device-config $(DEVICE_CONFIG) \
--floorplan-config $(AB_CONFIG) \
--implementation-config $(IMPL_CONFIG) \
--connectivity-ini $(LINK_CONFIG)

device: $(DEVICE_CONFIG)

$(DEVICE_CONFIG):$(RS_SCRIPT)
mkdir -p $(TEMP_DIR)
cd $(RSPATH) && $(RSPYTHON) $(RS_SCRIPT)

hw: $(KERNEL_XCLBIN)

$(KERNEL_XCLBIN): $(KERNEL_XO)
v++ -l -t ${TARGET} \
--platform $(PLATFORM) \
--kernel $(KERNEL_NAME) \
--connectivity.nk $(KERNEL_NAME):1:$(KERNEL_NAME) \
--config $(LINK_CONFIG) \
--temp_dir $(TEMP_DIR) \
-o $@ \
$^

xo:$(KERNEL_XO)

$(KERNEL_XO):$(SRC_DIR)/$(KERNEL_NAME).cpp
mkdir -p $(TEMP_DIR)
cd $(TEMP_DIR) && tapa compile \
--top $(KERNEL_NAME) \
--part-num $(PART_NUM) \
--keep-hls-work-dir \
--clock-period 3.33 \
-o $(KERNEL_NAME).xo \
-f $< \
2>&1 | tee tapa.log

csim:$(TEMP_DIR)/main.exe
$(TEMP_DIR)/main.exe

$(TEMP_DIR)/main.exe: $(SRC_DIR)/main.cpp $(SRC_DIR)/VecAdd.cpp
mkdir -p $(TEMP_DIR)
cd $(TEMP_DIR) && tapa g++ $^ $(INCLUDE) -o $@ -O2


show_groups:
@echo $(RS_KERNEL_XCLBIN)
$(RSXX) $(GRP_UTIL) -i $(TEMP_DIR)/passes/1-importer.json \
-o $(TEMP_DIR)/module_types.csv


clean:
rm -rf $(TEMP_DIR) *.log
rm -rf build
rm -rf .Xil .run
rm -rf *.exe
rm -rf .ipcache
199 changes: 199 additions & 0 deletions getting_started/aie_source/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
<!--
Copyright (c) 2024 RapidStream Design Automation, Inc. and contributors. All rights reserved.
The contributor(s) of this file has/have agreed to the RapidStream Contributor License Agreement.
-->

<img src="https://imagedelivery.net/AU8IzMTGgpVmEBfwPILIgw/1b565657-df33-41f9-f29e-0d539743e700/128" width="64px" alt="RapidStream Logo" />

# TAPA Design

## Introduction

Rapidsteam is fully compatible with [TAPA](https://github.com/rapidstream-org/rapidstream-tapa).
In this recipe, we illustrate how to create a Xilinx objective file (`.xo`) using TAPA, then optimize the `.xo` file with Rapidstream, and finally utilize the optimized output in the ongoing Vitis development process.


## Xilinx Object Files

[Vitis compiled object files (`.xo`)](https://docs.amd.com/r/en-US/ug1393-vitis-application-acceleration/Design-Topology) are IP packages used in the AMD Vitis kernel development flow for programming the programmable logic (PL) region of target devices. These files can be [generated from HLS C++ code](https://docs.amd.com/r/en-US/ug1393-vitis-application-acceleration/Developing-PL-Kernels-using-C) using the `v++` command, [packed from RTL code](https://docs.amd.com/r/en-US/ug1393-vitis-application-acceleration/RTL-Kernel-Development-Flow), or created using third-party frameworks like [RapidStream TAPA](https://github.com/rapidstream-org/rapidstream-tapa). In this example, we use `RapidStream TAPA` to generate the `VecAdd.xo` file, but the same flow applies to object files generated through other methods.


## Tutorial

### Step 1: C++ Simulation

Since our
design calls Xilinx Libraries, we need to source the Vitis environment before running the simulation.

```bash
source <Vitis_install_path>/Vitis/2023.2/settings64.sh
```

Before generating the `.xo` file, we recommend running a C++ simulation to verify the correctness of the design. This step is optional but highly recommended. Run the following command or `make csim` to perform C++ simulation:

```bash
tapa g++ design/main.cpp design/VecAdd.cpp \
-I /opt/tools/xilinx/Vitis_HLS/2023.2/include \
-o build/run_u55c.py/main.exe -O2
./build/run_u55c.py/main.exe
```

Your should see the following output:

```bash
I20241010 15:14:52.494259 4113880 task.h:66] running software simulation with TAPA library
kernel time: 0.0197967 s
PASS!
```

### Step 2: Generate the Xilinx Object File (`.xo`)

We use TAPA on top of 2023.2 to generate the `.xo` file. Run the following command or run `make xo`:

```bash
source <Vitis_install_path>/Vitis/2023.2/settings64.sh
mkdir -p build/run_u55c.py
cd build/run_u55c.py && tapa compile \
--top VecAdd \
--part-num xcu280-fsvh2892-2L-e \
--clock-period 3.33 \
-o VecAdd.xo \
-f design/VecAdd.cpp \
2>&1 | tee tapa.log
```

### Step 3 (Optional): Use Vitis --link to Generate the `.xclbin` File

With the `.xo` file generated, you can use `v++ -link` to generate the `.xclbin` file. Run the following command or execute `make hw`:

```bash
v++ -l -t hw \
--platform xilinx_u280_gen3x16_xdma_1_202211_1 \
--kernel VecAdd \
--connectivity.nk VecAdd:1:VecAdd \
--config design/link_config.ini \
--temp_dir build \
-o build/VecAdd.xclbin \
build/VecAdd.xo
```

If your machines is equipped with the target FPGA device, you can deploy the optimized design on the FPGA by running the following command:

```bash
./app.exe <path_to_vitis_xclbin>
```

:warning: **Note**: This step can take hours to complete. We recommend using the RapidStream flow to optimize the `.xo` file instead of generating the `.xclbin` file if you are familiar with AMD Vitis flow.


### Step 4: Define Virtual Device

In this tutorial, we use the [Alveo U55C](https://www.amd.com/en/products/accelerators/alveo/u55c/a-u55c-p00g-pq-g.html) as an example. The device is organized into six slots, each
containing 16 clock regions of logic. In actual implementations, the available slots are reduced
based on the platform specifics, as some resources are reserved for shell logic.

<img src="../../common/img/au55c_virtual_device.jpg" width="400px" alt="AU55C Device"/>

To generate a `device.json` file that details the device features, such as slot resources and
locations, you can either run the `run_u55c.py` script by invoking RapidStream as shown below or
simply enter `make device` in the terminal.

```bash
rapidstream run_u55c.py
```


### Step 5: Use Rapidstream to Optimize `.xo` Design

The RapidStream flow conducts design space exploration and generates solutions by taking all TAPA-generated `.xo` file as the input.
The RapidStream flow for TAPA requires the following key inputs:

- **tapa-xo-path**: The path to the tapa-generated `xo` file (VecAdd.xo).
- **device-config**: The virtual device (`device.json`) generated in previous step 2 by calling rapidstream APIs based on platform.
- **floorplan-config**: The configure file ([floorplan_config.json](design/config/run_u55c.py/floorplan_config.json)) to guide integrated Autobridge to floorplan the design.
- **implementation-config**: The configure file ([impl_config.json](design/config/run_u55c.py/impl_config.json)) to guide Vitis to implement the design (e.g., kernek clock, vitis_platform and etc.).
- **connectivity-ini**: The link configure file ([link_config.ini](design/config/run_u55c.py/link_config.ini)) to specify how the kernel interfaces are connected the memory controller. This is
the same for vitis link configure file.

We encapulate the rapidstream command for TAPA as `rapidstream-tapaop` for invoking.
You can run the command below or execute `make all` supported by our [Makefile](Makefile).

```bash
rapidstream-tapaopt --work-dir build/run_u55c.py \
--tapa-xo-path ./VecAdd.xo \
--device-config build/run_u55c.py/device.json \
--floorplan-config ../../design/config/run_u55c.py/ab_config.json \
--implementation-config ../../ design/config/run_u55c.py/impl_config.json \
--connectivity-ini ../../design/config/run_u55c.py/link_config.ini
```

When finished, you can locate these files using the following command:

```bash
find ./build/run_u55c.py/ -name *.xo
```

If everything is successful, you should at least get one optimized `.xo` file located in `./build/dse/candidate_0/exported/VecAdd.xo`.

### Step 7: Check the Group Module Report


RapidStream mandates a clear distinction between communication and computation within user designs.

- In `Group modules`, users are tasked solely with defining inter-submodule communication. For those familiar with Vivado IP Integrator flow, crafting a Group module mirrors the process of connecting IPs in IPI. RapidStream subsequently integrates appropriate pipeline registers into these Group modules.

- In `Leaf modules`, users retain the flexibility to implement diverse computational patterns, as RapidStream leaves these Leaf modules unchanged.

For further details, please consult the [code style](https://docs.rapidstream-da.com/required-coding-style/) section in our Documentation.

To generate a report on group types, execute the commands below or `run make show_groups`:

```bash
rapidstream ../../common/util/get_group.py \
-i build/passes/0-imported.json \
-o build/module_types.csv
```

The module types for your design can be found in `build/module_types.csv`. Below, we list the four Group modules. In this design, `VecAdd` serves as a Group module, while the other three modules are added by RapidStream.

| Module Name | Group Type |
|:--------------------------------:|:--------------:|
| VecAdd | grouped_module |
| __rs_VecAdd_aux | aux_module |
| ... | verilog_module |


### Step 8: Use Vitis --link with the Optimized `.xo` File

With the optimized `.xo` file generated, you can use `v++ -link` to generate the `.xclbin` file. Run the following command or run `make`:

```bash
v++ -l -t hw \
--platform xilinx_u280_gen3x16_xdma_1_202211_1 \
--kernel VecAdd \
--connectivity.nk VecAdd:1:VecAdd \
--config design/link_config.ini \
--temp_dir build/rapidstream \
-o build/VecAdd_rs_opt.xclbin \
./build/dse/candidate_0/exported/VecAdd.xo
```


To examine the timing results for each design point, use this command:

```bash
find ./build -name *.xclbin.info
```



If your machines is equipped with the target FPGA device, you can deploy the optimized design on the FPGA by running the following command:

```bash
make host
./app.exe <path_to_optimized_xclbin>
```

## Next Step

**Click here to [go back to Getting Started](../README.md)**
47 changes: 47 additions & 0 deletions getting_started/aie_source/design/VecAdd.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright 2024 RapidStream Design Automation, Inc.
// All Rights Reserved.


// Includes
#include <cstdint>
#include <tapa.h>
#define DATA_SIZE 4096

void read_mem(tapa::mmap<const uint32_t> mem_in, tapa::ostream<uint32_t>& stream_out) {

for (int i = 0; i < DATA_SIZE; i++) {
stream_out << mem_in[i];
}
}

void add_kernel(
tapa::istream<uint32_t> stream_in1,
tapa::istream<uint32_t> stream_in2,
tapa::ostream<uint32_t> stream_out) {

// Compute the addition
for (int i = 0; i < DATA_SIZE; i++) {
stream_out << (stream_in1.read() + stream_in2.read());
}
}

void write_mem(tapa::istream<uint32_t>& stream_in, tapa::mmap<uint32_t> mem_out) {

for (int i = 0; i < DATA_SIZE; i++) {
stream_in >> mem_out[i];
}
}

void VecAdd(tapa::mmap<const uint32_t> mem_in1, tapa::mmap<const uint32_t> mem_in2, tapa::mmap<uint32_t> mem_out) {

tapa::stream<uint32_t> stream_in1("input_stream_1");
tapa::stream<uint32_t> stream_in2("input_stream_2");
tapa::stream<uint32_t> stream_out("output_stream");

tapa::task()
.invoke(read_mem, mem_in1, stream_in1)
.invoke(read_mem, mem_in2, stream_in2)
.invoke(add_kernel, stream_in1, stream_in2, stream_out)
.invoke(write_mem, stream_out, mem_out);

}
Binary file added getting_started/aie_source/design/VecAdd.xo
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"dse_range_max": 0.8,
"dse_range_min": 0.7,
"partition_strategy": "flat",
"port_pre_assignments": {
".*mem_in1_.*": "HBM[16]",
".*mem_in2_.*": "HBM[17]",
".*mem_out_.*": "HBM[18]",
"ap_clk": "CLK_RST",
"ap_rst_n": "CLK_RST",
"interrupt": "CLK_RST",
"s_axi_control_.*": "S_AXI_CONTROL"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"max_workers": 2,
"port_to_clock_period": {
"ap_clk": 3.33
},
"vitis_platform": "xilinx_u55c_gen3x16_xdma_3_202210_1"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[connectivity]
sp=VecAdd.mem_in1:HBM[16]
sp=VecAdd.mem_in2:HBM[17]
sp=VecAdd.mem_out:HBM[18]
Loading

0 comments on commit cf91b1f

Please sign in to comment.