Embedded and Reconfigurable Systems M之HLS
一開始我們從簡單的開始,設計一個計數器,如下框圖:
很簡單,波形如下:(eg:RANGE = 3)
contatore.h
#include "ap_int.h"
typedef ap_uint<1> bit;
#define RANGE 10
void contatore_no_io(volatile bit *led_output);
必須包含標頭檔案,"ap_int.h“,ap_uint<5> 表示一個無符號的5bit,ap_int<5> signed a 5 bit。
contatore.cpp
#include "contatore.h"
void contatore_no_io(volatile bit *led_output)
{
static unsigned int counter_value = 0;
static bit led_status = 0;
counter_value++;
if (counter_value>=RANGE) {
led_status = not(led_status);
counter_value=0;
}
*led_output = led_status;
return;
}
testbench.cpp
#include "contatore.h" int main() { bit led_output_variable=0; for (int i=0; i<100; i++) { contatore_no_io(&led_output_variable); printf("Iter %d\t Led = %d\n", i,(int)led_output_variable); } printf("\n\n>>> End simulation <<<\n\n"); return 0; }
pragma HLS reset指令:
如果變數是靜態變數或全域性變數,則RESET
pragma用於顯式新增復位,或者可以通過off
pragma從復位中刪除變數。 當設計中存在靜態或全域性陣列時,這可能特別有用。
ap_ovld:顧名思義,輸出的有效訊號。
Interfacce:
Latency and Interval
Latency:從輸入到輸出進過的週期。
Throughput/Interval:迴圈流水線操作的一個重要術語叫做Initiation Interval(II) ,它是連續迴圈迭代開始時間之間的時鐘週期數。
由下圖一目瞭然:
接下來我們通過axi_lite來控制計數器的計數週期,
counter_base_axi_lite.cpp
#include "counter_base_axi_lite.h"
void counter_base_axi_lite(bit enable_count, volatile unsigned int range_counter,
volatile int *output_value, volatile bit *led_output)
{
#pragma HLS INTERFACE ap_none port=range_counter
#pragma HLS INTERFACE ap_none port=enable_count
#pragma HLS INTERFACE s_axilite register port=range_counter bundle=commands
#pragma HLS INTERFACE s_axilite register port=enable_count bundle=commands
#pragma HLS INTERFACE ap_none port=led_output
#pragma HLS INTERFACE ap_none port=output_value
static unsigned int counter_value = 0;
static bit led_status = 0;
static unsigned int BASE_COUNT = 5;
if (enable_count == 0)
BASE_COUNT = range_counter;
else
{
counter_value++;
if (counter_value >= BASE_COUNT)
{
counter_value = 0;
led_status = not(led_status);
}
}
*led_output = led_status;
*output_value = counter_value;
#ifndef __SYNTHESIS__
printf("Base count = %d\tCounter value = %d\n",BASE_COUNT, counter_value);
#endif
return;
}
test_bench.cpp
#include "counter_base_axi_lite.h"
int main()
{
int testbench_error = 0;
int count_value = 0;
bit led_output_variable;
unsigned int RANGE = 10;
bit ENABLE = 0;
for (int i=1; i<20; i++)
{
counter_base_axi_lite(ENABLE, RANGE, &count_value, &led_output_variable);
printf("ENABLE = %d\tRANGE = %d\tConter value = %d\t Led = %d\n",(int)ENABLE, RANGE,count_value, (int)led_output_variable);
}
ENABLE = 1;
for (int i=1; i<20; i++)
{
counter_base_axi_lite(ENABLE, RANGE, &count_value, &led_output_variable);
printf("ENABLE = %d\tRANGE = %d\tConter value = %d\t Led = %d\n",(int)ENABLE, RANGE,count_value, (int)led_output_variable);
// cross-check output port count_value
if (count_value != i % RANGE)
testbench_error++;
}
if (testbench_error == 0)
{
printf(">>> C simulation: OK <<<\n");
return 0;
}
else
{
printf(">>> C simulation: FAILED <<<\n");
return -1;
}
}
在vivado匯入改ip
匯入SDK:
在這個資料夾下,有這個ip的驅動程式碼:
helloworld.c
#include <stdio.h>
#include "platform.h“
#include "xcounter_base_axi_lite.h“
#include <unistd.h>
int main()
{
init_platform();
XCounter_base_axi_lite InstancePtr;
u16 DeviceId = 0;
XCounter_base_axi_lite_LookupConfig(DeviceId);
int error = XCounter_base_axi_lite_Initialize(&InstancePtr, DeviceId);
if (error == 0)
printf("Custom AXI counter: initialization OK :-)\n");
else
printf("Custom AXI counter: initialization failed :-(\n");
useconds_t sleeping_time_us = 5000000;
unsigned int RANGE = 0x008FFFFF;
printf("Disabling counter...\n");
XCounter_base_axi_lite_Set_enable_count_V(&InstancePtr, 0);
printf("Setting range...\n");
XCounter_base_axi_lite_Set_range_counter(&InstancePtr, RANGE);
printf("Enabling counter with base %d...\n", RANGE);
XCounter_base_axi_lite_Set_enable_count_V(&InstancePtr, 1);
printf("Counter up and running\n");
printf("Sleeping for %2.1f seconds\n", sleeping_time_us/1000000.0);
usleep(sleeping_time_us);
RANGE = 0x001FFFFF;
printf("Disabling counter...\n");
XCounter_base_axi_lite_Set_enable_count_V(&InstancePtr, 0);
printf("Setting range...\n");
XCounter_base_axi_lite_Set_range_counter(&InstancePtr, RANGE);
printf("Enabling counter with base %d...\n", RANGE);
XCounter_base_axi_lite_Set_enable_count_V(&InstancePtr, 1);
printf("Counter up and running\n");
cleanup_platform();
return 0;
}
優化 in Vivado HLS
以一個普通的陣列相加為例:
void media(volatile int input_array[LENGTH], volatile int
*average_value)
{
int temp_sum=0;
for (int i=0; i<LENGTH; i++)
{
temp_sum = temp_sum + input_array[i];
}
*average_value = temp_sum;
return;
}
由圖看出需要經過19個週期才輸出一個結果;
接下來我們使用一個優化指令,#pragma HLS PIPELINE II=1
資源:
介面:
這裡的輸入陣列算是一個memory,所以輸出使能和地址的訊號輸入資料。
接下來我們使用一個優化指令,#pragma HLS loop UNROLL迴圈展開
綜合的資源消耗,明顯用面積換速度了。
最後給出一個VGA的例項程式碼,由hls生成:
vga.h
#include "ap_int.h“
#define BIT_OUT 4
// VGA resolution (640x480) @ 60 Hz
// sync pulse: negative logic
#define WIDTH 640
#define HEIGHT 480
// VERTICAL timing (rows)
#define VERTICAL_FRONT_PORCH 10
#define VERTICAL_SYNC_PULSE 2
#define VERTICAL_BACK_PORCH 33
// HORIZONTAL timing (clocks @ 25.175 MHz)
#define HORIZONTAL_FRONT_PORCH 16
#define HORIZONTAL_SYNC_PULSE 96
#define HORIZONTAL_BACK_PORCH 48
void vga_base(volatile ap_uint<BIT_OUT> *R, volatile ap_uint<BIT_OUT> *G,
volatile ap_uint<BIT_OUT> *B, volatile ap_uint<1> *V_SYNC,
volatile ap_uint<1> *H_SYNC);
vga.cpp
#include "vga.h“
void vga_base(volatile ap_uint<BIT_OUT> *R, volatile ap_uint<BIT_OUT> *G,
volatile ap_uint<BIT_OUT> *B, volatile ap_uint<1> *V_SYNC,
volatile ap_uint<1> *H_SYNC)
{
#pragma HLS INTERFACE ap_ctrl_none port=return
#pragma HLS INTERFACE ap_none port=R
#pragma HLS INTERFACE ap_none port=B
#pragma HLS INTERFACE ap_none port=G
#pragma HLS INTERFACE ap_none port=V_SYNC
#pragma HLS INTERFACE ap_none port=H_SYNC
const int TOTAL_V = VERTICAL_SYNC_PULSE + VERTICAL_BACK_PORCH + HEIGHT +VERTICAL_FRONT_PORCH;
const int TOTAL_H = HORIZONTAL_SYNC_PULSE + HORIZONTAL_BACK_PORCH + WIDTH +
HORIZONTAL_FRONT_PORCH;
int x,y;
ap_uint<1> V_SYNC_temp = 1;
ap_uint<1> H_SYNC_temp = 1 ;
for (y=0; y<TOTAL_V; y++)
for (x=0; x<TOTAL_H; x++)
{
#pragma HLS PIPELINE II=1
if (y<VERTICAL_SYNC_PULSE) V_SYNC_temp=0;
else V_SYNC_temp=1;
if (x<HORIZONTAL_SYNC_PULSE) H_SYNC_temp=0;
else H_SYNC_temp=1;
if ((x>= HORIZONTAL_SYNC_PULSE + HORIZONTAL_BACK_PORCH) &&
(x< HORIZONTAL_SYNC_PULSE + HORIZONTAL_BACK_PORCH + WIDTH) &&
(y>= VERTICAL_SYNC_PULSE + VERTICAL_BACK_PORCH) &&
(y< VERTICAL_SYNC_PULSE + VERTICAL_BACK_PORCH + HEIGHT))
{
// DISPLAY IMAGE
*R=0;
*G=15;
*B=0;
*V_SYNC = V_SYNC_temp;
*H_SYNC = H_SYNC_temp;
} else
{
*R=0;
*G=0;
*B=0;
*V_SYNC = V_SYNC_temp;
*H_SYNC = H_SYNC_temp;
}
}
}
最後在vivado呼叫該ip