1. 程式人生 > >Embedded and Reconfigurable Systems M之HLS

Embedded and Reconfigurable Systems M之HLS

 

一開始我們從簡單的開始,設計一個計數器,如下框圖:

很簡單,波形如下:(eg:RANGE = 3)

contatore.h
 

#include "ap_int.h"

typedef ap_uint<1> bit;
#define RANGE 10
void contatore_no_io(volatile bit *led_output);

必須包含標頭檔案,"ap_int.h“,ap_uint<5> 表示一個無符號的5bit,ap_int<5> signed a 5 bit。

contatore.cpp

#include "contatore.h"

void contatore_no_io(volatile	bit	*led_output)
{
static unsigned int	counter_value	=	0;
static bit	led_status	=	0;
counter_value++;
if	(counter_value>=RANGE)	{
	led_status	=	not(led_status);
	counter_value=0;
}
*led_output	=	led_status;
		return;
}

testbench.cpp

#include "contatore.h"
int main()
{
bit	led_output_variable=0;
for	(int	i=0;	i<100;	i++)
{
	contatore_no_io(&led_output_variable);
	printf("Iter	%d\t	Led	=	%d\n",	i,(int)led_output_variable);
}

printf("\n\n>>>	End	simulation	<<<\n\n");
return	0;
}

pragma HLS reset指令:

如果變數是靜態變數或全域性變數,則RESET pragma用於顯式新增復位,或者可以通過off pragma從復位中刪除變數。 當設計中存在靜態或全域性陣列時,這可能特別有用。

ap_ovld:顧名思義,輸出的有效訊號。

Interfacce:

Latency and Interval

Latency:從輸入到輸出進過的週期。
Throughput/Interval:迴圈流水線操作的一個重要術語叫做Initiation IntervalII ,它是連續迴圈迭代開始時間之間的時鐘週期數。

由下圖一目瞭然:

接下來我們通過axi_lite來控制計數器的計數週期,

counter_base_axi_lite.cpp

#include "counter_base_axi_lite.h"

void counter_base_axi_lite(bit enable_count, volatile unsigned int range_counter,
							volatile int *output_value, volatile bit *led_output)
{

#pragma HLS INTERFACE ap_none port=range_counter
#pragma HLS INTERFACE ap_none port=enable_count
#pragma HLS INTERFACE s_axilite register port=range_counter bundle=commands
#pragma HLS INTERFACE s_axilite register port=enable_count bundle=commands
#pragma HLS INTERFACE ap_none port=led_output
#pragma HLS INTERFACE ap_none port=output_value

	static unsigned int counter_value = 0;
	static bit led_status = 0;
	static unsigned int BASE_COUNT = 5;
	if (enable_count == 0)
		BASE_COUNT = range_counter;
	else
	{
		counter_value++;
		if (counter_value >= BASE_COUNT)
		{
			counter_value = 0;
			led_status = not(led_status);
		}
	}
	*led_output = led_status;
	*output_value = counter_value;
	#ifndef __SYNTHESIS__
	printf("Base count = %d\tCounter value = %d\n",BASE_COUNT, counter_value);
	#endif
		return;
}

test_bench.cpp

#include "counter_base_axi_lite.h"
int main()
{
int testbench_error = 0;
int count_value = 0;
bit led_output_variable;
unsigned int RANGE = 10;
bit ENABLE = 0;
for (int i=1; i<20; i++)
{
	counter_base_axi_lite(ENABLE, RANGE, &count_value, &led_output_variable);
	printf("ENABLE = %d\tRANGE = %d\tConter value = %d\t Led = %d\n",(int)ENABLE, RANGE,count_value, (int)led_output_variable);
}
ENABLE = 1;
for (int i=1; i<20; i++)
{
	counter_base_axi_lite(ENABLE, RANGE, &count_value, &led_output_variable);
	printf("ENABLE = %d\tRANGE = %d\tConter value = %d\t Led = %d\n",(int)ENABLE, RANGE,count_value, (int)led_output_variable);
// cross-check output port count_value
	if (count_value != i % RANGE)
	testbench_error++;
}
if (testbench_error == 0)
{
	printf(">>> C simulation: OK <<<\n");
	return 0;
}
else
{
	printf(">>> C simulation: FAILED <<<\n");
	return -1;
}
}

在vivado匯入改ip

匯入SDK:

在這個資料夾下,有這個ip的驅動程式碼:

helloworld.c
 

#include <stdio.h>
#include "platform.h“
#include "xcounter_base_axi_lite.h“
#include <unistd.h>
int main()
{
init_platform();
XCounter_base_axi_lite InstancePtr;
u16 DeviceId = 0;
XCounter_base_axi_lite_LookupConfig(DeviceId);
int error = XCounter_base_axi_lite_Initialize(&InstancePtr, DeviceId);
if (error == 0)
printf("Custom AXI counter: initialization OK :-)\n");
else
printf("Custom AXI counter: initialization failed :-(\n");
useconds_t sleeping_time_us = 5000000;
unsigned int RANGE = 0x008FFFFF;
printf("Disabling counter...\n");
XCounter_base_axi_lite_Set_enable_count_V(&InstancePtr, 0);
printf("Setting range...\n");
XCounter_base_axi_lite_Set_range_counter(&InstancePtr, RANGE);
printf("Enabling counter with base %d...\n", RANGE);
XCounter_base_axi_lite_Set_enable_count_V(&InstancePtr, 1);
printf("Counter up and running\n");
printf("Sleeping for %2.1f seconds\n", sleeping_time_us/1000000.0);
usleep(sleeping_time_us);
RANGE = 0x001FFFFF;
printf("Disabling counter...\n");
XCounter_base_axi_lite_Set_enable_count_V(&InstancePtr, 0);
printf("Setting range...\n");
XCounter_base_axi_lite_Set_range_counter(&InstancePtr, RANGE);
printf("Enabling counter with base %d...\n", RANGE);
XCounter_base_axi_lite_Set_enable_count_V(&InstancePtr, 1);
printf("Counter up and running\n");
cleanup_platform();
return 0;
}

優化 in Vivado HLS

以一個普通的陣列相加為例:

void media(volatile int input_array[LENGTH], volatile int
*average_value)
{
int temp_sum=0;
for (int i=0; i<LENGTH; i++)
{
temp_sum = temp_sum + input_array[i];
}
*average_value = temp_sum;
return;
}

由圖看出需要經過19個週期才輸出一個結果;

接下來我們使用一個優化指令,#pragma HLS PIPELINE II=1

資源:

介面:

這裡的輸入陣列算是一個memory,所以輸出使能和地址的訊號輸入資料。

接下來我們使用一個優化指令,#pragma HLS loop UNROLL迴圈展開

綜合的資源消耗,明顯用面積換速度了。

最後給出一個VGA的例項程式碼,由hls生成:

vga.h

#include "ap_int.h“
#define BIT_OUT 4
// VGA resolution (640x480) @ 60 Hz
// sync pulse: negative logic
#define WIDTH 640
#define HEIGHT 480
// VERTICAL timing (rows)
#define VERTICAL_FRONT_PORCH 10
#define VERTICAL_SYNC_PULSE 2
#define VERTICAL_BACK_PORCH 33
// HORIZONTAL timing (clocks @ 25.175 MHz)
#define HORIZONTAL_FRONT_PORCH 16
#define HORIZONTAL_SYNC_PULSE 96
#define HORIZONTAL_BACK_PORCH 48
void vga_base(volatile ap_uint<BIT_OUT> *R, volatile ap_uint<BIT_OUT> *G,
volatile ap_uint<BIT_OUT> *B, volatile ap_uint<1> *V_SYNC,
volatile ap_uint<1> *H_SYNC);

 

vga.cpp

#include "vga.h“
void vga_base(volatile ap_uint<BIT_OUT> *R, volatile ap_uint<BIT_OUT> *G,
                volatile ap_uint<BIT_OUT> *B, volatile ap_uint<1> *V_SYNC,
                volatile ap_uint<1> *H_SYNC)
{
#pragma HLS INTERFACE ap_ctrl_none port=return
#pragma HLS INTERFACE ap_none port=R
#pragma HLS INTERFACE ap_none port=B
#pragma HLS INTERFACE ap_none port=G
#pragma HLS INTERFACE ap_none port=V_SYNC
#pragma HLS INTERFACE ap_none port=H_SYNC

const int TOTAL_V = VERTICAL_SYNC_PULSE + VERTICAL_BACK_PORCH + HEIGHT                   +VERTICAL_FRONT_PORCH;
const int TOTAL_H = HORIZONTAL_SYNC_PULSE + HORIZONTAL_BACK_PORCH + WIDTH +
HORIZONTAL_FRONT_PORCH;
int x,y;
ap_uint<1> V_SYNC_temp = 1;
ap_uint<1> H_SYNC_temp = 1 ;
for (y=0; y<TOTAL_V; y++)
for (x=0; x<TOTAL_H; x++)
{
#pragma HLS PIPELINE II=1
if (y<VERTICAL_SYNC_PULSE) V_SYNC_temp=0;
else V_SYNC_temp=1;
if (x<HORIZONTAL_SYNC_PULSE) H_SYNC_temp=0;
else H_SYNC_temp=1;
if ((x>= HORIZONTAL_SYNC_PULSE + HORIZONTAL_BACK_PORCH) &&
    (x< HORIZONTAL_SYNC_PULSE + HORIZONTAL_BACK_PORCH + WIDTH) &&
    (y>= VERTICAL_SYNC_PULSE + VERTICAL_BACK_PORCH) &&
    (y< VERTICAL_SYNC_PULSE + VERTICAL_BACK_PORCH + HEIGHT))
    {
        // DISPLAY IMAGE
        *R=0;
        *G=15;
        *B=0;
        *V_SYNC = V_SYNC_temp;
        *H_SYNC = H_SYNC_temp;
    } else
    {
        *R=0;
        *G=0;
        *B=0;
        *V_SYNC = V_SYNC_temp;
        *H_SYNC = H_SYNC_temp;
    }
}
}

最後在vivado呼叫該ip