1. 程式人生 > >Neon加速的Hello world例子

Neon加速的Hello world例子

程式碼如下

#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
#include <arm_neon.h>
#include <math.h>

#define N 100000
#define M 1000

inline double timingExec(struct timeval start, struct timeval end){
    double timeuse = 1000.0 * (end.tv_sec - start.tv_sec) +
        (end.tv_usec - start.tv_usec) / 1000.0;
    return timeuse;
}

int main () {
    float a[N], b[N], c[N], c_neon[N];
    struct timeval start, end;
    float time, time_neon;


    for(int i = 0; i < N; ++i) {
        a[i] = rand() % M;
        b[i] = rand() % M;
    }


    gettimeofday(&start, NULL);
    for(int t = 0; t < M; ++t)
    for(int i = 0; i < N; ++i) {
        c[i] = a[i] * b[i];
    }
    gettimeofday(&end, NULL);
    time = timingExec(start, end);

    gettimeofday(&start, NULL);
    for(int t = 0; t < M; ++t)
    for(int ix =0; ix < N - 3; ix += 4) {
        float32x4_t data_a = vld1q_f32(a + ix);
        float32x4_t data_b = vld1q_f32(b + ix);
        float32x4_t result = vmulq_f32(data_a, data_b);
        vst1q_f32(c_neon + ix, result);
    }
    gettimeofday(&end, NULL);
    time_neon = timingExec(start, end);


    float diff = 0.f;
    for(int i = 0; i < N; ++i) {
        //printf("%d %f %f\n", i, c[i], c_neon[i]);
        diff += fabs(c[i] - c_neon[i]);
    }
    printf("diff %f\n", diff);
    printf("time : %f time_neon : %f speed_up %f\n", time, time_neon, time / time_neon);
    return 0;

}

在ttx1上的加速比大致在2.3倍左右。