Neon加速的Hello world例子
阿新 • • 發佈:2018-12-17
程式碼如下
#include <sys/time.h> #include <stdlib.h> #include <stdio.h> #include <arm_neon.h> #include <math.h> #define N 100000 #define M 1000 inline double timingExec(struct timeval start, struct timeval end){ double timeuse = 1000.0 * (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec) / 1000.0; return timeuse; } int main () { float a[N], b[N], c[N], c_neon[N]; struct timeval start, end; float time, time_neon; for(int i = 0; i < N; ++i) { a[i] = rand() % M; b[i] = rand() % M; } gettimeofday(&start, NULL); for(int t = 0; t < M; ++t) for(int i = 0; i < N; ++i) { c[i] = a[i] * b[i]; } gettimeofday(&end, NULL); time = timingExec(start, end); gettimeofday(&start, NULL); for(int t = 0; t < M; ++t) for(int ix =0; ix < N - 3; ix += 4) { float32x4_t data_a = vld1q_f32(a + ix); float32x4_t data_b = vld1q_f32(b + ix); float32x4_t result = vmulq_f32(data_a, data_b); vst1q_f32(c_neon + ix, result); } gettimeofday(&end, NULL); time_neon = timingExec(start, end); float diff = 0.f; for(int i = 0; i < N; ++i) { //printf("%d %f %f\n", i, c[i], c_neon[i]); diff += fabs(c[i] - c_neon[i]); } printf("diff %f\n", diff); printf("time : %f time_neon : %f speed_up %f\n", time, time_neon, time / time_neon); return 0; }