計算RGB影象直方圖(一)
阿新 • • 發佈:2019-01-09
RGB影象代表每個畫素佔4個位元組(RGB佔3位元組還有一個位元組空),R、G、B分量各佔一個位元組,每個分量都有256種取值可能(每個分量值是32位整數)。因此RGB影象的直方圖是一個256 * 3的陣列。
軟體演算法:
// This function computes the histogram for R, G, and B. // // image_data is a pointer to an RGBA image with 8 bits per channel // w is the width of the image in pixels // h is the height of the image in pixels // The histogram is an array of 256 bins for R, G, and B. // Each bin entry is a 32-bit unsigned integer value. unsigned int * histogram_rgba_unorm8(void *image_data, int w, int h) { unsigned char *img = (unsigned char *)image_data; unsigned int *ref_histogram_results; unsigned int *ptr; int i; // clear the histogram results buffer to zeros. // // the histogram buffer stores the histogram values for R // followed by the histogram values for G and then B. // Since there are 256 bins for an 8-bit color channel, // the histogram buffer is 256 * 3 entries in size. // Each entry is a 32-bit unsigned integer value. // ref_histogram_results = (unsigned int *)malloc(256 * 3 * sizeof(unsigned int)); ptr = ref_histogram_results; memset(ref_histogram_results, 0x0, 256 * 3 * sizeof(unsigned int)); // compute histogram for R for (i=0; i<w*h*4; i+=4) { int indx = img[i];//每一個R分量的值 ptr[indx]++; } ptr += 256;//R分量有256種可能取值,在ptr[256*3]中佔256項 // compute histogram for G for (i=1; i<w*h*4; i+=4) { int indx = img[i];//每一個G分量的值 ptr[indx]++; } ptr += 256;//G分量同樣在ptr[256*3]中佔256項 // compute histogram for B for (i=2; i<w*h*4; i+=4) { int indx = img[i];//每一個B分量的值 ptr[indx]++; } return ref_histogram_results; }
OpenCL加速實現:
首先,對每個work group都計算一個tmp_histgram[256*3]的直方圖陣列,最後都寫入histgram[num_groups*256*3]陣列中。
global_work_size[0] = ((image_width + gsize[0] - 1) / gsize[0]); global_work_size[1] = ((image_height + gsize[1] - 1) / gsize[1]); num_groups = global_work_size[0] * global_work_size[1]; global_work_size[0] *= gsize[0]; global_work_size[1] *= gsize[1]; kernel void histogram_partial_image_rgba_unorm8(image2d_t img, global uint *histogram) { int local_size = (int)get_local_size(0) * (int)get_local_size(1); int image_width = get_image_width(img); int image_height = get_image_height(img); int group_indx = (get_group_id(1) * get_num_groups(0) + get_group_id(0)) * 256 * 3;//work item在work group中的id計算出tmp_histogram偏移 int x = get_global_id(0);//img寬度 int y = get_global_id(1);//img高度 local uint tmp_histogram[256 * 3]; int tid = get_local_id(1) * get_local_size(0) + get_local_id(0)); int j = 256 * 3; int indx = 0; // clear the local buffer that will generate the partial // histogram do { if (tid < j) tmp_histogram[indx+tid] = 0; j -= local_size; indx += local_size; } while (j > 0); //每個work item負責更新num_groups組中,每組對應位置的tmp_histogram值 barrier(CLK_LOCAL_MEM_FENCE); if ((x < image_width) && (y < image_height)) { float4 clr = read_imagef(img, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST, (float2)(x, y)); uchar indx_x, indx_y, indx_z; indx_x = convert_uchar_sat(clr.x * 255.0f); indx_y = convert_uchar_sat(clr.y * 255.0f); indx_z = convert_uchar_sat(clr.z * 255.0f); atomic_inc(&tmp_histogram[indx_x]); atomic_inc(&tmp_histogram[256+(uint)indx_y]); atomic_inc(&tmp_histogram[512+(uint)indx_z]); } barrier(CLK_LOCAL_MEM_FENCE); // copy the partial histogram to appropriate location in // histogram given by group_indx if (local_size >= (256 * 3)) { //一個work group可以寫完一個tmp_histogram,每個work item寫對應部分 if (tid < (256 * 3)) histogram[group_indx + tid] = tmp_histogram[tid]; } else { //一個work group寫不完一個tmp_histogram,256*3平均分到每個work group,每個work item寫所有256*3分的每個組中對應位置。 j = 256 * 3; indx = 0; do { if (tid < j) histogram[group_indx + indx + tid] = tmp_histogram[indx + tid]; j -= local_size; indx += local_size; } while (j > 0); } }
然後,處理前面生成的histgram[num_groups*256*3]的陣列,得到最終的直方圖陣列。
partial_global_work_size[0] = 256*3; partial_local_work_size[0] = (workgroup_size > 256) ? 256 : workgroup_size; kernel void histogram_sum_partial_results_unorm8( global uint *partial_histogram, int num_groups, global uint *histogram) { int tid = (int)get_global_id(0); int group_indx; int n = num_groups; local uint tmp_histogram[256 * 3]; tmp_histogram[tid] = partial_histogram[tid]; group_indx = 256*3; while (--n > 0) { tmp_histogram[tid] += partial_histogram[group_indx + tid];//每個work item處理所有num_groups的對應項 group_indx += 256*3; } histogram[tid] = tmp_histogram[tid];//tmp_histogram[tid]的值就是tid項最終的值 }