1. 程式人生 > >計算RGB影象直方圖(一)

計算RGB影象直方圖(一)

RGB影象代表每個畫素佔4個位元組(RGB佔3位元組還有一個位元組空),R、G、B分量各佔一個位元組,每個分量都有256種取值可能(每個分量值是32位整數)。因此RGB影象的直方圖是一個256 * 3的陣列。

軟體演算法:

// This function computes the histogram for R, G, and B.
//
// image_data is a pointer to an RGBA image with 8 bits per channel
// w is the width of the image in pixels
// h is the height of the image in pixels

// The histogram is an array of 256 bins for R, G, and B.
// Each bin entry is a 32-bit unsigned integer value.
unsigned int *
histogram_rgba_unorm8(void *image_data, int w, int h)
{
    unsigned char *img = (unsigned char *)image_data;
    unsigned int *ref_histogram_results;
    unsigned int *ptr;
    int i;
    // clear the histogram results buffer to zeros.
    //
    // the histogram buffer stores the histogram values for R
    // followed by the histogram values for G and then B.
    // Since there are 256 bins for an 8-bit color channel,
    // the histogram buffer is 256 * 3 entries in size.
    // Each entry is a 32-bit unsigned integer value.
    //
    ref_histogram_results = (unsigned int *)malloc(256 * 3 *
    sizeof(unsigned int));
    ptr = ref_histogram_results;
    memset(ref_histogram_results, 0x0, 256 * 3 *
    sizeof(unsigned int));
    // compute histogram for R
    for (i=0; i<w*h*4; i+=4)
    {
        int indx = img[i];//每一個R分量的值
        ptr[indx]++;
    }
    ptr += 256;//R分量有256種可能取值,在ptr[256*3]中佔256項
    // compute histogram for G
    for (i=1; i<w*h*4; i+=4)
    {
        int indx = img[i];//每一個G分量的值
        ptr[indx]++;
    }
    ptr += 256;//G分量同樣在ptr[256*3]中佔256項
    // compute histogram for B
    for (i=2; i<w*h*4; i+=4)
    {
        int indx = img[i];//每一個B分量的值
        ptr[indx]++;
    }
    return ref_histogram_results;
}

OpenCL加速實現:

首先,對每個work group都計算一個tmp_histgram[256*3]的直方圖陣列,最後都寫入histgram[num_groups*256*3]陣列中。 

global_work_size[0] = ((image_width + gsize[0] - 1) / gsize[0]);
global_work_size[1] = ((image_height + gsize[1] - 1) / gsize[1]);
num_groups = global_work_size[0] * global_work_size[1];
global_work_size[0] *= gsize[0];
global_work_size[1] *= gsize[1];

kernel void
histogram_partial_image_rgba_unorm8(image2d_t img,
global uint *histogram)
{
    int local_size = (int)get_local_size(0) * (int)get_local_size(1);
    int image_width = get_image_width(img);
    int image_height = get_image_height(img);
    int group_indx = (get_group_id(1) * get_num_groups(0) + get_group_id(0)) * 256 *         3;//work item在work group中的id計算出tmp_histogram偏移
    int x = get_global_id(0);//img寬度
    int y = get_global_id(1);//img高度
    local uint tmp_histogram[256 * 3];
    int tid = get_local_id(1) * get_local_size(0) + get_local_id(0));
    int j = 256 * 3;
    int indx = 0;
    // clear the local buffer that will generate the partial
    // histogram
    do
    {
        if (tid < j)
            tmp_histogram[indx+tid] = 0;
        
        j -= local_size;
        indx += local_size;
    } while (j > 0);
    //每個work item負責更新num_groups組中,每組對應位置的tmp_histogram值
    barrier(CLK_LOCAL_MEM_FENCE);

    if ((x < image_width) && (y < image_height))
    {
        float4 clr = read_imagef(img,
            CLK_NORMALIZED_COORDS_FALSE |
            CLK_ADDRESS_CLAMP_TO_EDGE |
            CLK_FILTER_NEAREST,
            (float2)(x, y));
        uchar indx_x, indx_y, indx_z;
        indx_x = convert_uchar_sat(clr.x * 255.0f);
        indx_y = convert_uchar_sat(clr.y * 255.0f);
        indx_z = convert_uchar_sat(clr.z * 255.0f);
        atomic_inc(&tmp_histogram[indx_x]);
        atomic_inc(&tmp_histogram[256+(uint)indx_y]);
        atomic_inc(&tmp_histogram[512+(uint)indx_z]);
    }
    barrier(CLK_LOCAL_MEM_FENCE);

    // copy the partial histogram to appropriate location in
    // histogram given by group_indx
    if (local_size >= (256 * 3))
    {
        //一個work group可以寫完一個tmp_histogram,每個work item寫對應部分
        if (tid < (256 * 3))
            histogram[group_indx + tid] = tmp_histogram[tid];
    }
    else
    {
        //一個work group寫不完一個tmp_histogram,256*3平均分到每個work group,每個work item寫所有256*3分的每個組中對應位置。
        j = 256 * 3;
        indx = 0;
        do
        {
            if (tid < j)
                histogram[group_indx + indx + tid] = tmp_histogram[indx + tid];
            j -= local_size;
            indx += local_size;
        } while (j > 0);
    }
}

然後,處理前面生成的histgram[num_groups*256*3]的陣列,得到最終的直方圖陣列。

partial_global_work_size[0] = 256*3;
partial_local_work_size[0] =
(workgroup_size > 256) ? 256 : workgroup_size;

kernel void
histogram_sum_partial_results_unorm8(
global uint *partial_histogram,
int num_groups,
global uint *histogram)
{
    int tid = (int)get_global_id(0);
    int group_indx;
    int n = num_groups;
    local uint tmp_histogram[256 * 3];
    tmp_histogram[tid] = partial_histogram[tid];
    group_indx = 256*3;
    while (--n > 0)
    {
        tmp_histogram[tid] += partial_histogram[group_indx + tid];//每個work item處理所有num_groups的對應項
        group_indx += 256*3;
    }
    histogram[tid] = tmp_histogram[tid];//tmp_histogram[tid]的值就是tid項最終的值
}