CUDA實現影象二次線性插值縮放
阿新 • • 發佈:2019-01-23
(Sx-0)/(SW-0)=(Dx-0)/(DW-0) (Sy-0)/(SH-0)=(Dy-0)/(DH-0)
=> Sx=Dx*SW/DW Sy=Dy*SH/DH
聚焦看看(Sx,Sy)座標點(Sx,Sy為浮點數)附近的情況;
對於近鄰取樣插值的縮放演算法,直接取Color0顏色作為縮放後點的顏色;
二次線性插值需要考慮(Sx,Sy)座標點周圍的4個顏色值Color0/Color1/Color2/Color3,
把(Sx,Sy)到A/B/C/D座標點的距離作為係數來把4個顏色混合出縮放後點的顏色;
( u=Sx-floor(Sx); v=Sy-floor(Sy); 說明:floor函式的返回值為小於等於引數的最大整數 )
二次線性插值公式為:
tmpColor0=Color0*(1-u) + Color2*u;
tmpColor1=Color1*(1-u) + Color3*u;
DstColor =tmpColor0*(1-v) + tmpColor2*v;
展開公式為:
pm0=(1-u)*(1-v);
pm1=v*(1-u);
pm2=u*(1-v);
pm3=u*v;
則顏色混合公式為:
DstColor = Color0*pm0 + Color1*pm1 + Color2*pm2 + Color3*pm3;
CUDA實現:
__global__ void cudaTransform(Uint8 *output, Uint8 *input, Uint32 pitchOutput, Uint32 pitchInput, Uint8 bytesPerPixelOutput, Uint8 bytesPerPixelInput, float xRatio, float yRatio)
{
int x = (int)(xRatio * blockIdx.x);
int y = (int)(yRatio * blockIdx.y);
Uint8 *a; Uint8 *b; Uint8 *c; Uint8 *d;
float xDist, yDist, blue, red, green;
// X and Y distance difference
xDist = (xRatio * blockIdx.x) - x;
yDist = (yRatio * blockIdx.y) - y;
// Points
a = input + y * pitchInput + x * bytesPerPixelInput;
b = input + y * pitchInput + (x + 1) * bytesPerPixelInput;
c = input + (y + 1) * pitchInput + x * bytesPerPixelInput;
d = input + (y + 1) * pitchInput + (x + 1) * bytesPerPixelInput;
// blue
blue = (a[2])*(1 - xDist)*(1 - yDist) + (b[2])*(xDist)*(1 - yDist) + (c[2])*(yDist)*(1 - xDist) + (d[2])*(xDist * yDist);
// green
green = ((a[1]))*(1 - xDist)*(1 - yDist) + (b[1])*(xDist)*(1 - yDist) + (c[1])*(yDist)*(1 - xDist) + (d[1])*(xDist * yDist);
// red
red = (a[0])*(1 - xDist)*(1 - yDist) + (b[0])*(xDist)*(1 - yDist) + (c[0])*(yDist)*(1 - xDist) + (d[0])*(xDist * yDist);
Uint8 *p = output + blockIdx.y * pitchOutput + blockIdx.x * bytesPerPixelOutput;
*(Uint32*)p = 0xff000000 | ((((int)blue) << 16)) | ((((int)green) << 8)) | ((int)red);
}
void RGB24_resize32(uint8_t* src, uint8_t*dst, int w, int h, int dstw, int dsth)
{
uint32_t src_row_btyes;
uint32_t dst_row_bytes;
int src_nb_component;
int dst_nb_component;
uint32_t src_size;
uint32_t dst_size;
uint8_t* device_src;
uint8_t* device_dst;
if (dstw <= 0 || dsth <= 0)
return;
float x_ratio = ((float)(w - 1)) / dstw;
float y_ratio = ((float)(h - 1)) / dsth;
dim3 grid(dstw, dsth);
src_row_btyes = (w * 3 + 3) & ~3;
dst_row_bytes = (dstw * 4 + 3) & ~3;
src_nb_component = 3;
dst_nb_component = 4;
src_size = src_row_btyes * h;
dst_size = dst_row_bytes * dsth;
// Copy original image
cudasafe(cudaMalloc((void **)&device_src, src_size), "Original image allocation ", __FILE__, __LINE__);
cudasafe(cudaMemcpy(device_src, src, src_size, cudaMemcpyHostToDevice), "Copy original image to device ", __FILE__, __LINE__);
cudasafe(cudaMalloc((void **)&device_dst, dst_size), "New image allocation ", __FILE__, __LINE__);
cudaTransform << <grid, 1 >> >(device_dst, device_src, dst_row_bytes, src_row_btyes, dst_nb_component, src_nb_component, x_ratio, y_ratio);
// Copy scaled image to host
cudasafe(cudaMemcpy(dst, device_dst, dst_size, cudaMemcpyDeviceToHost), "from device to host", __FILE__, __LINE__);
cudaFree(device_src);
cudaFree(device_dst);
}