本文測試OpenCL中讀取image數據時關於坐標的兩個問題:
- 使用float2坐標讀取
- 使用int2坐標讀取
首先完整的測試代碼如下,測試平台為SDM855:
#include <CL/cl.h>
#include <iostream>
#include <vector>
#include <math.h>
#include "OCL/OPPOOpenCLWrapper.h"
#include "OCL/OCLUtils.h"
#ifndef uchar
#define uchar unsigned char
#endif
const char code[] = R"(
const sampler_t samp1 = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
__kernel void readtest(read_only image2d_t src, global uchar *dst)
{
int2 coord = (int2)(get_global_id(0), get_global_id(1));
if(coord.x == 0 && coord.y == 0){
printf("(float2)(0.0, 0.0) read:%f \n", read_imagef(src, samp1, (float2)(0.0, 0.0) ).x * 255.0);
printf("(float2)(0.0, 1.0) read:%f \n", read_imagef(src, samp1, (float2)(0.0, 1.0) ).x * 255.0);
printf("(float2)(0.0, 1.5) read:%f \n", read_imagef(src, samp1, (float2)(0.0, 1.5) ).x * 255.0);
printf("(float2)(0.0, 2.0) read:%f \n", read_imagef(src, samp1, (float2)(0.0, 2.0) ).x * 255.0);
printf("(float2)(1.5, 1.5) read:%f \n", read_imagef(src, samp1, (float2)(1.5, 1.5) ).x * 255.0);
printf("(float2)(0.5, 2.0) read:%f \n", read_imagef(src, samp1, (float2)(0.5, 2.0) ).x * 255.0);
printf("(float2)(0.5, 2.5) read:%f \n", read_imagef(src, samp1, (float2)(0.5, 2.5) ).x * 255.0);
printf("(float2)(1.0, 1.0) read:%f \n", read_imagef(src, samp1, (float2)(1.0, 1.0) ).x * 255.0);
printf("(float2)(254.0, 254.0) read:%f \n", read_imagef(src, samp1, (float2)(254.0, 254.0) ).x * 255.0);
printf("(float2)(255.0, 255.0) read:%f \n", read_imagef(src, samp1, (float2)(255.0, 255.0) ).x * 255.0);
printf("(float2)(255.5, 255.5) read:%f \n", read_imagef(src, samp1, (float2)(255.5, 255.5) ).x * 255.0);
printf("(float2)(256.0, 256.0) read:%f \n", read_imagef(src, samp1, (float2)(256.0, 256.0) ).x * 255.0);
printf("(float2)(300, 300.0) read:%f \n", read_imagef(src, samp1, (float2)(300.0, 300.0) ).x * 255.0);
printf("(int2)(1, 1) read:%f \n", read_imagef(src, samp1, (int2)(1, 1) ).x * 255.0);
printf("(int2)(0, 0) read:%f \n", read_imagef(src, samp1, (int2)(0, 0) ).x * 255.0);
printf("(int2)(1, 2) read:%f \n", read_imagef(src, samp1, (int2)(1, 2) ).x * 255.0);
printf("(int2)(254, 254) read:%f \n", read_imagef(src, samp1, (int2)(254, 254) ).x * 255.0);
printf("(int2)(255, 255) read:%f \n", read_imagef(src, samp1, (int2)(255, 255) ).x * 255.0);
printf("(int2)(256, 256) read:%f \n", read_imagef(src, samp1, (int2)(256, 256) ).x * 255.0);
printf("(int2)(257, 257) read:%f \n", read_imagef(src, samp1, (int2)(257, 257) ).x * 255.0);
}
}
)";
void testsamp05()
{
OPPOOpenCLWrapper ocl;
cl_image_format imageformat;
imageformat.image_channel_data_type = CL_UNORM_INT8;
imageformat.image_channel_order = CL_R;
cl_image_desc imagedesc;
memset(&imagedesc, 0, sizeof(imagedesc));
imagedesc.image_width = 256;
imagedesc.image_height = 256;
imagedesc.image_type = CL_MEM_OBJECT_IMAGE2D;
std::vector<uchar> data(256*256, 0);
for(int i = 0; i < 256; ++i){
for(int w = 0; w < 256; ++w){
data[i*256+w] = std::max(i, w);
}
}
cl_int err;
cl_mem src = clCreateImage(ocl.getContext(), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &imageformat, &imagedesc, data.data(), &err);
checkErr(err, "src");
cl_mem dst = clCreateBuffer(ocl.getContext(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, 256*256, NULL, &err);
checkErr(err, "dst");
const char *pcode = code;
cl_program prog = ocl.makeProgram(&pcode, sizeof(code) / sizeof(code[0]));
cl_kernel kernel = ocl.makeKernel(prog, "readtest");
clSetKernelArg(kernel , 0, sizeof(src), &src);
clSetKernelArg(kernel , 1, sizeof(dst), &dst);
size_t globalsize[] = {256, 256} ;
clEnqueueNDRangeKernel(ocl.getCommandQueue(), kernel, 2, NULL, globalsize, NULL, 0, NULL, NULL);
clFinish(ocl.getCommandQueue());
}
我們創建一個寬高都為256的image對象,然后其值設置為當前寬高坐標的大者。同時數據格式為CL_UNORM_INT8,然后使用不同的坐標去讀取image對象的值。其結果顯示如下:
(float2)(0.0, 0.0) read:0.000000
(float2)(0.0, 1.0) read:0.500000
(float2)(0.0, 1.5) read:1.000000
(float2)(0.0, 2.0) read:1.500000
(float2)(1.5, 1.5) read:1.000000
(float2)(0.5, 2.0) read:1.500000
(float2)(0.5, 2.5) read:2.000000
(float2)(1.0, 1.0) read:0.750000
(float2)(254.0, 254.0) read:253.750000
(float2)(255.0, 255.0) read:254.750000
(float2)(255.5, 255.5) read:255.000000
(float2)(256.0, 256.0) read:255.000000
(float2)(300, 300.0) read:255.000000
(int2)(1, 1) read:1.000000
(int2)(0, 0) read:0.000000
(int2)(1, 2) read:2.000000
(int2)(254, 254) read:254.000000
(int2)(255, 255) read:255.000000
(int2)(256, 256) read:255.000000
(int2)(257, 257) read:255.000000
從上面的結果我們可以看出得知如下信息:
- 如果讀取的時候使用的是
float2坐標,假設為坐標為(w, h),那么,其返回的值為(w - 0.5, h - 0.5)處的插值結果,插值的方式為我們常規意義,或者在CPU代碼中對該圖像進行雙線性插值。當然這也和采樣器sampler_t對象設置為CLK_FILTER_LINEAR有關。如果其設置為CLK_FILTER_NEAREST,那么肯定就是為最近鄰插值了。舉例來說,對於(float2)(1.0, 1.0)坐標,其插值目標為(1.0 - 0.5, 1.0 - 0.5),位於(0,0), (0, 1), (1, 0), (1,1)四個像素點中間,根據雙線性插值計算。其結果即為0.75 - 如果讀取的時候使用的是
int2坐標,那么其坐標與值的關系就和CPU中處理該image一樣。
