opencl(八)----clEnqueueNDRangeKernel、clEnqueueTask、工作组和工作项


clEnqueueNDRangeKernel

cl_int clEnqueueNDRangeKernel (    
        cl_command_queue command_queue,     //命令队列
     cl_kernel kernel,                                   //
     cl_uint work_dim,                                //数据的维度
     const size_t *global_work_offset,         // 各维度上的全局ID偏移量
     const size_t *global_work_size,     //各维度上的工作项数量
     const size_t *local_work_size,      // 各维度上一个工作组中工作项的数量
     cl_uint num_events_in_wait_list,
     const cl_event *event_wait_list,
     cl_event *event
)

clEnqueueTask

cl_int clEnqueueTask (    
        cl_command_queue command_queue, //命令队列
     cl_kernel kernel,    //
     cl_uint num_events_in_wait_list,  
     const cl_event *event_wait_list,   
     cl_event *event
)
clEnqueueTask 和 clEnqueueNDRangeKernel的功能都是将核执行命令加入命令队列。而clEnqueueNDRangeKernel可以更好的划分数据,充分利用设备的资源

工作组和工作项的特点:

1、工作组中的工作项可以访问局部内存的同一块地址

2、工作组中的工作项可以进行同步

工作项相关函数

 

 工作组相关函数

 

 demo:

// 核函数
__kernel void id_check(__global float *output) { 

   /* Access work-item/work-group information */
   size_t global_id_0 = get_global_id(0);
   size_t global_id_1 = get_global_id(1);
   size_t global_size_0 = get_global_size(0);
   size_t offset_0 = get_global_offset(0);
   size_t offset_1 = get_global_offset(1);
   size_t local_id_0 = get_local_id(0);
   size_t local_id_1 = get_local_id(1);

   /* Determine array index */
   int index_0 = global_id_0 - offset_0;
   int index_1 = global_id_1 - offset_1;
   int index = index_1 * global_size_0 + index_0;
   
   /* Set float data */
   float f = global_id_0 * 10.0f + global_id_1 * 1.0f;
   f += local_id_0 * 0.1f + local_id_1 * 0.01f;

   output[index] = f;
}
#define _CRT_SECURE_NO_WARNINGS
#define PROGRAM_FILE "id_check.cl"
#define KERNEL_FUNC "id_check"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#ifdef MAC
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif

/* Find a GPU or CPU associated with the first available platform */
// 获取平台  获取设备
cl_device_id create_device() {

   cl_platform_id platform;
   cl_device_id dev;
   int err;

   /* Identify a platform */
   err = clGetPlatformIDs(1, &platform, NULL);
   if(err < 0) {
      perror("Couldn't identify a platform");
      exit(1);
   } 

   /* Access a device */
   err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &dev, NULL);
   if(err == CL_DEVICE_NOT_FOUND) {
      err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &dev, NULL);
   }
   if(err < 0) {
      perror("Couldn't access any devices");
      exit(1);   
   }

   return dev;
}

/* Create program from a file and compile it */
// 创建 cl_program  编译cl_program
cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) {

   cl_program program;
   FILE *program_handle;
   char *program_buffer, *program_log;
   size_t program_size, log_size;
   int err;

   /* Read program file and place content into buffer */
   program_handle = fopen(filename, "r");
   if(program_handle == NULL) {
      perror("Couldn't find the program file");
      exit(1);
   }
   fseek(program_handle, 0, SEEK_END);
   program_size = ftell(program_handle);
   rewind(program_handle);
   program_buffer = (char*)malloc(program_size + 1);
   program_buffer[program_size] = '\0';
   fread(program_buffer, sizeof(char), program_size, program_handle);
   fclose(program_handle);

   /* Create program from file */
   program = clCreateProgramWithSource(ctx, 1, 
      (const char**)&program_buffer, &program_size, &err);
   if(err < 0) {
      perror("Couldn't create the program");
      exit(1);
   }
   free(program_buffer);

   /* Build program */
   err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
   if(err < 0) {

      /* Find size of log and print to std output */
      clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 
            0, NULL, &log_size);
      program_log = (char*) malloc(log_size + 1);
      program_log[log_size] = '\0';
      clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 
            log_size + 1, program_log, NULL);
      printf("%s\n", program_log);
      free(program_log);
      exit(1);
   }

   return program;
}

int main() {

   /* OpenCL data structures */
   cl_device_id device;
   cl_context context;
   cl_command_queue queue;
   cl_program program;
   cl_kernel kernel;
   cl_int i, err;

   /* Data and buffers */
   size_t dim = 2;
   size_t global_offset[] = {3, 5};
   size_t global_size[] = {6, 4};
   size_t local_size[] = {3, 2};
   float test[24];      
   cl_mem test_buffer;

   /* Create a device and context */
   // 获取设备
   device = create_device();
   // 获取上下文
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
   if(err < 0) {
      perror("Couldn't create a context");
      exit(1);   
   }

   /* Build the program and create a kernel */
   // 获取编译后的 cl_program
   program = build_program(context, device, PROGRAM_FILE);
   // 创建核
   kernel = clCreateKernel(program, KERNEL_FUNC, &err);
   if(err < 0) {
      perror("Couldn't create a kernel");
      exit(1);   
   };

   /* Create a write-only buffer to hold the output data */
   // 创建 cl_mem
   test_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
         sizeof(test), NULL, &err);
   if(err < 0) {
      perror("Couldn't create a buffer");
      exit(1);   
   };

   /* Create kernel argument */
   // 设置核参数
   err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buffer);
   if(err < 0) {
      perror("Couldn't set a kernel argument");
      exit(1);   
   };

   /* Create a command queue */
   // 创建命令队列
   queue = clCreateCommandQueue(context, device, 0, &err);
   if(err < 0) {
      perror("Couldn't create a command queue");
      exit(1);   
   };

   /* Enqueue kernel */
   //Enqueues a command to execute a kernel on a device.
   /**
    *
    cl_int clEnqueueNDRangeKernel ( cl_command_queue command_queue,
      cl_kernel kernel,
      cl_uint work_dim,
      const size_t *global_work_offset,
      const size_t *global_work_size,
      const size_t *local_work_size,
      cl_uint num_events_in_wait_list,
      const cl_event *event_wait_list,
      cl_event *event)
    * **/
    // dim =2
    //size_t global_offset[] = {3, 5};
    //size_t global_size[] = {6, 4};
    //size_t local_size[] = {3, 2};
   err = clEnqueueNDRangeKernel(queue, kernel, dim, global_offset,
         global_size, local_size, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't enqueue the kernel");
      exit(1);   
   }


   /* Read and print the result */
   // 从设备中读取结果
   err = clEnqueueReadBuffer(queue, test_buffer, CL_TRUE, 0, 
      sizeof(test), &test, 0, NULL, NULL);
   if(err < 0) {
      perror("Couldn't read the buffer");
      exit(1);   
   }

   for(i=0; i<24; i+=6) {
      printf("%.2f     %.2f     %.2f     %.2f     %.2f     %.2f\n", 
         test[i], test[i+1], test[i+2], test[i+3], test[i+4], test[i+5]);
   }

   /* Deallocate resources */
   clReleaseMemObject(test_buffer);
   clReleaseKernel(kernel);
   clReleaseCommandQueue(queue);
   clReleaseProgram(program);
   clReleaseContext(context);
   return 0;
}


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM