GPGPU OpenCL編程步驟與簡單實例


http://www.cnblogs.com/xudong-bupt/p/3582780.html 

1.OpenCL概念

  OpenCL是一個為異構平台編寫程序的框架,此異構平台可由CPU、GPU或其他類型的處理器組成。OpenCL由一門用於編寫kernels (在OpenCL設備上運行的函數)的語言(基於C99)和一組用於定義並控制平台的API組成。

  OpenCL提供了兩種層面的並行機制:任務並行與數據並行。

2.OpenCL與CUDA的區別

  不同點:OpenCL是通用的異構平台編程語言,為了兼顧不同設備,使用繁瑣。

      CUDA是nvidia公司發明的專門在其GPGPU上的編程的框架,使用簡單,好入門。

  相同點:都是基於任務並行與數據並行。

3.OpenCL的編程步驟

  (1)Discover and initialize the platforms

    調用兩次clGetPlatformIDs函數,第一次獲取可用的平台數量,第二次獲取一個可用的平台。

  (2)Discover and initialize the devices

    調用兩次clGetDeviceIDs函數,第一次獲取可用的設備數量,第二次獲取一個可用的設備。

  (3)Create  a context(調用clCreateContext函數)

    上下文context可能會管理多個設備device。

  (4)Create a command queue(調用clCreateCommandQueue函數)

    一個設備device對應一個command queue。

    上下文conetxt將命令發送到設備對應的command queue,設備就可以執行命令隊列里的命令。

  (5)Create device buffers(調用clCreateBuffer函數)

    Buffer中保存的是數據對象,就是設備執行程序需要的數據保存在其中。

     Buffer由上下文conetxt創建,這樣上下文管理的多個設備就會共享Buffer中的數據。

  (6)Write host data to device buffers(調用clEnqueueWriteBuffer函數)

  (7)Create and compile the program

    創建程序對象,程序對象就代表你的程序源文件或者二進制代碼數據。

  (8)Create the kernel(調用clCreateKernel函數)

    根據你的程序對象,生成kernel對象,表示設備程序的入口。

  (9)Set the kernel arguments(調用clSetKernelArg函數)

  (10)Configure the work-item structure(設置worksize)

    配置work-item的組織形式(維數,group組成等)

  (11)Enqueue the kernel for execution(調用clEnqueueNDRangeKernel函數)

    將kernel對象,以及 work-item參數放入命令隊列中進行執行。

  (12)Read  the output buffer back to the host(調用clEnqueueReadBuffer函數)

  (13)Release OpenCL resources(至此結束整個運行過程)

4.說明

  OpenCL中的核函數必須單列一個文件。

  OpenCL的編程一般步驟就是上面的13步,太長了,以至於要想做個向量加法都是那么困難。

  不過上面的步驟前3步一般是固定的,可以單獨寫在一個.h/.cpp文件中,其他的一般也不會有什么大的變化。

5.程序實例,向量運算

5.1通用前3個步驟,生成一個文件

  tool.h

 1 #ifndef TOOLH
 2 #define TOOLH
 3 
 4 #include <CL/cl.h>
 5 #include <string.h>
 6 #include <stdio.h>
 7 #include <stdlib.h>
 8 #include <iostream>
 9 #include <string>
10 #include <fstream>
11 using namespace std;
12 
13 /** convert the kernel file into a string */
14 int convertToString(const char *filename, std::string& s);
15 
16 /**Getting platforms and choose an available one.*/
17 int getPlatform(cl_platform_id &platform);
18 
19 /**Step 2:Query the platform and choose the first GPU device if has one.*/
20 cl_device_id *getCl_device_id(cl_platform_id &platform);
21 
22 #endif
View Code

  tool.cpp

 1 #include <CL/cl.h>
 2 #include <string.h>
 3 #include <stdio.h>
 4 #include <stdlib.h>
 5 #include <iostream>
 6 #include <string>
 7 #include <fstream>
 8 #include "tool.h"
 9 using namespace std;
10 
11 /** convert the kernel file into a string */
12 int convertToString(const char *filename, std::string& s)
13 {
14     size_t size;
15     char*  str;
16     std::fstream f(filename, (std::fstream::in | std::fstream::binary));
17 
18     if(f.is_open())
19     {
20         size_t fileSize;
21         f.seekg(0, std::fstream::end);
22         size = fileSize = (size_t)f.tellg();
23         f.seekg(0, std::fstream::beg);
24         str = new char[size+1];
25         if(!str)
26         {
27             f.close();
28             return 0;
29         }
30 
31         f.read(str, fileSize);
32         f.close();
33         str[size] = '\0';
34         s = str;
35         delete[] str;
36         return 0;
37     }
38     cout<<"Error: failed to open file\n:"<<filename<<endl;
39     return -1;
40 }
41 
42 /**Getting platforms and choose an available one.*/
43 int getPlatform(cl_platform_id &platform)
44 {
45     platform = NULL;//the chosen platform
46 
47     cl_uint numPlatforms;//the NO. of platforms
48     cl_int    status = clGetPlatformIDs(0, NULL, &numPlatforms);
49     if (status != CL_SUCCESS)
50     {
51         cout<<"Error: Getting platforms!"<<endl;
52         return -1;
53     }
54 
55     /**For clarity, choose the first available platform. */
56     if(numPlatforms > 0)
57     {
58         cl_platform_id* platforms =
59             (cl_platform_id* )malloc(numPlatforms* sizeof(cl_platform_id));
60         status = clGetPlatformIDs(numPlatforms, platforms, NULL);
61         platform = platforms[0];
62         free(platforms);
63     }
64     else
65         return -1;
66 }
67 
68 /**Step 2:Query the platform and choose the first GPU device if has one.*/
69 cl_device_id *getCl_device_id(cl_platform_id &platform)
70 {
71     cl_uint numDevices = 0;
72     cl_device_id *devices=NULL;
73     cl_int    status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
74     if (numDevices > 0) //GPU available.
75     {
76         devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
77         status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
78     }
79     return devices;
80 }
View Code

5.2核函數文件

  HelloWorld_Kernel.cl

1 __kernel void helloworld(__global double* in, __global double* out)
2 {
3     int num = get_global_id(0);
4     out[num] = in[num] / 2.4 *(in[num]/6) ;
5 }
View Code

 5.3主函數文件

  HelloWorld.cpp

 1 //For clarity,error checking has been omitted.
 2 #include <CL/cl.h>
 3 #include "tool.h"
 4 #include <string.h>
 5 #include <stdio.h>
 6 #include <stdlib.h>
 7 #include <iostream>
 8 #include <string>
 9 #include <fstream>
10 using namespace std;
11 
12 int main(int argc, char* argv[])
13 {
14     cl_int    status;
15     /**Step 1: Getting platforms and choose an available one(first).*/
16     cl_platform_id platform;
17     getPlatform(platform);
18 
19     /**Step 2:Query the platform and choose the first GPU device if has one.*/
20     cl_device_id *devices=getCl_device_id(platform);
21 
22     /**Step 3: Create context.*/
23     cl_context context = clCreateContext(NULL,1, devices,NULL,NULL,NULL);
24 
25     /**Step 4: Creating command queue associate with the context.*/
26     cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
27 
28     /**Step 5: Create program object */
29     const char *filename = "HelloWorld_Kernel.cl";
30     string sourceStr;
31     status = convertToString(filename, sourceStr);
32     const char *source = sourceStr.c_str();
33     size_t sourceSize[] = {strlen(source)};
34     cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
35 
36     /**Step 6: Build program. */
37     status=clBuildProgram(program, 1,devices,NULL,NULL,NULL);
38 
39     /**Step 7: Initial input,output for the host and create memory objects for the kernel*/
40     const int NUM=512000;
41     double* input = new double[NUM];
42     for(int i=0;i<NUM;i++)
43         input[i]=i;
44     double* output = new double[NUM];
45 
46     cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, (NUM) * sizeof(double),(void *) input, NULL);
47     cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY , NUM * sizeof(double), NULL, NULL);
48 
49     /**Step 8: Create kernel object */
50     cl_kernel kernel = clCreateKernel(program,"helloworld", NULL);
51 
52     /**Step 9: Sets Kernel arguments.*/
53     status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer);
54     status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&outputBuffer);
55 
56     /**Step 10: Running the kernel.*/
57     size_t global_work_size[1] = {NUM};
58     cl_event enentPoint;
59     status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, &enentPoint);
60     clWaitForEvents(1,&enentPoint); ///wait
61     clReleaseEvent(enentPoint);
62 
63     /**Step 11: Read the cout put back to host memory.*/
64     status = clEnqueueReadBuffer(commandQueue, outputBuffer, CL_TRUE, 0, NUM * sizeof(double), output, 0, NULL, NULL);
65     cout<<output[NUM-1]<<endl;
66 
67     /**Step 12: Clean the resources.*/
68     status = clReleaseKernel(kernel);//*Release kernel.
69     status = clReleaseProgram(program);    //Release the program object.
70     status = clReleaseMemObject(inputBuffer);//Release mem object.
71     status = clReleaseMemObject(outputBuffer);
72     status = clReleaseCommandQueue(commandQueue);//Release  Command queue.
73     status = clReleaseContext(context);//Release context.
74 
75     if (output != NULL)
76     {
77         free(output);
78         output = NULL;
79     }
80 
81     if (devices != NULL)
82     {
83         free(devices);
84         devices = NULL;
85     }
86     return 0;
87 }
View Code

 

編譯、鏈接、執行:

  g++ -I /opt/AMDAPP/include/ -o A  *.cpp -lOpenCL ; ./A

  


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM