對於Ubuntu或其近親(Lubuntu、Kubuntu、Mint等)編寫OpenCL程序也不會太難。由於本例用的是AMD APP SDK,因此需要AMD的GPU以及相關驅動。首先,去AMD官網下載GPU驅動——AMD Catalyst。如果你用的是APU並且還有一塊獨立顯卡的話,通過AMD Catalyst Control Center可以選擇使用哪個GPU。像我現在用的聯想Z475筆記本,搭載了AMD APU A6-3420M以及一塊AMD Radeon HD 7400M,但是相比較而言,還是APU自帶的6620G的GPU性能更強一些,因此我這邊設置的是采用AMD Radeon HD 6620G。
在Linux下,AMD官方的GPU驅動是.run文件,只需使用sudo sh xxx.run即可安裝。安裝時采用默認安裝即可。
然后去developer.amd.com開發者網站下載AMD APP SDK。下載完成之后,將lib里面的動態庫文件(xxx.so)取出來,並且把include里的頭文件取出來。在你的OpenCL工程中把頭文件的路徑以及動態加載庫都設置好。在你用-l的時候,如果動態庫文件后綴名為.so.1,那么得把文件名后綴.1去掉。因為-l只能連接.a、.lib、.so等這些后綴的庫文件。
下面我們將舉一個最簡單的例子,首先看主機端代碼:
/* ============================================================================ Name : OpenCLTest.c Author : Zenny Chen Version : Copyright : Your copyright notice Description : Hello World in C, Ansi-style ============================================================================ */ #include <stdio.h> #include <string.h> #include <stdlib.h> #include <unistd.h> #include <CL/cl.h> static int GetCurrentLocationFilePath(char pDst[512], const char *filename) { if(pDst == NULL || filename == NULL) return 0; int size = readlink("/proc/self/exe", pDst, 512); while(pDst[size - 1] != '/') size--; strcpy(&pDst[size], filename); int retSize = strlen(filename) + size; pDst[retSize] = '\0'; return retSize; } int main(void) { /*Step1: Getting platforms and choose an available one.*/ cl_uint numPlatforms; //the NO. of platforms cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); if (status != CL_SUCCESS) { puts("Error: Getting platforms!"); return 0; } cl_platform_id platforms[16]; /*For clarity, choose the first available platform. */ if(numPlatforms > 0) { status = clGetPlatformIDs(numPlatforms, platforms, NULL); if(status != CL_SUCCESS) { puts("Failed to get platform IDs"); return 0; } } /*Step 2:Query the platform and choose the first GPU device if has one.Otherwise use the CPU as device.*/ cl_uint numDevices = 0; cl_device_id devices[16]; clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); if(numDevices == 0) //no GPU available. { puts("No devices available!"); return 0; } else { printf("The number of available devices is: %u\n", numDevices); clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); } /*Step 3: Create context.*/ cl_context context = clCreateContext(NULL,1, devices,NULL,NULL,NULL); /*Step 4: Creating command queue associate with the context.*/ cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL); /*Step 5: Create program object */ char filePath[512]; GetCurrentLocationFilePath(filePath, "test.cl"); FILE *fp = fopen(filePath, "r"); if(fp == NULL) { puts("OpenCL kernel source file open failed!"); return 0; } fseek(fp, 0, SEEK_END); long fileLength = ftell(fp); fseek(fp, 0, SEEK_SET); char *source = (char*)malloc(fileLength + 1); fread(source, 1, fileLength, fp); fclose(fp); size_t sourceSize[] = {fileLength}; cl_program program = clCreateProgramWithSource(context, 1, (const char**)&source, sourceSize, NULL); free(source); if(program == NULL) { puts("Failed to create the program!"); return 0; } /*Step 6: Build program. */ status = clBuildProgram(program, 1,devices,NULL,NULL,NULL); if(status != CL_SUCCESS) { puts("Failed to build the program!"); return 0; } /*Step 7: Initial input,output for the host and create memory objects for the kernel*/ int input[128]; for(int i = 0; i < 128; i++) input[i] = i + 1; cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, sizeof(input), input, NULL); cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY , sizeof(input), NULL, NULL); /*Step 8: Create kernel object */ cl_kernel kernel = clCreateKernel(program, "test", NULL); /*Step 9: Sets Kernel arguments.*/ status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer); status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputBuffer); /*Step 10: Running the kernel.*/ size_t global_work_size[1] = { 128 }; status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); /*Step 11: Read the cout put back to host memory.*/ int output[128]; status = clEnqueueReadBuffer(commandQueue, outputBuffer, CL_TRUE, 0, sizeof(input), output, 0, NULL, NULL); /*Step 12: Clean the resources.*/ status = clReleaseKernel(kernel); //Release kernel. status = clReleaseProgram(program); //Release the program object. status = clReleaseMemObject(inputBuffer); //Release mem object. status = clReleaseMemObject(outputBuffer); status = clReleaseCommandQueue(commandQueue); //Release Command queue. status = clReleaseContext(context); //Release context. for(int i = 0; i < 128; i++) { if(output[i] != i + 2) { printf("Error occurred @%d!", i); return 0; } } puts("Pass!"); return 1; }
在編譯選項中,使用-std=gnu99或-std=gnu11。上述代碼為純C語言,因此即便你沒有安裝g++也完全沒關系。
下面看看內核源代碼:
/* ============================================================================ Name : OpenCLTest.c Author : Zenny Chen Version : Copyright : Your copyright notice Description : Simple OpenCL kernel source ============================================================================ */ __kernel void test(__global int* in, __global int* out) { int index = get_global_id(0); out[index] = in[index] + 1; }
將此文件放在可執行文件相同路徑下,然后我們就能正常運行了。