▶ 函數 clCreateProgramWithSource 接收 OpenCL 代碼(設備無關)來創建程序,而函數 clCreateProgramWithBinary 接收已經經過函數 clBuildProgram 的 build 過的代碼(設備有關)來創建程序
● 范例代碼,還是計算兩向量和
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 #include <cl.h> 5 6 const int nElement = 2048; 7 const char *binaryFileName = "D:\\Code\\OpenCL\\OpenCLProjectTemp\\OpenCLProjectTemp\\vectorAdd.bin"; 8 const char *programSource = " \ 9 __kernel void vectorAdd(__global int *A, __global int *B, __global int *C) \ 10 { \ 11 int idx = get_global_id(0); \ 12 C[idx] = A[idx] + B[idx]; \ 13 return; \ 14 } \ 15 "; 16 17 int readSource(const char* kernelPath, unsigned char **output)// 讀取文本文件,存儲為字符串,返回字符數 18 { 19 FILE *pf; 20 long int size; 21 printf("readSource, Program file: %s\n", kernelPath); 22 fopen_s(&pf, kernelPath, "rb"); 23 if (!pf) 24 { 25 printf("Open kernel file failed\n"); 26 exit(-1); 27 } 28 if (fseek(pf, 0, SEEK_END) != 0) 29 { 30 printf("Seek end of file faildd\n"); 31 exit(-1); 32 } 33 if ((size = ftell(pf)) < 0) 34 { 35 printf("Get file position failed\n"); 36 exit(-1); 37 } 38 rewind(pf); 39 if ((*output = (unsigned char *)malloc(size + 1)) == NULL) 40 { 41 printf("Allocate space failed\n"); 42 exit(-1); 43 } 44 fread(*output, 1, size, pf); 45 fclose(pf); 46 (*output)[size] = '\0'; 47 return strlen((char*)*output); 48 } 49 50 int main() 51 { 52 const size_t datasize = sizeof(int) * nElement; 53 int i, *A, *B, *C; 54 cl_int status; 55 unsigned char *programBinary; 56 FILE *pf; 57 58 A = (int*)malloc(datasize); 59 B = (int*)malloc(datasize); 60 C = (int*)malloc(datasize); 61 for (i = 0; i < nElement; A[i] = B[i] = i, i++); 62 63 cl_platform_id platform; 64 clGetPlatformIDs(1, &platform, NULL); 65 cl_device_id device; 66 clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL); 67 cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &status); 68 cl_command_queue cmdQueue = clCreateCommandQueue(context, device, 0, &status); 69 cl_mem bufferA, bufferB, bufferC; 70 bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status); 71 bufferB = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status); 72 bufferC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize, NULL, &status); 73 clEnqueueWriteBuffer(cmdQueue, bufferA, CL_FALSE, 0, datasize, A, 0, NULL, NULL); 74 clEnqueueWriteBuffer(cmdQueue, bufferB, CL_FALSE, 0, datasize, B, 0, NULL, NULL); 75 cl_program program = clCreateProgramWithSource(context, 1, &programSource, NULL, &status); 76 clBuildProgram(program, 1, &device, NULL, NULL, NULL); 77 78 // 抽取已經 build 的 program 的代碼 79 size_t programBinarySize; 80 clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(cl_device_id), &programBinarySize, NULL); // 獲取 build 的 program 的大小 81 programBinary = (unsigned char *)malloc(sizeof(unsigned char)*programBinarySize); 82 clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char *), &programBinary, NULL); // 獲取代碼 83 84 // 將代碼寫入文件,再讀回來,這說明可以從外部文件中直接讀取已經 build 的 program 來使用 85 fopen_s(&pf, binaryFileName, "w"); 86 fwrite(programBinary, 1, programBinarySize, pf); 87 free(programBinary); 88 fclose(pf); 89 programBinarySize = readSource(binaryFileName, &programBinary); 90 91 // 使用clCreateProgramWithBinary 來建立 program 92 cl_program program2 = clCreateProgramWithBinary(context, 1, &device, &programBinarySize, (const unsigned char **)&programBinary, NULL, NULL); 93 clBuildProgram(program2, 1, &device, NULL, NULL, NULL); 94 95 cl_kernel kernel = clCreateKernel(program2, "vectorAdd", &status); 96 status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferA); 97 status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferB); 98 status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufferC); 99 size_t globalSize[1] = { nElement }, localSize[1] = { 256 }; 100 status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL, globalSize, localSize, 0, NULL, NULL); 101 clEnqueueReadBuffer(cmdQueue, bufferC, CL_TRUE, 0, datasize, C, 0, NULL, NULL); 102 103 for (i = 0; i < nElement; i++) 104 { 105 if (C[i] != i + i) 106 break; 107 } 108 printf("Output is %s.\n", (i == nElement) ? "correct" : "incorrect"); 109 110 free(A); 111 free(B); 112 free(C); 113 free(programBinary); 114 clReleaseContext(context); 115 clReleaseMemObject(bufferA); 116 clReleaseMemObject(bufferB); 117 clReleaseMemObject(bufferC); 118 clReleaseCommandQueue(cmdQueue); 119 clReleaseProgram(program); 120 clReleaseProgram(program2); 121 clReleaseKernel(kernel); 122 getchar(); 123 return 0; 124 }
● 輸出結果
readSource, Program file: D:\Code\OpenCL\OpenCLProjectTemp\OpenCLProjectTemp\vectorAdd.bin Output is correct.
● 由代碼中的向量加法生成的 program 代碼文件
1 // 2 // Generated by NVIDIA NVVM Compiler 3 // 4 // Compiler Build ID: UNKNOWN 5 // Driver 6 // Based on LLVM 3.4svn 7 // 8 9 .version 6.1 10 .target sm_61, texmode_independent 11 .address_size 64 12 13 // .globl vectorAdd 14 15 .entry vectorAdd( 16 .param .u64 .ptr .global .align 4 vectorAdd_param_0, 17 .param .u64 .ptr .global .align 4 vectorAdd_param_1, 18 .param .u64 .ptr .global .align 4 vectorAdd_param_2 19 ) 20 { 21 .reg .b32 %r<10>; 22 .reg .b64 %rd<8>; 23 24 25 ld.param.u64 %rd1, [vectorAdd_param_0]; 26 ld.param.u64 %rd2, [vectorAdd_param_1]; 27 ld.param.u64 %rd3, [vectorAdd_param_2]; 28 mov.b32 %r1, %envreg3; 29 mov.u32 %r2, %ntid.x; 30 mov.u32 %r3, %ctaid.x; 31 mad.lo.s32 %r4, %r3, %r2, %r1; 32 mov.u32 %r5, %tid.x; 33 add.s32 %r6, %r4, %r5; 34 mul.wide.s32 %rd4, %r6, 4; 35 add.s64 %rd5, %rd1, %rd4; 36 ld.global.u32 %r7, [%rd5]; 37 add.s64 %rd6, %rd2, %rd4; 38 ld.global.u32 %r8, [%rd6]; 39 add.s32 %r9, %r8, %r7; 40 add.s64 %rd7, %rd3, %rd4; 41 st.global.u32 [%rd7], %r9; 42 ret; 43 }