原文地址:Android AARCH64 平台的 OpenCL 配置
Android AARCH64 平台的 OpenCL 配置
開發環境
IDE: Android Studio 3.4.1
Android: 7.1
minSdkVersion: 25
targetSdkVersion: 26
JNI CMake: 3.4.1
ABI: arm64-v8a
OpenCL: 1.2
配置 OpenCL 使用項目中的 so 庫
這里以編譯 openclTest.cpp 為 libopenclTest.so 並導入 OpenCL 的動態庫為例
注1: 下列 so 庫需要從開發板上 pull 到項目中,其中 libGLES_mali.so 用於驅動 OpenCL,其他庫為依賴庫
注2: 不同平台的驅動庫位於不同位置,可以下載 OpenCL-Z 查看
注3: set_target_properties 中第一項參數設置了生成庫的名稱,例如設置為 openclTest,則生成 libopenclTest.so 庫,這里使用了 lib_* 作為前綴,則生成liblib_*.so 庫,實際開發中要避免這種情況
查看依賴庫
objdump -x libGLES_mali.so | grep NEEDED
目錄結構
opencltest
├─ app
│ ├─ build
│ ├─ libs
│ └─ src
│ ├─ androidTest
│ ├─ main
│ │ ├─ java
│ │ │ └─ com
│ │ │ └─ example
│ │ │ └─ opencltest
│ │ │ MainActivity.java
│ │ ├─ jni #C/C++ 源碼目錄
│ │ │ └─ openclTest.cpp
│ │ ├─ jniLibs #JNI 需要調用的運行庫
│ │ │ └─ arm64-v8a #對應 ABI 版本建立文件夾
│ │ │ ├─ libbinder.so
│ │ │ ├─ libc++.so
│ │ │ ├─ libc.so
│ │ │ ├─ libcrypto.so
│ │ │ ├─ libcutils.so
│ │ │ ├─ libdl.so
│ │ │ ├─ libGLES_mali.so
│ │ │ ├─ libhardware.so
│ │ │ ├─ liblog.so
│ │ │ ├─ libm.so
│ │ │ ├─ libui.so
│ │ │ ├─ libutils.so
│ │ │ └─ libz.so
│ │ └─ res
│ └─ test
└─ gradle
CMakeLists.txt 增加配置
add_library(openclTest
SHARED
src/main/jni/openclTest.cpp )
add_library(lib_opencl SHARED IMPORTED)
set_target_properties(lib_opencl
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libGLES_mali.so )
add_library(lib_z SHARED IMPORTED)
set_target_properties(lib_z
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libz.so )
add_library(lib_log SHARED IMPORTED)
set_target_properties(lib_log
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/liblog.so )
add_library(lib_utils SHARED IMPORTED)
set_target_properties(lib_utils
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libutils.so )
add_library(lib_ui SHARED IMPORTED)
set_target_properties(lib_ui
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libui.so )
add_library(lib_cutils SHARED IMPORTED)
set_target_properties(lib_cutils
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libcutils.so )
add_library(lib_binder SHARED IMPORTED)
set_target_properties(lib_binder
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libbinder.so )
add_library(lib_crypto SHARED IMPORTED)
set_target_properties(lib_crypto
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libcrypto.so )
add_library(lib_dl SHARED IMPORTED)
set_target_properties(lib_dl
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libdl.so )
add_library(lib_hardware SHARED IMPORTED)
set_target_properties(lib_hardware
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libhardware.so )
add_library(lib_c++ SHARED IMPORTED)
set_target_properties(lib_c++
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libc++.so )
add_library(lib_c SHARED IMPORTED)
set_target_properties(lib_c
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libc.so )
add_library(lib_m SHARED IMPORTED)
set_target_properties(lib_m
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libm.so )
target_link_libraries(openclTest
${log-lib}
lib_opencl )
配置 OpenCL 使用 dlopen 打開開發板上的運行庫
這種配置方法需要從 Github - KhronosGroup/OpenCL-Headers 倉庫下載 OpenCL 的頭文件,並且需要自行實現 OpenCL 的函數
注: 需要在源碼中定義
CL_TARGET_OPENCL_VERSION 120
,否則會報找不到版本號的錯誤
目錄結構
opencltest
├─ app
│ ├─ build
│ ├─ libs
│ └─ src
│ ├─ androidTest
│ ├─ main
│ │ ├─ java
│ │ │ └─ com
│ │ │ └─ example
│ │ │ └─ opencltest
│ │ │ MainActivity.java
│ │ ├─ jni #C/C++ 源碼目錄
│ │ │ ├─ dlopencl.cpp #dlopen 打開運行庫,實現 OpenCL 中的函數
│ │ │ ├─ openclTest.cpp
│ │ │ └─ include #JNI 調用的頭文件
│ │ │ ├─ dlopencl.h #定義 OpenCL 中的函數
│ │ │ └─ CL #OpenCL 需要使用的頭文件
│ │ │ ├─ cl.h
│ │ │ ├─ cl_ext.h
│ │ │ ├─ cl_gl.h
│ │ │ ├─ cl_gl_ext.h
│ │ │ ├─ cl_platform.h
│ │ │ ├─ cl_version.h
│ │ │ └─ opencl.h
│ │ └─ res
│ └─ test
└─ gradle
CMakeLists.txt 增加配置
include_directories(${PROJECT_SOURCE_DIR}/src/main/jni/include)
add_library(openclTest
SHARED
src/main/jni/openclTest.cpp )
add_library(lib_dlopencl
SHARED
src/main/jni/dlopencl.cpp )
target_link_libraries(openclTest
${log-lib}
lib_dlopencl )
需要導入的 dlopencl.h 和 dlopencl.cpp 寫在文末
OpenCL 使用
以下均以使用 dlopen 導入運行庫的方式為例
- 定義 OpenCL 版本
#define CL_TARGET_OPENCL_VERSION 120
- 包含頭文件
#include <CL/cl.h>
#include "dlopencl.h"
- 使用 Logcat 打印日志
由於 C 語言工作在 JNI 層,無法獲取控制台,導致了 printf() 函數失效,這里使用 __android_log_print 方法打印日志到 Logcat
#include <android/log.h>
#define DEBUG
#ifdef DEBUG
#define LOG "LOG-TAG"
#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG, __VA_ARGS__)
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG, __VA_ARGS__)
#define LOGW(...) __android_log_print(ANDROID_LOG_WARN, LOG, __VA_ARGS__)
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG, __VA_ARGS__)
#define LOGF(...) __android_log_print(ANDROID_LOG_FATAL, LOG, __VA_ARGS__)
#else
#define LOG
#define LOGD(...)
#define LOGI(...)
#define LOGW(...)
#define LOGE(...)
#define LOGF(...)
#endif
使用方法同 printf(),例:
LOGI("Device ID: %d", device_id);
- 創建用於初始化 OpenCL 的 JNI 接口
建議在同一個文件中編寫多個操作 OpenCL 的函數,這里的初始化實際上是將操作 OpenCL 的變量建立為全局變量,通過 JNI 一次調用后其他函數再使用這些被初始化過的變量
定義變量
cl_uint num_device;
cl_uint num_platform;
cl_platform_id *platform;
cl_device_id *devices;
cl_int err;
cl_context context;
extern "C"
JNIEXPORT jint JNICALL
Java_com_example_opencltest_MainActivity_initOpencl(JNIEnv *env, jobject instance) {
initFns();
LOGI("getPlatformNum");
// 獲取可用平台數量
err = clGetPlatformIDs(0, 0, &num_platform);
platform = (cl_platform_id*)malloc(sizeof(cl_platform_id)*num_platform);
LOGI("getPlatformIDs");
// 獲取平台 ID
err = clGetPlatformIDs(num_platform, platform, nullptr);
if(err < 0) {
LOGE("clGetPlatformIDs failed");
return -1;
}
LOGI("getDeviceNum");
// 獲取可用設備數量
err = clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_GPU, 0, nullptr, &num_device);
devices = (cl_device_id*)malloc(sizeof(cl_device_id)*num_device);
LOGI("getDeviceIDs");
// 獲取設備 ID
err = clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_GPU, num_device, devices, nullptr);
if (err < 0) {
LOGE("clGetDeviceIDs failed");
return -1;
}
return 0;
}
使用 OpenCL 執行 Kernel 函數
通常編寫 OpenCL 中的 Kernel 函數要求單獨寫在 *.cl 文件中,調用時使用 fopen 打開,將里面的代碼作為字符串讀入之后再進行 runtime 編譯,比較麻煩,這里直接將整個 Kernel 函數寫成字符串
本例中因為測試原因沒有釋放掉創建的資源
可以使用允許正則替換的編輯器例如 VSCode,先寫好 Kernel 函數,然后使用正則表達式
(.*)
選擇所有行並將每一行作為一個參數($1),替換為"$1\\n"
,例如替換前:int a = 0;
,替換后:"int a = 0;\n"
注1: 經測試發現 OpenCL 在 PC 端允許每次調用任意數量的矢量數據,而在開發板上只允許一次調用 1.2.3.4.8.16 個
PC 端允許的矢量調用:
Integralgraph.s0
Integralgraph.s01
Integralgraph.s012
Integralgraph.s0123
Integralgraph.s01234
Integralgraph.s0123456
Integralgraph.s01234567
Integralgraph.s012345678
Integralgraph.s0123456789
Integralgraph.s0123456789a
Integralgraph.s0123456789ab
Integralgraph.s0123456789abc
Integralgraph.s0123456789abcd
Integralgraph.s0123456789abcde
Integralgraph.s0123456789abcdef
開發板允許的矢量調用:
Integralgraph.s0
Integralgraph.s01
Integralgraph.s012
Integralgraph.s0123
Integralgraph.s01234567
Integralgraph.s0123456789abcdef
所以在編寫運行於 AARCH64 架構的 OpenCL Kernel 函數時需要將被調用的矢量數組分組相加
例如在 PC 端為
TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s0123456789abcde);
在開發板中需要改為
TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s01234567, Integralgraph[index4].s89ab, Integralgraph[index4].scde);
注2: 測試中發現 PC 端的 OpenCL 遇到數組越界問題會直接跳過不予處理,但在 AARCH64 中會導致執行失敗
注3: AARCH64 平台上 OpenCL 能申請到的可調用內存遠小於 PC 端,如果出現輸入或輸出的數據超出 OpenCL 申請到的內存可能會導致數據輸出不完整,讀取數據時錯誤代碼返回 -14 等問題
先在 Activity 中調用 initOpencl(),初始化 OpenCL 及其平台和設備
// 定義積分圖寬度
#define CLL_IMAGE_W (400)
// 定義積分圖高度
#define CLL_IMAGE_H (80)
// 構建 Kernel 函數字符串
const char *clkernel[] = {
"__kernel void kernel_Integralgraph_45int(__global int16 * grayImage,\n"
" __global int16 * Integralgraph,\n"
" __global unsigned * const p_height) {\n"
" int x = get_global_id(0);\n"
" int height = *p_height;\n"
" int width = get_global_size(0);\n"
" __local int index, index1, index2, index3, index4, index5;\n"
" __local bool flagx0, flagxw, flagy1, flagy2;\n"
" __local int16 TableInteg1, TableInteg2, TableInteg3;\n"
" flagx0 = min(0, -x);\n"
" flagxw = min(0, x - width + 1);\n"
" for (int j = 0; j < height; j++) {\n"
" flagy1 = min(0, -j);\n"
" flagy2 = min(0, 1 - j);\n"
" index = j * width + x;\n"
" if(j==0) {\n"
" Integralgraph[index] = (int16)grayImage[index];\n"
" } else if(j==1) {\n"
" index1 = (j - 1)*width + x - 1;\n"
" index3 = (j - 1)*width + x + 1;\n"
" index4 = (j - 1)*width + x;\n"
" TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s01234567, Integralgraph[index4].s89ab, Integralgraph[index4].scde);\n"
" TableInteg3 = (int16)(Integralgraph[index4].s12345678, Integralgraph[index4].s9abc, Integralgraph[index4].sdef, Integralgraph[index3].s0*flagxw);\n"
" Integralgraph[index] = (int16)((int16)TableInteg1*flagy1 + (int16)TableInteg3*flagy1 + (int16)grayImage[index4] * flagy1 + (int16)grayImage[index]);\n"
" } else {\n"
" index1 = (j - 1)*width + x - 1;\n"
" index2 = (j - 2)*width + x;\n"
" index3 = (j - 1)*width + x + 1;\n"
" index4 = (j - 1)*width + x;\n"
" TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s01234567, Integralgraph[index4].s89ab, Integralgraph[index4].scde);\n"
" TableInteg2 = (int16)(Integralgraph[index2].s0*flagx0, Integralgraph[index2].s12345678, Integralgraph[index2].s9ab, Integralgraph[index2].scde, Integralgraph[index2].sf*flagxw);\n"
" TableInteg3 = (int16)(Integralgraph[index4].s12345678, Integralgraph[index4].s9abc, Integralgraph[index4].sdef, Integralgraph[index3].s0*flagxw);\n"
" Integralgraph[index] = (int16)((int16)TableInteg1*flagy1 + (int16)TableInteg3*flagy1 - (int16)TableInteg2 * flagy2 + (int16)grayImage[index4] * flagy1 + (int16)grayImage[index]);\n"
" }\n"
" }\n"
"}\n"
};
// 創建 Context
context = clCreateContext(nullptr, 1, devices, nullptr, nullptr, &err);
if(err < 0) {
LOGE("Create context failed, error code: [%d]", err);
return -1;
}
int inputData[CLL_IMAGE_W * CLL_IMAGE_H];
int outputData[CLL_IMAGE_W * CLL_IMAGE_H];
int width = CLL_IMAGE_W;
int height = CLL_IMAGE_H;
// 初始化輸入輸出數據
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
inputData[i * width + j] = 1;
outputData[i * width + j] = 0;
}
}
clock_t time_start;
clock_t time_finish;
double total_time;
char *program_log;
cl_command_queue queue;
cl_kernel kernel;
size_t log_size;
cl_program program;
// 創建命令隊列
queue = clCreateCommandQueue(context, devices[0], 0, &err);
if (err < 0) {
LOGE("Create command queue failed, error code: [%d]", err);
}
// 創建程序
program = clCreateProgramWithSource(context, sizeof(clkernel) / sizeof(clkernel[0]), clkernel, nullptr, nullptr);
// 構建/編譯程序
err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
if (err < 0) {
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
program_log = (char*)malloc(sizeof(log_size));
// 查詢構建/編譯過程中的 log
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, program_log, nullptr);
LOGE("program_build_info: \n[%s]\n", program_log);
free(program_log);
}
// 創建 Kernel
kernel = clCreateKernel(program, "kernel_Integralgraph_45int", &err);
if (err < 0) {
LOGE("Create kernel failed, error code: [%d]", err);
}
// 創建用於輸入參數、輸出數據的內存空間
cl_mem meminput_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * height * width, inputData, &err);
cl_mem memoutput_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * height * width, nullptr, &err);
cl_mem memHeight_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &height, &err);
// 向 Kernel 傳遞參數
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &meminput_buffer);
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &memoutput_buffer);
err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &memHeight_buffer);
size_t global_work_offset[2] = {0, 0};
size_t localThreads[2] = {1, 1};
size_t globalThreads[2] = {(size_t)(width / 16), 1};
// 獲得程序開始執行的時間戳
time_start = clock();
// 排布工作組與工作項並執行
err = clEnqueueNDRangeKernel(queue, kernel, 2, global_work_offset, globalThreads, localThreads, 0, nullptr, nullptr);
if (err < 0) {
LOGE("Run Kernel failed, error code: [%d]", err);
}
// 獲得程序執行完成的時間戳
time_finish = clock();
// 計算運行時間
total_time = (double)(time_finish - time_start) / CLOCKS_PER_SEC;
LOGI("Total time: [%f]s", total_time);
// 從輸出的內存空間中讀取數據
err = clEnqueueReadBuffer(queue, memoutput_buffer, CL_TRUE, 0, sizeof(int) * height * width, outputData, 0, nullptr, nullptr);
if (err < 0) {
LOGE("Read buffer failed, error code: [%d]", err);
}
// 輸出數據,積分圖中的每一行拼接為一條 LOG
char outputDataTemp[2048];
for (int i = 0; i < height ; i++) {
// 清空字符串,拼接下一行
memset(outputDataTemp, 0x00, 2048);
for (int j = 0; j < width ; j++) {
// 將一行數據拼接在一個字符串中
sprintf(outputDataTemp, "%s %d", outputDataTemp, outputData[i * width + j]);
}
// LOG打印
LOGI("line [%d]\n%s", i, outputDataTemp);
}
附加
dlopencl.h
#ifndef __AOPENCL_CL_H
#define __AOPENCL_CL_H
#ifdef __APPLE__
#include <OpenCL/cl_platform.h>
#else
#include <CL/cl_platform.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
#define IAH()
//#define IAH() printf("File:%s, Line:%d\n",__FILE__, __LINE__);
void initFns();
/* Platform API */
#define clGetPlatformIDs aclGetPlatformIDs
cl_int
(*aclGetPlatformIDs)(cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */);
#define clGetPlatformInfo aclGetPlatformInfo
cl_int
(*aclGetPlatformInfo)(cl_platform_id /* platform */,
cl_platform_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Device APIs */
#define clGetDeviceIDs aclGetDeviceIDs
cl_int
(*aclGetDeviceIDs)(cl_platform_id /* platform */,
cl_device_type /* device_type */,
cl_uint /* num_entries */,
cl_device_id * /* devices */,
cl_uint * /* num_devices */);
#define clGetDeviceInfo aclGetDeviceInfo
cl_int
(*aclGetDeviceInfo)(cl_device_id /* device */,
cl_device_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Context APIs */
#define clCreateContext aclCreateContext
cl_context
(*aclCreateContext)(const cl_context_properties * /* properties */,
cl_uint /* num_devices */,
const cl_device_id * /* devices */,
void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
void * /* user_data */,
cl_int * /* errcode_ret */);
#define clCreateContextFromType aclCreateContextFromType
cl_context
(*aclCreateContextFromType)(const cl_context_properties * /* properties */,
cl_device_type /* device_type */,
void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *),
void * /* user_data */,
cl_int * /* errcode_ret */);
#define clRetainContext aclRetainContext
cl_int
(*aclRetainContext)(cl_context /* context */);
#define clReleaseContext aclReleaseContext
cl_int
(*aclReleaseContext)(cl_context /* context */);
#define clGetContextInfo aclGetContextInfo
cl_int
(*aclGetContextInfo)(cl_context /* context */,
cl_context_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Command Queue APIs */
#define clCreateCommandQueue aclCreateCommandQueue
cl_command_queue
(*aclCreateCommandQueue)(cl_context /* context */,
cl_device_id /* device */,
cl_command_queue_properties /* properties */,
cl_int * /* errcode_ret */);
#define clRetainCommandQueue aclRetainCommandQueue
cl_int
(*aclRetainCommandQueue)(cl_command_queue /* command_queue */);
#define clReleaseCommandQueue aclReleaseCommandQueue
cl_int
(*aclReleaseCommandQueue)(cl_command_queue /* command_queue */);
#define clGetCommandQueueInfo aclGetCommandQueueInfo
cl_int
(*aclGetCommandQueueInfo)(cl_command_queue /* command_queue */,
cl_command_queue_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Memory Object APIs */
#define clCreateBuffer aclCreateBuffer
cl_mem
(*aclCreateBuffer)(cl_context /* context */,
cl_mem_flags /* flags */,
size_t /* size */,
void * /* host_ptr */,
cl_int * /* errcode_ret */);
#define clCreateSubBuffer aclCreateSubBuffer
cl_mem
(*aclCreateSubBuffer)(cl_mem /* buffer */,
cl_mem_flags /* flags */,
cl_buffer_create_type /* buffer_create_type */,
const void * /* buffer_create_info */,
cl_int * /* errcode_ret */);
#define clRetainMemObject aclRetainMemObject
cl_int
(*aclRetainMemObject)(cl_mem /* memobj */);
#define clReleaseMemObject aclReleaseMemObject
cl_int
(*aclReleaseMemObject)(cl_mem /* memobj */);
#define clGetSupportedImageFormats aclGetSupportedImageFormats
cl_int
(*aclGetSupportedImageFormats)(cl_context /* context */,
cl_mem_flags /* flags */,
cl_mem_object_type /* image_type */,
cl_uint /* num_entries */,
cl_image_format * /* image_formats */,
cl_uint * /* num_image_formats */);
#define clGetMemObjectInfo aclGetMemObjectInfo
cl_int
(*aclGetMemObjectInfo)(cl_mem /* memobj */,
cl_mem_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
#define clGetImageInfo aclGetImageInfo
cl_int
(*aclGetImageInfo)(cl_mem /* image */,
cl_image_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
#define clSetMemObjectDestructorCallback aclSetMemObjectDestructorCallback
cl_int
(*aclSetMemObjectDestructorCallback)( cl_mem /* memobj */,
void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
void * /*user_data */ );
/* Sampler APIs */
#define clCreateSampler aclCreateSampler
cl_sampler
(*aclCreateSampler)(cl_context /* context */,
cl_bool /* normalized_coords */,
cl_addressing_mode /* addressing_mode */,
cl_filter_mode /* filter_mode */,
cl_int * /* errcode_ret */);
#define clRetainSampler aclRetainSampler
cl_int
(*aclRetainSampler)(cl_sampler /* sampler */);
#define clReleaseSampler aclReleaseSampler
cl_int
(*aclReleaseSampler)(cl_sampler /* sampler */);
#define clGetSamplerInfo aclGetSamplerInfo
cl_int
(*aclGetSamplerInfo)(cl_sampler /* sampler */,
cl_sampler_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Program Object APIs */
#define clCreateProgramWithSource aclCreateProgramWithSource
cl_program
(*aclCreateProgramWithSource)(cl_context /* context */,
cl_uint /* count */,
const char ** /* strings */,
const size_t * /* lengths */,
cl_int * /* errcode_ret */);
#define clCreateProgramWithBinary aclCreateProgramWithBinary
cl_program
(*aclCreateProgramWithBinary)(cl_context /* context */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const size_t * /* lengths */,
const unsigned char ** /* binaries */,
cl_int * /* binary_status */,
cl_int * /* errcode_ret */);
#define clRetainProgram aclRetainProgram
cl_int
(*aclRetainProgram)(cl_program /* program */);
#define clReleaseProgram aclReleaseProgram
cl_int
(*aclReleaseProgram)(cl_program /* program */);
#define clBuildProgram aclBuildProgram
cl_int
(*aclBuildProgram)(cl_program /* program */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* options */,
void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
void * /* user_data */);
#define clGetProgramInfo aclGetProgramInfo
cl_int
(*aclGetProgramInfo)(cl_program /* program */,
cl_program_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
#define clGetProgramBuildInfo aclGetProgramBuildInfo
cl_int
(*aclGetProgramBuildInfo)(cl_program /* program */,
cl_device_id /* device */,
cl_program_build_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Kernel Object APIs */
#define clCreateKernel aclCreateKernel
cl_kernel
(*aclCreateKernel)(cl_program /* program */,
const char * /* kernel_name */,
cl_int * /* errcode_ret */);
#define clCreateKernelsInProgram aclCreateKernelsInProgram
cl_int
(*aclCreateKernelsInProgram)(cl_program /* program */,
cl_uint /* num_kernels */,
cl_kernel * /* kernels */,
cl_uint * /* num_kernels_ret */);
#define clRetainKernel aclRetainKernel
cl_int
(*aclRetainKernel)(cl_kernel /* kernel */);
#define clReleaseKernel aclReleaseKernel
cl_int
(*aclReleaseKernel)(cl_kernel /* kernel */);
#define clSetKernelArg aclSetKernelArg
cl_int
(*aclSetKernelArg)(cl_kernel /* kernel */,
cl_uint /* arg_index */,
size_t /* arg_size */,
const void * /* arg_value */);
#define clGetKernelInfo aclGetKernelInfo
cl_int
(*aclGetKernelInfo)(cl_kernel /* kernel */,
cl_kernel_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
#define clGetKernelWorkGroupInfo aclGetKernelWorkGroupInfo
cl_int
(*aclGetKernelWorkGroupInfo)(cl_kernel /* kernel */,
cl_device_id /* device */,
cl_kernel_work_group_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Event Object APIs */
#define clWaitForEvents aclWaitForEvents
cl_int
(*aclWaitForEvents)(cl_uint /* num_events */,
const cl_event * /* event_list */);
#define clGetEventInfo aclGetEventInfo
cl_int
(*aclGetEventInfo)(cl_event /* event */,
cl_event_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
#define clCreateUserEvent aclCreateUserEvent
cl_event
(*aclCreateUserEvent)(cl_context /* context */,
cl_int * /* errcode_ret */);
#define clRetainEvent aclRetainEvent
cl_int
(*aclRetainEvent)(cl_event /* event */);
#define clReleaseEvent aclReleaseEvent
cl_int
(*aclReleaseEvent)(cl_event /* event */);
#define clSetUserEventStatus aclSetUserEventStatus
cl_int
(*aclSetUserEventStatus)(cl_event /* event */,
cl_int /* execution_status */);
#define clSetEventCallback aclSetEventCallback
cl_int
(*aclSetEventCallback)( cl_event /* event */,
cl_int /* command_exec_callback_type */,
void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
void * /* user_data */);
/* Profiling APIs */
#define clGetEventProfilingInfo aclGetEventProfilingInfo
cl_int
(*aclGetEventProfilingInfo)(cl_event /* event */,
cl_profiling_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Flush and Finish APIs */
#define clFlush aclFlush
cl_int
(*aclFlush)(cl_command_queue /* command_queue */);
#define clFinish aclFinish
cl_int
(*aclFinish)(cl_command_queue /* command_queue */);
/* Enqueued Commands APIs */
#define clEnqueueReadBuffer aclEnqueueReadBuffer
cl_int
(*aclEnqueueReadBuffer)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_read */,
size_t /* offset */,
size_t /* size */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueReadBufferRect aclEnqueueReadBufferRect
cl_int
(*aclEnqueueReadBufferRect)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_read */,
const size_t * /* buffer_offset */,
const size_t * /* host_offset */,
const size_t * /* region */,
size_t /* buffer_row_pitch */,
size_t /* buffer_slice_pitch */,
size_t /* host_row_pitch */,
size_t /* host_slice_pitch */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueWriteBuffer aclEnqueueWriteBuffer
cl_int
(*aclEnqueueWriteBuffer)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_write */,
size_t /* offset */,
size_t /* size */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueWriteBufferRect aclEnqueueWriteBufferRect
cl_int
(*aclEnqueueWriteBufferRect)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_write */,
const size_t * /* buffer_offset */,
const size_t * /* host_offset */,
const size_t * /* region */,
size_t /* buffer_row_pitch */,
size_t /* buffer_slice_pitch */,
size_t /* host_row_pitch */,
size_t /* host_slice_pitch */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueCopyBuffer aclEnqueueCopyBuffer
cl_int
(*aclEnqueueCopyBuffer)(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_buffer */,
size_t /* src_offset */,
size_t /* dst_offset */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueCopyBufferRect aclEnqueueCopyBufferRect
cl_int
(*aclEnqueueCopyBufferRect)(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_buffer */,
const size_t * /* src_origin */,
const size_t * /* dst_origin */,
const size_t * /* region */,
size_t /* src_row_pitch */,
size_t /* src_slice_pitch */,
size_t /* dst_row_pitch */,
size_t /* dst_slice_pitch */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueReadImage aclEnqueueReadImage
cl_int
(*aclEnqueueReadImage)(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_read */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t /* row_pitch */,
size_t /* slice_pitch */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueWriteImage aclEnqueueWriteImage
cl_int
(*aclEnqueueWriteImage)(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_write */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t /* input_row_pitch */,
size_t /* input_slice_pitch */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueCopyImage aclEnqueueCopyImage
cl_int
(*aclEnqueueCopyImage)(cl_command_queue /* command_queue */,
cl_mem /* src_image */,
cl_mem /* dst_image */,
const size_t * /* src_origin[3] */,
const size_t * /* dst_origin[3] */,
const size_t * /* region[3] */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueCopyImageToBuffer aclEnqueueCopyImageToBuffer
cl_int
(*aclEnqueueCopyImageToBuffer)(cl_command_queue /* command_queue */,
cl_mem /* src_image */,
cl_mem /* dst_buffer */,
const size_t * /* src_origin[3] */,
const size_t * /* region[3] */,
size_t /* dst_offset */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueCopyBufferToImage aclEnqueueCopyBufferToImage
cl_int
(*aclEnqueueCopyBufferToImage)(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_image */,
size_t /* src_offset */,
const size_t * /* dst_origin[3] */,
const size_t * /* region[3] */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
void *
(*aclEnqueueMapBuffer)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
size_t /* offset */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */,
cl_int * /* errcode_ret */);
void *
(*aclEnqueueMapImage)(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t * /* image_row_pitch */,
size_t * /* image_slice_pitch */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */,
cl_int * /* errcode_ret */);
#define clEnqueueUnmapMemObject aclEnqueueUnmapMemObject
cl_int
(*aclEnqueueUnmapMemObject)(cl_command_queue /* command_queue */,
cl_mem /* memobj */,
void * /* mapped_ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueNDRangeKernel aclEnqueueNDRangeKernel
cl_int
(*aclEnqueueNDRangeKernel)(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* work_dim */,
const size_t * /* global_work_offset */,
const size_t * /* global_work_size */,
const size_t * /* local_work_size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueTask aclEnqueueTask
cl_int
(*aclEnqueueTask)(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueNativeKernel aclEnqueueNativeKernel
cl_int
(*aclEnqueueNativeKernel)(cl_command_queue /* command_queue */,
void (CL_CALLBACK * /*user_func*/)(void *),
void * /* args */,
size_t /* cb_args */,
cl_uint /* num_mem_objects */,
const cl_mem * /* mem_list */,
const void ** /* args_mem_loc */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
//#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1!
/*
* WARNING:
* This API introduces mutable state into the OpenCL implementation. It has been REMOVED
* to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the
* OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
* It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
*
* Software developers previously relying on this API are instructed to set the command queue
* properties when creating the queue, instead.
*/
#define clSetCommandQueueProperty aclSetCommandQueueProperty
cl_int
(*aclSetCommandQueueProperty)(cl_command_queue /* command_queue */,
cl_command_queue_properties /* properties */,
cl_bool /* enable */,
cl_command_queue_properties * /* old_properties */);
#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */
#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
#define clCreateImage2D aclCreateImage2D
cl_mem
(*aclCreateImage2D)(cl_context /* context */,
cl_mem_flags /* flags */,
const cl_image_format * /* image_format */,
size_t /* image_width */,
size_t /* image_height */,
size_t /* image_row_pitch */,
void * /* host_ptr */,
cl_int * /* errcode_ret */);
#define clCreateImage3D aclCreateImage3D
cl_mem
(*aclCreateImage3D)(cl_context /* context */,
cl_mem_flags /* flags */,
const cl_image_format * /* image_format */,
size_t /* image_width */,
size_t /* image_height */,
size_t /* image_depth */,
size_t /* image_row_pitch */,
size_t /* image_slice_pitch */,
void * /* host_ptr */,
cl_int * /* errcode_ret */);
#define clEnqueueMarker aclEnqueueMarker
cl_int
(*aclEnqueueMarker)(cl_command_queue /* command_queue */,
cl_event * /* event */);
#define clEnqueueWaitForEvents aclEnqueueWaitForEvents
cl_int
(*aclEnqueueWaitForEvents)(cl_command_queue /* command_queue */,
cl_uint /* num_events */,
const cl_event * /* event_list */);
#define clEnqueueBarrier aclEnqueueBarrier
cl_int
(*aclEnqueueBarrier)(cl_command_queue /* command_queue */);
#define clUnloadCompiler aclUnloadCompiler
cl_int
(*aclUnloadCompiler)(void);
void *
(*aclGetExtensionFunctionAddress)(const char * /* func_name */);
#endif
#ifdef __cplusplus
}
#endif
#endif /* __AOPENCL_CL_H */
dlopencl.cpp
根據實際情況自行修改 so_path 的路徑
#define CL_TARGET_OPENCL_VERSION 120
#include "CL/cl.h"
#include <dlfcn.h>
#include <cstdio>
#include <string.h>
#include "dlopencl.h"
int loadedCL;
void *getCLHandle() {
LOGD("get_handle");
void *res = nullptr;
char *so_path = (char*)"/system/vendor/lib64/egl/libGLES_mali.so";
res = dlopen(so_path, RTLD_LAZY);
if (res == nullptr) {
LOGD("Open library failed");
} else {
LOGD("Loaded library name: [%s]", so_path);
}
return res;
}
void initFns() {
loadedCL = 0;
void *handle = getCLHandle();
if (handle == nullptr) return;
/* Platform API */
IAH();
aclGetPlatformIDs = (cl_int (*)(cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */)) dlsym(handle, "clGetPlatformIDs");
IAH();
aclGetPlatformInfo = (cl_int (*)(cl_platform_id /* platform */,
cl_platform_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetPlatformInfo");
/* Device APIs */
IAH();
aclGetDeviceIDs = (cl_int (*)(cl_platform_id /* platform */,
cl_device_type /* device_type */,
cl_uint /* num_entries */,
cl_device_id * /* devices */,
cl_uint * /* num_devices */)) dlsym(handle, "clGetDeviceIDs");
IAH();
aclGetDeviceInfo = (cl_int (*)(cl_device_id /* device */,
cl_device_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetDeviceInfo");
/* Context APIs */
IAH();
aclCreateContext = (cl_context (*)(const cl_context_properties * /* properties */,
cl_uint /* num_devices */,
const cl_device_id * /* devices */,
void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
void * /* user_data */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateContext");
IAH();
aclCreateContextFromType = (cl_context (*)(const cl_context_properties * /* properties */,
cl_device_type /* device_type */,
void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *),
void * /* user_data */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateContextFromType");
IAH();
aclRetainContext = (cl_int (*)(cl_context /* context */)) dlsym(handle, "clRetainContext");
IAH();
aclReleaseContext = (cl_int (*)(cl_context /* context */)) dlsym(handle, "clReleaseContext");
IAH();
aclGetContextInfo = (cl_int (*)(cl_context /* context */,
cl_context_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetContextInfo");
/* Command Queue APIs */
IAH();
aclCreateCommandQueue = (cl_command_queue (*)(cl_context /* context */,
cl_device_id /* device */,
cl_command_queue_properties /* properties */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateCommandQueue");
IAH();
aclRetainCommandQueue = (cl_int (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clRetainCommandQueue");
IAH();
aclReleaseCommandQueue = (cl_int (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clReleaseCommandQueue");
IAH();
aclGetCommandQueueInfo = (cl_int (*)(cl_command_queue /* command_queue */,
cl_command_queue_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetCommandQueueInfo");
/* Memory Object APIs */
IAH();
aclCreateBuffer = (cl_mem (*)(cl_context /* context */,
cl_mem_flags /* flags */,
size_t /* size */,
void * /* host_ptr */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateBuffer");
IAH();
aclCreateSubBuffer = (cl_mem (*)(cl_mem /* buffer */,
cl_mem_flags /* flags */,
cl_buffer_create_type /* buffer_create_type */,
const void * /* buffer_create_info */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateSubBuffer");
IAH();
aclRetainMemObject = (cl_int (*)(cl_mem /* memobj */)) dlsym(handle, "clRetainMemObject");
IAH();
aclReleaseMemObject = (cl_int (*)(cl_mem /* memobj */)) dlsym(handle, "clReleaseMemObject");
IAH();
aclGetSupportedImageFormats = (cl_int (*)(cl_context /* context */,
cl_mem_flags /* flags */,
cl_mem_object_type /* image_type */,
cl_uint /* num_entries */,
cl_image_format * /* image_formats */,
cl_uint * /* num_image_formats */)) dlsym(handle, "clGetSupportedImageFormats");
IAH();
aclGetMemObjectInfo = (cl_int (*)(cl_mem /* memobj */,
cl_mem_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetMemObjectInfo");
IAH();
aclGetImageInfo = (cl_int (*)(cl_mem /* image */,
cl_image_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetImageInfo");
IAH();
aclSetMemObjectDestructorCallback = (cl_int (*)( cl_mem /* memobj */,
void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
void * /*user_data */ )) dlsym(handle, "clSetMemObjectDestructorCallback");
/* Sampler APIs */
IAH();
aclCreateSampler = (cl_sampler (*)(cl_context /* context */,
cl_bool /* normalized_coords */,
cl_addressing_mode /* addressing_mode */,
cl_filter_mode /* filter_mode */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateSampler");
IAH();
aclRetainSampler = (cl_int (*)(cl_sampler /* sampler */)) dlsym(handle, "clRetainSampler");
IAH();
aclReleaseSampler = (cl_int (*)(cl_sampler /* sampler */)) dlsym(handle, "clReleaseSampler");
IAH();
aclGetSamplerInfo = (cl_int (*)(cl_sampler /* sampler */,
cl_sampler_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetSamplerInfo");
/* Program Object APIs */
IAH();
aclCreateProgramWithSource = (cl_program (*)(cl_context /* context */,
cl_uint /* count */,
const char ** /* strings */,
const size_t * /* lengths */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateProgramWithSource");
IAH();
aclCreateProgramWithBinary = (cl_program (*)(cl_context /* context */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const size_t * /* lengths */,
const unsigned char ** /* binaries */,
cl_int * /* binary_status */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateProgramWithBinary");
IAH();
aclRetainProgram = (cl_int (*)(cl_program /* program */)) dlsym(handle, "clRetainProgram");
IAH();
aclReleaseProgram = (cl_int (*)(cl_program /* program */)) dlsym(handle, "clReleaseProgram");
IAH();
aclBuildProgram = (cl_int (*)(cl_program /* program */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* options */,
void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
void * /* user_data */)) dlsym(handle, "clBuildProgram");
IAH();
aclGetProgramInfo = (cl_int (*)(cl_program /* program */,
cl_program_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetProgramInfo");
IAH();
aclGetProgramBuildInfo = (cl_int (*)(cl_program /* program */,
cl_device_id /* device */,
cl_program_build_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetProgramBuildInfo");
/* Kernel Object APIs */
IAH();
aclCreateKernel = (cl_kernel (*)(cl_program /* program */,
const char * /* kernel_name */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateKernel");
IAH();
aclCreateKernelsInProgram = (cl_int (*)(cl_program /* program */,
cl_uint /* num_kernels */,
cl_kernel * /* kernels */,
cl_uint * /* num_kernels_ret */)) dlsym(handle, "clCreateKernelsInProgram");
IAH();
aclRetainKernel = (cl_int (*)(cl_kernel /* kernel */)) dlsym(handle, "clRetainKernel");
IAH();
aclReleaseKernel = (cl_int (*)(cl_kernel /* kernel */)) dlsym(handle, "clReleaseKernel");
IAH();
aclSetKernelArg = (cl_int (*)(cl_kernel /* kernel */,
cl_uint /* arg_index */,
size_t /* arg_size */,
const void * /* arg_value */)) dlsym(handle, "clSetKernelArg");
IAH();
aclGetKernelInfo = (cl_int (*)(cl_kernel /* kernel */,
cl_kernel_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetKernelInfo");
IAH();
aclGetKernelWorkGroupInfo = (cl_int (*)(cl_kernel /* kernel */,
cl_device_id /* device */,
cl_kernel_work_group_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetKernelWorkGroupInfo");
/* Event Object APIs */
IAH();
aclWaitForEvents = (cl_int (*)(cl_uint /* num_events */,
const cl_event * /* event_list */)) dlsym(handle, "clWaitForEvents");
IAH();
aclGetEventInfo = (cl_int (*)(cl_event /* event */,
cl_event_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetEventInfo");
IAH();
aclCreateUserEvent = (cl_event (*)(cl_context /* context */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateUserEvent");
IAH();
aclRetainEvent = (cl_int (*)(cl_event /* event */)) dlsym(handle, "clRetainEvent");
IAH();
aclReleaseEvent = (cl_int (*)(cl_event /* event */)) dlsym(handle, "clReleaseEvent");
IAH();
aclSetUserEventStatus = (cl_int (*)(cl_event /* event */,
cl_int /* execution_status */)) dlsym(handle, "clSetUserEventStatus");
IAH();
aclSetEventCallback = (cl_int (*)( cl_event /* event */,
cl_int /* command_exec_callback_type */,
void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
void * /* user_data */)) dlsym(handle, "clSetEventCallback");
/* Profiling APIs */
IAH();
aclGetEventProfilingInfo = (cl_int (*)(cl_event /* event */,
cl_profiling_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetEventProfilingInfo");
/* Flush and Finish APIs */
IAH();
aclFlush = (cl_int (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clFlush");
IAH();
aclFinish = (cl_int (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clFinish");
/* Enqueued Commands APIs */
IAH();
aclEnqueueReadBuffer = (cl_int (*)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_read */,
size_t /* offset */,
size_t /* size */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueReadBuffer");
IAH();
aclEnqueueReadBufferRect = (cl_int (*)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_read */,
const size_t * /* buffer_offset */,
const size_t * /* host_offset */,
const size_t * /* region */,
size_t /* buffer_row_pitch */,
size_t /* buffer_slice_pitch */,
size_t /* host_row_pitch */,
size_t /* host_slice_pitch */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueReadBufferRect");
IAH();
aclEnqueueWriteBuffer = (cl_int (*)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_write */,
size_t /* offset */,
size_t /* size */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueWriteBuffer");
IAH();
aclEnqueueWriteBufferRect = (cl_int (*)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_write */,
const size_t * /* buffer_offset */,
const size_t * /* host_offset */,
const size_t * /* region */,
size_t /* buffer_row_pitch */,
size_t /* buffer_slice_pitch */,
size_t /* host_row_pitch */,
size_t /* host_slice_pitch */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueWriteBufferRect");
IAH();
aclEnqueueCopyBuffer = (cl_int (*)(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_buffer */,
size_t /* src_offset */,
size_t /* dst_offset */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueCopyBuffer");
IAH();
aclEnqueueCopyBufferRect = (cl_int (*)(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_buffer */,
const size_t * /* src_origin */,
const size_t * /* dst_origin */,
const size_t * /* region */,
size_t /* src_row_pitch */,
size_t /* src_slice_pitch */,
size_t /* dst_row_pitch */,
size_t /* dst_slice_pitch */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueCopyBufferRect");
IAH();
aclEnqueueReadImage = (cl_int (*)(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_read */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t /* row_pitch */,
size_t /* slice_pitch */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueReadImage");
IAH();
aclEnqueueWriteImage = (cl_int (*)(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_write */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t /* input_row_pitch */,
size_t /* input_slice_pitch */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueWriteImage");
IAH();
aclEnqueueCopyImage = (cl_int (*)(cl_command_queue /* command_queue */,
cl_mem /* src_image */,
cl_mem /* dst_image */,
const size_t * /* src_origin[3] */,
const size_t * /* dst_origin[3] */,
const size_t * /* region[3] */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueCopyImage");
IAH();
aclEnqueueCopyImageToBuffer = (cl_int (*)(cl_command_queue /* command_queue */,
cl_mem /* src_image */,
cl_mem /* dst_buffer */,
const size_t * /* src_origin[3] */,
const size_t * /* region[3] */,
size_t /* dst_offset */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueCopyImageToBuffer");
IAH();
aclEnqueueCopyBufferToImage = (cl_int (*)(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_image */,
size_t /* src_offset */,
const size_t * /* dst_origin[3] */,
const size_t * /* region[3] */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueCopyBufferToImage");
#if 0
void *
(*aclEnqueueMapBuffer)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
size_t /* offset */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */,
cl_int * /* errcode_ret */)) dlsym(handle,"");
void *
(*aclEnqueueMapImage)(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t * /* image_row_pitch */,
size_t * /* image_slice_pitch */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */,
cl_int * /* errcode_ret */)) dlsym(handle,"");
#endif
IAH();
aclEnqueueUnmapMemObject = (cl_int (*)(cl_command_queue /* command_queue */,
cl_mem /* memobj */,
void * /* mapped_ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueUnmapMemObject");
IAH();
aclEnqueueNDRangeKernel = (cl_int (*)(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* work_dim */,
const size_t * /* global_work_offset */,
const size_t * /* global_work_size */,
const size_t * /* local_work_size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueNDRangeKernel");
IAH();
aclEnqueueTask = (cl_int (*)(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueTask");
IAH();
aclEnqueueNativeKernel = (cl_int (*)(cl_command_queue /* command_queue */,
void (CL_CALLBACK * /*user_func*/)(void *),
void * /* args */,
size_t /* cb_args */,
cl_uint /* num_mem_objects */,
const cl_mem * /* mem_list */,
const void ** /* args_mem_loc */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */)) dlsym(handle, "clEnqueueNativeKernel");
#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
//#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1!
/*
* WARNING:
* This API introduces mutable state into the OpenCL implementation. It has been REMOVED
* to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the
* OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
* It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
*
* Software developers previously relying on this API are instructed to set the command queue
* properties when creating the queue, instead.
*/
IAH();
aclSetCommandQueueProperty = (cl_int (*)(cl_command_queue /* command_queue */,
cl_command_queue_properties /* properties */,
cl_bool /* enable */,
cl_command_queue_properties * /* old_properties */)) dlsym(handle,"clSetCommandQueueProperty");
#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */
#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
IAH();
aclCreateImage2D = (cl_mem (*)(cl_context /* context */,
cl_mem_flags /* flags */,
const cl_image_format * /* image_format */,
size_t /* image_width */,
size_t /* image_height */,
size_t /* image_row_pitch */,
void * /* host_ptr */,
cl_int * /* errcode_ret */)) dlsym(handle,"clCreateImage2D");
IAH();
aclCreateImage3D = (cl_mem (*)(cl_context /* context */,
cl_mem_flags /* flags */,
const cl_image_format * /* image_format */,
size_t /* image_width */,
size_t /* image_height */,
size_t /* image_depth */,
size_t /* image_row_pitch */,
size_t /* image_slice_pitch */,
void * /* host_ptr */,
cl_int * /* errcode_ret */)) dlsym(handle,"clCreateImage3D");
IAH();
aclEnqueueMarker = (cl_int (*)(cl_command_queue /* command_queue */,
cl_event * /* event */)) dlsym(handle,"clEnqueueMarker");
IAH();
aclEnqueueWaitForEvents = (cl_int (*)(cl_command_queue /* command_queue */,
cl_uint /* num_events */,
const cl_event * /* event_list */)) dlsym(handle,"clEnqueueWaitForEvents");
IAH();
aclEnqueueBarrier = (cl_int (*)(cl_command_queue /* command_queue */)) dlsym(handle,"clEnqueueBarrier");
IAH();
aclUnloadCompiler = (cl_int (*)(void)) dlsym(handle,"clUnloadCompiler");
#endif
#if 0
void *
(*aclGetExtensionFunctionAddress)(const char * /* func_name */)) dlsym(handle,"");
#endif
loadedCL = 1;
}