CMSR Generated Program Example: C++ OpenCL
The following is a CMSR generated C++ OpenCL version program for
25 million parameter YOLO-like object detection deep neural network.
It's a huge model. So program is long. It can be run on any
OpenCL enabled computers and devices. This includes pre- and post processng
such as color inversion, histogram equalization, encoding, object filtering,
etc. There is no need for extra coding.
Just add to your project along with the based class file and
shader program file. All needed is to call three functions for
initialization, evaluation, and to release resources.
/* Generated by CMSR Machine Learning Studio
* Algorithm: Convolutional Neural Network (OD-CNN)
* Language: C++/OpenCL
* Call this function to detect objects
* Input arguments;
* maxOutCount: maximum number of output objects
* outClassIndex: output object class indexes
* outX: output object X position
* outY: output object Y position
* outWidth: output object width
* outHeight: output object height
* Call example: functioname.evaluate(file, 0, 5, indexes, X, Y, WID, HGT, 1, imagedata);
* Returns value is int label index value as well as ordered list of best labels and probability
* Model name: Road+ 245x245 10-30 acer7
* Date generated: 2023-08-21 06:24:44
*/
#ifndef CL_TARGET_OPENCL_VERSION
#define CL_TARGET_OPENCL_VERSION 120
#endif
#include
#include
#include
#include "CMSRModel.hpp"
using namespace std;
extern "C"
int CMSRModel::evaluate(
int maxOutCount, /* max output area count */
int *outClassIndex, /* output area class index */
float *outClassProbability, /* output area probability */
float *outX, /* output X position */
float *outY, /* output Y position */
float *outWidth, /* output width */
float *outHeight, /* output height */
int blackandwhite, /* 1 if black and white, otherwise 0 */
int r0g1b2, /* 1 if [][][0] is red, otherwise 0. */
int *inputIMAGEARRAY, /* [245/height*245/width*3/colors] */
int areasizefilter /* min object area, 1 for default */
) {
int xxindex;
int i, j, k, x, y, z, z1, r, g, b;
int w1, h1, w, h, x1, x2, x3, imax, count, maxcount, orderedcount;
float sum, sum1, fcolors, softmaxsum;
double d;
int rowHeightMajor = 1;
int inverseColor = 1;
int first = 1;
float objectnessthreshold = 0.5f;
float iouduplicatethreshold = 0.8f;
int IMAGEARRAY[245][245][3];
x = 0;
if (blackandwhite) {
z = 1;
} else {
z = 3;
}
for (j=0; j < 245; j++) {
for (i=0; i < 245; i++) {
for (k=0; k < z; k++) {
IMAGEARRAY[j][i][k] = inputIMAGEARRAY[x];
x++;
}
}
}
if (inverseColor) {
for (j=0; j < 245; j++) {
for (i=0; i < 245; i++) {
if (blackandwhite) {
if (rowHeightMajor) {
IMAGEARRAY[j][i][0] = 255 - IMAGEARRAY[j][i][0];
} else {
IMAGEARRAY[i][j][0] = 255 - IMAGEARRAY[i][j][0];
}
} else {
if (rowHeightMajor) {
IMAGEARRAY[j][i][0] = 255 - IMAGEARRAY[j][i][0];
IMAGEARRAY[j][i][1] = 255 - IMAGEARRAY[j][i][1];
IMAGEARRAY[j][i][2] = 255 - IMAGEARRAY[j][i][2];
} else {
IMAGEARRAY[i][j][0] = 255 - IMAGEARRAY[i][j][0];
IMAGEARRAY[i][j][1] = 255 - IMAGEARRAY[i][j][1];
IMAGEARRAY[i][j][2] = 255 - IMAGEARRAY[i][j][2];
}
}
}
}
}
int evaluation[256];
int accum, int2, colorcount = 3;
// histogram eqlualization
colorcount = 3;
if (blackandwhite) {
colorcount = 1;
}
for (i=0; i < 256; i++) {
evaluation[i] = 0;
}
// count histogram
count = 0;
for (j=0; j < 245; j++) {
for (i=0; i < 245; i++) {
for (k=0; k < colorcount; k++) {
if (rowHeightMajor) {
int2 = IMAGEARRAY[j][i][k];
} else {
int2 = IMAGEARRAY[i][j][k];
}
evaluation[int2]++;
count++;
}
}
}
// compute cumulative ratios
accum = 0;
for (i=0; i < 256; i++) {
accum += evaluation[i];
evaluation[i] = (int)(255.0f*(((float)accum)/((float)count)));
}
for (j=0; j < 245; j++) {
for (i=0; i < 245; i++) { // black & white only
for (k=0; k < colorcount; k++) {
if (rowHeightMajor) {
int2 = evaluation[IMAGEARRAY[j][i][k]];
IMAGEARRAY[j][i][k] = int2;
} else {
int2 = evaluation[IMAGEARRAY[i][j][k]];
IMAGEARRAY[i][j][k] = int2;
}
}
}
}
// ship data to requested array;
for (j=0; j < 245; j++) {
for (i=0; i < 245; i++) {
if (blackandwhite) {
if (rowHeightMajor) {
r = IMAGEARRAY[j][i][0];
} else {
r = IMAGEARRAY[i][j][0];
}
g = r;
b = r;
} else {
if (rowHeightMajor) {
if (r0g1b2) {
r = IMAGEARRAY[j][i][0];
g = IMAGEARRAY[j][i][1];
b = IMAGEARRAY[j][i][2];
} else {
b = IMAGEARRAY[j][i][0];
g = IMAGEARRAY[j][i][1];
r = IMAGEARRAY[j][i][2];
}
} else {
if (r0g1b2) {
r = IMAGEARRAY[i][j][0];
g = IMAGEARRAY[i][j][1];
b = IMAGEARRAY[i][j][2];
} else {
b = IMAGEARRAY[i][j][0];
g = IMAGEARRAY[i][j][1];
r = IMAGEARRAY[i][j][2];
}
}
}
xxindex = 0*60025+j*245+i;
IMAGE[xxindex] = (b+1.0f)/256.0f;
xxindex = 1*60025+j*245+i;
IMAGE[xxindex] = (g+1.0f)/256.0f;
xxindex = 2*60025+j*245+i;
IMAGE[xxindex] = (r+1.0f)/256.0f;
}
}
evaluate0();
typedef struct BoundingBox {
float x, y, width, height, score;
float xleftmost, xrightmost, ytopmost, ybottommost;
int classindex1;
int merged;
} BoundingBox;
BoundingBox PredBOXES[7][7][1];
BoundingBox *PredBOXESlist[50];
int gridheight = 7;
int gridwidth = 7;
int initialcount = 0;
int mergeCount = 0;
float *fVALUE;
fVALUE = RESULTVALUE;
BoundingBox *box;
BoundingBox *box1;
double pc, px, py, pwidth, pheight;
float gridsizex = 1.0f / (float)gridwidth;
float gridsizey = 1.0f / (float)gridheight;
float left, right, top, bottom, iou, area;
for (x1=0; x1 < gridheight; x1++) {
for (x2=0; x2 < gridwidth; x2++) {
for (x3=0; x3 < 1; x3++) {
box = &PredBOXES[x1][x2][x3];
w = x1*35+x2*5+x3*5;
pc = fVALUE[w];
px = ((fVALUE[w+1]-0.1f)/0.8f)*gridsizex + gridsizex*x2;
py = ((fVALUE[w+2]-0.1f)/0.8f)*gridsizey + gridsizey*x1;
pwidth = fVALUE[w+3];
pheight = fVALUE[w+4];
if (pwidth < 0.0f) pwidth = 0.0f;
if (pheight < 0.0f) pheight = 0.0f;
pwidth = pwidth*pwidth;
pheight = pheight*pheight;
if (pc < objectnessthreshold) { // pc
continue;
}
if (pwidth < 0.005f) continue;
if (pheight < 0.005f) continue;
box->score = pc;
box->x = px;
box->y = py;
box->width = pwidth;
box->height = pheight;
box->xleftmost = box->x - box->width/2.0f;
box->xrightmost = box->x + box->width/2.0f;
box->ytopmost = box->y - box->height/2.0f;
box->ybottommost = box->y + box->height/2.0f;
imax = 0;
box->classindex1 = imax;
box->merged = 1;
PredBOXESlist[initialcount] = box;
for (k = initialcount-1; k >= 0; k--) {
if (PredBOXESlist[k]->score < box->score) {
PredBOXESlist[k+1] = PredBOXESlist[k];
PredBOXESlist[k] = box;
continue;
}
break;
}
initialcount++;
}
}
}
mergeCount = 0;
for (i=0; i < initialcount; i++) {
box = PredBOXESlist[i];
if (box->merged==0) {
continue;
}
outClassIndex[mergeCount] = box->classindex1;
outClassProbability[mergeCount] = box->score;
outX[mergeCount] = box->x;
outY[mergeCount] = box->y;
outWidth[mergeCount] = box->width;
outHeight[mergeCount] = box->height;
mergeCount++;
if (mergeCount==maxOutCount) {
break;
}
for (j=i+1; j < initialcount; j++) {
box1 = PredBOXESlist[j];
if (box1->merged==0) {
continue;
}
if (box1->xleftmost >= box->xrightmost) {
continue;
}
if (box1->xrightmost <= box->xleftmost) {
continue;
}
if (box1->ytopmost >= box->ybottommost) {
continue;
}
if (box1->ybottommost <= box->ytopmost) {
continue;
}
if (box->xleftmost < box1->xleftmost) {
left = box1->xleftmost;
} else {
left = box->xleftmost;
}
if (box->xrightmost < box1->xrightmost) {
right = box->xrightmost;
} else {
right = box1->xrightmost;
}
if (box->ytopmost < box1->ytopmost) {
top = box1->ytopmost;
} else {
top = box->ytopmost;
}
if (box->ybottommost < box1->ybottommost) {
bottom = box->ybottommost;
} else {
bottom = box1->ybottommost;
}
iou = (right-left) * (bottom-top);
area = box->width * box->height + box1->width * box1->height - iou;
if (area <= 0.0f) {
iou = 0.0f;
} else {
iou = iou / area;
}
if (iou >= iouduplicatethreshold) {
box1->merged = 0;
}
}
}
return mergeCount;
}
int CMSRModel::initializeModel0(char *modelfilepath, char *bytedata) {
int ret;
initialize0(22, 3);
byteposition = 0;
ifstream kernelfile;
if (bytedata==NULL) {
kernelfile.open(modelfilepath, ios::binary | ios::in);
if (!kernelfile.good()) {
if (verbose) cout << "File not found: " << modelfilepath << "\n";
return -1;
}
cbuff = new char[11289600*sizeof(float)];
isFromFile = true;
} else {
cbuff = bytedata;
isFromFile = false;
}
BUFF = new float[11289600];
IMAGE = new float[180075];
RESULTVALUE = new float[246];
mergeIndex = new int[5] {0, 2450, 1, 1, 0};
memMergeIndex = clCreateBuffer(context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof(cl_int) * 5,
mergeIndex, &ret);
if (isFromFile) kernelfile.read(cbuff, 3456);
loadWGT(cbuff, BUFF, 864);
memKERNEL[0] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 864,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 128);
loadWGT(cbuff, BUFF, 32);
memBIAS[0] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 32,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 73728);
loadWGT(cbuff, BUFF, 18432);
memKERNEL[1] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 18432,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 256);
loadWGT(cbuff, BUFF, 64);
memBIAS[1] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 64,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 16384);
loadWGT(cbuff, BUFF, 4096);
memKERNEL[2] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 4096,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 256);
loadWGT(cbuff, BUFF, 64);
memBIAS[2] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 64,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 147456);
loadWGT(cbuff, BUFF, 36864);
memKERNEL[3] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 36864,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 256);
loadWGT(cbuff, BUFF, 64);
memBIAS[3] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 64,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 294912);
loadWGT(cbuff, BUFF, 73728);
memKERNEL[4] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 73728,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 512);
loadWGT(cbuff, BUFF, 128);
memBIAS[4] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 128,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 65536);
loadWGT(cbuff, BUFF, 16384);
memKERNEL[5] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 16384,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 512);
loadWGT(cbuff, BUFF, 128);
memBIAS[5] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 128,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 589824);
loadWGT(cbuff, BUFF, 147456);
memKERNEL[6] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 147456,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 512);
loadWGT(cbuff, BUFF, 128);
memBIAS[6] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 128,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 65536);
loadWGT(cbuff, BUFF, 16384);
memKERNEL[7] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 16384,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 512);
loadWGT(cbuff, BUFF, 128);
memBIAS[7] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 128,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 589824);
loadWGT(cbuff, BUFF, 147456);
memKERNEL[8] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 147456,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 512);
loadWGT(cbuff, BUFF, 128);
memBIAS[8] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 128,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 1179648);
loadWGT(cbuff, BUFF, 294912);
memKERNEL[9] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 294912,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 1024);
loadWGT(cbuff, BUFF, 256);
memBIAS[9] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 256,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 262144);
loadWGT(cbuff, BUFF, 65536);
memKERNEL[10] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 65536,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 1024);
loadWGT(cbuff, BUFF, 256);
memBIAS[10] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 256,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 2359296);
loadWGT(cbuff, BUFF, 589824);
memKERNEL[11] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 589824,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 1024);
loadWGT(cbuff, BUFF, 256);
memBIAS[11] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 256,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 262144);
loadWGT(cbuff, BUFF, 65536);
memKERNEL[12] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 65536,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 1024);
loadWGT(cbuff, BUFF, 256);
memBIAS[12] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 256,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 2359296);
loadWGT(cbuff, BUFF, 589824);
memKERNEL[13] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 589824,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 1024);
loadWGT(cbuff, BUFF, 256);
memBIAS[13] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 256,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 262144);
loadWGT(cbuff, BUFF, 65536);
memKERNEL[14] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 65536,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 1024);
loadWGT(cbuff, BUFF, 256);
memBIAS[14] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 256,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 2359296);
loadWGT(cbuff, BUFF, 589824);
memKERNEL[15] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 589824,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 1024);
loadWGT(cbuff, BUFF, 256);
memBIAS[15] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 256,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 4718592);
loadWGT(cbuff, BUFF, 1179648);
memKERNEL[16] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 1179648,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 2048);
loadWGT(cbuff, BUFF, 512);
memBIAS[16] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 512,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 1048576);
loadWGT(cbuff, BUFF, 262144);
memKERNEL[17] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 262144,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 2048);
loadWGT(cbuff, BUFF, 512);
memBIAS[17] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 512,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 9437184);
loadWGT(cbuff, BUFF, 2359296);
memKERNEL[18] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 2359296,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 2048);
loadWGT(cbuff, BUFF, 512);
memBIAS[18] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 512,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 1048576);
loadWGT(cbuff, BUFF, 262144);
memKERNEL[19] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 262144,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 2048);
loadWGT(cbuff, BUFF, 512);
memBIAS[19] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 512,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 45158400);
loadWGT(cbuff, BUFF, 11289600);
memKERNEL[20] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 11289600,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 9800);
loadWGT(cbuff, BUFF, 2450);
memBIAS[20] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 2450,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 24019800);
loadWGT(cbuff, BUFF, 6004950);
memfcWEIGHT[1] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 6004950,
BUFF, &ret);
if (isFromFile) kernelfile.read(cbuff, 2401980);
loadWGT(cbuff, BUFF, 600495);
memfcWEIGHT[2] = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * 600495,
BUFF, &ret);
if (isFromFile) {
kernelfile.read(cbuff, 3);
kernelfile.close();
}
int xx = checkFileValidity(cbuff);
isProfileLoaded = true;
if (isFromFile) {
delete cbuff; cbuff = NULL;
} else {
cbuff = NULL;
}
delete BUFF; BUFF = NULL;
memVALUE0 = clCreateBuffer(context,
CL_MEM_READ_WRITE,
sizeof(cl_float) * 1889568,
NULL, &ret);
memVALUE1 = clCreateBuffer(context,
CL_MEM_READ_WRITE,
sizeof(cl_float) * 1889568,
NULL, &ret);
memfcVALUE0 = clCreateBuffer(context,
CL_MEM_READ_WRITE,
sizeof(cl_float) * 2451,
NULL, &ret);
if (xx!=0) return xx;
return 0;
}
int CMSRModel::evaluate0() {
int opr;
clEnqueueWriteBuffer(commandQueue, memVALUE0, CL_TRUE, 0,
180075 * sizeof(cl_float),
IMAGE, 0, NULL, NULL);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 1889568, 32, 3, 3, 1, 3, 245, 245, 243, 243, 4);
setPointersAndEnqueue(1889568, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[0], memBIAS[0], memVALUE1);
clFlush(commandQueue);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 937024, 64, 3, 3, 2, 32, 243, 243, 121, 121, 4);
setPointersAndEnqueue(937024, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[1], memBIAS[1], memVALUE0);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 937024, 64, 1, 1, 1, 64, 121, 121, 121, 121, 4);
setPointersAndEnqueue(937024, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[2], memBIAS[2], memVALUE1);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 906304, 64, 3, 3, 1, 64, 121, 121, 119, 119, 4);
setPointersAndEnqueue(906304, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[3], memBIAS[3], memVALUE0);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 445568, 128, 3, 3, 2, 64, 119, 119, 59, 59, 4);
setPointersAndEnqueue(445568, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[4], memBIAS[4], memVALUE1);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 445568, 128, 1, 1, 1, 128, 59, 59, 59, 59, 4);
setPointersAndEnqueue(445568, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[5], memBIAS[5], memVALUE0);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 415872, 128, 3, 3, 1, 128, 59, 59, 57, 57, 4);
setPointersAndEnqueue(415872, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[6], memBIAS[6], memVALUE1);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 415872, 128, 1, 1, 1, 128, 57, 57, 57, 57, 4);
setPointersAndEnqueue(415872, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[7], memBIAS[7], memVALUE0);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 387200, 128, 3, 3, 1, 128, 57, 57, 55, 55, 4);
setPointersAndEnqueue(387200, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[8], memBIAS[8], memVALUE1);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 186624, 256, 3, 3, 2, 128, 55, 55, 27, 27, 4);
setPointersAndEnqueue(186624, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[9], memBIAS[9], memVALUE0);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 186624, 256, 1, 1, 1, 256, 27, 27, 27, 27, 4);
setPointersAndEnqueue(186624, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[10], memBIAS[10], memVALUE1);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 160000, 256, 3, 3, 1, 256, 27, 27, 25, 25, 4);
setPointersAndEnqueue(160000, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[11], memBIAS[11], memVALUE0);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 160000, 256, 1, 1, 1, 256, 25, 25, 25, 25, 4);
setPointersAndEnqueue(160000, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[12], memBIAS[12], memVALUE1);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 135424, 256, 3, 3, 1, 256, 25, 25, 23, 23, 4);
setPointersAndEnqueue(135424, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[13], memBIAS[13], memVALUE0);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 135424, 256, 1, 1, 1, 256, 23, 23, 23, 23, 4);
setPointersAndEnqueue(135424, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[14], memBIAS[14], memVALUE1);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 112896, 256, 3, 3, 1, 256, 23, 23, 21, 21, 4);
setPointersAndEnqueue(112896, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[15], memBIAS[15], memVALUE0);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 51200, 512, 3, 3, 2, 256, 21, 21, 10, 10, 4);
setPointersAndEnqueue(51200, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[16], memBIAS[16], memVALUE1);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 51200, 512, 1, 1, 1, 512, 10, 10, 10, 10, 4);
setPointersAndEnqueue(51200, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[17], memBIAS[17], memVALUE0);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 32768, 512, 3, 3, 1, 512, 10, 10, 8, 8, 4);
setPointersAndEnqueue(32768, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[18], memBIAS[18], memVALUE1);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 12800, 512, 1, 1, 2, 512, 8, 8, 5, 5, 4);
setPointersAndEnqueue(12800, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[19], memBIAS[19], memVALUE0);
setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 22050, 2450, 3, 3, 1, 512, 5, 5, 3, 3, 4);
setPointersAndEnqueue(22050, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[20], memBIAS[20], memVALUE1);
setIntegers(kernelCNNJocl02FwdPool00, 0, 13, 0, 1, 2450, 2450, 2450, 3, 3, 1, 1, 2, 1, 1, 1);
setPointersAndEnqueue(2450, kernelCNNJocl02FwdPool00, 13, 2, memVALUE1, memVALUE0);
setFloats(kernelCNNJocl03FwdMerge01, 0, 3, 0.1f, 0.0f, 0.0f);
setIntegers(kernelCNNJocl03FwdMerge01, 3, 6, 1, 1, 2450, 2450, 0, 0);
setPointersAndEnqueue(2450, kernelCNNJocl03FwdMerge01, 9, 3, memMergeIndex, memVALUE0, memfcVALUE0);
opr = 0;
setFloats(kernelCNNJocl04FwdFC01, 0, 1, 0.1f);
setIntegers(kernelCNNJocl04FwdFC01, 1, 11, opr, -1, 1, 2450, -1, -1, -1, 2451, 2450, 1, -1);
setPointersAndEnqueue(2450, kernelCNNJocl04FwdFC01, 12, 3, memfcVALUE0, memfcWEIGHT[1], memVALUE0);
opr = 0;
setFloats(kernelCNNJocl04FwdFC01, 0, 1, 0.1f);
setIntegers(kernelCNNJocl04FwdFC01, 1, 11, opr, 7, 1, 49, 7, 7, 2, 2451, 245, 6, 1);
setPointersAndEnqueue(49, kernelCNNJocl04FwdFC01, 12, 3, memVALUE0, memfcWEIGHT[2], memfcVALUE0);
clFinish(commandQueue);
clEnqueueReadBuffer(commandQueue, memfcVALUE0, CL_TRUE, 0,
246 * sizeof(cl_float),
RESULTVALUE,
0, NULL, NULL);
return 0;
}
|
|