Rosella       Machine Intelligence & Data Mining

CMSR Generated Program Example: C++ OpenCL

The following is a CMSR generated C++ OpenCL version program for 25 million parameter YOLO-like object detection deep neural network. It's a huge model. So program is long. It can be run on any OpenCL enabled computers and devices. This includes pre- and post processng such as color inversion, histogram equalization, encoding, object filtering, etc. There is no need for extra coding. Just add to your project along with the based class file and shader program file. All needed is to call three functions for initialization, evaluation, and to release resources.

/* Generated by CMSR Machine Learning Studio
 * Algorithm: Convolutional Neural Network (OD-CNN)
 * Language:  C++/OpenCL
 * Call this function to detect objects
 * Input arguments;
 *   maxOutCount: maximum number of output objects
 *   outClassIndex: output object class indexes
 *   outX: output object X position
 *   outY: output object Y position
 *   outWidth: output object width
 *   outHeight: output object height
 *   Call example: functioname.evaluate(file, 0, 5, indexes, X, Y, WID, HGT, 1, imagedata);
 * Returns value is int label index value as well as ordered list of best labels and probability
 * Model name: Road+ 245x245 10-30 acer7
 * Date generated: 2023-08-21 06:24:44
 */

#ifndef CL_TARGET_OPENCL_VERSION
#define CL_TARGET_OPENCL_VERSION 120
#endif
#include 
#include 
#include 
#include "CMSRModel.hpp"
using namespace std;

extern "C"
int CMSRModel::evaluate(
	int    maxOutCount,   /* max output area count */
	int   *outClassIndex, /* output area class index */
	float *outClassProbability, /* output area probability */
	float *outX,          /* output X position */
	float *outY,          /* output Y position */
	float *outWidth,      /* output width */
	float *outHeight,     /* output height */
	int    blackandwhite, /* 1 if black and white, otherwise 0 */
	int    r0g1b2,        /* 1 if [][][0] is red, otherwise 0. */
	int   *inputIMAGEARRAY, /* [245/height*245/width*3/colors] */
	int    areasizefilter /* min object area, 1 for default */
	) {

	int xxindex;
	int i, j, k, x, y, z, z1, r, g, b;
	int w1, h1, w, h, x1, x2, x3, imax, count, maxcount, orderedcount;
	float sum, sum1, fcolors, softmaxsum;
	double d;
	int rowHeightMajor = 1;
	int inverseColor = 1;
	int first = 1;

	float objectnessthreshold = 0.5f;
	float iouduplicatethreshold = 0.8f;

	int IMAGEARRAY[245][245][3];
	x = 0;
	if (blackandwhite) {
		z = 1;
	} else {
		z = 3;
	}
	for (j=0; j < 245; j++) {
		for (i=0; i < 245; i++) {
			for (k=0; k < z; k++) {
				IMAGEARRAY[j][i][k] = inputIMAGEARRAY[x];
				x++;
			}
		}
	}


	if (inverseColor) {
		for (j=0; j < 245; j++) {
			for (i=0; i < 245; i++) {
				if (blackandwhite) {
					if (rowHeightMajor) {
						IMAGEARRAY[j][i][0] = 255 - IMAGEARRAY[j][i][0];
					} else {
						IMAGEARRAY[i][j][0] = 255 - IMAGEARRAY[i][j][0];
					}
				} else {
					if (rowHeightMajor) {
						IMAGEARRAY[j][i][0] = 255 - IMAGEARRAY[j][i][0];
						IMAGEARRAY[j][i][1] = 255 - IMAGEARRAY[j][i][1];
						IMAGEARRAY[j][i][2] = 255 - IMAGEARRAY[j][i][2];
					} else {
						IMAGEARRAY[i][j][0] = 255 - IMAGEARRAY[i][j][0];
						IMAGEARRAY[i][j][1] = 255 - IMAGEARRAY[i][j][1];
						IMAGEARRAY[i][j][2] = 255 - IMAGEARRAY[i][j][2];
					}
				}
			}
		}
	}
	int evaluation[256];
	int accum, int2, colorcount = 3;

	// histogram eqlualization
	colorcount = 3;
	if (blackandwhite) {
		colorcount = 1;
	}
	for (i=0; i < 256; i++) {
		evaluation[i] = 0;
	}
	// count histogram
	count = 0;
	for (j=0; j < 245; j++) {
		for (i=0; i < 245; i++) {
			for (k=0; k < colorcount; k++) {
				if (rowHeightMajor) {
					int2 = IMAGEARRAY[j][i][k];
				} else {
					int2 = IMAGEARRAY[i][j][k];
				}
				evaluation[int2]++;
				count++;
			}
		}
	}
	// compute cumulative ratios
	accum = 0;
	for (i=0; i < 256; i++) {
		accum += evaluation[i];
		evaluation[i] = (int)(255.0f*(((float)accum)/((float)count)));
	}
	for (j=0; j < 245; j++) {
		for (i=0; i < 245; i++) { // black & white only
			for (k=0; k < colorcount; k++) {
				if (rowHeightMajor) {
					int2 = evaluation[IMAGEARRAY[j][i][k]];
					IMAGEARRAY[j][i][k] = int2;
				} else {
					int2 = evaluation[IMAGEARRAY[i][j][k]];
					IMAGEARRAY[i][j][k] = int2;
				}
			}
		}
	}

	// ship data to requested array;
	for (j=0; j < 245; j++) {
		for (i=0; i < 245; i++) {
			if (blackandwhite) {
				if (rowHeightMajor) {
					r = IMAGEARRAY[j][i][0];
				} else {
					r = IMAGEARRAY[i][j][0];
				}
				g = r;
				b = r;
			} else {
				if (rowHeightMajor) {
					if (r0g1b2) {
						r = IMAGEARRAY[j][i][0];
						g = IMAGEARRAY[j][i][1];
						b = IMAGEARRAY[j][i][2];
					} else {
						b = IMAGEARRAY[j][i][0];
						g = IMAGEARRAY[j][i][1];
						r = IMAGEARRAY[j][i][2];
					}
				} else {
					if (r0g1b2) {
						r = IMAGEARRAY[i][j][0];
						g = IMAGEARRAY[i][j][1];
						b = IMAGEARRAY[i][j][2];
					} else {
						b = IMAGEARRAY[i][j][0];
						g = IMAGEARRAY[i][j][1];
						r = IMAGEARRAY[i][j][2];
					}
				}
			}
			xxindex = 0*60025+j*245+i;
			IMAGE[xxindex] = (b+1.0f)/256.0f;
			xxindex = 1*60025+j*245+i;
			IMAGE[xxindex] = (g+1.0f)/256.0f;
			xxindex = 2*60025+j*245+i;
			IMAGE[xxindex] = (r+1.0f)/256.0f;
		}
	}
	evaluate0();

	typedef struct BoundingBox {
		float x, y, width, height, score;
		float xleftmost, xrightmost, ytopmost, ybottommost;
		int classindex1;
		int merged;
	} BoundingBox;

	BoundingBox PredBOXES[7][7][1];
	BoundingBox *PredBOXESlist[50];
	int gridheight = 7;
	int gridwidth  = 7;
	int initialcount = 0;
	int mergeCount = 0;
	float *fVALUE;
	fVALUE = RESULTVALUE;
	BoundingBox *box;
	BoundingBox *box1;
	double pc, px, py, pwidth, pheight;
	float gridsizex = 1.0f / (float)gridwidth;
	float gridsizey = 1.0f / (float)gridheight;
	float left, right, top, bottom, iou, area;
	for (x1=0; x1 < gridheight; x1++) {
		for (x2=0; x2 < gridwidth; x2++) {
			for (x3=0; x3 < 1; x3++) {
				box = &PredBOXES[x1][x2][x3];
				w = x1*35+x2*5+x3*5;
				pc = fVALUE[w];
				px = ((fVALUE[w+1]-0.1f)/0.8f)*gridsizex + gridsizex*x2;
				py = ((fVALUE[w+2]-0.1f)/0.8f)*gridsizey + gridsizey*x1;
				pwidth = fVALUE[w+3];
				pheight = fVALUE[w+4];
				if (pwidth < 0.0f) pwidth = 0.0f;
				if (pheight < 0.0f) pheight = 0.0f;
				pwidth = pwidth*pwidth;
				pheight = pheight*pheight;
				if (pc < objectnessthreshold) { // pc
					continue;
				}
				if (pwidth  < 0.005f) continue;
				if (pheight < 0.005f) continue;
				box->score = pc;
				box->x = px;
				box->y = py;
				box->width = pwidth;
				box->height = pheight;
				box->xleftmost = box->x - box->width/2.0f;
				box->xrightmost = box->x + box->width/2.0f;
				box->ytopmost = box->y - box->height/2.0f;
				box->ybottommost = box->y + box->height/2.0f;
				imax = 0;
				box->classindex1 = imax;
				box->merged = 1;
				PredBOXESlist[initialcount] = box;
				for (k = initialcount-1; k >= 0; k--) {
					if (PredBOXESlist[k]->score < box->score) {
						PredBOXESlist[k+1] = PredBOXESlist[k];
						PredBOXESlist[k] = box;
						continue;
					}
					break;
				}
				initialcount++;
			}
		}
	}
	mergeCount = 0;
	for (i=0; i < initialcount; i++) {
		box = PredBOXESlist[i];
		if (box->merged==0) {
			continue;
		}
		outClassIndex[mergeCount] = box->classindex1;
		outClassProbability[mergeCount] = box->score;
		outX[mergeCount] = box->x;
		outY[mergeCount] = box->y;
		outWidth[mergeCount] = box->width;
		outHeight[mergeCount] = box->height;
		mergeCount++;
		if (mergeCount==maxOutCount) {
			break;
		}
		for (j=i+1; j < initialcount; j++) {
			box1 = PredBOXESlist[j];
			if (box1->merged==0) {
				continue;
			}
			if (box1->xleftmost >= box->xrightmost) {
				continue;
			}
			if (box1->xrightmost <= box->xleftmost) {
				continue;
			}
			if (box1->ytopmost >= box->ybottommost) {
				continue;
			}
			if (box1->ybottommost <= box->ytopmost) {
				continue;
			}
			if (box->xleftmost < box1->xleftmost) {
				left = box1->xleftmost;
			} else {
				left = box->xleftmost;
			}
			if (box->xrightmost < box1->xrightmost) {
				right = box->xrightmost;
			} else {
				right = box1->xrightmost;
			}
			if (box->ytopmost < box1->ytopmost) {
				top = box1->ytopmost;
			} else {
				top = box->ytopmost;
			}
			if (box->ybottommost < box1->ybottommost) {
				bottom = box->ybottommost;
			} else {
				bottom = box1->ybottommost;
			}
			iou = (right-left) * (bottom-top);
			area = box->width * box->height + box1->width * box1->height - iou;
			if (area <= 0.0f) {
				iou = 0.0f;
			} else {
				iou = iou / area;
			}
			if (iou >= iouduplicatethreshold) {
				box1->merged = 0;
			}
		}
	}

	return mergeCount;
}


int CMSRModel::initializeModel0(char *modelfilepath, char *bytedata) {
	int ret;

	initialize0(22, 3);

	byteposition = 0;
	ifstream kernelfile;
	if (bytedata==NULL) {
		kernelfile.open(modelfilepath, ios::binary | ios::in);
		if (!kernelfile.good()) {
			if (verbose) cout << "File not found: " << modelfilepath << "\n";
			return -1;
		}
		cbuff = new char[11289600*sizeof(float)];
		isFromFile = true;
	} else {
		cbuff = bytedata;
		isFromFile = false;
	}

	BUFF  = new float[11289600];
	IMAGE = new float[180075];
	RESULTVALUE = new float[246];

	mergeIndex = new int[5] {0, 2450, 1, 1, 0};
	memMergeIndex = clCreateBuffer(context, 
		CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_int) * 5, 
		mergeIndex, &ret);

	if (isFromFile) kernelfile.read(cbuff, 3456);
	loadWGT(cbuff, BUFF, 864);
	memKERNEL[0] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 864, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 128);
	loadWGT(cbuff, BUFF, 32);
	memBIAS[0] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 32, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 73728);
	loadWGT(cbuff, BUFF, 18432);
	memKERNEL[1] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 18432, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 256);
	loadWGT(cbuff, BUFF, 64);
	memBIAS[1] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 64, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 16384);
	loadWGT(cbuff, BUFF, 4096);
	memKERNEL[2] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 4096, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 256);
	loadWGT(cbuff, BUFF, 64);
	memBIAS[2] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 64, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 147456);
	loadWGT(cbuff, BUFF, 36864);
	memKERNEL[3] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 36864, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 256);
	loadWGT(cbuff, BUFF, 64);
	memBIAS[3] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 64, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 294912);
	loadWGT(cbuff, BUFF, 73728);
	memKERNEL[4] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 73728, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 512);
	loadWGT(cbuff, BUFF, 128);
	memBIAS[4] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 128, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 65536);
	loadWGT(cbuff, BUFF, 16384);
	memKERNEL[5] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 16384, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 512);
	loadWGT(cbuff, BUFF, 128);
	memBIAS[5] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 128, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 589824);
	loadWGT(cbuff, BUFF, 147456);
	memKERNEL[6] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 147456, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 512);
	loadWGT(cbuff, BUFF, 128);
	memBIAS[6] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 128, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 65536);
	loadWGT(cbuff, BUFF, 16384);
	memKERNEL[7] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 16384, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 512);
	loadWGT(cbuff, BUFF, 128);
	memBIAS[7] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 128, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 589824);
	loadWGT(cbuff, BUFF, 147456);
	memKERNEL[8] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 147456, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 512);
	loadWGT(cbuff, BUFF, 128);
	memBIAS[8] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 128, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 1179648);
	loadWGT(cbuff, BUFF, 294912);
	memKERNEL[9] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 294912, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 1024);
	loadWGT(cbuff, BUFF, 256);
	memBIAS[9] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 256, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 262144);
	loadWGT(cbuff, BUFF, 65536);
	memKERNEL[10] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 65536, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 1024);
	loadWGT(cbuff, BUFF, 256);
	memBIAS[10] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 256, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 2359296);
	loadWGT(cbuff, BUFF, 589824);
	memKERNEL[11] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 589824, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 1024);
	loadWGT(cbuff, BUFF, 256);
	memBIAS[11] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 256, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 262144);
	loadWGT(cbuff, BUFF, 65536);
	memKERNEL[12] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 65536, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 1024);
	loadWGT(cbuff, BUFF, 256);
	memBIAS[12] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 256, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 2359296);
	loadWGT(cbuff, BUFF, 589824);
	memKERNEL[13] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 589824, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 1024);
	loadWGT(cbuff, BUFF, 256);
	memBIAS[13] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 256, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 262144);
	loadWGT(cbuff, BUFF, 65536);
	memKERNEL[14] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 65536, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 1024);
	loadWGT(cbuff, BUFF, 256);
	memBIAS[14] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 256, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 2359296);
	loadWGT(cbuff, BUFF, 589824);
	memKERNEL[15] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 589824, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 1024);
	loadWGT(cbuff, BUFF, 256);
	memBIAS[15] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 256, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 4718592);
	loadWGT(cbuff, BUFF, 1179648);
	memKERNEL[16] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 1179648, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 2048);
	loadWGT(cbuff, BUFF, 512);
	memBIAS[16] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 512, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 1048576);
	loadWGT(cbuff, BUFF, 262144);
	memKERNEL[17] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 262144, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 2048);
	loadWGT(cbuff, BUFF, 512);
	memBIAS[17] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 512, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 9437184);
	loadWGT(cbuff, BUFF, 2359296);
	memKERNEL[18] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 2359296, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 2048);
	loadWGT(cbuff, BUFF, 512);
	memBIAS[18] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 512, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 1048576);
	loadWGT(cbuff, BUFF, 262144);
	memKERNEL[19] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 262144, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 2048);
	loadWGT(cbuff, BUFF, 512);
	memBIAS[19] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 512, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 45158400);
	loadWGT(cbuff, BUFF, 11289600);
	memKERNEL[20] = clCreateBuffer(context,
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 11289600, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 9800);
	loadWGT(cbuff, BUFF, 2450);
	memBIAS[20] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 2450, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 24019800);
	loadWGT(cbuff, BUFF, 6004950);
	memfcWEIGHT[1] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 6004950, 
		BUFF, &ret);

	if (isFromFile) kernelfile.read(cbuff, 2401980);
	loadWGT(cbuff, BUFF, 600495);
	memfcWEIGHT[2] = clCreateBuffer(context, 
		CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
		sizeof(cl_float) * 600495, 
		BUFF, &ret);

	if (isFromFile) {
		kernelfile.read(cbuff, 3);
		kernelfile.close();
	}
	int xx = checkFileValidity(cbuff);
	isProfileLoaded = true;
	if (isFromFile) {
		delete cbuff; cbuff = NULL;
	} else {
		cbuff = NULL;
	}
	delete BUFF; BUFF = NULL;

	memVALUE0 = clCreateBuffer(context, 
		CL_MEM_READ_WRITE,
		sizeof(cl_float) * 1889568, 
		NULL, &ret);

	memVALUE1 = clCreateBuffer(context, 
		CL_MEM_READ_WRITE,
		sizeof(cl_float) * 1889568, 
		NULL, &ret);

	memfcVALUE0 = clCreateBuffer(context, 
		CL_MEM_READ_WRITE,
		sizeof(cl_float) * 2451, 
		NULL, &ret);

	if (xx!=0) return xx;
	return 0;
}

int CMSRModel::evaluate0() {
	int opr;

	clEnqueueWriteBuffer(commandQueue, memVALUE0, CL_TRUE, 0,
		180075 * sizeof(cl_float), 
		IMAGE, 0, NULL, NULL);
	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 1889568, 32, 3, 3, 1, 3, 245, 245, 243, 243, 4);
	setPointersAndEnqueue(1889568, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[0], memBIAS[0], memVALUE1);
	clFlush(commandQueue);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 937024, 64, 3, 3, 2, 32, 243, 243, 121, 121, 4);
	setPointersAndEnqueue(937024, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[1], memBIAS[1], memVALUE0);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 937024, 64, 1, 1, 1, 64, 121, 121, 121, 121, 4);
	setPointersAndEnqueue(937024, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[2], memBIAS[2], memVALUE1);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 906304, 64, 3, 3, 1, 64, 121, 121, 119, 119, 4);
	setPointersAndEnqueue(906304, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[3], memBIAS[3], memVALUE0);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 445568, 128, 3, 3, 2, 64, 119, 119, 59, 59, 4);
	setPointersAndEnqueue(445568, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[4], memBIAS[4], memVALUE1);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 445568, 128, 1, 1, 1, 128, 59, 59, 59, 59, 4);
	setPointersAndEnqueue(445568, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[5], memBIAS[5], memVALUE0);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 415872, 128, 3, 3, 1, 128, 59, 59, 57, 57, 4);
	setPointersAndEnqueue(415872, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[6], memBIAS[6], memVALUE1);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 415872, 128, 1, 1, 1, 128, 57, 57, 57, 57, 4);
	setPointersAndEnqueue(415872, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[7], memBIAS[7], memVALUE0);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 387200, 128, 3, 3, 1, 128, 57, 57, 55, 55, 4);
	setPointersAndEnqueue(387200, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[8], memBIAS[8], memVALUE1);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 186624, 256, 3, 3, 2, 128, 55, 55, 27, 27, 4);
	setPointersAndEnqueue(186624, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[9], memBIAS[9], memVALUE0);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 186624, 256, 1, 1, 1, 256, 27, 27, 27, 27, 4);
	setPointersAndEnqueue(186624, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[10], memBIAS[10], memVALUE1);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 160000, 256, 3, 3, 1, 256, 27, 27, 25, 25, 4);
	setPointersAndEnqueue(160000, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[11], memBIAS[11], memVALUE0);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 160000, 256, 1, 1, 1, 256, 25, 25, 25, 25, 4);
	setPointersAndEnqueue(160000, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[12], memBIAS[12], memVALUE1);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 135424, 256, 3, 3, 1, 256, 25, 25, 23, 23, 4);
	setPointersAndEnqueue(135424, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[13], memBIAS[13], memVALUE0);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 135424, 256, 1, 1, 1, 256, 23, 23, 23, 23, 4);
	setPointersAndEnqueue(135424, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[14], memBIAS[14], memVALUE1);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 112896, 256, 3, 3, 1, 256, 23, 23, 21, 21, 4);
	setPointersAndEnqueue(112896, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[15], memBIAS[15], memVALUE0);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 51200, 512, 3, 3, 2, 256, 21, 21, 10, 10, 4);
	setPointersAndEnqueue(51200, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[16], memBIAS[16], memVALUE1);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 51200, 512, 1, 1, 1, 512, 10, 10, 10, 10, 4);
	setPointersAndEnqueue(51200, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[17], memBIAS[17], memVALUE0);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 32768, 512, 3, 3, 1, 512, 10, 10, 8, 8, 4);
	setPointersAndEnqueue(32768, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[18], memBIAS[18], memVALUE1);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 12800, 512, 1, 1, 2, 512, 8, 8, 5, 5, 4);
	setPointersAndEnqueue(12800, kernelCNNJocl01FwdConv00, 15, 4, memVALUE1, memKERNEL[19], memBIAS[19], memVALUE0);

	setFloats(kernelCNNJocl01FwdConv00, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl01FwdConv00, 1, 14, 0, 0, 1, 22050, 2450, 3, 3, 1, 512, 5, 5, 3, 3, 4);
	setPointersAndEnqueue(22050, kernelCNNJocl01FwdConv00, 15, 4, memVALUE0, memKERNEL[20], memBIAS[20], memVALUE1);

	setIntegers(kernelCNNJocl02FwdPool00, 0, 13, 0, 1, 2450, 2450, 2450, 3, 3, 1, 1, 2, 1, 1, 1);
	setPointersAndEnqueue(2450, kernelCNNJocl02FwdPool00, 13, 2, memVALUE1, memVALUE0);

	setFloats(kernelCNNJocl03FwdMerge01, 0, 3, 0.1f, 0.0f, 0.0f);
	setIntegers(kernelCNNJocl03FwdMerge01, 3, 6, 1, 1, 2450, 2450, 0, 0);
	setPointersAndEnqueue(2450, kernelCNNJocl03FwdMerge01, 9, 3, memMergeIndex, memVALUE0, memfcVALUE0);
	opr = 0;
	setFloats(kernelCNNJocl04FwdFC01, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl04FwdFC01, 1, 11, opr, -1, 1, 2450, -1, -1, -1, 2451, 2450, 1, -1);
	setPointersAndEnqueue(2450, kernelCNNJocl04FwdFC01, 12, 3, memfcVALUE0, memfcWEIGHT[1], memVALUE0);

	opr = 0;
	setFloats(kernelCNNJocl04FwdFC01, 0, 1, 0.1f);
	setIntegers(kernelCNNJocl04FwdFC01, 1, 11, opr, 7, 1, 49, 7, 7, 2, 2451, 245, 6, 1);
	setPointersAndEnqueue(49, kernelCNNJocl04FwdFC01, 12, 3, memVALUE0, memfcWEIGHT[2], memfcVALUE0);

	clFinish(commandQueue);
	clEnqueueReadBuffer(commandQueue, memfcVALUE0, CL_TRUE, 0,
		246 * sizeof(cl_float), 
		RESULTVALUE, 
		0, NULL, NULL);	
	return 0;
}