/* Tom Gall (tom.gall@linaro.org / tom_gall@mac.com) * Copyright Linaro 2014 * Released under the terms and conditions of the license documented in the LICENSE file */ int opencl_map_input_data(opencl_sqlite_context *s) { cl_int err; s->data_gpu->d=clCreateBuffer(s->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, s->data_cpu->rows * s->data_cpu->stride, s->data_cpu->d, &err); if(err < 0) { perror("Couldn't create data buffer object"); exit(1); } return OPENCL_SUCCESS; } int opencl_map_output_data(opencl_sqlite_context *s, int work_units_per_kernel) { s->results_gpu->r=clCreateBuffer(s->context, CL_MEM_WRITE_ONLY, s->data_cpu->rows * s->data_cpu->stride, NULL, NULL); s->results_gpu->roffsetResults_cl=clCreateBuffer(s->context, CL_MEM_WRITE_ONLY, sizeof(int) * work_units_per_kernel, NULL, NULL); return OPENCL_SUCCESS; } int opencl_map_output_summary_data(opencl_sqlite_context *s, int work_units_per_kernel, int columns) { s->results_gpu->r=clCreateBuffer(s->context, CL_MEM_WRITE_ONLY, work_units_per_kernel * sizeof(float) * columns, NULL, NULL); s->results_gpu->roffsetResults_cl = NULL; /*s->results_gpu->roffsetResults_cl=clCreateBuffer(s->context, CL_MEM_WRITE_ONLY, sizeof(int) * work_units_per_kernel, NULL, NULL); */ return OPENCL_SUCCESS; } int opencl_map_output_xsummary_data(opencl_sqlite_context *s, int work_units_per_kernel, int columns) { s->results_gpu->r=clCreateBuffer(s->context, CL_MEM_WRITE_ONLY, work_units_per_kernel * sizeof(float) * columns, NULL, NULL); s->results_gpu->roffsetResults_cl=clCreateBuffer(s->context, CL_MEM_WRITE_ONLY, sizeof(int) * work_units_per_kernel*columns, NULL, NULL); return OPENCL_SUCCESS; } //CL_MEM_ALLOC_HOST_PTR int opencl_map_output_vector_data(opencl_sqlite_context *s) { s->results_gpu->r=clCreateBuffer(s->context, CL_MEM_WRITE_ONLY, s->data_cpu->rows * sizeof(int), NULL, NULL); return OPENCL_SUCCESS; } int opencl_transfer_results(opencl_sqlite_context *s, int work_units_per_kernel, int result_stride) { cl_int err=0; int i; int *data; size_t offset=0; size_t target_offset=0; //int r[4]; int *t; int r[work_units_per_kernel]; //int * r=malloc(work_units_per_kernel * sizeof(int)); t = clEnqueueMapBuffer(s->queue, s->results_gpu->roffsetResults_cl, CL_TRUE, CL_MAP_READ, 0, work_units_per_kernel * sizeof(int), 0, NULL, NULL, &err); memcpy (r, t, work_units_per_kernel * sizeof(int)); // err = clEnqueueReadBuffer(s->queue, s->results_gpu->roffsetResults_cl, CL_TRUE, 0, // work_units_per_kernel * sizeof(int), r, 0, NULL, NULL); // int total = 2; int total =0; for (i=0; iresults_cpu->r = malloc(total * result_stride * sizeof(int)); s->results_cpu->r = malloc(100000 * result_stride * sizeof(int)); // data = (int *) s->results_cpu->r; //int *data2 = malloc(100000 * result_stride * sizeof(int)); data = clEnqueueMapBuffer(s->queue, s->results_gpu->r, CL_TRUE, CL_MAP_READ, 0, 100000 * result_stride * sizeof(int), 0, NULL, NULL, &err); memcpy(s->results_cpu->r, data, 100000 * result_stride * sizeof(int)); clEnqueueUnmapMemObject(s->queue, s->results_gpu->r, data, 0, NULL, NULL); /* err |= clEnqueueReadBuffer(s->queue, s->results_gpu->r, CL_TRUE, 0, 100000 * result_stride * sizeof(int), data2, 0, NULL, NULL); */ /* for (i = 0; i < work_units_per_kernel; i++) { err |= clEnqueueReadBuffer(s->queue, s->results_gpu->r, CL_TRUE, offset, r[i] * result_stride * sizeof(int), data, 0, NULL, NULL); offset += ((s->data_cpu->rows/work_units_per_kernel) * 7 * sizeof(int)); data += (r[i] * result_stride); } if(err < 0) { perror("Couldn't read result buffer object"); // exit(1); } */ /* for (i = 0; i < work_units_per_kernel ; i++) { data = clEnqueueMapBuffer(s->queue, s->results_gpu->r, CL_TRUE, CL_MAP_READ, 0, r[i] * result_stride * sizeof(int), 0, NULL, NULL, &err); memcpy(s->results_cpu->r, data, r[i] * result_stride * sizeof(int)); clEnqueueUnmapMemObject(s->queue, s->results_gpu->r, data, 0, NULL, NULL); offset += (r[i] * result_stride * sizeof(int)); } */ // clEnqueueUnmapMemObject(s->queue, s->results_gpu->roffsetResults_cl, r, 0, NULL, NULL); return total; } float opencl_transfer_fsummary_results(opencl_sqlite_context *s, int work_units_per_kernel) { cl_int err=0; int i; int *data; size_t offset=0; size_t target_offset=0; float r[work_units_per_kernel]; data = clEnqueueMapBuffer(s->queue, s->results_gpu->r, CL_TRUE, CL_MAP_READ, 0, work_units_per_kernel * sizeof(float), 0, NULL, NULL, &err); memcpy (r, data, work_units_per_kernel * sizeof(float)); for (i=1; i < work_units_per_kernel; i++) r[0] += r[i]; /* err = clEnqueueReadBuffer(s->queue, s->results_gpu->r, CL_TRUE, 0, work_units_per_kernel * sizeof(float), r, 0, NULL, NULL); r[0]=r[0] + r[1]+ r[2] + r[3]; */ if(err < 0) { perror("Couldn't read result buffer object"); // exit(1); } return r[0]; } int opencl_transfer_isummary_results(opencl_sqlite_context *s, int work_units_per_kernel, int columns) { cl_int err=0; int i; int *data; size_t offset=0; size_t target_offset=0; int r[work_units_per_kernel*columns]; int maxI, minI; //int * r=malloc(work_units_per_kernel * sizeof(int)); data = clEnqueueMapBuffer(s->queue, s->results_gpu->r, CL_TRUE, CL_MAP_READ, 0, work_units_per_kernel * sizeof(int) * columns, 0, NULL, NULL, &err); memcpy (r, data, work_units_per_kernel * sizeof(int) * columns); maxI = r[0]; minI=r[1]; for (i=0; i< (work_units_per_kernel * columns); i+= 2) { if (r[i] > maxI) maxI = r[i]; if (r[i+1] < minI) minI = r[i+1]; } if(err < 0) { perror("Couldn't read result buffer object"); // exit(1); } printf("max is %d min is %d\n", maxI, minI); return maxI; } int opencl_transfer_ixsummary_results(opencl_sqlite_context *s, int work_units_per_kernel, int columns) { // cl_event prof_event; // cl_ulong time_start, time_end, total_time=0; cl_int err=0; int i; int *data, *counts; int total=0, count=0; size_t offset=0; size_t target_offset=0; int r[work_units_per_kernel*columns]; int t[work_units_per_kernel]; //int * r; //=malloc(work_units_per_kernel * sizeof(int)); // err = clEnqueueReadBuffer(s->queue, s->results_gpu->r, CL_TRUE, 0, // work_units_per_kernel * sizeof(int) *columns, r, 0, NULL, NULL); counts = clEnqueueMapBuffer(s->queue, s->results_gpu->roffsetResults_cl, CL_TRUE, CL_MAP_READ, 0, work_units_per_kernel * sizeof(int), 0, NULL, NULL, &err); memcpy (t, counts, work_units_per_kernel * sizeof(int)); for (i= 0; i < work_units_per_kernel; i++) { count+=t[i]; } /* clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL); clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL); total_time = time_end - time_start; printf("1st dispatch time: %llu microseconds\n", total_time/1000); */ // total=r[0] + r[1]+ r[2] + r[3]; // s->results_cpu->r = malloc(work_units_per_kernel * sizeof(int) * columns); // data = (int *) s->results_cpu->r; // err |= clEnqueueReadBuffer(s->queue, s->results_gpu->r, CL_TRUE, 0, // work_units_per_kernel * columns * sizeof(int), data, // 0, NULL, NULL); data = clEnqueueMapBuffer(s->queue, s->results_gpu->r, CL_TRUE, CL_MAP_READ, 0, work_units_per_kernel * sizeof(int) * columns, 0, NULL, NULL, &err); memcpy (r, data, work_units_per_kernel * sizeof(int) * columns); for (i= 0; i < work_units_per_kernel; i++) { total+=r[i]; } if(err < 0) { perror("Couldn't read result buffer object"); // exit(1); } return (total/count); } int opencl_transfer_vector_results(opencl_sqlite_context *s) { int *data; int total=0, i, match, totalmis=0; cl_int err=0; int *valdata = (int*)s->data_cpu->d; data = clEnqueueMapBuffer(s->queue, s->results_gpu->r, CL_TRUE, CL_MAP_READ, 0, s->data_cpu->rows * sizeof(int) , 0, NULL, NULL, &err); s->results_cpu->r = malloc (s->data_cpu->rows * sizeof(int)); memcpy (s->results_cpu->r, data, s->data_cpu->rows * sizeof(int) ); // s->results_cpu->r = data; data = (int *)s->results_cpu->r; for (i= 0; i < s->data_cpu->rows; i++) { if (*data) { total++; } /* if ((valdata[i*3+1] > 60) && (valdata[(i*3)+2] < 0)) { match=1; totalmis++; } else { match=0; } if (*data && match) { total++; } else if (match && !(*data)) { printf("mismatch row %d\n", i); } */ data++; } if(err < 0) { perror("Couldn't read result buffer object"); // exit(1); } printf("cl found %d cpu found %d\n",total, totalmis); s->results_cpu->rows=total; return total; } void *opencl_result_vector_to_result_buffer(opencl_sqlite_context *s) { int *resultBuffer = malloc (sizeof (int) * s->results_cpu->rows * s->results_cpu->columns); int *r = resultBuffer; int *resultVector = s->results_cpu->r; int i = s->data_cpu->rows; for( ; i; i--) { if (*resultVector) { *r = *resultVector; r++; resultVector++; *r = *resultVector; r++; resultVector++; *r = *resultVector; r++; resultVector++; } else { r+=3; } } return resultBuffer; }