parallel processing - Issue with clEnqueueReadBuffer Function in OpenCL - in an Array Sum Sample -
everyone, i'm beginner on opencl , wrote simple code in c sums 2 arrays. here part of code:
// create kernel. cl_kernel kernelsum = clcreatekernel( myprogram, "sum", &error ); // set input array. size_t arraysize = 1000; char* = ( char* ) malloc( sizeof( char ) * arraysize ); char* b = ( char* ) malloc( sizeof( char ) * arraysize ); char* c = ( char* ) malloc( sizeof( char ) * arraysize ); (int = 0; < arraysize; += 1) { a[ ] = 1; b[ ] = 2; c[ ] = -1; } // set buffers. cl_mem a_buffer = clcreatebuffer( mycontext, cl_mem_read_only | cl_mem_copy_host_ptr, arraysize * sizeof( char ), a, &error ); cl_mem b_buffer = clcreatebuffer( mycontext, cl_mem_read_only | cl_mem_copy_host_ptr, arraysize * sizeof( char ), b, &error ); cl_mem c_buffer = clcreatebuffer( mycontext, cl_mem_write_only, arraysize * sizeof( char ), null, &error ); printf( "buffers created.\n" ); // setting kernel arguments. error = clsetkernelarg( kernelsum, 0, sizeof( cl_mem ), &a_buffer ); error |= clsetkernelarg( kernelsum, 1, sizeof( cl_mem ), &b_buffer ); error |= clsetkernelarg( kernelsum, 2, sizeof( cl_mem ), &c_buffer ); printf( "arguments set.\n" ); // enqueue kernels execute. cl_event event; size_t globalworkoffset = 0; size_t globalworksize[ 1 ] = { arraysize }; size_t localworksize[ 1 ] = { 1 }; clenqueuendrangekernel( mycommandqueue, kernelsum, 1, // work_dim 0, // global work offset globalworksize, localworksize, // local work offset 0, null, &event ); printf( "kernel enqueued.\n" ); error = clenqueuereadbuffer( mycommandqueue, c_buffer, cl_true, // blocking option ( size_t ) 0, arraysize * sizeof( char ), // offset, data_size c, // host_ptr 0, null, &event ); if ( error != cl_success ) { printf( "buffer reading failed.\n" ); exit( 1 ); }
however, got incorrect result : numbers in "c" array zeros. thought has clenqueuereadbuffer, or perhaps not. ideas issue? expecting suggestions! :-)
your call clenqueuereadbuffer
not wait kernel finish. execute simultaneously kernel. change call to:
error = clenqueuereadbuffer( mycommandqueue, c_buffer, cl_true, // blocking option ( size_t ) 0, arraysize * sizeof( char ), // offset, data_size c, // host_ptr 1, &event, null );
this cause clenqueuereadbuffer
wait kernel event finish before starting read buffer.
Comments
Post a Comment