@@ -163,6 +163,20 @@ void host_convolution(const cl_float* in, cl_float* out, const cl_float* mask,
163163 }
164164}
165165
166+ cl_int opencl_version_contains (const cl_device_id dev ,
167+ const char * version_fragment )
168+ {
169+ char version [64 ];
170+ cl_int error = CL_SUCCESS ;
171+ OCLERROR_RET (clGetDeviceInfo (dev , CL_DEVICE_VERSION , sizeof (version ),
172+ & version , NULL ),
173+ error , fail );
174+ char * found_version = strstr (version , version_fragment );
175+ return (found_version != NULL );
176+ fail :
177+ return error ;
178+ }
179+
166180int main (int argc , char * argv [])
167181{
168182 cl_int error = CL_SUCCESS ;
@@ -199,7 +213,7 @@ int main(int argc, char* argv[])
199213 OCLERROR_PAR (dev = cl_util_get_device (dev_opts .triplet .plat_index ,
200214 dev_opts .triplet .dev_index ,
201215 dev_opts .triplet .dev_type , & error ),
202- error , dev );
216+ error , end );
203217
204218 if (!diag_opts .quiet )
205219 {
@@ -212,32 +226,66 @@ int main(int argc, char* argv[])
212226 fflush (stdout );
213227 }
214228
215- #if CL_HPP_TARGET_OPENCL_VERSION < 120
216- fprintf (stderr ,
217- "Error: OpenCL subdevices not supported before version 1.2 " );
218- exit (EXIT_FAILURE );
219- #endif
229+ if (opencl_version_contains (dev , "1.1" ))
230+ {
231+ fprintf (stderr ,
232+ "Error: OpenCL sub-devices not supported before version 1.2 " );
233+ exit (EXIT_FAILURE );
234+ }
220235
221- // Create subdevices, each with half of the compute units available.
236+ // Check if device supports fission.
237+ cl_device_partition_property * dev_props = NULL ;
238+ size_t props_size = 0 ;
239+ OCLERROR_RET (clGetDeviceInfo (dev , CL_DEVICE_PARTITION_PROPERTIES , 0 , NULL ,
240+ & props_size ),
241+ error , end );
242+ if (props_size == 0 )
243+ {
244+ fprintf (stderr , "Error: device does not support fission" );
245+ exit (EXIT_FAILURE );
246+ }
247+
248+ // Check if the "partition equally" type is supported.
249+ MEM_CHECK (dev_props = (cl_device_partition_property * )malloc (sizeof (char )
250+ * props_size ),
251+ error , end );
252+ OCLERROR_RET (clGetDeviceInfo (dev , CL_DEVICE_PARTITION_PROPERTIES ,
253+ sizeof (char ) * props_size , dev_props , NULL ),
254+ error , props );
255+ size_t prop = 0 ;
256+ for (; prop < props_size ; ++ prop )
257+ {
258+ if (dev_props [prop ] == CL_DEVICE_PARTITION_EQUALLY )
259+ {
260+ break ;
261+ }
262+ }
263+ if (prop == props_size )
264+ {
265+ fprintf (stderr , "Error: device does not partition equally" );
266+ exit (EXIT_FAILURE );
267+ }
268+
269+ // Create sub-devices, each with half of the compute units available.
222270 cl_uint max_compute_units = 0 ;
223271 OCLERROR_RET (clGetDeviceInfo (dev , CL_DEVICE_MAX_COMPUTE_UNITS ,
224272 sizeof (cl_uint ), & max_compute_units , NULL ),
225- error , dev );
273+ error , props );
226274 cl_device_partition_property subdevices_properties [] = {
227275 (cl_device_partition_property )CL_DEVICE_PARTITION_EQUALLY ,
228276 (cl_device_partition_property )(max_compute_units / 2 ), 0
229277 };
230278
231- // Initialize subdevices array with one device and then reallocate for
232- // MacOS and Windows not to complain about NULL subdevices array.
279+ // Initialize sub-devices array with one device and then reallocate for
280+ // MacOS and Windows not to complain about NULL sub-devices array.
233281 cl_uint subdev_count = 1 ;
234282 cl_device_id * subdevices =
235283 (cl_device_id * )malloc (subdev_count * sizeof (cl_device_id ));
236284
237285 OCLERROR_RET (clCreateSubDevices (dev , subdevices_properties ,
238286 max_compute_units , subdevices ,
239287 & subdev_count ),
240- error , dev );
288+ error , props );
241289
242290 if (subdev_count < 2 )
243291 {
@@ -249,11 +297,11 @@ int main(int argc, char* argv[])
249297 (cl_device_id * )realloc (subdevices , subdev_count * sizeof (cl_device_id ));
250298 OCLERROR_RET (clCreateSubDevices (dev , subdevices_properties , subdev_count ,
251299 subdevices , NULL ),
252- error , subdevs );
300+ error , subdev1 );
253301
254302 OCLERROR_PAR (context = clCreateContext (NULL , subdev_count , subdevices , NULL ,
255303 NULL , & error ),
256- error , subdevs );
304+ error , subdev1 );
257305
258306 // Read kernel file.
259307 const char * kernel_location = "./convolution.cl" ;
@@ -280,11 +328,14 @@ int main(int argc, char* argv[])
280328 // it's only necessary to add the -cl-std option for 2.0 and 3.0 OpenCL
281329 // versions.
282330 char compiler_options [1023 ] = "" ;
283- #if CL_HPP_TARGET_OPENCL_VERSION >= 300
284- strcat (compiler_options , "-cl-std=CL3.0 " );
285- #elif CL_HPP_TARGET_OPENCL_VERSION >= 200
286- strcat (compiler_options , "-cl-std=CL2.0 " );
287- #endif
331+ if (opencl_version_contains (dev , "3." ))
332+ {
333+ strcat (compiler_options , "-cl-std=CL3.0 " );
334+ }
335+ else if (opencl_version_contains (dev , "2." ))
336+ {
337+ strcat (compiler_options , "-cl-std=CL2.0 " );
338+ }
288339
289340 OCLERROR_RET (
290341 clBuildProgram (program , 2 , subdevices , compiler_options , NULL , NULL ),
@@ -356,7 +407,7 @@ int main(int argc, char* argv[])
356407 mask_dim * mask_dim , -1000 , 1000 );
357408
358409 // Create device buffers, from which we will create the subbuffers for the
359- // subdevices .
410+ // sub-devices .
360411 const size_t grid_midpoint = y_dim / 2 ;
361412 const size_t pad_grid_midpoint = pad_y_dim / 2 ;
362413
@@ -391,7 +442,7 @@ int main(int argc, char* argv[])
391442 fflush (stdout );
392443 }
393444
394- // Set up subdevices for kernel execution.
445+ // Set up sub-devices for kernel execution.
395446 const size_t half_input_bytes =
396447 sizeof (cl_float ) * pad_x_dim * (pad_grid_midpoint + 1 );
397448 const size_t input_offset =
@@ -414,7 +465,7 @@ int main(int argc, char* argv[])
414465 error , bufmask );
415466
416467 // Initialize queues for command execution on each device.
417- #if CL_HPP_TARGET_OPENCL_VERSION >= 200
468+ #if defined( CL_VERSION_2_0 ) || defined( CL_VERSION_3_0 )
418469 cl_command_queue_properties props [] = { CL_QUEUE_PROPERTIES ,
419470 CL_QUEUE_PROFILING_ENABLE , 0 };
420471 OCLERROR_PAR (sub_queues [subdevice ] = clCreateCommandQueueWithProperties (
@@ -507,7 +558,8 @@ int main(int argc, char* argv[])
507558 }
508559
509560 GET_CURRENT_TIMER (host_start )
510- host_convolution (h_input_grid , h_output_grid , h_mask , (cl_uint )x_dim , (cl_uint )y_dim );
561+ host_convolution (h_input_grid , h_output_grid , h_mask , (cl_uint )x_dim ,
562+ (cl_uint )y_dim );
511563 GET_CURRENT_TIMER (host_end )
512564 size_t host_time ;
513565 TIMER_DIFFERENCE (host_time , host_start , host_end )
@@ -588,31 +640,31 @@ int main(int argc, char* argv[])
588640event1 :
589641 OCLERROR_RET (clReleaseEvent (events [0 ]), end_error , subbufout );
590642subbufout :
591- if (subdevice = = 1 )
643+ if (subdevice > = 1 )
592644 {
593645 OCLERROR_RET (clReleaseMemObject (sub_output_grids [1 ]), end_error ,
594646 subbufout0 );
595647 }
596648subbufout0 :
597649 OCLERROR_PAR (clReleaseMemObject (sub_output_grids [0 ]), end_error , subbufin );
598650subbufin :
599- if (subdevice = = 1 )
651+ if (subdevice > = 1 )
600652 {
601653 OCLERROR_RET (clReleaseMemObject (sub_input_grids [1 ]), end_error ,
602654 subbufin0 );
603655 }
604656subbufin0 :
605657 OCLERROR_RET (clReleaseMemObject (sub_input_grids [0 ]), end_error , subqueue );
606658subqueue :
607- if (subdevice = = 1 )
659+ if (subdevice > = 1 )
608660 {
609661 OCLERROR_RET (clReleaseCommandQueue (sub_queues [1 ]), end_error ,
610662 subqueue0 );
611663 }
612664subqueue0 :
613- OCLERROR_RET (clReleaseCommandQueue (sub_queues [1 ]), end_error , conv );
665+ OCLERROR_RET (clReleaseCommandQueue (sub_queues [0 ]), end_error , conv );
614666conv :
615- if (subdevice = = 1 )
667+ if (subdevice > = 1 )
616668 {
617669 OCLERROR_RET (clReleaseKernel (convolutions [1 ]), end_error , conv0 );
618670 }
@@ -631,15 +683,19 @@ int main(int argc, char* argv[])
631683hinput :
632684 free (h_input_grid );
633685prg :
634- OCLERROR_RET (clReleaseProgram (program ), end_error , subdevs );
686+ OCLERROR_RET (clReleaseProgram (program ), end_error , ker );
635687ker :
636688 free (kernel );
637689contx :
638- OCLERROR_RET (clReleaseContext (context ), end_error , end );
690+ OCLERROR_RET (clReleaseContext (context ), end_error , subdev1 );
691+ subdev1 :
692+ OCLERROR_RET (clReleaseDevice (subdevices [1 ]), end_error , subdev0 );
693+ subdev0 :
694+ OCLERROR_RET (clReleaseDevice (subdevices [0 ]), end_error , subdevs );
639695subdevs :
640696 free (subdevices );
641- dev :
642- OCLERROR_RET ( clReleaseDevice ( dev ), end_error , end );
697+ props :
698+ free ( dev_props );
643699end :
644700 if (error ) cl_util_print_error (error );
645701 return error ;
0 commit comments