@@ -163,6 +163,13 @@ void host_convolution(const cl_float* in, cl_float* out, const cl_float* mask,
163163 }
164164}
165165
166+ cl_int opencl_version_contains (const char * dev_version ,
167+ const char * version_fragment )
168+ {
169+ char * found_version = strstr (dev_version , version_fragment );
170+ return (found_version != NULL );
171+ }
172+
166173int main (int argc , char * argv [])
167174{
168175 cl_int error = CL_SUCCESS ;
@@ -199,7 +206,13 @@ int main(int argc, char* argv[])
199206 OCLERROR_PAR (dev = cl_util_get_device (dev_opts .triplet .plat_index ,
200207 dev_opts .triplet .dev_index ,
201208 dev_opts .triplet .dev_type , & error ),
202- error , dev );
209+ error , end );
210+
211+ // Query OpenCL version supported by device.
212+ char dev_version [64 ];
213+ OCLERROR_RET (clGetDeviceInfo (dev , CL_DEVICE_VERSION , sizeof (dev_version ),
214+ & dev_version , NULL ),
215+ error , end );
203216
204217 if (!diag_opts .quiet )
205218 {
@@ -212,48 +225,88 @@ int main(int argc, char* argv[])
212225 fflush (stdout );
213226 }
214227
215- #if CL_HPP_TARGET_OPENCL_VERSION < 120
216- fprintf (stderr ,
217- "Error: OpenCL subdevices not supported before version 1.2 " );
218- exit (EXIT_FAILURE );
219- #endif
228+ if (opencl_version_contains (dev_version , "1.0" )
229+ || opencl_version_contains (dev_version , "1.1" ))
230+ {
231+ fprintf (stdout ,
232+ "This sample requires device partitioning, which is an OpenCL "
233+ "1.2 feature, but the device chosen only supports OpenCL %s. "
234+ "Please try with a different OpenCL device instead.\n" ,
235+ dev_version );
236+ exit (EXIT_SUCCESS );
237+ }
238+
239+ // Check if device supports fission.
240+ cl_device_partition_property * dev_props = NULL ;
241+ size_t props_size = 0 ;
242+ OCLERROR_RET (clGetDeviceInfo (dev , CL_DEVICE_PARTITION_PROPERTIES , 0 , NULL ,
243+ & props_size ),
244+ error , end );
245+ if (props_size == 0 )
246+ {
247+ fprintf (stdout ,
248+ "This sample requires device fission, which is a "
249+ "feature available from OpenCL 1.2 on, but the "
250+ "device chosen does not seem to support it. Please "
251+ "try with a different OpenCL device instead.\n" );
252+ exit (EXIT_SUCCESS );
253+ }
254+
255+ // Check if the "partition equally" type is supported.
256+ MEM_CHECK (dev_props = (cl_device_partition_property * )malloc (props_size ),
257+ error , end );
258+ OCLERROR_RET (clGetDeviceInfo (dev , CL_DEVICE_PARTITION_PROPERTIES ,
259+ props_size , dev_props , NULL ),
260+ error , props );
261+ size_t prop = 0 ,
262+ props_length = props_size / sizeof (cl_device_partition_property );
263+ for (; prop < props_length ; ++ prop )
264+ {
265+ if (dev_props [prop ] == CL_DEVICE_PARTITION_EQUALLY )
266+ {
267+ break ;
268+ }
269+ }
270+ if (prop == props_length )
271+ {
272+ fprintf (stdout ,
273+ "This sample requires partition equally, which is a "
274+ "partition scheme available from OpenCL 1.2 on, but "
275+ "the device chosen does not seem to support it. "
276+ "Please try with a different OpenCL device instead.\n" );
277+ exit (EXIT_SUCCESS );
278+ }
220279
221- // Create subdevices , each with half of the compute units available.
280+ // Create sub-devices , each with half of the compute units available.
222281 cl_uint max_compute_units = 0 ;
282+ cl_uint subdev_created = 0 ;
283+ const cl_uint subdev_count = 2 ;
223284 OCLERROR_RET (clGetDeviceInfo (dev , CL_DEVICE_MAX_COMPUTE_UNITS ,
224285 sizeof (cl_uint ), & max_compute_units , NULL ),
225- error , dev );
286+ error , props );
226287 cl_device_partition_property subdevices_properties [] = {
227288 (cl_device_partition_property )CL_DEVICE_PARTITION_EQUALLY ,
228- (cl_device_partition_property )(max_compute_units / 2 ), 0
289+ (cl_device_partition_property )(max_compute_units / subdev_count ), 0
229290 };
230291
231- // Initialize subdevices array with one device and then reallocate for
232- // MacOS and Windows not to complain about NULL subdevices array.
233- cl_uint subdev_count = 1 ;
234292 cl_device_id * subdevices =
235293 (cl_device_id * )malloc (subdev_count * sizeof (cl_device_id ));
236294
237- OCLERROR_RET (clCreateSubDevices (dev , subdevices_properties ,
238- max_compute_units , subdevices ,
239- & subdev_count ),
240- error , dev );
295+ OCLERROR_RET (clCreateSubDevices (dev , subdevices_properties , subdev_count ,
296+ subdevices , & subdev_created ),
297+ error , props );
241298
242- if (subdev_count < 2 )
299+ if (subdev_created < subdev_count )
243300 {
244- fprintf (stderr , "Error: OpenCL cannot create subdevices" );
301+ fprintf (stderr ,
302+ "Error: OpenCL cannot create the number of sub-devices "
303+ "requested\n" );
245304 exit (EXIT_FAILURE );
246305 }
247306
248- subdevices =
249- (cl_device_id * )realloc (subdevices , subdev_count * sizeof (cl_device_id ));
250- OCLERROR_RET (clCreateSubDevices (dev , subdevices_properties , subdev_count ,
251- subdevices , NULL ),
252- error , subdevs );
253-
254307 OCLERROR_PAR (context = clCreateContext (NULL , subdev_count , subdevices , NULL ,
255308 NULL , & error ),
256- error , subdevs );
309+ error , subdev1 );
257310
258311 // Read kernel file.
259312 const char * kernel_location = "./convolution.cl" ;
@@ -280,11 +333,14 @@ int main(int argc, char* argv[])
280333 // it's only necessary to add the -cl-std option for 2.0 and 3.0 OpenCL
281334 // versions.
282335 char compiler_options [1023 ] = "" ;
283- #if CL_HPP_TARGET_OPENCL_VERSION >= 300
284- strcat (compiler_options , "-cl-std=CL3.0 " );
285- #elif CL_HPP_TARGET_OPENCL_VERSION >= 200
286- strcat (compiler_options , "-cl-std=CL2.0 " );
287- #endif
336+ if (opencl_version_contains (dev_version , "3." ))
337+ {
338+ strcat (compiler_options , "-cl-std=CL3.0 " );
339+ }
340+ else if (opencl_version_contains (dev_version , "2." ))
341+ {
342+ strcat (compiler_options , "-cl-std=CL2.0 " );
343+ }
288344
289345 OCLERROR_RET (
290346 clBuildProgram (program , 2 , subdevices , compiler_options , NULL , NULL ),
@@ -356,7 +412,7 @@ int main(int argc, char* argv[])
356412 mask_dim * mask_dim , -1000 , 1000 );
357413
358414 // Create device buffers, from which we will create the subbuffers for the
359- // subdevices .
415+ // sub-devices .
360416 const size_t grid_midpoint = y_dim / 2 ;
361417 const size_t pad_grid_midpoint = pad_y_dim / 2 ;
362418
@@ -391,7 +447,7 @@ int main(int argc, char* argv[])
391447 fflush (stdout );
392448 }
393449
394- // Set up subdevices for kernel execution.
450+ // Set up sub-devices for kernel execution.
395451 const size_t half_input_bytes =
396452 sizeof (cl_float ) * pad_x_dim * (pad_grid_midpoint + 1 );
397453 const size_t input_offset =
@@ -414,7 +470,7 @@ int main(int argc, char* argv[])
414470 error , bufmask );
415471
416472 // Initialize queues for command execution on each device.
417- #if CL_HPP_TARGET_OPENCL_VERSION >= 200
473+ #if defined( CL_VERSION_2_0 ) || defined( CL_VERSION_3_0 )
418474 cl_command_queue_properties props [] = { CL_QUEUE_PROPERTIES ,
419475 CL_QUEUE_PROFILING_ENABLE , 0 };
420476 OCLERROR_PAR (sub_queues [subdevice ] = clCreateCommandQueueWithProperties (
@@ -507,7 +563,8 @@ int main(int argc, char* argv[])
507563 }
508564
509565 GET_CURRENT_TIMER (host_start )
510- host_convolution (h_input_grid , h_output_grid , h_mask , (cl_uint )x_dim , (cl_uint )y_dim );
566+ host_convolution (h_input_grid , h_output_grid , h_mask , (cl_uint )x_dim ,
567+ (cl_uint )y_dim );
511568 GET_CURRENT_TIMER (host_end )
512569 size_t host_time ;
513570 TIMER_DIFFERENCE (host_time , host_start , host_end )
@@ -588,31 +645,31 @@ int main(int argc, char* argv[])
588645event1 :
589646 OCLERROR_RET (clReleaseEvent (events [0 ]), end_error , subbufout );
590647subbufout :
591- if (subdevice = = 1 )
648+ if (subdevice > = 1 )
592649 {
593650 OCLERROR_RET (clReleaseMemObject (sub_output_grids [1 ]), end_error ,
594651 subbufout0 );
595652 }
596653subbufout0 :
597654 OCLERROR_PAR (clReleaseMemObject (sub_output_grids [0 ]), end_error , subbufin );
598655subbufin :
599- if (subdevice = = 1 )
656+ if (subdevice > = 1 )
600657 {
601658 OCLERROR_RET (clReleaseMemObject (sub_input_grids [1 ]), end_error ,
602659 subbufin0 );
603660 }
604661subbufin0 :
605662 OCLERROR_RET (clReleaseMemObject (sub_input_grids [0 ]), end_error , subqueue );
606663subqueue :
607- if (subdevice = = 1 )
664+ if (subdevice > = 1 )
608665 {
609666 OCLERROR_RET (clReleaseCommandQueue (sub_queues [1 ]), end_error ,
610667 subqueue0 );
611668 }
612669subqueue0 :
613- OCLERROR_RET (clReleaseCommandQueue (sub_queues [1 ]), end_error , conv );
670+ OCLERROR_RET (clReleaseCommandQueue (sub_queues [0 ]), end_error , conv );
614671conv :
615- if (subdevice = = 1 )
672+ if (subdevice > = 1 )
616673 {
617674 OCLERROR_RET (clReleaseKernel (convolutions [1 ]), end_error , conv0 );
618675 }
@@ -631,15 +688,19 @@ int main(int argc, char* argv[])
631688hinput :
632689 free (h_input_grid );
633690prg :
634- OCLERROR_RET (clReleaseProgram (program ), end_error , subdevs );
691+ OCLERROR_RET (clReleaseProgram (program ), end_error , ker );
635692ker :
636693 free (kernel );
637694contx :
638- OCLERROR_RET (clReleaseContext (context ), end_error , end );
695+ OCLERROR_RET (clReleaseContext (context ), end_error , subdev1 );
696+ subdev1 :
697+ OCLERROR_RET (clReleaseDevice (subdevices [1 ]), end_error , subdev0 );
698+ subdev0 :
699+ OCLERROR_RET (clReleaseDevice (subdevices [0 ]), end_error , subdevs );
639700subdevs :
640701 free (subdevices );
641- dev :
642- OCLERROR_RET ( clReleaseDevice ( dev ), end_error , end );
702+ props :
703+ free ( dev_props );
643704end :
644705 if (error ) cl_util_print_error (error );
645706 return error ;
0 commit comments