@@ -163,6 +163,22 @@ void host_convolution(const cl_float* in, cl_float* out, const cl_float* mask,
163163 }
164164}
165165
166+ cl_int opencl_version_contains (const cl_device_id dev ,
167+ const char * version_fragment )
168+ {
169+ char version [64 ];
170+ cl_int error = CL_SUCCESS ;
171+ OCLERROR_RET (clGetDeviceInfo (dev , CL_DEVICE_VERSION , sizeof (version ),
172+ & version , NULL ),
173+ error , fail );
174+ char * found_version = strstr (version , version_fragment );
175+ printf ("Version detected %s from version %s and fragment %s\n" ,
176+ found_version , version , version_fragment );
177+ return (found_version != NULL );
178+ fail :
179+ return error ;
180+ }
181+
166182int main (int argc , char * argv [])
167183{
168184 cl_int error = CL_SUCCESS ;
@@ -212,32 +228,66 @@ int main(int argc, char* argv[])
212228 fflush (stdout );
213229 }
214230
215- #if CL_HPP_TARGET_OPENCL_VERSION < 120
216- fprintf (stderr ,
217- "Error: OpenCL subdevices not supported before version 1.2 " );
218- exit (EXIT_FAILURE );
219- #endif
231+ if (opencl_version_contains (dev , "1.1" ))
232+ {
233+ fprintf (stderr ,
234+ "Error: OpenCL sub-devices not supported before version 1.2 " );
235+ exit (EXIT_FAILURE );
236+ }
220237
221- // Create subdevices, each with half of the compute units available.
238+ // Check if device supports fission.
239+ cl_device_partition_property * dev_props = NULL ;
240+ size_t props_size = 0 ;
241+ OCLERROR_RET (clGetDeviceInfo (dev , CL_DEVICE_PARTITION_PROPERTIES , 0 , NULL ,
242+ & props_size ),
243+ error , dev );
244+ if (props_size == 0 )
245+ {
246+ fprintf (stderr , "Error: device does not support fission" );
247+ exit (EXIT_FAILURE );
248+ }
249+
250+ // Check if the "partition equally" type is supported.
251+ MEM_CHECK (dev_props = (cl_device_partition_property * )malloc (sizeof (char )
252+ * props_size ),
253+ error , dev );
254+ OCLERROR_RET (clGetDeviceInfo (dev , CL_DEVICE_PARTITION_PROPERTIES ,
255+ sizeof (char ) * props_size , dev_props , NULL ),
256+ error , props );
257+ size_t i = 0 ;
258+ for (; i < props_size ; ++ i )
259+ {
260+ if (dev_props [i ] == CL_DEVICE_PARTITION_EQUALLY )
261+ {
262+ break ;
263+ }
264+ }
265+ if (i == props_size )
266+ {
267+ fprintf (stderr , "Error: device does not partition equally" );
268+ exit (EXIT_FAILURE );
269+ }
270+
271+ // Create sub-devices, each with half of the compute units available.
222272 cl_uint max_compute_units = 0 ;
223273 OCLERROR_RET (clGetDeviceInfo (dev , CL_DEVICE_MAX_COMPUTE_UNITS ,
224274 sizeof (cl_uint ), & max_compute_units , NULL ),
225- error , dev );
275+ error , props );
226276 cl_device_partition_property subdevices_properties [] = {
227277 (cl_device_partition_property )CL_DEVICE_PARTITION_EQUALLY ,
228278 (cl_device_partition_property )(max_compute_units / 2 ), 0
229279 };
230280
231- // Initialize subdevices array with one device and then reallocate for
232- // MacOS and Windows not to complain about NULL subdevices array.
281+ // Initialize sub-devices array with one device and then reallocate for
282+ // MacOS and Windows not to complain about NULL sub-devices array.
233283 cl_uint subdev_count = 1 ;
234284 cl_device_id * subdevices =
235285 (cl_device_id * )malloc (subdev_count * sizeof (cl_device_id ));
236286
237287 OCLERROR_RET (clCreateSubDevices (dev , subdevices_properties ,
238288 max_compute_units , subdevices ,
239289 & subdev_count ),
240- error , dev );
290+ error , props );
241291
242292 if (subdev_count < 2 )
243293 {
@@ -249,11 +299,11 @@ int main(int argc, char* argv[])
249299 (cl_device_id * )realloc (subdevices , subdev_count * sizeof (cl_device_id ));
250300 OCLERROR_RET (clCreateSubDevices (dev , subdevices_properties , subdev_count ,
251301 subdevices , NULL ),
252- error , subdevs );
302+ error , subdev1 );
253303
254304 OCLERROR_PAR (context = clCreateContext (NULL , subdev_count , subdevices , NULL ,
255305 NULL , & error ),
256- error , subdevs );
306+ error , subdev1 );
257307
258308 // Read kernel file.
259309 const char * kernel_location = "./convolution.cl" ;
@@ -280,11 +330,14 @@ int main(int argc, char* argv[])
280330 // it's only necessary to add the -cl-std option for 2.0 and 3.0 OpenCL
281331 // versions.
282332 char compiler_options [1023 ] = "" ;
283- #if CL_HPP_TARGET_OPENCL_VERSION >= 300
284- strcat (compiler_options , "-cl-std=CL3.0 " );
285- #elif CL_HPP_TARGET_OPENCL_VERSION >= 200
286- strcat (compiler_options , "-cl-std=CL2.0 " );
287- #endif
333+ if (opencl_version_contains (dev , "3." ))
334+ {
335+ strcat (compiler_options , "-cl-std=CL3.0 " );
336+ }
337+ else if (opencl_version_contains (dev , "2." ))
338+ {
339+ strcat (compiler_options , "-cl-std=CL2.0 " );
340+ }
288341
289342 OCLERROR_RET (
290343 clBuildProgram (program , 2 , subdevices , compiler_options , NULL , NULL ),
@@ -356,7 +409,7 @@ int main(int argc, char* argv[])
356409 mask_dim * mask_dim , -1000 , 1000 );
357410
358411 // Create device buffers, from which we will create the subbuffers for the
359- // subdevices .
412+ // sub-devices .
360413 const size_t grid_midpoint = y_dim / 2 ;
361414 const size_t pad_grid_midpoint = pad_y_dim / 2 ;
362415
@@ -391,7 +444,7 @@ int main(int argc, char* argv[])
391444 fflush (stdout );
392445 }
393446
394- // Set up subdevices for kernel execution.
447+ // Set up sub-devices for kernel execution.
395448 const size_t half_input_bytes =
396449 sizeof (cl_float ) * pad_x_dim * (pad_grid_midpoint + 1 );
397450 const size_t input_offset =
@@ -414,7 +467,7 @@ int main(int argc, char* argv[])
414467 error , bufmask );
415468
416469 // Initialize queues for command execution on each device.
417- #if CL_HPP_TARGET_OPENCL_VERSION >= 200
470+ #if defined( CL_VERSION_2_0 ) || defined( CL_VERSION_3_0 )
418471 cl_command_queue_properties props [] = { CL_QUEUE_PROPERTIES ,
419472 CL_QUEUE_PROFILING_ENABLE , 0 };
420473 OCLERROR_PAR (sub_queues [subdevice ] = clCreateCommandQueueWithProperties (
@@ -507,7 +560,8 @@ int main(int argc, char* argv[])
507560 }
508561
509562 GET_CURRENT_TIMER (host_start )
510- host_convolution (h_input_grid , h_output_grid , h_mask , (cl_uint )x_dim , (cl_uint )y_dim );
563+ host_convolution (h_input_grid , h_output_grid , h_mask , (cl_uint )x_dim ,
564+ (cl_uint )y_dim );
511565 GET_CURRENT_TIMER (host_end )
512566 size_t host_time ;
513567 TIMER_DIFFERENCE (host_time , host_start , host_end )
@@ -631,13 +685,19 @@ int main(int argc, char* argv[])
631685hinput :
632686 free (h_input_grid );
633687prg :
634- OCLERROR_RET (clReleaseProgram (program ), end_error , subdevs );
688+ OCLERROR_RET (clReleaseProgram (program ), end_error , ker );
635689ker :
636690 free (kernel );
637691contx :
638- OCLERROR_RET (clReleaseContext (context ), end_error , end );
692+ OCLERROR_RET (clReleaseContext (context ), end_error , subdev1 );
693+ subdev1 :
694+ OCLERROR_RET (clReleaseDevice (subdevices [1 ]), end_error , subdev0 );
695+ subdev0 :
696+ OCLERROR_RET (clReleaseDevice (subdevices [0 ]), end_error , subdevs );
639697subdevs :
640698 free (subdevices );
699+ props :
700+ free (dev_props );
641701dev :
642702 OCLERROR_RET (clReleaseDevice (dev ), end_error , end );
643703end :
0 commit comments