Skip to content

Commit b6475e4

Browse files
committed
Fixes from review
1 parent 33535f9 commit b6475e4

File tree

2 files changed

+157
-58
lines changed

2 files changed

+157
-58
lines changed

samples/core/multi-device/main.c

Lines changed: 104 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,13 @@ void host_convolution(const cl_float* in, cl_float* out, const cl_float* mask,
163163
}
164164
}
165165

166+
cl_int opencl_version_contains(const char* dev_version,
167+
const char* version_fragment)
168+
{
169+
char* found_version = strstr(dev_version, version_fragment);
170+
return (found_version != NULL);
171+
}
172+
166173
int main(int argc, char* argv[])
167174
{
168175
cl_int error = CL_SUCCESS;
@@ -199,7 +206,13 @@ int main(int argc, char* argv[])
199206
OCLERROR_PAR(dev = cl_util_get_device(dev_opts.triplet.plat_index,
200207
dev_opts.triplet.dev_index,
201208
dev_opts.triplet.dev_type, &error),
202-
error, dev);
209+
error, end);
210+
211+
// Query OpenCL version supported by device.
212+
char dev_version[64];
213+
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_VERSION, sizeof(dev_version),
214+
&dev_version, NULL),
215+
error, end);
203216

204217
if (!diag_opts.quiet)
205218
{
@@ -212,48 +225,88 @@ int main(int argc, char* argv[])
212225
fflush(stdout);
213226
}
214227

215-
#if CL_HPP_TARGET_OPENCL_VERSION < 120
216-
fprintf(stderr,
217-
"Error: OpenCL subdevices not supported before version 1.2 ");
218-
exit(EXIT_FAILURE);
219-
#endif
228+
if (opencl_version_contains(dev_version, "1.0")
229+
|| opencl_version_contains(dev_version, "1.1"))
230+
{
231+
fprintf(stdout,
232+
"This sample requires device partitioning, which is an OpenCL "
233+
"1.2 feature, but the device chosen only supports OpenCL %s. "
234+
"Please try with a different OpenCL device instead.\n",
235+
dev_version);
236+
exit(EXIT_SUCCESS);
237+
}
238+
239+
// Check if device supports fission.
240+
cl_device_partition_property* dev_props = NULL;
241+
size_t props_size = 0;
242+
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL,
243+
&props_size),
244+
error, end);
245+
if (props_size == 0)
246+
{
247+
fprintf(stdout,
248+
"This sample requires device fission, which is a "
249+
"feature available from OpenCL 1.2 on, but the "
250+
"device chosen does not seem to support it. Please "
251+
"try with a different OpenCL device instead.\n");
252+
exit(EXIT_SUCCESS);
253+
}
254+
255+
// Check if the "partition equally" type is supported.
256+
MEM_CHECK(dev_props = (cl_device_partition_property*)malloc(props_size),
257+
error, end);
258+
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_PARTITION_PROPERTIES,
259+
props_size, dev_props, NULL),
260+
error, props);
261+
size_t prop = 0,
262+
props_length = props_size / sizeof(cl_device_partition_property);
263+
for (; prop < props_length; ++prop)
264+
{
265+
if (dev_props[prop] == CL_DEVICE_PARTITION_EQUALLY)
266+
{
267+
break;
268+
}
269+
}
270+
if (prop == props_length)
271+
{
272+
fprintf(stdout,
273+
"This sample requires partition equally, which is a "
274+
"partition scheme available from OpenCL 1.2 on, but "
275+
"the device chosen does not seem to support it. "
276+
"Please try with a different OpenCL device instead.\n");
277+
exit(EXIT_SUCCESS);
278+
}
220279

221-
// Create subdevices, each with half of the compute units available.
280+
// Create sub-devices, each with half of the compute units available.
222281
cl_uint max_compute_units = 0;
282+
cl_uint subdev_created = 0;
283+
const cl_uint subdev_count = 2;
223284
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS,
224285
sizeof(cl_uint), &max_compute_units, NULL),
225-
error, dev);
286+
error, props);
226287
cl_device_partition_property subdevices_properties[] = {
227288
(cl_device_partition_property)CL_DEVICE_PARTITION_EQUALLY,
228-
(cl_device_partition_property)(max_compute_units / 2), 0
289+
(cl_device_partition_property)(max_compute_units / subdev_count), 0
229290
};
230291

231-
// Initialize subdevices array with one device and then reallocate for
232-
// MacOS and Windows not to complain about NULL subdevices array.
233-
cl_uint subdev_count = 1;
234292
cl_device_id* subdevices =
235293
(cl_device_id*)malloc(subdev_count * sizeof(cl_device_id));
236294

237-
OCLERROR_RET(clCreateSubDevices(dev, subdevices_properties,
238-
max_compute_units, subdevices,
239-
&subdev_count),
240-
error, dev);
295+
OCLERROR_RET(clCreateSubDevices(dev, subdevices_properties, subdev_count,
296+
subdevices, &subdev_created),
297+
error, props);
241298

242-
if (subdev_count < 2)
299+
if (subdev_created < subdev_count)
243300
{
244-
fprintf(stderr, "Error: OpenCL cannot create subdevices");
301+
fprintf(stderr,
302+
"Error: OpenCL cannot create the number of sub-devices "
303+
"requested\n");
245304
exit(EXIT_FAILURE);
246305
}
247306

248-
subdevices =
249-
(cl_device_id*)realloc(subdevices, subdev_count * sizeof(cl_device_id));
250-
OCLERROR_RET(clCreateSubDevices(dev, subdevices_properties, subdev_count,
251-
subdevices, NULL),
252-
error, subdevs);
253-
254307
OCLERROR_PAR(context = clCreateContext(NULL, subdev_count, subdevices, NULL,
255308
NULL, &error),
256-
error, subdevs);
309+
error, subdev1);
257310

258311
// Read kernel file.
259312
const char* kernel_location = "./convolution.cl";
@@ -280,11 +333,14 @@ int main(int argc, char* argv[])
280333
// it's only necessary to add the -cl-std option for 2.0 and 3.0 OpenCL
281334
// versions.
282335
char compiler_options[1023] = "";
283-
#if CL_HPP_TARGET_OPENCL_VERSION >= 300
284-
strcat(compiler_options, "-cl-std=CL3.0 ");
285-
#elif CL_HPP_TARGET_OPENCL_VERSION >= 200
286-
strcat(compiler_options, "-cl-std=CL2.0 ");
287-
#endif
336+
if (opencl_version_contains(dev_version, "3."))
337+
{
338+
strcat(compiler_options, "-cl-std=CL3.0 ");
339+
}
340+
else if (opencl_version_contains(dev_version, "2."))
341+
{
342+
strcat(compiler_options, "-cl-std=CL2.0 ");
343+
}
288344

289345
OCLERROR_RET(
290346
clBuildProgram(program, 2, subdevices, compiler_options, NULL, NULL),
@@ -356,7 +412,7 @@ int main(int argc, char* argv[])
356412
mask_dim * mask_dim, -1000, 1000);
357413

358414
// Create device buffers, from which we will create the subbuffers for the
359-
// subdevices.
415+
// sub-devices.
360416
const size_t grid_midpoint = y_dim / 2;
361417
const size_t pad_grid_midpoint = pad_y_dim / 2;
362418

@@ -391,7 +447,7 @@ int main(int argc, char* argv[])
391447
fflush(stdout);
392448
}
393449

394-
// Set up subdevices for kernel execution.
450+
// Set up sub-devices for kernel execution.
395451
const size_t half_input_bytes =
396452
sizeof(cl_float) * pad_x_dim * (pad_grid_midpoint + 1);
397453
const size_t input_offset =
@@ -414,7 +470,7 @@ int main(int argc, char* argv[])
414470
error, bufmask);
415471

416472
// Initialize queues for command execution on each device.
417-
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
473+
#if defined(CL_VERSION_2_0) || defined(CL_VERSION_3_0)
418474
cl_command_queue_properties props[] = { CL_QUEUE_PROPERTIES,
419475
CL_QUEUE_PROFILING_ENABLE, 0 };
420476
OCLERROR_PAR(sub_queues[subdevice] = clCreateCommandQueueWithProperties(
@@ -507,7 +563,8 @@ int main(int argc, char* argv[])
507563
}
508564

509565
GET_CURRENT_TIMER(host_start)
510-
host_convolution(h_input_grid, h_output_grid, h_mask, (cl_uint)x_dim, (cl_uint)y_dim);
566+
host_convolution(h_input_grid, h_output_grid, h_mask, (cl_uint)x_dim,
567+
(cl_uint)y_dim);
511568
GET_CURRENT_TIMER(host_end)
512569
size_t host_time;
513570
TIMER_DIFFERENCE(host_time, host_start, host_end)
@@ -588,31 +645,31 @@ int main(int argc, char* argv[])
588645
event1:
589646
OCLERROR_RET(clReleaseEvent(events[0]), end_error, subbufout);
590647
subbufout:
591-
if (subdevice == 1)
648+
if (subdevice >= 1)
592649
{
593650
OCLERROR_RET(clReleaseMemObject(sub_output_grids[1]), end_error,
594651
subbufout0);
595652
}
596653
subbufout0:
597654
OCLERROR_PAR(clReleaseMemObject(sub_output_grids[0]), end_error, subbufin);
598655
subbufin:
599-
if (subdevice == 1)
656+
if (subdevice >= 1)
600657
{
601658
OCLERROR_RET(clReleaseMemObject(sub_input_grids[1]), end_error,
602659
subbufin0);
603660
}
604661
subbufin0:
605662
OCLERROR_RET(clReleaseMemObject(sub_input_grids[0]), end_error, subqueue);
606663
subqueue:
607-
if (subdevice == 1)
664+
if (subdevice >= 1)
608665
{
609666
OCLERROR_RET(clReleaseCommandQueue(sub_queues[1]), end_error,
610667
subqueue0);
611668
}
612669
subqueue0:
613-
OCLERROR_RET(clReleaseCommandQueue(sub_queues[1]), end_error, conv);
670+
OCLERROR_RET(clReleaseCommandQueue(sub_queues[0]), end_error, conv);
614671
conv:
615-
if (subdevice == 1)
672+
if (subdevice >= 1)
616673
{
617674
OCLERROR_RET(clReleaseKernel(convolutions[1]), end_error, conv0);
618675
}
@@ -631,15 +688,19 @@ int main(int argc, char* argv[])
631688
hinput:
632689
free(h_input_grid);
633690
prg:
634-
OCLERROR_RET(clReleaseProgram(program), end_error, subdevs);
691+
OCLERROR_RET(clReleaseProgram(program), end_error, ker);
635692
ker:
636693
free(kernel);
637694
contx:
638-
OCLERROR_RET(clReleaseContext(context), end_error, end);
695+
OCLERROR_RET(clReleaseContext(context), end_error, subdev1);
696+
subdev1:
697+
OCLERROR_RET(clReleaseDevice(subdevices[1]), end_error, subdev0);
698+
subdev0:
699+
OCLERROR_RET(clReleaseDevice(subdevices[0]), end_error, subdevs);
639700
subdevs:
640701
free(subdevices);
641-
dev:
642-
OCLERROR_RET(clReleaseDevice(dev), end_error, end);
702+
props:
703+
free(dev_props);
643704
end:
644705
if (error) cl_util_print_error(error);
645706
return error;

samples/core/multi-device/main.cpp

Lines changed: 53 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,12 @@ void host_convolution(const std::vector<cl_float> in,
9797
}
9898
}
9999

100+
bool opencl_version_contains(const cl::string& dev_version,
101+
const cl::string& version_fragment)
102+
{
103+
return dev_version.find(version_fragment) != cl::string::npos;
104+
}
105+
100106
int main(int argc, char* argv[])
101107
{
102108
try
@@ -118,6 +124,9 @@ int main(int argc, char* argv[])
118124
dev.getInfo<CL_DEVICE_PLATFORM>()
119125
}; // https://github.com/KhronosGroup/OpenCL-CLHPP/issues/150
120126

127+
// Query OpenCL version supported by device.
128+
const std::string dev_version = dev.getInfo<CL_DEVICE_VERSION>();
129+
121130
if (!diag_opts.quiet)
122131
{
123132
std::cout << "Selected device: " << dev.getInfo<CL_DEVICE_NAME>()
@@ -133,12 +142,44 @@ int main(int argc, char* argv[])
133142
std::cout.flush();
134143
}
135144

136-
#if CL_HPP_TARGET_OPENCL_VERSION < 120
137-
std::cerr
138-
<< "Error: OpenCL subdevices not supported before version 1.2 "
139-
<< std::endl;
140-
exit(EXIT_FAILURE);
141-
#endif
145+
if (opencl_version_contains(dev_version, "1.0")
146+
|| opencl_version_contains(dev_version, "1.1"))
147+
{
148+
std::cout
149+
<< "This sample requires device partitioning, which is an "
150+
"OpenCL 1.2 feature, but the device chosen only "
151+
"supports OpenCL "
152+
<< dev_version
153+
<< ". Please try with a different OpenCL device instead."
154+
<< std::endl;
155+
exit(EXIT_SUCCESS);
156+
}
157+
158+
// Check if device supports fission.
159+
std::vector<cl_device_partition_property> dev_props =
160+
dev.getInfo<CL_DEVICE_PARTITION_PROPERTIES>();
161+
if (dev_props.size() == 0)
162+
{
163+
std::cout << "This sample requires device fission, which is a "
164+
"feature available from OpenCL 1.2 on, but the "
165+
"device chosen does not seem to support it. Please "
166+
"try with a different OpenCL device instead."
167+
<< std::endl;
168+
exit(EXIT_SUCCESS);
169+
}
170+
171+
// Check if the "partition equally" type is supported.
172+
if (std::find(dev_props.begin(), dev_props.end(),
173+
CL_DEVICE_PARTITION_EQUALLY)
174+
== dev_props.end())
175+
{
176+
std::cout << "This sample requires partition equally, which is a "
177+
"partition scheme available from OpenCL 1.2 on, but "
178+
"the device chosen does not seem to support it. "
179+
"Please try with a different OpenCL device instead."
180+
<< std::endl;
181+
exit(EXIT_SUCCESS);
182+
}
142183

143184
// Create subdevices, each with half of the compute units available.
144185
cl_uint max_compute_units = dev.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
@@ -189,10 +230,10 @@ int main(int argc, char* argv[])
189230
std::string compiler_opt_str =
190231
"-cl-std=CL" + std::to_string(i) + ".0 "; // -cl-std=CLi.0
191232

192-
compiler_options += cl::string{ cl::util::opencl_c_version_contains(
193-
dev, version_str)
194-
? compiler_opt_str
195-
: "" };
233+
compiler_options +=
234+
cl::string{ opencl_version_contains(dev_version, version_str)
235+
? compiler_opt_str
236+
: "" };
196237
}
197238
program.build(subdevices, compiler_options.c_str());
198239

@@ -316,10 +357,7 @@ int main(int argc, char* argv[])
316357
std::cout.flush();
317358
}
318359

319-
auto convolution =
320-
cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, cl_uint2>(
321-
program, "convolution_3x3")
322-
.getKernel();
360+
auto convolution = cl::Kernel(program, "convolution_3x3");
323361

324362
cl::CommandQueue queue(context, subdevice,
325363
cl::QueueProperties::Profiling);
@@ -362,7 +400,7 @@ int main(int argc, char* argv[])
362400
std::cout.flush();
363401
}
364402

365-
convolutions.push_back(convolution.clone());
403+
convolutions.push_back(convolution);
366404
sub_queues.push_back(queue);
367405
sub_input_grids.push_back(sub_input_grid);
368406
sub_output_grids.push_back(sub_output_grid);

0 commit comments

Comments
 (0)