Skip to content

Commit 11c0765

Browse files
committed
Fixes from review
1 parent 419fe40 commit 11c0765

File tree

2 files changed

+103
-27
lines changed

2 files changed

+103
-27
lines changed

samples/core/multi-device/main.c

Lines changed: 83 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,22 @@ void host_convolution(const cl_float* in, cl_float* out, const cl_float* mask,
163163
}
164164
}
165165

166+
cl_int opencl_version_contains(const cl_device_id dev,
167+
const char* version_fragment)
168+
{
169+
char version[64];
170+
cl_int error = CL_SUCCESS;
171+
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_VERSION, sizeof(version),
172+
&version, NULL),
173+
error, fail);
174+
char* found_version = strstr(version, version_fragment);
175+
printf("Version detected %s from version %s and fragment %s\n",
176+
found_version, version, version_fragment);
177+
return (found_version != NULL);
178+
fail:
179+
return error;
180+
}
181+
166182
int main(int argc, char* argv[])
167183
{
168184
cl_int error = CL_SUCCESS;
@@ -212,32 +228,66 @@ int main(int argc, char* argv[])
212228
fflush(stdout);
213229
}
214230

215-
#if CL_HPP_TARGET_OPENCL_VERSION < 120
216-
fprintf(stderr,
217-
"Error: OpenCL subdevices not supported before version 1.2 ");
218-
exit(EXIT_FAILURE);
219-
#endif
231+
if (opencl_version_contains(dev, "1.1"))
232+
{
233+
fprintf(stderr,
234+
"Error: OpenCL sub-devices not supported before version 1.2 ");
235+
exit(EXIT_FAILURE);
236+
}
220237

221-
// Create subdevices, each with half of the compute units available.
238+
// Check if device supports fission.
239+
cl_device_partition_property* dev_props = NULL;
240+
size_t props_size = 0;
241+
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL,
242+
&props_size),
243+
error, dev);
244+
if (props_size == 0)
245+
{
246+
fprintf(stderr, "Error: device does not support fission");
247+
exit(EXIT_FAILURE);
248+
}
249+
250+
// Check if the "partition equally" type is supported.
251+
MEM_CHECK(dev_props = (cl_device_partition_property*)malloc(sizeof(char)
252+
* props_size),
253+
error, dev);
254+
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_PARTITION_PROPERTIES,
255+
sizeof(char) * props_size, dev_props, NULL),
256+
error, props);
257+
size_t i = 0;
258+
for (; i < props_size; ++i)
259+
{
260+
if (dev_props[i] == CL_DEVICE_PARTITION_EQUALLY)
261+
{
262+
break;
263+
}
264+
}
265+
if (i == props_size)
266+
{
267+
fprintf(stderr, "Error: device does not partition equally");
268+
exit(EXIT_FAILURE);
269+
}
270+
271+
// Create sub-devices, each with half of the compute units available.
222272
cl_uint max_compute_units = 0;
223273
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS,
224274
sizeof(cl_uint), &max_compute_units, NULL),
225-
error, dev);
275+
error, props);
226276
cl_device_partition_property subdevices_properties[] = {
227277
(cl_device_partition_property)CL_DEVICE_PARTITION_EQUALLY,
228278
(cl_device_partition_property)(max_compute_units / 2), 0
229279
};
230280

231-
// Initialize subdevices array with one device and then reallocate for
232-
// MacOS and Windows not to complain about NULL subdevices array.
281+
// Initialize sub-devices array with one device and then reallocate for
282+
// MacOS and Windows not to complain about NULL sub-devices array.
233283
cl_uint subdev_count = 1;
234284
cl_device_id* subdevices =
235285
(cl_device_id*)malloc(subdev_count * sizeof(cl_device_id));
236286

237287
OCLERROR_RET(clCreateSubDevices(dev, subdevices_properties,
238288
max_compute_units, subdevices,
239289
&subdev_count),
240-
error, dev);
290+
error, props);
241291

242292
if (subdev_count < 2)
243293
{
@@ -249,11 +299,11 @@ int main(int argc, char* argv[])
249299
(cl_device_id*)realloc(subdevices, subdev_count * sizeof(cl_device_id));
250300
OCLERROR_RET(clCreateSubDevices(dev, subdevices_properties, subdev_count,
251301
subdevices, NULL),
252-
error, subdevs);
302+
error, subdev1);
253303

254304
OCLERROR_PAR(context = clCreateContext(NULL, subdev_count, subdevices, NULL,
255305
NULL, &error),
256-
error, subdevs);
306+
error, subdev1);
257307

258308
// Read kernel file.
259309
const char* kernel_location = "./convolution.cl";
@@ -280,11 +330,14 @@ int main(int argc, char* argv[])
280330
// it's only necessary to add the -cl-std option for 2.0 and 3.0 OpenCL
281331
// versions.
282332
char compiler_options[1023] = "";
283-
#if CL_HPP_TARGET_OPENCL_VERSION >= 300
284-
strcat(compiler_options, "-cl-std=CL3.0 ");
285-
#elif CL_HPP_TARGET_OPENCL_VERSION >= 200
286-
strcat(compiler_options, "-cl-std=CL2.0 ");
287-
#endif
333+
if (opencl_version_contains(dev, "3."))
334+
{
335+
strcat(compiler_options, "-cl-std=CL3.0 ");
336+
}
337+
else if (opencl_version_contains(dev, "2."))
338+
{
339+
strcat(compiler_options, "-cl-std=CL2.0 ");
340+
}
288341

289342
OCLERROR_RET(
290343
clBuildProgram(program, 2, subdevices, compiler_options, NULL, NULL),
@@ -356,7 +409,7 @@ int main(int argc, char* argv[])
356409
mask_dim * mask_dim, -1000, 1000);
357410

358411
// Create device buffers, from which we will create the subbuffers for the
359-
// subdevices.
412+
// sub-devices.
360413
const size_t grid_midpoint = y_dim / 2;
361414
const size_t pad_grid_midpoint = pad_y_dim / 2;
362415

@@ -391,7 +444,7 @@ int main(int argc, char* argv[])
391444
fflush(stdout);
392445
}
393446

394-
// Set up subdevices for kernel execution.
447+
// Set up sub-devices for kernel execution.
395448
const size_t half_input_bytes =
396449
sizeof(cl_float) * pad_x_dim * (pad_grid_midpoint + 1);
397450
const size_t input_offset =
@@ -414,7 +467,7 @@ int main(int argc, char* argv[])
414467
error, bufmask);
415468

416469
// Initialize queues for command execution on each device.
417-
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
470+
#if defined(CL_VERSION_2_0) || defined(CL_VERSION_3_0)
418471
cl_command_queue_properties props[] = { CL_QUEUE_PROPERTIES,
419472
CL_QUEUE_PROFILING_ENABLE, 0 };
420473
OCLERROR_PAR(sub_queues[subdevice] = clCreateCommandQueueWithProperties(
@@ -507,7 +560,8 @@ int main(int argc, char* argv[])
507560
}
508561

509562
GET_CURRENT_TIMER(host_start)
510-
host_convolution(h_input_grid, h_output_grid, h_mask, (cl_uint)x_dim, (cl_uint)y_dim);
563+
host_convolution(h_input_grid, h_output_grid, h_mask, (cl_uint)x_dim,
564+
(cl_uint)y_dim);
511565
GET_CURRENT_TIMER(host_end)
512566
size_t host_time;
513567
TIMER_DIFFERENCE(host_time, host_start, host_end)
@@ -631,13 +685,19 @@ int main(int argc, char* argv[])
631685
hinput:
632686
free(h_input_grid);
633687
prg:
634-
OCLERROR_RET(clReleaseProgram(program), end_error, subdevs);
688+
OCLERROR_RET(clReleaseProgram(program), end_error, ker);
635689
ker:
636690
free(kernel);
637691
contx:
638-
OCLERROR_RET(clReleaseContext(context), end_error, end);
692+
OCLERROR_RET(clReleaseContext(context), end_error, subdev1);
693+
subdev1:
694+
OCLERROR_RET(clReleaseDevice(subdevices[1]), end_error, subdev0);
695+
subdev0:
696+
OCLERROR_RET(clReleaseDevice(subdevices[0]), end_error, subdevs);
639697
subdevs:
640698
free(subdevices);
699+
props:
700+
free(dev_props);
641701
dev:
642702
OCLERROR_RET(clReleaseDevice(dev), end_error, end);
643703
end:

samples/core/multi-device/main.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,25 @@ int main(int argc, char* argv[])
140140
exit(EXIT_FAILURE);
141141
#endif
142142

143+
// Check if device supports fission.
144+
std::vector<cl_device_partition_property> dev_props =
145+
dev.getInfo<CL_DEVICE_PARTITION_PROPERTIES>();
146+
if (dev_props.size() == 0)
147+
{
148+
std::cerr << "Error: device does not support fission" << std::endl;
149+
exit(EXIT_FAILURE);
150+
}
151+
152+
// Check if the "partition equally" type is supported.
153+
if (std::find(dev_props.begin(), dev_props.end(),
154+
CL_DEVICE_PARTITION_EQUALLY)
155+
== dev_props.end())
156+
{
157+
std::cerr << "Error: device does not partition equally"
158+
<< std::endl;
159+
exit(EXIT_FAILURE);
160+
}
161+
143162
// Create subdevices, each with half of the compute units available.
144163
cl_uint max_compute_units = dev.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
145164
cl_device_partition_property subdevices_properties[] = {
@@ -316,10 +335,7 @@ int main(int argc, char* argv[])
316335
std::cout.flush();
317336
}
318337

319-
auto convolution =
320-
cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, cl_uint2>(
321-
program, "convolution_3x3")
322-
.getKernel();
338+
auto convolution = cl::Kernel(program, "convolution_3x3");
323339

324340
cl::CommandQueue queue(context, subdevice,
325341
cl::QueueProperties::Profiling);

0 commit comments

Comments
 (0)