Skip to content

Commit b818605

Browse files
committed
Fixes from review
1 parent 419fe40 commit b818605

File tree

2 files changed

+108
-36
lines changed

2 files changed

+108
-36
lines changed

samples/core/multi-device/main.c

Lines changed: 87 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,20 @@ void host_convolution(const cl_float* in, cl_float* out, const cl_float* mask,
163163
}
164164
}
165165

166+
cl_int opencl_version_contains(const cl_device_id dev,
167+
const char* version_fragment)
168+
{
169+
char version[64];
170+
cl_int error = CL_SUCCESS;
171+
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_VERSION, sizeof(version),
172+
&version, NULL),
173+
error, fail);
174+
char* found_version = strstr(version, version_fragment);
175+
return (found_version != NULL);
176+
fail:
177+
return error;
178+
}
179+
166180
int main(int argc, char* argv[])
167181
{
168182
cl_int error = CL_SUCCESS;
@@ -199,7 +213,7 @@ int main(int argc, char* argv[])
199213
OCLERROR_PAR(dev = cl_util_get_device(dev_opts.triplet.plat_index,
200214
dev_opts.triplet.dev_index,
201215
dev_opts.triplet.dev_type, &error),
202-
error, dev);
216+
error, end);
203217

204218
if (!diag_opts.quiet)
205219
{
@@ -212,32 +226,66 @@ int main(int argc, char* argv[])
212226
fflush(stdout);
213227
}
214228

215-
#if CL_HPP_TARGET_OPENCL_VERSION < 120
216-
fprintf(stderr,
217-
"Error: OpenCL subdevices not supported before version 1.2 ");
218-
exit(EXIT_FAILURE);
219-
#endif
229+
if (opencl_version_contains(dev, "1.1"))
230+
{
231+
fprintf(stderr,
232+
"Error: OpenCL sub-devices not supported before version 1.2 ");
233+
exit(EXIT_FAILURE);
234+
}
220235

221-
// Create subdevices, each with half of the compute units available.
236+
// Check if device supports fission.
237+
cl_device_partition_property* dev_props = NULL;
238+
size_t props_size = 0;
239+
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL,
240+
&props_size),
241+
error, end);
242+
if (props_size == 0)
243+
{
244+
fprintf(stderr, "Error: device does not support fission");
245+
exit(EXIT_FAILURE);
246+
}
247+
248+
// Check if the "partition equally" type is supported.
249+
MEM_CHECK(dev_props = (cl_device_partition_property*)malloc(sizeof(char)
250+
* props_size),
251+
error, end);
252+
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_PARTITION_PROPERTIES,
253+
sizeof(char) * props_size, dev_props, NULL),
254+
error, props);
255+
size_t prop = 0;
256+
for (; prop < props_size; ++prop)
257+
{
258+
if (dev_props[prop] == CL_DEVICE_PARTITION_EQUALLY)
259+
{
260+
break;
261+
}
262+
}
263+
if (prop == props_size)
264+
{
265+
fprintf(stderr, "Error: device does not partition equally");
266+
exit(EXIT_FAILURE);
267+
}
268+
269+
// Create sub-devices, each with half of the compute units available.
222270
cl_uint max_compute_units = 0;
223271
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS,
224272
sizeof(cl_uint), &max_compute_units, NULL),
225-
error, dev);
273+
error, props);
226274
cl_device_partition_property subdevices_properties[] = {
227275
(cl_device_partition_property)CL_DEVICE_PARTITION_EQUALLY,
228276
(cl_device_partition_property)(max_compute_units / 2), 0
229277
};
230278

231-
// Initialize subdevices array with one device and then reallocate for
232-
// MacOS and Windows not to complain about NULL subdevices array.
279+
// Initialize sub-devices array with one device and then reallocate for
280+
// MacOS and Windows not to complain about NULL sub-devices array.
233281
cl_uint subdev_count = 1;
234282
cl_device_id* subdevices =
235283
(cl_device_id*)malloc(subdev_count * sizeof(cl_device_id));
236284

237285
OCLERROR_RET(clCreateSubDevices(dev, subdevices_properties,
238286
max_compute_units, subdevices,
239287
&subdev_count),
240-
error, dev);
288+
error, props);
241289

242290
if (subdev_count < 2)
243291
{
@@ -249,11 +297,11 @@ int main(int argc, char* argv[])
249297
(cl_device_id*)realloc(subdevices, subdev_count * sizeof(cl_device_id));
250298
OCLERROR_RET(clCreateSubDevices(dev, subdevices_properties, subdev_count,
251299
subdevices, NULL),
252-
error, subdevs);
300+
error, subdev1);
253301

254302
OCLERROR_PAR(context = clCreateContext(NULL, subdev_count, subdevices, NULL,
255303
NULL, &error),
256-
error, subdevs);
304+
error, subdev1);
257305

258306
// Read kernel file.
259307
const char* kernel_location = "./convolution.cl";
@@ -280,11 +328,14 @@ int main(int argc, char* argv[])
280328
// it's only necessary to add the -cl-std option for 2.0 and 3.0 OpenCL
281329
// versions.
282330
char compiler_options[1023] = "";
283-
#if CL_HPP_TARGET_OPENCL_VERSION >= 300
284-
strcat(compiler_options, "-cl-std=CL3.0 ");
285-
#elif CL_HPP_TARGET_OPENCL_VERSION >= 200
286-
strcat(compiler_options, "-cl-std=CL2.0 ");
287-
#endif
331+
if (opencl_version_contains(dev, "3."))
332+
{
333+
strcat(compiler_options, "-cl-std=CL3.0 ");
334+
}
335+
else if (opencl_version_contains(dev, "2."))
336+
{
337+
strcat(compiler_options, "-cl-std=CL2.0 ");
338+
}
288339

289340
OCLERROR_RET(
290341
clBuildProgram(program, 2, subdevices, compiler_options, NULL, NULL),
@@ -356,7 +407,7 @@ int main(int argc, char* argv[])
356407
mask_dim * mask_dim, -1000, 1000);
357408

358409
// Create device buffers, from which we will create the subbuffers for the
359-
// subdevices.
410+
// sub-devices.
360411
const size_t grid_midpoint = y_dim / 2;
361412
const size_t pad_grid_midpoint = pad_y_dim / 2;
362413

@@ -391,7 +442,7 @@ int main(int argc, char* argv[])
391442
fflush(stdout);
392443
}
393444

394-
// Set up subdevices for kernel execution.
445+
// Set up sub-devices for kernel execution.
395446
const size_t half_input_bytes =
396447
sizeof(cl_float) * pad_x_dim * (pad_grid_midpoint + 1);
397448
const size_t input_offset =
@@ -414,7 +465,7 @@ int main(int argc, char* argv[])
414465
error, bufmask);
415466

416467
// Initialize queues for command execution on each device.
417-
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
468+
#if defined(CL_VERSION_2_0) || defined(CL_VERSION_3_0)
418469
cl_command_queue_properties props[] = { CL_QUEUE_PROPERTIES,
419470
CL_QUEUE_PROFILING_ENABLE, 0 };
420471
OCLERROR_PAR(sub_queues[subdevice] = clCreateCommandQueueWithProperties(
@@ -507,7 +558,8 @@ int main(int argc, char* argv[])
507558
}
508559

509560
GET_CURRENT_TIMER(host_start)
510-
host_convolution(h_input_grid, h_output_grid, h_mask, (cl_uint)x_dim, (cl_uint)y_dim);
561+
host_convolution(h_input_grid, h_output_grid, h_mask, (cl_uint)x_dim,
562+
(cl_uint)y_dim);
511563
GET_CURRENT_TIMER(host_end)
512564
size_t host_time;
513565
TIMER_DIFFERENCE(host_time, host_start, host_end)
@@ -588,31 +640,31 @@ int main(int argc, char* argv[])
588640
event1:
589641
OCLERROR_RET(clReleaseEvent(events[0]), end_error, subbufout);
590642
subbufout:
591-
if (subdevice == 1)
643+
if (subdevice >= 1)
592644
{
593645
OCLERROR_RET(clReleaseMemObject(sub_output_grids[1]), end_error,
594646
subbufout0);
595647
}
596648
subbufout0:
597649
OCLERROR_PAR(clReleaseMemObject(sub_output_grids[0]), end_error, subbufin);
598650
subbufin:
599-
if (subdevice == 1)
651+
if (subdevice >= 1)
600652
{
601653
OCLERROR_RET(clReleaseMemObject(sub_input_grids[1]), end_error,
602654
subbufin0);
603655
}
604656
subbufin0:
605657
OCLERROR_RET(clReleaseMemObject(sub_input_grids[0]), end_error, subqueue);
606658
subqueue:
607-
if (subdevice == 1)
659+
if (subdevice >= 1)
608660
{
609661
OCLERROR_RET(clReleaseCommandQueue(sub_queues[1]), end_error,
610662
subqueue0);
611663
}
612664
subqueue0:
613-
OCLERROR_RET(clReleaseCommandQueue(sub_queues[1]), end_error, conv);
665+
OCLERROR_RET(clReleaseCommandQueue(sub_queues[0]), end_error, conv);
614666
conv:
615-
if (subdevice == 1)
667+
if (subdevice >= 1)
616668
{
617669
OCLERROR_RET(clReleaseKernel(convolutions[1]), end_error, conv0);
618670
}
@@ -631,15 +683,19 @@ int main(int argc, char* argv[])
631683
hinput:
632684
free(h_input_grid);
633685
prg:
634-
OCLERROR_RET(clReleaseProgram(program), end_error, subdevs);
686+
OCLERROR_RET(clReleaseProgram(program), end_error, ker);
635687
ker:
636688
free(kernel);
637689
contx:
638-
OCLERROR_RET(clReleaseContext(context), end_error, end);
690+
OCLERROR_RET(clReleaseContext(context), end_error, subdev1);
691+
subdev1:
692+
OCLERROR_RET(clReleaseDevice(subdevices[1]), end_error, subdev0);
693+
subdev0:
694+
OCLERROR_RET(clReleaseDevice(subdevices[0]), end_error, subdevs);
639695
subdevs:
640696
free(subdevices);
641-
dev:
642-
OCLERROR_RET(clReleaseDevice(dev), end_error, end);
697+
props:
698+
free(dev_props);
643699
end:
644700
if (error) cl_util_print_error(error);
645701
return error;

samples/core/multi-device/main.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,25 @@ int main(int argc, char* argv[])
140140
exit(EXIT_FAILURE);
141141
#endif
142142

143+
// Check if device supports fission.
144+
std::vector<cl_device_partition_property> dev_props =
145+
dev.getInfo<CL_DEVICE_PARTITION_PROPERTIES>();
146+
if (dev_props.size() == 0)
147+
{
148+
std::cerr << "Error: device does not support fission" << std::endl;
149+
exit(EXIT_FAILURE);
150+
}
151+
152+
// Check if the "partition equally" type is supported.
153+
if (std::find(dev_props.begin(), dev_props.end(),
154+
CL_DEVICE_PARTITION_EQUALLY)
155+
== dev_props.end())
156+
{
157+
std::cerr << "Error: device does not partition equally"
158+
<< std::endl;
159+
exit(EXIT_FAILURE);
160+
}
161+
143162
// Create subdevices, each with half of the compute units available.
144163
cl_uint max_compute_units = dev.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
145164
cl_device_partition_property subdevices_properties[] = {
@@ -316,10 +335,7 @@ int main(int argc, char* argv[])
316335
std::cout.flush();
317336
}
318337

319-
auto convolution =
320-
cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, cl_uint2>(
321-
program, "convolution_3x3")
322-
.getKernel();
338+
auto convolution = cl::Kernel(program, "convolution_3x3");
323339

324340
cl::CommandQueue queue(context, subdevice,
325341
cl::QueueProperties::Profiling);
@@ -362,7 +378,7 @@ int main(int argc, char* argv[])
362378
std::cout.flush();
363379
}
364380

365-
convolutions.push_back(convolution.clone());
381+
convolutions.push_back(convolution);
366382
sub_queues.push_back(queue);
367383
sub_input_grids.push_back(sub_input_grid);
368384
sub_output_grids.push_back(sub_output_grid);

0 commit comments

Comments
 (0)