Skip to content

Commit aefdc8d

Browse files
authored
Arm backend: Fixes for VGF models (#13681)
* Models with constants/weight data no longer crash * Missing intermediate allocation prevented correct output * The emulation layer required a number of extensions * A latent bug on indexing for VGF with >1 segment * Memory barriers for non-unified memory Vulkan targets Change-Id: Ide2aea329981a37b7787c9c676d6a2d1fd4ff3c8 cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 Signed-off-by: Rob Elliott <[email protected]>
1 parent 51de606 commit aefdc8d

File tree

3 files changed

+282
-32
lines changed

3 files changed

+282
-32
lines changed

backends/arm/runtime/VGFBackend.cpp

Lines changed: 94 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,6 @@ VkResult vkml_allocate_basics(
249249
VkDevice* device,
250250
VkQueue* queue,
251251
VkCommandPool* command_pool) {
252-
const char* dev_exts[] = {"VK_ARM_tensors", "VK_ARM_data_graph"};
253252
VkResult result;
254253

255254
if (VK_SUCCESS != volkInitialize()) {
@@ -372,11 +371,103 @@ VkResult vkml_allocate_basics(
372371
.pQueuePriorities = &qp,
373372
};
374373

374+
// Query features
375+
VkPhysicalDeviceVulkan12Features available_12 = {
376+
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
377+
.pNext = NULL,
378+
};
379+
VkPhysicalDeviceVulkan11Features available_11 = {
380+
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
381+
.pNext = &available_12,
382+
};
383+
VkPhysicalDeviceFeatures2 available_2 = {
384+
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
385+
.pNext = &available_11,
386+
};
387+
vkGetPhysicalDeviceFeatures2(*physical_device, &available_2);
388+
389+
// Select features
390+
VkPhysicalDeviceShaderReplicatedCompositesFeaturesEXT features_c{
391+
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_REPLICATED_COMPOSITES_FEATURES_EXT,
392+
nullptr};
393+
features_c.shaderReplicatedComposites = true;
394+
VkPhysicalDeviceVulkan13Features features_13{
395+
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, nullptr};
396+
features_13.synchronization2 = true;
397+
features_13.maintenance4 = true;
398+
features_13.pipelineCreationCacheControl = true;
399+
features_13.pNext = &features_c;
400+
VkPhysicalDeviceVulkan12Features features_12{
401+
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, nullptr};
402+
features_12.hostQueryReset = true;
403+
features_12.storageBuffer8BitAccess = true;
404+
features_12.uniformAndStorageBuffer8BitAccess =
405+
available_12.uniformAndStorageBuffer8BitAccess;
406+
features_12.shaderInt8 = true;
407+
features_12.shaderFloat16 = available_12.shaderFloat16;
408+
features_12.vulkanMemoryModel = true;
409+
features_12.vulkanMemoryModelDeviceScope =
410+
available_12.vulkanMemoryModelDeviceScope;
411+
features_12.pNext = &features_13;
412+
VkPhysicalDeviceVulkan11Features features_11{
413+
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, nullptr};
414+
features_11.storageBuffer16BitAccess = available_11.storageBuffer16BitAccess;
415+
features_11.uniformAndStorageBuffer16BitAccess =
416+
available_11.uniformAndStorageBuffer16BitAccess;
417+
features_11.pNext = &features_12;
418+
VkPhysicalDeviceTensorFeaturesARM features_tensor{
419+
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TENSOR_FEATURES_ARM, nullptr};
420+
features_tensor.shaderTensorAccess = true;
421+
features_tensor.tensors = true;
422+
features_tensor.pNext = &features_11;
423+
VkPhysicalDeviceDataGraphFeaturesARM features_graph{
424+
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DATA_GRAPH_FEATURES_ARM, nullptr};
425+
features_graph.dataGraph = true;
426+
features_graph.pNext = &features_tensor;
427+
428+
VkPhysicalDeviceFeatures device_features = {};
429+
device_features.shaderInt16 = VK_TRUE;
430+
device_features.shaderInt64 = VK_TRUE;
431+
432+
// Extension strings to enable
433+
auto dev_exts = {
434+
"VK_ARM_tensors",
435+
"VK_ARM_data_graph",
436+
"VK_KHR_maintenance4",
437+
"VK_KHR_maintenance5",
438+
"VK_KHR_deferred_host_operations",
439+
"VK_EXT_shader_replicated_composites"};
440+
441+
uint32_t exts = 0;
442+
vkEnumerateDeviceExtensionProperties(
443+
*physical_device, nullptr, &exts, nullptr);
444+
vector<VkExtensionProperties> available(exts);
445+
vkEnumerateDeviceExtensionProperties(
446+
*physical_device, nullptr, &exts, available.data());
447+
448+
vector<const char*> requested_exts;
449+
for (auto& ext : dev_exts) {
450+
bool found = false;
451+
for (auto const& ext_avail : available) {
452+
if (strcmp(ext, ext_avail.extensionName) == 0) {
453+
found = true;
454+
requested_exts.push_back(ext);
455+
}
456+
}
457+
if (found == false) {
458+
ET_LOG(Info, "Failed to find extension %s", ext);
459+
}
460+
}
461+
462+
// Create the device with our subset of features
375463
VkDeviceCreateInfo dci{VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, nullptr};
376464
dci.queueCreateInfoCount = 1;
377465
dci.pQueueCreateInfos = &queue_info;
378-
dci.enabledExtensionCount = 2;
379-
dci.ppEnabledExtensionNames = dev_exts;
466+
dci.enabledExtensionCount = requested_exts.size();
467+
dci.ppEnabledExtensionNames = requested_exts.data();
468+
;
469+
dci.pEnabledFeatures = &device_features;
470+
dci.pNext = &features_graph;
380471
result = vkCreateDevice(*physical_device, &dci, nullptr, device);
381472
if (result != VK_SUCCESS) {
382473
ET_LOG(Error, "Failed to create VkDevice");

0 commit comments

Comments
 (0)