Skip to content

Commit

Permalink
Add new option to disable cpu sync for tensors (#8490)
Browse files Browse the repository at this point in the history
* add options to disable cpu copy back

* null check proprties

* only affect gpu outputs

* change name to disabletensorcpusync

* slight refactoring

* Globally enable ms-experimental ops

* change meaning of ms_experimental to mean *all* ms_experimental ops. Some experimental ops will still be enabled globally without this flag like audio ops.

* remove changes incorrectly merged

* bad merge

* add test

Co-authored-by: Sheil Kumar <[email protected]>
  • Loading branch information
smk2007 and Sheil Kumar authored Aug 27, 2021
1 parent 6a477ac commit 775f862
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 5 deletions.
23 changes: 19 additions & 4 deletions winml/lib/Api/impl/TensorBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,17 @@ struct TensorBase : TBase {
return S_OK;
}

bool GetDisableTensorCpuSyncFromMetadata(const wfc::IPropertySet& properties) {
if (properties != nullptr && properties.HasKey(L"DisableTensorCpuSync")) {
if (auto disableTensorCpuSyncInspectable = properties.Lookup(L"DisableTensorCpuSync")) {
auto disableTensorCpuSyncValue = disableTensorCpuSyncInspectable.as<wf::IPropertyValue>();
return disableTensorCpuSyncValue.GetBoolean();
}
}

return false;
}

// ILotusValueProviderPrivate::UpdateSourceResourceData
STDMETHOD(UpdateSourceResourceData)
(BindingContext& context, IValue* value) {
Expand All @@ -350,13 +361,17 @@ struct TensorBase : TBase {
// get the shape
RETURN_IF_FAILED_MSG(value->GetTensorShape(shape_), "Failed to get the tensor shape from resource!");

bool is_cpu;
bool isCpuOutput = SUCCEEDED(value->IsCpu(&is_cpu)) && is_cpu;
bool disableTensorCpuSyncProperty = GetDisableTensorCpuSyncFromMetadata(context.properties);
bool disableCpuSync = !isCpuOutput && disableTensorCpuSyncProperty;

// make sure we always have a CPU resource
if (CpuTensor() == nullptr) {
if (!disableCpuSync && CpuTensor() == nullptr) {
CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape_);
}

bool is_cpu;
if (SUCCEEDED(value->IsCpu(&is_cpu)) && is_cpu) {
if (isCpuOutput) {
// Get the data pointer and size
auto buffer = CpuTensor()->buffer(false);

Expand All @@ -371,7 +386,7 @@ struct TensorBase : TBase {
// In that case the underlying buffers will not match the engine output, and they need to be flushed.
CpuTensor()->flush();
}
} else {
} else if (!disableCpuSync) {
// If we got a gpu resource, we should move the data to the cpu so accessors can retrieve the data.
// We don't need to copy the engine provided dx resource into a local copy since we always preallocate gpu
// resources for tensors. Therefore we are certain that the returned dxresource is the same as the one we passed in
Expand Down
4 changes: 3 additions & 1 deletion winml/test/scenario/cppwinrt/scenariotestscppwinrt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,9 @@ static void Scenario21RunModel2ChainZ() {
std::vector<int64_t> shape = {1, 3, 720, 720};
auto outputValue = TensorFloat::Create(shape); // FeatureValueFromFeatureValueDescriptor(input, nullptr);
// now bind the(empty) output so we have a marker to chain with
binding1.Bind(output.Name(), outputValue);
PropertySet outputBindProperties;
outputBindProperties.Insert(L"DisableTensorCpuSync", wf::PropertyValue::CreateBoolean(true));
binding1.Bind(output.Name(), outputValue, outputBindProperties);
// and leave the output unbound on the second model, we will fetch it later
// run both models async
WINML_EXPECT_NO_THROW(session1.EvaluateAsync(binding1, L""));
Expand Down

0 comments on commit 775f862

Please sign in to comment.