Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ Bottom level categories:

## Unreleased

### Bug Fixes

#### DX12

- Align copies b/w textures and buffers via a single intermediate buffer per copy when `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported` is `false`. By @ErichDonGubler in [#7721](https://github.com/gfx-rs/wgpu/pull/7721).

## v27.0.2 (2025-10-03)

### Bug Fixes
Expand Down
9 changes: 9 additions & 0 deletions cts_runner/test.lst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ fails-if(vulkan) webgpu:api,operation,command_buffer,copyTextureToTexture:copy_d
webgpu:api,operation,command_buffer,copyTextureToTexture:copy_depth_stencil:format="stencil8"
// Fails with OOM in CI.
fails-if(dx12) webgpu:api,operation,command_buffer,image_copy:offsets_and_sizes:*
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="FullCopyT2B";dimension="1d"
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="FullCopyT2B";dimension="2d"
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="FullCopyT2B";dimension="3d"
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="PartialCopyT2B";dimension="1d"
fails-if(dx12) webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="PartialCopyT2B";dimension="2d"
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="PartialCopyT2B";dimension="3d"
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="CopyB2T";checkMethod="FullCopyT2B";dimension="1d"
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="CopyB2T";checkMethod="FullCopyT2B";dimension="2d"
fails-if(dx12,vulkan,metal) webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="CopyB2T";checkMethod="FullCopyT2B";dimension="3d"
webgpu:api,operation,compute,basic:memcpy:*
//FAIL: webgpu:api,operation,compute,basic:large_dispatch:*
webgpu:api,operation,compute_pipeline,overrides:*
Expand Down
12 changes: 1 addition & 11 deletions tests/tests/wgpu-gpu/regression/issue_6827.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,7 @@ static TEST_SCATTER: GpuTestConfiguration = GpuTestConfiguration::new()
.expect_fail(FailureCase::backend_adapter(
wgpu::Backends::METAL,
"Apple Paravirtual device", // CI on M1
))
.expect_fail(
// Unfortunately this depends on if `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported`
// is true, which we have no way to encode. This reproduces in CI though, so not too worried about it.
FailureCase::backend(wgpu::Backends::DX12)
.flaky()
.validation_error(
"D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512",
)
.panic("GraphicsCommandList::close failed: The parameter is incorrect"),
),
)),
)
.run_async(|ctx| async move { run_test(ctx, true).await });

Expand Down
16 changes: 16 additions & 0 deletions wgpu-hal/src/dx12/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,21 @@ impl super::Adapter {
.is_ok()
};

let unrestricted_buffer_texture_copy_pitch_supported = {
let mut features13 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS13::default();
unsafe {
device.CheckFeatureSupport(
Direct3D12::D3D12_FEATURE_D3D12_OPTIONS13,
<*mut _>::cast(&mut features13),
size_of_val(&features13) as u32,
)
}
.is_ok()
&& features13
.UnrestrictedBufferTextureCopyPitchSupported
.as_bool()
};

let mut max_sampler_descriptor_heap_size =
Direct3D12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE;
{
Expand Down Expand Up @@ -304,6 +319,7 @@ impl super::Adapter {
suballocation_supported: !info.name.contains("Iris(R) Xe"),
shader_model,
max_sampler_descriptor_heap_size,
unrestricted_buffer_texture_copy_pitch_supported,
};

// Theoretically vram limited, but in practice 2^20 is the limit
Expand Down
159 changes: 148 additions & 11 deletions wgpu-hal/src/dx12/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::{
dxgi::{name::ObjectExt, result::HResult as _},
},
dx12::borrow_interface_temporarily,
AccelerationStructureEntries,
AccelerationStructureEntries, CommandEncoder as _,
};

fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> Direct3D12::D3D12_BOX {
Expand Down Expand Up @@ -312,6 +312,78 @@ impl super::CommandEncoder {
}
}
}

unsafe fn buf_tex_intermediate<T>(
&mut self,
region: crate::BufferTextureCopy,
tex_fmt: wgt::TextureFormat,
copy_op: impl FnOnce(&mut Self, &super::Buffer, wgt::BufferSize, crate::BufferTextureCopy) -> T,
) -> (T, super::Buffer) {
let size = {
let copy_info = region.buffer_layout.get_buffer_texture_copy_info(
tex_fmt,
region.texture_base.aspect.map(),
&region.size.into(),
);
copy_info.unwrap().bytes_in_copy
};

let size = wgt::BufferSize::new(size).unwrap();

let buffer = {
let (resource, allocation) =
super::suballocation::DeviceAllocationContext::from(&*self)
.create_buffer(&crate::BufferDescriptor {
label: None,
size: size.get(),
usage: wgt::BufferUses::COPY_SRC | wgt::BufferUses::COPY_DST,
memory_flags: crate::MemoryFlags::empty(),
})
.expect(concat!(
"internal error: ",
"failed to allocate intermediate buffer ",
"for offset alignment"
));
super::Buffer {
resource,
size: size.get(),
allocation,
}
};

let mut region = region;
region.buffer_layout.offset = 0;

unsafe {
self.transition_buffers(
[crate::BufferBarrier {
buffer: &buffer,
usage: crate::StateTransition {
from: wgt::BufferUses::empty(),
to: wgt::BufferUses::COPY_DST,
},
}]
.into_iter(),
)
};

let t = copy_op(self, &buffer, size, region);

unsafe {
self.transition_buffers(
[crate::BufferBarrier {
buffer: &buffer,
usage: crate::StateTransition {
from: wgt::BufferUses::COPY_DST,
to: wgt::BufferUses::COPY_SRC,
},
}]
.into_iter(),
)
};

(t, buffer)
}
}

impl crate::CommandEncoder for super::CommandEncoder {
Expand Down Expand Up @@ -366,6 +438,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
Ok(super::CommandBuffer { raw })
}
unsafe fn reset_all<I: Iterator<Item = super::CommandBuffer>>(&mut self, command_buffers: I) {
self.intermediate_copy_bufs.clear();
for cmd_buf in command_buffers {
self.free_lists.push(cmd_buf.raw);
}
Expand Down Expand Up @@ -612,30 +685,59 @@ impl crate::CommandEncoder for super::CommandEncoder {
) where
T: Iterator<Item = crate::BufferTextureCopy>,
{
let list = self.list.as_ref().unwrap();
for r in regions {
let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();

for naive_copy_region in regions {
let is_offset_aligned = naive_copy_region.buffer_layout.offset % offset_alignment == 0;
let (final_copy_region, src) = if is_offset_aligned {
(naive_copy_region, src)
} else {
let (intermediate_to_dst_region, intermediate_buf) = unsafe {
let src_offset = naive_copy_region.buffer_layout.offset;
self.buf_tex_intermediate(
naive_copy_region,
dst.format,
|this, buf, size, intermediate_to_dst_region| {
let layout = crate::BufferCopy {
src_offset,
dst_offset: 0,
size,
};
this.copy_buffer_to_buffer(src, buf, [layout].into_iter());
intermediate_to_dst_region
},
)
};
self.intermediate_copy_bufs.push(intermediate_buf);
let intermediate_buf = self.intermediate_copy_bufs.last().unwrap();
(intermediate_to_dst_region, intermediate_buf)
};

let list = self.list.as_ref().unwrap();

let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
pResource: unsafe { borrow_interface_temporarily(&src.resource) },
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
PlacedFootprint: r.to_subresource_footprint(dst.format),
PlacedFootprint: final_copy_region.to_subresource_footprint(dst.format),
},
};
let dst_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
pResource: unsafe { borrow_interface_temporarily(&dst.resource) },
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
SubresourceIndex: dst.calc_subresource_for_copy(&r.texture_base),
SubresourceIndex: dst
.calc_subresource_for_copy(&final_copy_region.texture_base),
},
};

let src_box = make_box(&wgt::Origin3d::ZERO, &r.size);
let src_box = make_box(&wgt::Origin3d::ZERO, &final_copy_region.size);
unsafe {
list.CopyTextureRegion(
&dst_location,
r.texture_base.origin.x,
r.texture_base.origin.y,
r.texture_base.origin.z,
final_copy_region.texture_base.origin.x,
final_copy_region.texture_base.origin.y,
final_copy_region.texture_base.origin.z,
&src_location,
Some(&src_box),
)
Expand All @@ -652,8 +754,12 @@ impl crate::CommandEncoder for super::CommandEncoder {
) where
T: Iterator<Item = crate::BufferTextureCopy>,
{
let list = self.list.as_ref().unwrap();
for r in regions {
let copy_aligned = |this: &mut Self,
src: &super::Texture,
dst: &super::Buffer,
r: crate::BufferTextureCopy| {
let list = this.list.as_ref().unwrap();

let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
pResource: unsafe { borrow_interface_temporarily(&src.resource) },
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
Expand All @@ -673,6 +779,37 @@ impl crate::CommandEncoder for super::CommandEncoder {
unsafe {
list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, Some(&src_box))
};
};

let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();

for r in regions {
let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0;
if is_offset_aligned {
copy_aligned(self, src, dst, r)
} else {
let orig_offset = r.buffer_layout.offset;
let (intermediate_to_dst_region, src) = unsafe {
self.buf_tex_intermediate(
r,
src.format,
|this, buf, size, intermediate_region| {
copy_aligned(this, src, buf, intermediate_region);
crate::BufferCopy {
src_offset: 0,
dst_offset: orig_offset,
size,
}
},
)
};

unsafe {
self.copy_buffer_to_buffer(&src, dst, [intermediate_to_dst_region].into_iter());
}

self.intermediate_copy_bufs.push(src);
};
}
}

Expand Down
1 change: 1 addition & 0 deletions wgpu-hal/src/dx12/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,7 @@ impl crate::Device for super::Device {
mem_allocator: self.mem_allocator.clone(),
rtv_pool: Arc::clone(&self.rtv_pool),
temp_rtv_handles: Vec::new(),
intermediate_copy_bufs: Vec::new(),
null_rtv_handle: self.null_rtv_handle,
list: None,
free_lists: Vec::new(),
Expand Down
19 changes: 18 additions & 1 deletion wgpu-hal/src/dx12/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,11 @@ use windows::{
core::{Free, Interface},
Win32::{
Foundation,
Graphics::{Direct3D, Direct3D12, DirectComposition, Dxgi},
Graphics::{
Direct3D,
Direct3D12::{self, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT},
DirectComposition, Dxgi,
},
System::Threading,
},
};
Expand Down Expand Up @@ -601,6 +605,17 @@ struct PrivateCapabilities {
suballocation_supported: bool,
shader_model: naga::back::hlsl::ShaderModel,
max_sampler_descriptor_heap_size: u32,
unrestricted_buffer_texture_copy_pitch_supported: bool,
}

impl PrivateCapabilities {
fn texture_data_placement_alignment(&self) -> u64 {
if self.unrestricted_buffer_texture_copy_pitch_supported {
4
} else {
D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT.into()
}
}
}

#[derive(Default)]
Expand Down Expand Up @@ -840,6 +855,8 @@ pub struct CommandEncoder {
rtv_pool: Arc<Mutex<descriptor::CpuPool>>,
temp_rtv_handles: Vec<descriptor::Handle>,

intermediate_copy_bufs: Vec<Buffer>,

null_rtv_handle: descriptor::Handle,
list: Option<Direct3D12::ID3D12GraphicsCommandList>,
free_lists: Vec<Direct3D12::ID3D12GraphicsCommandList>,
Expand Down
30 changes: 30 additions & 0 deletions wgpu-hal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2438,6 +2438,36 @@ pub struct CopyExtent {
pub depth: u32,
}

impl From<wgt::Extent3d> for CopyExtent {
fn from(value: wgt::Extent3d) -> Self {
let wgt::Extent3d {
width,
height,
depth_or_array_layers,
} = value;
Self {
width,
height,
depth: depth_or_array_layers,
}
}
}

impl From<CopyExtent> for wgt::Extent3d {
fn from(value: CopyExtent) -> Self {
let CopyExtent {
width,
height,
depth,
} = value;
Self {
width,
height,
depth_or_array_layers: depth,
}
}
}

#[derive(Clone, Debug)]
pub struct TextureCopy {
pub src_base: TextureCopyBase,
Expand Down
Loading