Skip to content

Commit c9bd7a1

Browse files
authored
P2P tensors must be dense (#2161)
refer pytorch/pytorch@11a231ef528
1 parent 8342cdd commit c9bd7a1

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

src/xccl/ProcessGroupXCCL.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,15 @@ void checkSingleTensor(
9999
C10_THROW_ERROR(ValueError, "Tensors must be XPU and dense");
100100
}
101101

102-
// Skip the following requirements for P2P operations
102+
// Check memory format
103103
if (!tensor.is_contiguous(tensor.suggest_memory_format())) {
104+
// P2P is a bit relaxed, supporting transfer of a transposed tensor
104105
if (p2p) {
106+
// But must be dense still
107+
if (!tensor.is_non_overlapping_and_dense()) {
108+
C10_THROW_ERROR(
109+
ValueError, "Tensors for P2P must be non-overlapping and dense");
110+
}
105111
TORCH_WARN_ONCE(
106112
"Detected non-contiguous tensor in P2P operations. It is user "
107113
"responsibility to guarantee that source and destination tensors have "

test/xpu/distributed/test_c10d_xccl.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,21 @@ def rank_to_GPU(self):
248248
# return rank to GPU map
249249
return init_multigpu_helper(self.world_size, "xccl")
250250

251+
@requires_xccl()
252+
@skip_if_lt_x_gpu(2)
253+
def test_send_recv_non_dense_tensor(self):
254+
pg = self._create_process_group_xccl()
255+
device = self.rank_to_GPU[self.rank][0]
256+
full = torch.empty((64, 64), device=device).fill_(self.rank)
257+
# Take a slice in col dimension, making it non-dense
258+
block = full[:, 16:32]
259+
if self.rank == 0:
260+
with self.assertRaises(ValueError):
261+
dist.send(block, dst=1)
262+
elif self.rank == 1:
263+
with self.assertRaises(ValueError):
264+
dist.recv(block, src=0)
265+
251266
@requires_xccl()
252267
@skip_but_pass_in_sandcastle_if(
253268
torch.xpu.device_count() < 2, "XCCL test requires 2+ GPUs"

0 commit comments

Comments
 (0)