File tree Expand file tree Collapse file tree 2 files changed +22
-1
lines changed Expand file tree Collapse file tree 2 files changed +22
-1
lines changed Original file line number Diff line number Diff line change @@ -99,9 +99,15 @@ void checkSingleTensor(
9999 C10_THROW_ERROR (ValueError, " Tensors must be XPU and dense" );
100100 }
101101
102- // Skip the following requirements for P2P operations
102+ // Check memory format
103103 if (!tensor.is_contiguous (tensor.suggest_memory_format ())) {
104+ // P2P is a bit relaxed, supporting transfer of a transposed tensor
104105 if (p2p) {
106+ // But must be dense still
107+ if (!tensor.is_non_overlapping_and_dense ()) {
108+ C10_THROW_ERROR (
109+ ValueError, " Tensors for P2P must be non-overlapping and dense" );
110+ }
105111 TORCH_WARN_ONCE (
106112 " Detected non-contiguous tensor in P2P operations. It is user "
107113 " responsibility to guarantee that source and destination tensors have "
Original file line number Diff line number Diff line change @@ -248,6 +248,21 @@ def rank_to_GPU(self):
248248 # return rank to GPU map
249249 return init_multigpu_helper (self .world_size , "xccl" )
250250
251+ @requires_xccl ()
252+ @skip_if_lt_x_gpu (2 )
253+ def test_send_recv_non_dense_tensor (self ):
254+ pg = self ._create_process_group_xccl ()
255+ device = self .rank_to_GPU [self .rank ][0 ]
256+ full = torch .empty ((64 , 64 ), device = device ).fill_ (self .rank )
257+ # Take a slice in col dimension, making it non-dense
258+ block = full [:, 16 :32 ]
259+ if self .rank == 0 :
260+ with self .assertRaises (ValueError ):
261+ dist .send (block , dst = 1 )
262+ elif self .rank == 1 :
263+ with self .assertRaises (ValueError ):
264+ dist .recv (block , src = 0 )
265+
251266 @requires_xccl ()
252267 @skip_but_pass_in_sandcastle_if (
253268 torch .xpu .device_count () < 2 , "XCCL test requires 2+ GPUs"
You can’t perform that action at this time.
0 commit comments