Fix CI failures for UB overlap changes (#2149)

djns99 · web-flow · commit 3b4366be34ec · 2025-09-03T00:27:04.000-07:00
Signed-off-by: djns99 &lt;40156487+djns99@users.noreply.github.com&gt;
diff --git a/examples/pytorch/comm_gemm_overlap/te_layer_with_overlap.py b/examples/pytorch/comm_gemm_overlap/te_layer_with_overlap.py
@@ -264,7 +264,11 @@ def dist_print(msg, end="\n", group=nccl_world, src=0, debug=False, error=False)
             [batched_size, hidden_size],
             tp_size,
             quantization_modes=[
-                UserBufferQuantizationMode.FP8 if opts.fp8 else UserBufferQuantizationMode.NONE
+                (
+                    te.module.base.UserBufferQuantizationMode.FP8
+                    if opts.fp8
+                    else te.module.base.UserBufferQuantizationMode.NONE
+                )
             ],
             dtype=torch.bfloat16,
             bootstrap_backend=opts.bootstrap_backend,
diff --git a/tests/pytorch/distributed/run_layer_with_overlap.py b/tests/pytorch/distributed/run_layer_with_overlap.py
@@ -420,10 +420,14 @@ def dist_print(msg, src=None, end="\n", debug=False, error=False):
         }
 
     quantization_modes = [
-        UserBufferQuantizationMode.FP8 if opts.fp8 else UserBufferQuantizationMode.NONE
+        (
+            te.module.base.UserBufferQuantizationMode.FP8
+            if opts.fp8
+            else te.module.base.UserBufferQuantizationMode.NONE
+        )
     ]
     if opts.first_last_layers_bf16 and opts.fp8:
-        quantization_modes.append(UserBufferQuantizationMode.NONE)
+        quantization_modes.append(te.module.base.UserBufferQuantizationMode.NONE)
 
     te.module.base.initialize_ub(
         [opts.seq_length * opts.batch_size, opts.num_heads * opts.head_dim],
diff --git a/tests/pytorch/distributed/test_fusible_ops_with_userbuffers.py b/tests/pytorch/distributed/test_fusible_ops_with_userbuffers.py
@@ -508,9 +508,9 @@ def main() -> None:
             torch.distributed.get_world_size(group),
             quantization_modes=[
                 (
-                    UserBufferQuantizationMode.FP8
+                    te.module.base.UserBufferQuantizationMode.FP8
                     if model_config.quantization is not None
-                    else UserBufferQuantizationMode.NONE
+                    else te.module.base.UserBufferQuantizationMode.NONE
                 )
             ],
             dtype=model_config.dtype,
diff --git a/transformer_engine/pytorch/module/base.py b/transformer_engine/pytorch/module/base.py
@@ -473,7 +473,7 @@ def add_ub(
                 fp8_buf = (name in layers_all_gather_overlap) or (
                     user_ub_cfg[name].get("fp8_buf", False) and name in methods["pipeline"]
                 )
-                ub_cfg.update(ub_cfgs[name])
+                ub_cfg.update(user_ub_cfg[name])
                 ub_cfg["fp8_buf"] = fp8_buf
             add_ub(name, quantization_mode, **ub_cfg)
 

Original file line number	Diff line number	Diff line change
`@@ -508,9 +508,9 @@ def main() -> None:`
`508`	`508`	`torch.distributed.get_world_size(group),`
`509`	`509`	`quantization_modes=[`
`510`	`510`	`(`
`511`		`- UserBufferQuantizationMode.FP8`
	`511`	`+ te.module.base.UserBufferQuantizationMode.FP8`
`512`	`512`	`if model_config.quantization is not None`
`513`		`- else UserBufferQuantizationMode.NONE`
	`513`	`+ else te.module.base.UserBufferQuantizationMode.NONE`
`514`	`514`	`)`
`515`	`515`	`],`
`516`	`516`	`dtype=model_config.dtype,`
Original file line number	Diff line number	Diff line change
`@@ -473,7 +473,7 @@ def add_ub(`
`473`	`473`	`fp8_buf = (name in layers_all_gather_overlap) or (`
`474`	`474`	`user_ub_cfg[name].get("fp8_buf", False) and name in methods["pipeline"]`
`475`	`475`	`)`
`476`		`- ub_cfg.update(ub_cfgs[name])`
	`476`	`+ ub_cfg.update(user_ub_cfg[name])`
`477`	`477`	`ub_cfg["fp8_buf"] = fp8_buf`
`478`	`478`	`add_ub(name, quantization_mode, **ub_cfg)`
`479`	`479`