temp: enable debug_print

windreamer · windreamer · commit 0362250a74f9 · 2025-09-22T19:58:27.000+08:00
diff --git a/src/turbomind/layers/sampling_layers/GuidedDecodeMaskLayer.cc b/src/turbomind/layers/sampling_layers/GuidedDecodeMaskLayer.cc
@@ -47,13 +47,13 @@ void GuidedDecodeMaskLayer<T>::Forward(TensorMap& args)
     const auto           bitmask_size = xgrammar::GetBitmaskSize(vocab_size_padded_);
     Tensor_<int32_t>     bitmask{{bsz, bitmask_size}, kCPU};
     Tensor_<int32_t>     bitmask_device{{bsz, bitmask_size}, kDEVICE};
-    std::vector<int64_t> bitmap_shape = {bsz, bitmask_size};
+    std::vector<int64_t> bitmask_shape = {bsz, bitmask_size};
 
     DLTensor bitmask_dltensor{bitmask.data(),
                               DLDevice{kDLCPU, 0},
-                              static_cast<int32_t>(bitmap_shape.size()),
+                              bitmask.ndim(),
                               xgrammar::GetBitmaskDLType(),
-                              bitmap_shape.data(),
+                              bitmask_shape.data(),
                               nullptr,
                               0};
     bool     need_apply = false;
@@ -67,6 +67,8 @@ void GuidedDecodeMaskLayer<T>::Forward(TensorMap& args)
 
     if (need_apply) {
         Copy(bitmask, bitmask_device);
+
+        // cudaDeviceSynchronize();
         ApplyTokenBitmaskInplace(logits, bitmask_device);
     }
 }