File tree Expand file tree Collapse file tree 1 file changed +5
-3
lines changed 
src/turbomind/layers/sampling_layers Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -47,13 +47,13 @@ void GuidedDecodeMaskLayer<T>::Forward(TensorMap& args)
4747    const  auto            bitmask_size = xgrammar::GetBitmaskSize (vocab_size_padded_);
4848    Tensor_<int32_t >     bitmask{{bsz, bitmask_size}, kCPU };
4949    Tensor_<int32_t >     bitmask_device{{bsz, bitmask_size}, kDEVICE };
50-     std::vector<int64_t > bitmap_shape  = {bsz, bitmask_size};
50+     std::vector<int64_t > bitmask_shape  = {bsz, bitmask_size};
5151
5252    DLTensor bitmask_dltensor{bitmask.data (),
5353                              DLDevice{kDLCPU , 0 },
54-                               static_cast < int32_t >(bitmap_shape. size () ),
54+                               bitmask. ndim ( ),
5555                              xgrammar::GetBitmaskDLType (),
56-                               bitmap_shape .data (),
56+                               bitmask_shape .data (),
5757                              nullptr ,
5858                              0 };
5959    bool      need_apply = false ;
@@ -67,6 +67,8 @@ void GuidedDecodeMaskLayer<T>::Forward(TensorMap& args)
6767
6868    if  (need_apply) {
6969        Copy (bitmask, bitmask_device);
70+ 
71+         //  cudaDeviceSynchronize();
7072        ApplyTokenBitmaskInplace (logits, bitmask_device);
7173    }
7274}
    
 
   
 
     
   
   
          
     
  
    
     
 
    
      
     
 
     
    You can’t perform that action at this time.
  
 
    
  
     
    
      
        
     
 
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments