pytorch · samanklesaria · Jul 11, 2025 · Jul 11, 2025 · Jul 11, 2025 · Jul 11, 2025
@@ -30,5 +30,5 @@ fi
 
 (
     cd test
-    pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not sox and not ffmpeg and not fairseq and not hdemucs"
+    pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not sox and not ffmpeg and not fairseq and not hdemucs and not (torchscript and rnnt)"
 )
@@ -28,7 +28,6 @@ if(BUILD_RNNT)
     rnnt/compute_alphas.cpp
     rnnt/compute_betas.cpp
     rnnt/compute.cpp
-    rnnt/autograd.cpp
     )
   if (USE_CUDA)
     list(

@@ -30,4 +30,5 @@ TORCH_LIBRARY_FRAGMENT(torchaudio, m) {
       "int blank,"
       "float clamp,"
       "bool fused_log_softmax) -> (Tensor, Tensor?)");
+  m.def("torchaudio::rnnt_loss_forward", &rnnt_loss);
 }
@@ -1760,6 +1760,19 @@ def _fix_waveform_shape(
     waveform_shift = waveform_shift.view(shape[:-1] + waveform_shift.shape[-1:])
     return waveform_shift
 
+class RnntLoss(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, *args):
+        output, saved = torch.ops.torchaudio.rnnt_loss_forward(*args)
+        ctx.save_for_backward(saved)
+        return output
+
+    @staticmethod
+    def backward(ctx, dy):
+        grad = ctx.saved_tensors[0]
+        grad_out = dy.view((-1, 1, 1, 1))
+        result = grad * grad_out;
+        return (result, None, None, None, None, None, None, None)
 
 def _rnnt_loss(
     logits: Tensor,
@@ -1803,14 +1816,14 @@ def _rnnt_loss(
     if blank < 0:  # reinterpret blank index if blank < 0.
         blank = logits.shape[-1] + blank
 
-    costs, _ = torch.ops.torchaudio.rnnt_loss(
-        logits=logits,
-        targets=targets,
-        logit_lengths=logit_lengths,
-        target_lengths=target_lengths,
-        blank=blank,
-        clamp=clamp,
-        fused_log_softmax=fused_log_softmax,
+    costs = RnntLoss.apply(
+        logits,
+        targets,
+        logit_lengths,
+        target_lengths,
+        blank,
+        clamp,
+        fused_log_softmax
     )
 
     if reduction == "mean":
-Original file line number
+Diff line change
@@ Expand Up / @@ -30,5 +30,5 @@ fi @@
     (
         cd test
-        pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not sox and not ffmpeg and not fairseq and not hdemucs"
+        pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not sox and not ffmpeg and not fairseq and not hdemucs and not (torchscript and rnnt)"
     )