Only rely on CudnnRNNV3 in TF

fchollet · fchollet · commit 04cad40272cb · 2024-04-08T10:43:50.000-07:00
diff --git a/keras/backend/tensorflow/rnn.py b/keras/backend/tensorflow/rnn.py
@@ -682,13 +682,20 @@ def _cudnn_gru(
         _assert_valid_mask(mask)
         sequence_lengths = _compute_sequence_length_from_mask(mask, time_major)
     else:
-        sequence_lengths = None
+        if time_major:
+            batch_dim = tf.shape(inputs)[1]
+            max_sequence_length = tf.shape(inputs)[0]
+        else:
+            batch_dim = tf.shape(inputs)[0]
+            max_sequence_length = tf.shape(inputs)[1]
+        sequence_lengths = tf.fill([batch_dim], max_sequence_length)
 
     if not time_major and sequence_lengths is None:
         inputs = tf.transpose(inputs, perm=(1, 0, 2))
         seq_axis, batch_axis = (0, 1)
     else:
         seq_axis, batch_axis = (0, 1) if time_major else (1, 0)
+
     # For init_h, cuDNN expects one more dim of num_layers before or after batch
     # dim for time major or batch major inputs respectively
     init_h = tf.expand_dims(initial_state, axis=seq_axis)
@@ -719,49 +726,36 @@ def _cudnn_gru(
         transpose_weights=True,
     )
 
-    if sequence_lengths is not None:
-        if go_backwards:
-            # Three reversals are required. E.g.,
-            # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
-            # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
-            # output_from_cudnn = [6, 5, 4, 0, 0]
-            # expected_output = [0, 0, 6, 5 ,4]
-            inputs = tf.reverse_sequence(
-                inputs,
-                sequence_lengths,
-                seq_axis=seq_axis,
-                batch_axis=batch_axis,
-            )
-        outputs, h, _, _, _ = tf.raw_ops.CudnnRNNV3(
-            input=inputs,
-            input_h=init_h,
-            input_c=0,
-            params=params,
-            is_training=True,
-            rnn_mode="gru",
-            sequence_lengths=sequence_lengths,
-            time_major=time_major,
+    if go_backwards:
+        # Three reversals are required. E.g.,
+        # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
+        # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
+        # output_from_cudnn = [6, 5, 4, 0, 0]
+        # expected_output = [0, 0, 6, 5 ,4]
+        inputs = tf.reverse_sequence(
+            inputs,
+            sequence_lengths,
+            seq_axis=seq_axis,
+            batch_axis=batch_axis,
         )
-        if go_backwards:
-            outputs = tf.reverse_sequence(
-                outputs,
-                sequence_lengths,
-                seq_axis=seq_axis,
-                batch_axis=batch_axis,
-            )
-            outputs = tf.reverse(outputs, axis=[seq_axis])
-    else:
-        if go_backwards:
-            # Reverse axis 0 since the input is already convert to time major.
-            inputs = tf.reverse(inputs, axis=[0])
-        outputs, h, _, _ = tf.raw_ops.CudnnRNN(
-            input=inputs,
-            input_h=init_h,
-            input_c=0,
-            params=params,
-            is_training=True,
-            rnn_mode="gru",
+    outputs, h, _, _, _ = tf.raw_ops.CudnnRNNV3(
+        input=inputs,
+        input_h=init_h,
+        input_c=0,
+        params=params,
+        is_training=True,
+        rnn_mode="gru",
+        sequence_lengths=sequence_lengths,
+        time_major=time_major,
+    )
+    if go_backwards:
+        outputs = tf.reverse_sequence(
+            outputs,
+            sequence_lengths,
+            seq_axis=seq_axis,
+            batch_axis=batch_axis,
         )
+        outputs = tf.reverse(outputs, axis=[seq_axis])
 
     last_output = outputs[-1]
     if not time_major and sequence_lengths is None and return_sequences:
@@ -880,7 +874,13 @@ def _cudnn_lstm(
         _assert_valid_mask(mask)
         sequence_lengths = _compute_sequence_length_from_mask(mask, time_major)
     else:
-        sequence_lengths = None
+        if time_major:
+            batch_dim = tf.shape(inputs)[1]
+            max_sequence_length = tf.shape(inputs)[0]
+        else:
+            batch_dim = tf.shape(inputs)[0]
+            max_sequence_length = tf.shape(inputs)[1]
+        sequence_lengths = tf.fill([batch_dim], max_sequence_length)
 
     if not time_major and sequence_lengths is None:
         inputs = tf.transpose(inputs, perm=(1, 0, 2))
@@ -918,52 +918,36 @@ def _cudnn_lstm(
         transpose_weights=True,
     )
 
-    if sequence_lengths is not None:
-        if go_backwards:
-            # Three reversals are required. E.g.,
-            # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
-            # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
-            # output_from_cudnn = [6, 5, 4, 0, 0]
-            # expected_output = [0, 0, 6, 5 ,4]
-            inputs = tf.reverse_sequence(
-                inputs,
-                sequence_lengths,
-                seq_axis=seq_axis,
-                batch_axis=batch_axis,
-            )
-        outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3(
-            input=inputs,
-            input_h=init_h,
-            input_c=init_c,
-            params=params,
-            is_training=True,
-            rnn_mode="lstm",
-            sequence_lengths=sequence_lengths,
-            time_major=time_major,
+    if go_backwards:
+        # Three reversals are required. E.g.,
+        # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
+        # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
+        # output_from_cudnn = [6, 5, 4, 0, 0]
+        # expected_output = [0, 0, 6, 5 ,4]
+        inputs = tf.reverse_sequence(
+            inputs,
+            sequence_lengths,
+            seq_axis=seq_axis,
+            batch_axis=batch_axis,
         )
-        if go_backwards:
-            outputs = tf.reverse_sequence(
-                outputs,
-                sequence_lengths,
-                seq_axis=seq_axis,
-                batch_axis=batch_axis,
-            )
-            outputs = tf.reverse(outputs, axis=[seq_axis])
-    else:
-        # # Fill the array with shape [batch] with value of max timesteps.
-        # sequence_length = array_ops.fill([array_ops.shape(inputs)[1]],
-        #                                  array_ops.shape(inputs)[0])
-        if go_backwards:
-            # Reverse axis 0 since the input is already convert to time major.
-            inputs = tf.reverse(inputs, axis=[0])
-        outputs, h, c, _ = tf.raw_ops.CudnnRNN(
-            input=inputs,
-            input_h=init_h,
-            input_c=init_c,
-            params=params,
-            is_training=True,
-            rnn_mode="lstm",
+    outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3(
+        input=inputs,
+        input_h=init_h,
+        input_c=init_c,
+        params=params,
+        is_training=True,
+        rnn_mode="lstm",
+        sequence_lengths=sequence_lengths,
+        time_major=time_major,
+    )
+    if go_backwards:
+        outputs = tf.reverse_sequence(
+            outputs,
+            sequence_lengths,
+            seq_axis=seq_axis,
+            batch_axis=batch_axis,
         )
+        outputs = tf.reverse(outputs, axis=[seq_axis])
 
     last_output = outputs[-1]
     if not time_major and sequence_lengths is None and return_sequences: