diff --git a/src/transformers/modeling_tf_mobilebert.py b/src/transformers/modeling_tf_mobilebert.py index e0e2b1fd3..6eb00eb45 100644 --- a/src/transformers/modeling_tf_mobilebert.py +++ b/src/transformers/modeling_tf_mobilebert.py @@ -370,7 +370,7 @@ class TFMobileBertOutput(tf.keras.layers.Layer): hidden_states = self.dense(hidden_states) if not self.use_bottleneck: - hidden_states = self.dropout(hidden_states) + hidden_states = self.dropout(hidden_states, training=training) hidden_states = self.LayerNorm(hidden_states + residual_tensor_1) else: hidden_states = self.LayerNorm(hidden_states + residual_tensor_1)