diff --git a/tests/test_modeling_albert.py b/tests/test_modeling_albert.py index d1da4fb6c..1859f51aa 100644 --- a/tests/test_modeling_albert.py +++ b/tests/test_modeling_albert.py @@ -291,13 +291,14 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase): class AlbertModelIntegrationTest(unittest.TestCase): @slow def test_inference_no_head_absolute_embedding(self): - model = AlbertForPreTraining.from_pretrained("albert-base-v2") + model = AlbertModel.from_pretrained("albert-base-v2") input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]]) - output = model(input_ids)[0] - expected_shape = torch.Size((1, 11, 30000)) + attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) + output = model(input_ids, attention_mask=attention_mask)[0] + expected_shape = torch.Size((1, 11, 768)) self.assertEqual(output.shape, expected_shape) expected_slice = torch.tensor( - [[[4.6061, 0.7321, -1.7725], [4.6061, 0.7323, -1.7727], [4.6061, 0.7323, -1.7727]]] + [[[-0.6513, 1.5035, -0.2766], [-0.6515, 1.5046, -0.2780], [-0.6512, 1.5049, -0.2784]]] ) - self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4)) + self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4)) diff --git a/tests/test_modeling_bert.py b/tests/test_modeling_bert.py index 7d2bd9bc2..03f76c264 100755 --- a/tests/test_modeling_bert.py +++ b/tests/test_modeling_bert.py @@ -555,35 +555,38 @@ class BertModelIntegrationTest(unittest.TestCase): def test_inference_no_head_absolute_embedding(self): model = BertModel.from_pretrained("bert-base-uncased") input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]]) - output = model(input_ids)[0] + attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) + output = model(input_ids, attention_mask=attention_mask)[0] expected_shape = torch.Size((1, 11, 768)) self.assertEqual(output.shape, expected_shape) - expected_slice = torch.tensor( - [[[-0.0483, 0.1188, -0.0313], [-0.0606, 0.1435, 0.0199], [-0.0235, 0.1519, 0.0175]]] - ) + expected_slice = torch.tensor([[[0.4249, 0.1008, 0.7531], [0.3771, 0.1188, 0.7467], [0.4152, 0.1098, 0.7108]]]) - self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4)) + self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4)) @slow def test_inference_no_head_relative_embedding_key(self): model = BertModel.from_pretrained("zhiheng-huang/bert-base-uncased-embedding-relative-key") input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]]) - output = model(input_ids)[0] + attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) + output = model(input_ids, attention_mask=attention_mask)[0] expected_shape = torch.Size((1, 11, 768)) self.assertEqual(output.shape, expected_shape) expected_slice = torch.tensor( - [[[0.3492, 0.4126, -0.1484], [0.2274, -0.0549, 0.1623], [0.5889, 0.6797, -0.0189]]] + [[[0.0756, 0.3142, -0.5128], [0.3761, 0.3462, -0.5477], [0.2052, 0.3760, -0.1240]]] ) - self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4)) + self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4)) @slow def test_inference_no_head_relative_embedding_key_query(self): model = BertModel.from_pretrained("zhiheng-huang/bert-base-uncased-embedding-relative-key-query") input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]]) - output = model(input_ids)[0] + attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) + output = model(input_ids, attention_mask=attention_mask)[0] expected_shape = torch.Size((1, 11, 768)) self.assertEqual(output.shape, expected_shape) - expected_slice = torch.tensor([[[1.1677, 0.5129, 0.9524], [0.6659, 0.5958, 0.6688], [1.1714, 0.1764, 0.6266]]]) + expected_slice = torch.tensor( + [[[0.6496, 0.3784, 0.8203], [0.8148, 0.5656, 0.2636], [-0.0681, 0.5597, 0.7045]]] + ) - self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4)) + self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4)) diff --git a/tests/test_modeling_convbert.py b/tests/test_modeling_convbert.py index e561245bd..610affc45 100644 --- a/tests/test_modeling_convbert.py +++ b/tests/test_modeling_convbert.py @@ -416,18 +416,16 @@ class ConvBertModelTest(ModelTesterMixin, unittest.TestCase): @require_torch class ConvBertModelIntegrationTest(unittest.TestCase): @slow - def test_inference_masked_lm(self): + def test_inference_no_head(self): model = ConvBertModel.from_pretrained("YituTech/conv-bert-base") - input_ids = torch.tensor([[0, 1, 2, 3, 4, 5]]) + input_ids = torch.tensor([[1, 2, 3, 4, 5, 6]]) output = model(input_ids)[0] - print(output[:, :3, :3]) expected_shape = torch.Size((1, 6, 768)) self.assertEqual(output.shape, expected_shape) - # TODO Replace values below with what was printed above. expected_slice = torch.tensor( - [[[-0.0348, -0.4686, -0.3064], [0.2264, -0.2699, -0.7423], [0.1032, -0.4501, -0.5828]]] + [[[-0.0864, -0.4898, -0.3677], [0.1434, -0.2952, -0.7640], [-0.0112, -0.4432, -0.5432]]] ) self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4)) diff --git a/tests/test_modeling_deberta.py b/tests/test_modeling_deberta.py index f2af7ce43..1c66617b8 100644 --- a/tests/test_modeling_deberta.py +++ b/tests/test_modeling_deberta.py @@ -13,12 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. - -import random import unittest -import numpy as np - from transformers import is_torch_available from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device @@ -275,16 +271,13 @@ class DebertaModelIntegrationTest(unittest.TestCase): @slow def test_inference_no_head(self): - random.seed(0) - np.random.seed(0) - torch.manual_seed(0) - torch.cuda.manual_seed_all(0) model = DebertaModel.from_pretrained("microsoft/deberta-base") input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) - output = model(input_ids)[0] + attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) + output = model(input_ids, attention_mask=attention_mask)[0] # compare the actual values for a slice. expected_slice = torch.tensor( - [[[-0.0218, -0.6641, -0.3665], [-0.3907, -0.4716, -0.6640], [0.7461, 1.2570, -0.9063]]] + [[[-0.5986, -0.8055, -0.8462], [1.4484, -0.9348, -0.8059], [0.3123, 0.0032, -1.4131]]] ) - self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4), f"{output[:, :3, :3]}") + self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4), f"{output[:, 1:4, 1:4]}") diff --git a/tests/test_modeling_deberta_v2.py b/tests/test_modeling_deberta_v2.py index 1f183aa6e..718682edb 100644 --- a/tests/test_modeling_deberta_v2.py +++ b/tests/test_modeling_deberta_v2.py @@ -13,12 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. - -import random import unittest -import numpy as np - from transformers import is_torch_available from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device @@ -275,16 +271,13 @@ class DebertaV2ModelIntegrationTest(unittest.TestCase): @slow def test_inference_no_head(self): - random.seed(0) - np.random.seed(0) - torch.manual_seed(0) - torch.cuda.manual_seed_all(0) model = DebertaV2Model.from_pretrained("microsoft/deberta-v2-xlarge") input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) - output = model(input_ids)[0] + attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) + output = model(input_ids, attention_mask=attention_mask)[0] # compare the actual values for a slice. expected_slice = torch.tensor( - [[[-0.2913, 0.2647, 0.5627], [-0.4318, 0.1389, 0.3881], [-0.2929, -0.2489, 0.3452]]] + [[[0.2356, 0.1948, 0.0369], [-0.1063, 0.3586, -0.5152], [-0.6399, -0.0259, -0.2525]]] ) - self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4), f"{output[:, :3, :3]}") + self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4), f"{output[:, 1:4, 1:4]}") diff --git a/tests/test_modeling_distilbert.py b/tests/test_modeling_distilbert.py index 9b65e69f8..d6c3dc54b 100644 --- a/tests/test_modeling_distilbert.py +++ b/tests/test_modeling_distilbert.py @@ -256,11 +256,12 @@ class DistilBertModelIntergrationTest(unittest.TestCase): def test_inference_no_head_absolute_embedding(self): model = DistilBertModel.from_pretrained("distilbert-base-uncased") input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]]) - output = model(input_ids)[0] + attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) + output = model(input_ids, attention_mask=attention_mask)[0] expected_shape = torch.Size((1, 11, 768)) self.assertEqual(output.shape, expected_shape) expected_slice = torch.tensor( - [[[0.4026, -0.2919, 0.3902], [0.3828, -0.2129, 0.3563], [0.3919, -0.2287, 0.3438]]] + [[[-0.1639, 0.3299, 0.1648], [-0.1746, 0.3289, 0.1710], [-0.1884, 0.3357, 0.1810]]] ) - self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4)) + self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4)) diff --git a/tests/test_modeling_electra.py b/tests/test_modeling_electra.py index 601ab6b29..88138a587 100644 --- a/tests/test_modeling_electra.py +++ b/tests/test_modeling_electra.py @@ -350,13 +350,14 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase): class ElectraModelIntegrationTest(unittest.TestCase): @slow def test_inference_no_head_absolute_embedding(self): - model = ElectraForPreTraining.from_pretrained("google/electra-small-discriminator") + model = ElectraModel.from_pretrained("google/electra-small-discriminator") input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]]) - output = model(input_ids)[0] - expected_shape = torch.Size((1, 11)) + attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) + output = model(input_ids, attention_mask=attention_mask)[0] + expected_shape = torch.Size((1, 11, 256)) self.assertEqual(output.shape, expected_shape) expected_slice = torch.tensor( - [[-8.9253, -4.0305, -3.9306, -3.8774, -4.1873, -4.1280, 0.9429, -4.1672, 0.9281, 0.0410, -3.4823]] + [[[0.4471, 0.6821, -0.3265], [0.4627, 0.5255, -0.3668], [0.4532, 0.3313, -0.4344]]] ) - self.assertTrue(torch.allclose(output, expected_slice, atol=1e-4)) + self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4)) diff --git a/tests/test_modeling_mbart.py b/tests/test_modeling_mbart.py index d51e6056b..9428acb47 100644 --- a/tests/test_modeling_mbart.py +++ b/tests/test_modeling_mbart.py @@ -343,7 +343,7 @@ class MBartEnroIntegrationTest(AbstractSeq2SeqIntegrationTest): ] tgt_text = [ "Şeful ONU declară că nu există o soluţie militară în Siria", - 'Secretarul General Ban Ki-moon declară că răspunsul său la intensificarea sprijinului militar al Rusiei pentru Siria este că "nu există o soluţie militară" la conflictul de aproape cinci ani şi că noi arme nu vor face decât să înrăutăţească violenţa şi mizeria pentru milioane de oameni.', + 'Secretarul General Ban Ki-moon declară că răspunsul său la intensificarea sprijinului militar al Rusiei pentru Siria este că "nu există o soluţie militară" la conflictul de aproape cinci ani şi că noi arme nu vor face decât să înrăutăţească violenţa şi mizeria a milioane de oameni.', ] expected_src_tokens = [8274, 127873, 25916, 7, 8622, 2071, 438, 67485, 53, 187895, 23, 51712, 2, 250004] @@ -359,7 +359,9 @@ class MBartEnroIntegrationTest(AbstractSeq2SeqIntegrationTest): @slow def test_enro_generate_batch(self): - batch: BatchEncoding = self.tokenizer(self.src_text, return_tensors="pt").to(torch_device) + batch: BatchEncoding = self.tokenizer(self.src_text, return_tensors="pt", padding=True, truncation=True).to( + torch_device + ) translated_tokens = self.model.generate(**batch) decoded = self.tokenizer.batch_decode(translated_tokens, skip_special_tokens=True) assert self.tgt_text == decoded diff --git a/tests/test_modeling_squeezebert.py b/tests/test_modeling_squeezebert.py index 18f41e8cf..493326157 100644 --- a/tests/test_modeling_squeezebert.py +++ b/tests/test_modeling_squeezebert.py @@ -278,9 +278,9 @@ class SqueezeBertModelIntegrationTest(unittest.TestCase): def test_inference_classification_head(self): model = SqueezeBertForSequenceClassification.from_pretrained("squeezebert/squeezebert-mnli") - input_ids = torch.tensor([[0, 29414, 232, 328, 740, 1140, 12695, 69, 13, 1588, 2]]) + input_ids = torch.tensor([[1, 29414, 232, 328, 740, 1140, 12695, 69, 13, 1588, 2]]) output = model(input_ids)[0] expected_shape = torch.Size((1, 3)) self.assertEqual(output.shape, expected_shape) - expected_tensor = torch.tensor([[0.5075, 0.0682, -0.5881]]) + expected_tensor = torch.tensor([[0.6401, -0.0349, -0.6041]]) self.assertTrue(torch.allclose(output, expected_tensor, atol=1e-4))