From aa925a52fad9d6b98dac4c1b27f881bef7e88dad Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Tue, 19 May 2020 21:35:04 +0200 Subject: [PATCH] [Tests, GPU, SLOW] fix a bunch of GPU hardcoded tests in Pytorch (#4468) * fix gpu slow tests in pytorch * change model to device syntax --- examples/contrib/run_transfo_xl.py | 2 +- src/transformers/modeling_utils.py | 2 +- tests/test_modeling_ctrl.py | 1 + tests/test_modeling_encoder_decoder.py | 2 +- tests/test_modeling_gpt2.py | 2 ++ tests/test_modeling_longformer.py | 21 ++++++++++++++------- tests/test_modeling_openai.py | 1 + tests/test_modeling_t5.py | 3 +++ tests/test_modeling_transfo_xl.py | 1 + tests/test_modeling_xlm.py | 3 ++- tests/test_modeling_xlnet.py | 1 + 11 files changed, 28 insertions(+), 11 deletions(-) diff --git a/examples/contrib/run_transfo_xl.py b/examples/contrib/run_transfo_xl.py index 84e2806a7..a28637c59 100644 --- a/examples/contrib/run_transfo_xl.py +++ b/examples/contrib/run_transfo_xl.py @@ -80,7 +80,7 @@ def main(): # Load a pre-trained model model = TransfoXLLMHeadModel.from_pretrained(args.model_name) - model = model.to(device) + model.to(device) logger.info( "Evaluating with bsz {} tgt_len {} ext_len {} mem_len {} clamp_len {}".format( diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 8fa1c0d3d..7cd0df955 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -770,7 +770,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin): import torch_xla.core.xla_model as xm model = xm.send_cpu_data_to_device(model, xm.xla_device()) - model = model.to(xm.xla_device()) + model.to(xm.xla_device()) return model diff --git a/tests/test_modeling_ctrl.py b/tests/test_modeling_ctrl.py index 098abc90c..285fba42b 100644 --- a/tests/test_modeling_ctrl.py +++ b/tests/test_modeling_ctrl.py @@ -219,6 +219,7 @@ class CTRLModelLanguageGenerationTest(unittest.TestCase): @slow def test_lm_generate_ctrl(self): model = CTRLLMHeadModel.from_pretrained("ctrl") + model.to(torch_device) input_ids = torch.tensor( [[11859, 0, 1611, 8]], dtype=torch.long, device=torch_device ) # Legal the president is diff --git a/tests/test_modeling_encoder_decoder.py b/tests/test_modeling_encoder_decoder.py index 57fc81734..a07c69b24 100644 --- a/tests/test_modeling_encoder_decoder.py +++ b/tests/test_modeling_encoder_decoder.py @@ -329,5 +329,5 @@ class EncoderDecoderModelTest(unittest.TestCase): @slow def test_real_bert_model_from_pretrained(self): - model = EncoderDecoderModel.from_pretrained("bert-base-uncased", "bert-base-uncased") + model = EncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-uncased", "bert-base-uncased") self.assertIsNotNone(model) diff --git a/tests/test_modeling_gpt2.py b/tests/test_modeling_gpt2.py index 3d8b890f1..cdc67efee 100644 --- a/tests/test_modeling_gpt2.py +++ b/tests/test_modeling_gpt2.py @@ -343,6 +343,7 @@ class GPT2ModelLanguageGenerationTest(unittest.TestCase): @slow def test_lm_generate_gpt2(self): model = GPT2LMHeadModel.from_pretrained("gpt2") + model.to(torch_device) input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device) # The dog expected_output_ids = [ 464, @@ -372,6 +373,7 @@ class GPT2ModelLanguageGenerationTest(unittest.TestCase): @slow def test_lm_generate_distilgpt2(self): model = GPT2LMHeadModel.from_pretrained("distilgpt2") + model.to(torch_device) input_ids = torch.tensor([[464, 1893]], dtype=torch.long, device=torch_device) # The president expected_output_ids = [ 464, diff --git a/tests/test_modeling_longformer.py b/tests/test_modeling_longformer.py index 6690750eb..0d7ba840d 100644 --- a/tests/test_modeling_longformer.py +++ b/tests/test_modeling_longformer.py @@ -214,32 +214,39 @@ class LongformerModelIntegrationTest(unittest.TestCase): @slow def test_inference_no_head(self): model = LongformerModel.from_pretrained("longformer-base-4096") + model.to(torch_device) # 'Hello world! ' repeated 1000 times - input_ids = torch.tensor([[0] + [20920, 232, 328, 1437] * 1000 + [2]]) # long input + input_ids = torch.tensor( + [[0] + [20920, 232, 328, 1437] * 1000 + [2]], dtype=torch.long, device=torch_device + ) # long input attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=input_ids.device) attention_mask[:, [1, 4, 21]] = 2 # Set global attention on a few random positions output = model(input_ids, attention_mask=attention_mask)[0] - expected_output_sum = torch.tensor(74585.8594) - expected_output_mean = torch.tensor(0.0243) + expected_output_sum = torch.tensor(74585.8594, device=torch_device) + expected_output_mean = torch.tensor(0.0243, device=torch_device) self.assertTrue(torch.allclose(output.sum(), expected_output_sum, atol=1e-4)) self.assertTrue(torch.allclose(output.mean(), expected_output_mean, atol=1e-4)) @slow def test_inference_masked_lm(self): model = LongformerForMaskedLM.from_pretrained("longformer-base-4096") + model.to(torch_device) # 'Hello world! ' repeated 1000 times - input_ids = torch.tensor([[0] + [20920, 232, 328, 1437] * 1000 + [2]]) # long input + input_ids = torch.tensor( + [[0] + [20920, 232, 328, 1437] * 1000 + [2]], dtype=torch.long, device=torch_device + ) # long input loss, prediction_scores = model(input_ids, masked_lm_labels=input_ids) - expected_loss = torch.tensor(0.0620) - expected_prediction_scores_sum = torch.tensor(-6.1599e08) - expected_prediction_scores_mean = torch.tensor(-3.0622) + expected_loss = torch.tensor(0.0620, device=torch_device) + expected_prediction_scores_sum = torch.tensor(-6.1599e08, device=torch_device) + expected_prediction_scores_mean = torch.tensor(-3.0622, device=torch_device) + input_ids = input_ids.to(torch_device) self.assertTrue(torch.allclose(loss, expected_loss, atol=1e-4)) self.assertTrue(torch.allclose(prediction_scores.sum(), expected_prediction_scores_sum, atol=1e-4)) diff --git a/tests/test_modeling_openai.py b/tests/test_modeling_openai.py index cd9a2cf23..83dd2a95f 100644 --- a/tests/test_modeling_openai.py +++ b/tests/test_modeling_openai.py @@ -227,6 +227,7 @@ class OPENAIGPTModelLanguageGenerationTest(unittest.TestCase): @slow def test_lm_generate_openai_gpt(self): model = OpenAIGPTLMHeadModel.from_pretrained("openai-gpt") + model.to(torch_device) input_ids = torch.tensor([[481, 4735, 544]], dtype=torch.long, device=torch_device) # the president is expected_output_ids = [ 481, diff --git a/tests/test_modeling_t5.py b/tests/test_modeling_t5.py index f8e3114ad..fb9f450ae 100644 --- a/tests/test_modeling_t5.py +++ b/tests/test_modeling_t5.py @@ -444,6 +444,7 @@ class T5ModelIntegrationTests(unittest.TestCase): ) input_ids = tok.encode(model.config.prefix + original_input, return_tensors="pt") + input_ids = input_ids.to(torch_device) output = model.generate( input_ids=input_ids, @@ -471,6 +472,7 @@ class T5ModelIntegrationTests(unittest.TestCase): expected_translation = "Cette section d'images provenant de l'enregistrement infrarouge effectué par le télescope Spitzer montre un « portrait familial » de générations innombrables de étoiles : les plus anciennes sont observées sous forme de pointes bleues, alors que les « nouveau-nés » de couleur rose dans la salle des accouchements doivent être plus difficiles " input_ids = tok.encode(model.config.prefix + original_input, return_tensors="pt") + input_ids = input_ids.to(torch_device) output = model.generate( input_ids=input_ids, @@ -498,6 +500,7 @@ class T5ModelIntegrationTests(unittest.TestCase): expected_translation = "Taco Bell a declarat că intenţionează să adauge 2 000 de locaţii în SUA până în 2022." input_ids = tok.encode(model.config.prefix + original_input, return_tensors="pt") + input_ids = input_ids.to(torch_device) output = model.generate( input_ids=input_ids, diff --git a/tests/test_modeling_transfo_xl.py b/tests/test_modeling_transfo_xl.py index c9317b7b7..343993f43 100644 --- a/tests/test_modeling_transfo_xl.py +++ b/tests/test_modeling_transfo_xl.py @@ -223,6 +223,7 @@ class TransfoXLModelLanguageGenerationTest(unittest.TestCase): @slow def test_lm_generate_transfo_xl_wt103(self): model = TransfoXLLMHeadModel.from_pretrained("transfo-xl-wt103") + model.to(torch_device) input_ids = torch.tensor( [ [ diff --git a/tests/test_modeling_xlm.py b/tests/test_modeling_xlm.py index 6a5805c1a..cb7c6626f 100644 --- a/tests/test_modeling_xlm.py +++ b/tests/test_modeling_xlm.py @@ -434,6 +434,7 @@ class XLMModelLanguageGenerationTest(unittest.TestCase): @slow def test_lm_generate_xlm_mlm_en_2048(self): model = XLMWithLMHeadModel.from_pretrained("xlm-mlm-en-2048") + model.to(torch_device) input_ids = torch.tensor([[14, 447]], dtype=torch.long, device=torch_device) # the president expected_output_ids = [ 14, @@ -459,4 +460,4 @@ class XLMModelLanguageGenerationTest(unittest.TestCase): ] # the president the president the president the president the president the president the president the president the president the president # TODO(PVP): this and other input_ids I tried for generation give pretty bad results. Not sure why. Model might just not be made for auto-regressive inference output_ids = model.generate(input_ids, do_sample=False) - self.assertListEqual(output_ids[0].numpy().tolist(), expected_output_ids) + self.assertListEqual(output_ids[0].cpu().numpy().tolist(), expected_output_ids) diff --git a/tests/test_modeling_xlnet.py b/tests/test_modeling_xlnet.py index e076c2c26..12b3e13f3 100644 --- a/tests/test_modeling_xlnet.py +++ b/tests/test_modeling_xlnet.py @@ -517,6 +517,7 @@ class XLNetModelLanguageGenerationTest(unittest.TestCase): @slow def test_lm_generate_xlnet_base_cased(self): model = XLNetLMHeadModel.from_pretrained("xlnet-base-cased") + model.to(torch_device) input_ids = torch.tensor( [ [