set KMP_AFFINITY and OMP_NUM_THREADS to deal with slow perf issue when having many threads
This commit is contained in:
Родитель
6021962790
Коммит
0d02b8a06b
|
@ -14,7 +14,6 @@ if nlp_path not in sys.path:
|
|||
sys.path.insert(0, nlp_path)
|
||||
|
||||
sys.path.insert(0, "./")
|
||||
print(sys.path)
|
||||
from utils_nlp.dataset.cnndm import CNNDMBertSumProcessedData, CNNDMSummarizationDataset
|
||||
from utils_nlp.models.transformers.extractive_summarization import (
|
||||
ExtractiveSummarizer,
|
||||
|
@ -25,7 +24,8 @@ from utils_nlp.models.transformers.extractive_summarization import (
|
|||
# os.environ["NCCL_BLOCKING_WAIT"] = "1"
|
||||
|
||||
os.environ["NCCL_IB_DISABLE"] = "0"
|
||||
|
||||
os.environ['OMP_NUM_THREADS'] = str(torch.cuda.device_count())
|
||||
os.environ["KMP_AFFINITY"] = "verbose"
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
|
@ -207,6 +207,7 @@ def main_worker(local_rank, ngpus_per_node, summarizer, args):
|
|||
else:
|
||||
save_every = SAVE_EVERY
|
||||
# """
|
||||
print("starting training")
|
||||
summarizer.fit(
|
||||
ext_sum_train,
|
||||
num_gpus=world_size,
|
||||
|
|
|
@ -3,11 +3,14 @@
|
|||
|
||||
import os
|
||||
import pytest
|
||||
|
||||
import torch
|
||||
|
||||
@pytest.mark.gpu
|
||||
@pytest.mark.integration
|
||||
def test_ddp_extractive_summarization_cnndm_transformers(scripts, tmp):
|
||||
ddp_env = os.environ.copy()
|
||||
ddp_env["OMP_NUM_THREADS"] = str(torch.cuda.device_count())
|
||||
ddp_env["KMP_AFFINITY"] = "verbose"
|
||||
script = scripts["ddp_bertsumext"]
|
||||
summary_filename = "bertsumext_prediction.txt"
|
||||
import subprocess
|
||||
|
@ -27,6 +30,7 @@ def test_ddp_extractive_summarization_cnndm_transformers(scripts, tmp):
|
|||
"--summary_filename",
|
||||
summary_filename,
|
||||
],
|
||||
env=ddp_env,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
|
|
Загрузка…
Ссылка в новой задаче