Merge pull request #576 from microsoft/daden/bugfix

Daden/bugfix improvement and bug fix based on the bug bash feedback
2020-03-27 10:46:11 -04:00 · 2020-03-27 10:46:11 -04:00 · 2bc32034d5
--- a/examples/text_summarization/abstractive_summarization_bertsumabs_cnndm.ipynb
+++ b/examples/text_summarization/abstractive_summarization_bertsumabs_cnndm.ipynb
@ -46,7 +46,8 @@
   "metadata": {},
   "source": [
    "## Before you start\n",
-    "Set QUICK_RUN = True to run the notebook on a small subset of data and a smaller number of steps. If QUICK_RUN = False, the notebook takes about 5 hours to run on a VM with 4 16GB NVIDIA V100 GPUs. Finetuning costs around 1.5 hours and inferecing costs around 3.5 hour.  Better performance can be achieved by increasing the MAX_STEPS.\n",
+    "\n",
+    "It's recommended to run this notebook on GPU machines as it's very computationally intensive. Set QUICK_RUN = True to run the notebook on a small subset of data and a smaller number of steps. If QUICK_RUN = False, the notebook takes about 5 hours to run on a VM with 4 16GB NVIDIA V100 GPUs. Finetuning costs around 1.5 hours and inferecing costs around 3.5 hour.  Better performance can be achieved by increasing the MAX_STEPS.\n",
    "\n",
    "* **ROUGE Evalation**: To run rouge evaluation, please refer to the section of compute_rouge_perl in [summarization_evaluation.ipynb](./summarization_evaluation.ipynb) for setup.\n",
    "\n",
@ -92,11 +93,18 @@
    "if nlp_path not in sys.path:\n",
    "    sys.path.insert(0, nlp_path)\n",
    "\n",
-    "from utils_nlp.models.transformers.abstractive_summarization_bertsum import BertSumAbs, BertSumAbsProcessor\n",
+    "from utils_nlp.models.transformers.abstractive_summarization_bertsum import (\n",
+    "    BertSumAbs,\n",
+    "    BertSumAbsProcessor,\n",
+    ")\n",
    "\n",
    "from utils_nlp.dataset.cnndm import CNNDMSummarizationDataset\n",
    "from utils_nlp.eval import compute_rouge_python\n",
    "\n",
+    "from utils_nlp.models.transformers.datasets import SummarizationDataset\n",
+    "import nltk\n",
+    "from nltk import tokenize\n",
+    "\n",
    "import pandas as pd\n",
    "import pprint\n",
    "import scrapbook as sb"
@ -139,8 +147,8 @@
   "outputs": [],
   "source": [
    "train_dataset, test_dataset = CNNDMSummarizationDataset(\n",
-    "            top_n=TOP_N, local_cache_path=DATA_PATH, prepare_extractive=False\n",
-    "        )"
+    "    top_n=TOP_N, local_cache_path=DATA_PATH, prepare_extractive=False\n",
+    ")"
   ]
  },
  {
@ -190,36 +198,41 @@
    "MAX_SOURCE_SEQ_LENGTH = 640\n",
    "MAX_TARGET_SEQ_LENGTH = 140\n",
    "\n",
-    "# mixed precision setting. To enable mixed precision training, follow instructions in SETUP.md. \n",
+    "# mixed precision setting. To enable mixed precision training, follow instructions in SETUP.md.\n",
    "FP16 = False\n",
    "if FP16:\n",
-    "    FP16_OPT_LEVEL=\"O2\"\n",
-    "    \n",
+    "    FP16_OPT_LEVEL = \"O2\"\n",
+    "\n",
    "# fine-tuning parameters\n",
    "# batch size, unit is the number of tokens\n",
-    "BATCH_SIZE_PER_GPU = 3\n",
+    "BATCH_SIZE_PER_GPU = 1\n",
    "\n",
    "\n",
    "# GPU used for training\n",
    "NUM_GPUS = torch.cuda.device_count()\n",
+    "if NUM_GPUS > 0:\n",
+    "    BATCH_SIZE = NUM_GPUS * BATCH_SIZE_PER_GPU\n",
+    "else:\n",
+    "    BATCH_SIZE = 1\n",
+    "\n",
    "\n",
    "# Learning rate\n",
-    "LEARNING_RATE_BERT=5e-4/2.0\n",
-    "LEARNING_RATE_DEC=0.05/2.0\n",
+    "LEARNING_RATE_BERT = 5e-4 / 2.0\n",
+    "LEARNING_RATE_DEC = 0.05 / 2.0\n",
    "\n",
    "\n",
    "# How often the statistics reports show up in training, unit is step.\n",
-    "REPORT_EVERY=10\n",
-    "SAVE_EVERY=500\n",
+    "REPORT_EVERY = 10\n",
+    "SAVE_EVERY = 500\n",
    "\n",
    "# total number of steps for training\n",
-    "MAX_STEPS=1e3\n",
-    "   \n",
-    "if not QUICK_RUN:\n",
-    "    MAX_STEPS=5e3\n",
+    "MAX_STEPS = 1e3\n",
    "\n",
-    "WARMUP_STEPS_BERT=2000\n",
-    "WARMUP_STEPS_DEC=1000   \n"
+    "if not QUICK_RUN:\n",
+    "    MAX_STEPS = 5e3\n",
+    "\n",
+    "WARMUP_STEPS_BERT = 2000\n",
+    "WARMUP_STEPS_DEC = 1000"
   ]
  },
  {
@ -253,21 +266,20 @@
   },
   "outputs": [],
   "source": [
-    "\n",
    "summarizer.fit(\n",
-    "        train_dataset,\n",
-    "        num_gpus=NUM_GPUS,\n",
-    "        batch_size=BATCH_SIZE_PER_GPU*NUM_GPUS,\n",
-    "        max_steps=MAX_STEPS,\n",
-    "        learning_rate_bert=LEARNING_RATE_BERT,\n",
-    "        learning_rate_dec=LEARNING_RATE_DEC,\n",
-    "        warmup_steps_bert=WARMUP_STEPS_BERT,\n",
-    "        warmup_steps_dec=WARMUP_STEPS_DEC,\n",
-    "        save_every=SAVE_EVERY,\n",
-    "        report_every=REPORT_EVERY*5,\n",
-    "        fp16=FP16,\n",
-    "        # checkpoint=\"saved checkpoint path\"\n",
-    ")\n"
+    "    train_dataset,\n",
+    "    num_gpus=NUM_GPUS,\n",
+    "    batch_size=BATCH_SIZE,\n",
+    "    max_steps=MAX_STEPS,\n",
+    "    learning_rate_bert=LEARNING_RATE_BERT,\n",
+    "    learning_rate_dec=LEARNING_RATE_DEC,\n",
+    "    warmup_steps_bert=WARMUP_STEPS_BERT,\n",
+    "    warmup_steps_dec=WARMUP_STEPS_DEC,\n",
+    "    save_every=SAVE_EVERY,\n",
+    "    report_every=REPORT_EVERY * 5,\n",
+    "    fp16=FP16,\n",
+    "    # checkpoint=\"saved checkpoint path\"\n",
+    ")"
   ]
  },
  {
@ -327,14 +339,19 @@
    "TEST_TOP_N = 32\n",
    "if not QUICK_RUN:\n",
    "    TEST_TOP_N = len(test_dataset)\n",
+    "\n",
+    "if NUM_GPUS:\n",
+    "    BATCH_SIZE = NUM_GPUS * BATCH_SIZE_PER_GPU\n",
+    "else:\n",
+    "    BATCH_SIZE = 1\n",
    "    \n",
-    "shortened_dataset= test_dataset.shorten(top_n=TEST_TOP_N)\n",
+    "shortened_dataset = test_dataset.shorten(top_n=TEST_TOP_N)\n",
    "src = shortened_dataset.get_source()\n",
    "reference_summaries = [\" \".join(t).rstrip(\"\\n\") for t in shortened_dataset.get_target()]\n",
    "generated_summaries = summarizer.predict(\n",
-    "    shortened_dataset, batch_size=32*4, num_gpus=NUM_GPUS\n",
+    "    shortened_dataset, batch_size=BATCH_SIZE, num_gpus=NUM_GPUS\n",
    ")\n",
-    "assert len(generated_summaries) == len(reference_summaries)\n"
+    "assert len(generated_summaries) == len(reference_summaries)"
   ]
  },
  {
@ -374,13 +391,6 @@
    "pprint.pprint(rouge_scores)"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -415,39 +425,22 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
   "outputs": [],
   "source": [
-    "from utils_nlp.models.transformers.datasets import SummarizationDataset\n",
-    "import nltk\n",
-    "from nltk import tokenize\n",
-    "\n",
    "test_dataset = SummarizationDataset(\n",
-    "    None,\n",
-    "    source=[source],\n",
-    "    source_preprocessing=[tokenize.sent_tokenize],\n",
+    "    None, source=[source], source_preprocessing=[tokenize.sent_tokenize],\n",
    ")\n",
-    "generated_summaries = summarizer.predict(\n",
-    "    test_dataset, batch_size=1, num_gpus=1\n",
-    ")\n"
+    "generated_summaries = summarizer.predict(test_dataset, batch_size=1, num_gpus=NUM_GPUS)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'two employees bought , sold weapons on their own , company says .   company fired workers , turned them in to atf , says it was identified in the feds are sold weapons , entirely genuine \"     u . s . officials say they turned them two miles east - northeast of oakland , while donors are paid just $ 300 to $ 1 , 000 .'"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
    "generated_summaries[0]"
   ]
@ -475,9 +468,9 @@
 "metadata": {
  "celltoolbar": "Tags",
  "kernelspec": {
-   "display_name": "python3.6 cm3",
+   "display_name": "Python (nlp_gpu)",
   "language": "python",
-   "name": "cm3"
+   "name": "nlp_gpu"
  },
  "language_info": {
   "codemirror_mode": {
@ -494,4 +487,4 @@
 },
 "nbformat": 4,
 "nbformat_minor": 2
-}
+}
--- a/examples/text_summarization/abstractive_summarization_unilm_cnndm.ipynb
+++ b/examples/text_summarization/abstractive_summarization_unilm_cnndm.ipynb
@ -65,7 +65,7 @@
    "import time\n",
    "\n",
    "from utils_nlp.dataset.cnndm import CNNDMSummarizationDatasetOrg\n",
-    "from utils_nlp.models import S2SAbsSumProcessor, S2SAbstractiveSummarizer\n",
+    "from utils_nlp.models.transformers.abstractive_summarization_seq2seq import S2SAbsSumProcessor, S2SAbstractiveSummarizer\n",
    "from utils_nlp.eval import compute_rouge_python\n",
    "\n",
    "start_time = time.time()"
--- a/examples/text_summarization/abstractive_summarization_unilm_cnndm.py
+++ b/examples/text_summarization/abstractive_summarization_unilm_cnndm.py
@ -4,7 +4,11 @@ import jsonlines

 import torch

-from utils_nlp.models import S2SAbsSumProcessor, S2SAbstractiveSummarizer
+from utils_nlp.models.transformers.abstractive_summarization_seq2seq import (
+     S2SAbsSumProcessor, 
+     S2SAbstractiveSummarizer
+)
+
 from utils_nlp.eval import compute_rouge_python

 parser = argparse.ArgumentParser()
--- a/examples/text_summarization/extractive_summarization_cnndm_aml_distributed.ipynb
+++ b/examples/text_summarization/extractive_summarization_cnndm_aml_distributed.ipynb
@ -25,7 +25,9 @@
    "- Azure Machine Learning Workspace\n",
    "- Azure Machine Learning SDK\n",
    "\n",
-    "To run rouge evaluation, please refer to the section of compute_rouge_perl in [summarization_evaluation.ipynb](summarization_evaluation.ipynb). "
+    "To run rouge evaluation, please refer to the section of compute_rouge_perl in [summarization_evaluation.ipynb](summarization_evaluation.ipynb). \n",
+    "\n",
+    "You can run this notebook on CPU-only machines."
   ]
  },
  {
@ -84,7 +86,9 @@
    "    ExtSumProcessor,\n",
    ")\n",
    "# Check core SDK version number\n",
-    "print(\"SDK version:\", azureml.core.VERSION)"
+    "print(\"SDK version:\", azureml.core.VERSION)\n",
+    "\n",
+    "import pprint"
   ]
  },
  {
@ -106,7 +110,6 @@
    "RESOURCE_GROUP = \"YOUR_WORKSPACE_NAME\"  # modifiy to use your own\n",
    "WORKSPACE_NAME = \"YOUR_WORKSPACE_REGION\"  # modifiy to use your own\n",
    "\n",
-    "\n",
    "# for creating Azure ML Compute Cluster\n",
    "AMLCOMPUTE_CLUSTER_NAME = \"bertsumext\"  # modifiy to use your own\n",
    "NODE_COUNT = 2\n",
@ -152,7 +155,7 @@
    "\n",
    "##\n",
    "# The number of lines at the head of data file used for preprocessing. -1 means all the lines.\n",
-    "TOP_N = 1000\n",
+    "TOP_N = 100\n",
    "QUICK_RUN = True\n",
    "if not QUICK_RUN:\n",
    "    TOP_N = -1"
@ -293,11 +296,11 @@
   "outputs": [],
   "source": [
    "ENTRY_SCRIPT = \"extractive_summarization_cnndm_distributed_train.py\"\n",
-    "!mkdir -p {PROJECT_FOLDER}\n",
-    "!python ../../tools/generate_conda_file.py --gpu --name {CONDA_ENV_NAME}\n",
-    "!cp ./nlp_gpu.yaml {PROJECT_FOLDER}\n",
-    "!cp {ENTRY_SCRIPT} {PROJECT_FOLDER}\n",
-    "!cp -r ../../utils_nlp {PROJECT_FOLDER}"
+    "os.makedirs(PROJECT_FOLDER, exist_ok=True)\n",
+    "os.system(\"python ../../tools/generate_conda_file.py --gpu --name {}\".format(CONDA_ENV_NAME))\n",
+    "os.system(\"cp ./nlp_gpu.yaml {}\".format(PROJECT_FOLDER))\n",
+    "os.system(\"cp {} {}\".format(ENTRY_SCRIPT, PROJECT_FOLDER))\n",
+    "os.system(\"cp -r ../../utils_nlp {}\".format(PROJECT_FOLDER))"
   ]
  },
  {
@ -397,8 +400,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# need to clear the local output dir as the ds.download won't download if the path exists\n",
-    "!rm -rf {LOCAL_OUTPUT_DIR}/* "
+    "# need to clear the local output dir as the ds.download won't download if the path exists \n",
+    "os.system(\"rm -rf  {}/*\".format(LOCAL_OUTPUT_DIR))"
   ]
  },
  {
@ -418,10 +421,13 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "# the script uses <q> as sentence separator so it can write the prediction into the files properly\n",
+    "# here we need to replace <q> with \"\\n\" to prepare for evalation\n",
+    "# removing the ending \"\\n\" is also a preparation step for evalution.\n",
    "prediction = []\n",
    "with open(os.path.join(LOCAL_OUTPUT_DIR, f'{TARGET_OUTPUT_DIR}{SUMMARY_FILENAME}'), \"r\") as filehandle:\n",
    "    for cnt, line in enumerate(filehandle):\n",
-    "        prediction.append(line[0:-1]) # remove the ending \"\\n\""
+    "        prediction.append(line[0:-1].replace(\"<q>\", \"\\n\")) # remove the ending \"\\n\""
   ]
  },
  {
@ -451,7 +457,7 @@
    "for i in ext_sum_test:\n",
    "    source.append(i[\"src_txt\"]) \n",
    "    temp_target.append(\" \".join(j) for j in i['tgt']) \n",
-    "target = [''.join(i) for i in list(temp_target)]"
+    "target = ['\\n'.join(i) for i in list(temp_target)]"
   ]
  },
  {
@ -498,13 +504,13 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# processor = ExtSumProcessor()\n",
+    "BATCH_SIZE = 32\n",
    "summarizer = ExtractiveSummarizer(processor, encoder=ENCODER, cache_dir=LOCAL_CACHE_DIR)\n",
    "summarizer.model.load_state_dict(\n",
    "    torch.load(os.path.join(LOCAL_OUTPUT_DIR, f'{TARGET_OUTPUT_DIR}{MODEL_FILENAME}'),\n",
    "               map_location=\"cpu\"))\n",
    "\n",
-    "prediction = summarizer.predict(test_dataset[0:TOP_N], num_gpus=torch.cuda.device_count(), batch_size=128, sentence_separator = \"\\n\")\n",
+    "prediction = summarizer.predict(ext_sum_test, num_gpus=torch.cuda.device_count(), batch_size=BATCH_SIZE, sentence_separator = \"\\n\")\n",
    "#\"\"\""
   ]
  },
--- a/examples/text_summarization/extractive_summarization_cnndm_transformer.ipynb
+++ b/examples/text_summarization/extractive_summarization_cnndm_transformer.ipynb
@ -35,9 +35,9 @@
    "> **Tip**: If you want to run through the notebook quickly, you can set the **`QUICK_RUN`** flag in the cell below to **`True`** to run the notebook on a small subset of the data and a smaller number of epochs. \n",
    "\n",
    "Using only 1 NVIDIA Tesla V100 GPUs, 16GB GPU memory configuration,\n",
-    "- for data preprocessing, it takes around 1 minutes to preprocess the data for quick run. Otherwise it takes ~2 hours to finish the data preprocessing. This time estimation assumes that the chosen transformer model is \"distilbert-base-uncased\" and the sentence selection method is \"greedy\", which is the default. The preprocessing time can be significantly longer if the sentence selection method is \"combination\", which can achieve better model performance.\n",
+    "- for data preprocessing, it takes around 1 minutes to preprocess the data for quick run. Otherwise it takes ~20 minutes to finish the data preprocessing. This time estimation assumes that the chosen transformer model is \"distilbert-base-uncased\" and the sentence selection method is \"greedy\", which is the default. The preprocessing time can be significantly longer if the sentence selection method is \"combination\", which can achieve better model performance.\n",
    "\n",
-    "- for model fine tuning, it takes around 10 minutes for quick run. Otherwise, it takes around ~3 hours to finish. This estimation assumes the chosen encoder method is \"transformer\". The model fine tuning time can be shorter if other encoder method is chosen, which may result in worse model performance. \n",
+    "- for model fine tuning, it takes around 2 minutes for quick run. Otherwise, it takes around ~3 hours to finish. This estimation assumes the chosen encoder method is \"transformer\". The model fine tuning time can be shorter if other encoder method is chosen, which may result in worse model performance. \n",
    "\n",
    "### Additional Notes\n",
    "\n",
@ -106,6 +106,9 @@
    "    ExtSumProcessor,\n",
    ")\n",
    "\n",
+    "from utils_nlp.models.transformers.datasets import SummarizationDataset\n",
+    "import nltk\n",
+    "from nltk import tokenize\n",
    "\n",
    "import pandas as pd\n",
    "import scrapbook as sb\n",
@ -156,7 +159,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "processor = ExtSumProcessor(model_name=MODEL_NAME)"
+    "# notebook parameters\n",
+    "# the cache data path during find tuning\n",
+    "CACHE_DIR = TemporaryDirectory().name\n",
+    "processor = ExtSumProcessor(model_name=MODEL_NAME, cache_dir=CACHE_DIR)"
   ]
  },
  {
@ -211,10 +217,8 @@
    "DATA_PATH = TemporaryDirectory().name\n",
    "# The number of lines at the head of data file used for preprocessing. -1 means all the lines.\n",
    "TOP_N = 1000\n",
-    "CHUNK_SIZE=200\n",
    "if not QUICK_RUN:\n",
-    "    TOP_N = -1\n",
-    "    CHUNK_SIZE = 2000\n"
+    "    TOP_N = -1"
   ]
  },
  {
@ -380,13 +384,8 @@
   },
   "outputs": [],
   "source": [
-    "# notebook parameters\n",
-    "# the cache data path during find tuning\n",
-    "CACHE_DIR = TemporaryDirectory().name\n",
-    "\n",
-    "\n",
-    "BATCH_SIZE = 20 # batch size, unit is the number of samples\n",
-    "MAX_POS_LENGTH = 1025\n",
+    "BATCH_SIZE = 5 # batch size, unit is the number of samples\n",
+    "MAX_POS_LENGTH = 512\n",
    "if USE_PREPROCSSED_DATA: #if bertsum published data is used\n",
    "    BATCH_SIZE = 3000 # batch size, unit is the number of tokens\n",
    "    MAX_POS_LENGTH = 512\n",
@ -406,7 +405,7 @@
    "REPORT_EVERY=100\n",
    "\n",
    "# total number of steps for training\n",
-    "MAX_STEPS=5e2\n",
+    "MAX_STEPS=1e2\n",
    "# number of steps for warm up\n",
    "WARMUP_STEPS=5e2\n",
    "    \n",
@ -621,17 +620,13 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from utils_nlp.models.transformers.datasets import SummarizationDataset\n",
-    "import nltk\n",
-    "from nltk import tokenize\n",
-    "\n",
    "test_dataset = SummarizationDataset(\n",
    "    None,\n",
    "    source=[source],\n",
    "    source_preprocessing=[tokenize.sent_tokenize],\n",
    "    word_tokenize=nltk.word_tokenize,\n",
    ")\n",
-    "processor = ExtSumProcessor(model_name=MODEL_NAME)\n",
+    "processor = ExtSumProcessor(model_name=MODEL_NAME,  cache_dir=CACHE_DIR)\n",
    "preprocessed_dataset = processor.preprocess(test_dataset)"
   ]
  },
@ -688,9 +683,9 @@
 "metadata": {
  "celltoolbar": "Tags",
  "kernelspec": {
-   "display_name": "python3.6 cm3",
+   "display_name": "Python (nlp_gpu)",
   "language": "python",
-   "name": "cm3"
+   "name": "nlp_gpu"
  },
  "language_info": {
   "codemirror_mode": {
--- a/examples/text_summarization/summarization_evaluation.ipynb
+++ b/examples/text_summarization/summarization_evaluation.ipynb
@ -31,7 +31,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -55,7 +55,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -73,37 +73,18 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of candidates: 2\n",
-      "Number of references: 2\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "python_rouge_scores = compute_rouge_python(cand=summary_candidates, ref=summary_references)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ROUGE-1: {'f': 0.7696078431372548, 'p': 0.875, 'r': 0.6904761904761905}\n",
-      "ROUGE-2: {'f': 0.6666666666666667, 'p': 0.7857142857142857, 'r': 0.5833333333333333}\n",
-      "ROUGE-L: {'f': 0.7696078431372548, 'p': 0.875, 'r': 0.6904761904761905}\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "print(\"ROUGE-1: {}\".format(python_rouge_scores[\"rouge-1\"]))\n",
    "print(\"ROUGE-2: {}\".format(python_rouge_scores[\"rouge-2\"]))\n",
@ -120,7 +101,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -136,21 +117,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of candidates: 2\n",
-      "Number of references: 2\n",
-      "ROUGE-1: {'f': 0.5980392156862745, 'p': 0.68125, 'r': 0.5357142857142857}\n",
-      "ROUGE-2: {'f': 0.325, 'p': 0.3833333333333333, 'r': 0.28431372549019607}\n",
-      "ROUGE-L: {'f': 0.5980392156862745, 'p': 0.68125, 'r': 0.5357142857142857}\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "python_rouge_scores_hi = compute_rouge_python(cand=summary_candidates_hi, ref=summary_references_hi, language=\"hi\")\n",
    "print(\"ROUGE-1: {}\".format(python_rouge_scores_hi[\"rouge-1\"]))\n",
@ -164,7 +133,25 @@
   "source": [
    "### compute_rouge_perl\n",
    "To use `compute_rouge_perl`, in addition to installing the Python package `pyrouge`, you also need to go through the following setup steps on a Linux machine.  \n",
-    "**NOTE**: Set `PYROUGE_PATH` to the root directory of the cloned `pyrouge` repo and `PYTHON_PATH` to the root directory of the conda environment where you installed `pyrouge` first."
+    "**NOTE**: Set `PYTHON_PATH` to the root directory of the conda environment where you installed `pyrouge` first. You can use \"conda env list\" to find the`PYTHON_PATH`. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "python_path = !which python"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "PYTHON_PATH = \"/\".join(python_path[0].split(os.pathsep)[0].split(\"/\")[0:-2])"
   ]
  },
  {
@ -177,8 +164,8 @@
   "source": [
    "%%bash\n",
    "git clone https://github.com/andersjo/pyrouge.git\n",
-    "PYROUGE_PATH=<root directory of cloned pyrouge repo> #e.g./home/hlu/notebooks/summarization/pyrouge\n",
-    "PYTHON_PATH=<root directory of conda environment> #e.g./data/anaconda/envs/nlp_gpu\n",
+    "PYROUGE_PATH=./pyrouge \n",
+    "PYTHON_PATH=$PYTHON_PATH #<root directory of conda environment> #e.g./data/anaconda/envs/nlp_gpu\n",
    "$PYTHON_PATH/bin/pyrouge_set_rouge_path $PYROUGE_PATH/tools/ROUGE-1.5.5\n",
    "\n",
    "# install XML::DOM plugin, instructions https://web.archive.org/web/20171107220839/www.summarizerman.com/post/42675198985/figuring-out-rouge\n",
@ -203,160 +190,18 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2019-12-03 19:43:25,977 [MainThread  ] [INFO ]  Writing summaries.\n",
-      "2019-12-03 19:43:25,978 [MainThread  ] [INFO ]  Processing summaries. Saving system files to /tmp/tmpm29_bwie/system and model files to /tmp/tmpm29_bwie/model.\n",
-      "2019-12-03 19:43:25,979 [MainThread  ] [INFO ]  Processing files in /tmp/tmpf5p8odh5/rouge-tmp-2019-12-03-19-43-25/candidate/.\n",
-      "2019-12-03 19:43:25,980 [MainThread  ] [INFO ]  Processing cand.1.txt.\n",
-      "2019-12-03 19:43:25,981 [MainThread  ] [INFO ]  Processing cand.0.txt.\n",
-      "2019-12-03 19:43:25,982 [MainThread  ] [INFO ]  Saved processed files to /tmp/tmpm29_bwie/system.\n",
-      "2019-12-03 19:43:25,982 [MainThread  ] [INFO ]  Processing files in /tmp/tmpf5p8odh5/rouge-tmp-2019-12-03-19-43-25/reference/.\n",
-      "2019-12-03 19:43:25,983 [MainThread  ] [INFO ]  Processing ref.0.txt.\n",
-      "2019-12-03 19:43:25,984 [MainThread  ] [INFO ]  Processing ref.1.txt.\n",
-      "2019-12-03 19:43:25,985 [MainThread  ] [INFO ]  Saved processed files to /tmp/tmpm29_bwie/model.\n",
-      "2019-12-03 19:43:25,986 [MainThread  ] [INFO ]  Written ROUGE configuration to /tmp/tmps00p9hvz/rouge_conf.xml\n",
-      "2019-12-03 19:43:25,987 [MainThread  ] [INFO ]  Running ROUGE with command /home/hlu/notebooks/summarization/pyrouge/tools/ROUGE-1.5.5/ROUGE-1.5.5.pl -e /home/hlu/notebooks/summarization/pyrouge/tools/ROUGE-1.5.5/data -c 95 -2 -1 -U -r 1000 -n 4 -w 1.2 -a -m /tmp/tmps00p9hvz/rouge_conf.xml\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of candidates: 2\n",
-      "Number of references: 2\n",
-      "---------------------------------------------\n",
-      "1 ROUGE-1 Average_R: 0.69048 (95%-conf.int. 0.66667 - 0.71429)\n",
-      "1 ROUGE-1 Average_P: 0.87500 (95%-conf.int. 0.75000 - 1.00000)\n",
-      "1 ROUGE-1 Average_F: 0.76961 (95%-conf.int. 0.70588 - 0.83334)\n",
-      "---------------------------------------------\n",
-      "1 ROUGE-2 Average_R: 0.58333 (95%-conf.int. 0.50000 - 0.66667)\n",
-      "1 ROUGE-2 Average_P: 0.78571 (95%-conf.int. 0.57143 - 1.00000)\n",
-      "1 ROUGE-2 Average_F: 0.66666 (95%-conf.int. 0.53333 - 0.80000)\n",
-      "---------------------------------------------\n",
-      "1 ROUGE-3 Average_R: 0.51428 (95%-conf.int. 0.42857 - 0.60000)\n",
-      "1 ROUGE-3 Average_P: 0.75000 (95%-conf.int. 0.50000 - 1.00000)\n",
-      "1 ROUGE-3 Average_F: 0.60577 (95%-conf.int. 0.46154 - 0.75000)\n",
-      "---------------------------------------------\n",
-      "1 ROUGE-4 Average_R: 0.41666 (95%-conf.int. 0.33333 - 0.50000)\n",
-      "1 ROUGE-4 Average_P: 0.70000 (95%-conf.int. 0.40000 - 1.00000)\n",
-      "1 ROUGE-4 Average_F: 0.51515 (95%-conf.int. 0.36363 - 0.66667)\n",
-      "---------------------------------------------\n",
-      "1 ROUGE-L Average_R: 0.69048 (95%-conf.int. 0.66667 - 0.71429)\n",
-      "1 ROUGE-L Average_P: 0.87500 (95%-conf.int. 0.75000 - 1.00000)\n",
-      "1 ROUGE-L Average_F: 0.76961 (95%-conf.int. 0.70588 - 0.83334)\n",
-      "---------------------------------------------\n",
-      "1 ROUGE-W-1.2 Average_R: 0.44238 (95%-conf.int. 0.40075 - 0.48401)\n",
-      "1 ROUGE-W-1.2 Average_P: 0.84981 (95%-conf.int. 0.69963 - 1.00000)\n",
-      "1 ROUGE-W-1.2 Average_F: 0.58095 (95%-conf.int. 0.50960 - 0.65230)\n",
-      "---------------------------------------------\n",
-      "1 ROUGE-S* Average_R: 0.44643 (95%-conf.int. 0.41667 - 0.47619)\n",
-      "1 ROUGE-S* Average_P: 0.76785 (95%-conf.int. 0.53571 - 1.00000)\n",
-      "1 ROUGE-S* Average_F: 0.55695 (95%-conf.int. 0.46875 - 0.64516)\n",
-      "---------------------------------------------\n",
-      "1 ROUGE-SU* Average_R: 0.49790 (95%-conf.int. 0.47727 - 0.51852)\n",
-      "1 ROUGE-SU* Average_P: 0.80000 (95%-conf.int. 0.60000 - 1.00000)\n",
-      "1 ROUGE-SU* Average_F: 0.60729 (95%-conf.int. 0.53164 - 0.68293)\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "perl_rouge_scores = compute_rouge_perl(cand=summary_candidates, ref=summary_references)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'rouge_1_recall': 0.69048,\n",
-       " 'rouge_1_recall_cb': 0.66667,\n",
-       " 'rouge_1_recall_ce': 0.71429,\n",
-       " 'rouge_1_precision': 0.875,\n",
-       " 'rouge_1_precision_cb': 0.75,\n",
-       " 'rouge_1_precision_ce': 1.0,\n",
-       " 'rouge_1_f_score': 0.76961,\n",
-       " 'rouge_1_f_score_cb': 0.70588,\n",
-       " 'rouge_1_f_score_ce': 0.83334,\n",
-       " 'rouge_2_recall': 0.58333,\n",
-       " 'rouge_2_recall_cb': 0.5,\n",
-       " 'rouge_2_recall_ce': 0.66667,\n",
-       " 'rouge_2_precision': 0.78571,\n",
-       " 'rouge_2_precision_cb': 0.57143,\n",
-       " 'rouge_2_precision_ce': 1.0,\n",
-       " 'rouge_2_f_score': 0.66666,\n",
-       " 'rouge_2_f_score_cb': 0.53333,\n",
-       " 'rouge_2_f_score_ce': 0.8,\n",
-       " 'rouge_3_recall': 0.51428,\n",
-       " 'rouge_3_recall_cb': 0.42857,\n",
-       " 'rouge_3_recall_ce': 0.6,\n",
-       " 'rouge_3_precision': 0.75,\n",
-       " 'rouge_3_precision_cb': 0.5,\n",
-       " 'rouge_3_precision_ce': 1.0,\n",
-       " 'rouge_3_f_score': 0.60577,\n",
-       " 'rouge_3_f_score_cb': 0.46154,\n",
-       " 'rouge_3_f_score_ce': 0.75,\n",
-       " 'rouge_4_recall': 0.41666,\n",
-       " 'rouge_4_recall_cb': 0.33333,\n",
-       " 'rouge_4_recall_ce': 0.5,\n",
-       " 'rouge_4_precision': 0.7,\n",
-       " 'rouge_4_precision_cb': 0.4,\n",
-       " 'rouge_4_precision_ce': 1.0,\n",
-       " 'rouge_4_f_score': 0.51515,\n",
-       " 'rouge_4_f_score_cb': 0.36363,\n",
-       " 'rouge_4_f_score_ce': 0.66667,\n",
-       " 'rouge_l_recall': 0.69048,\n",
-       " 'rouge_l_recall_cb': 0.66667,\n",
-       " 'rouge_l_recall_ce': 0.71429,\n",
-       " 'rouge_l_precision': 0.875,\n",
-       " 'rouge_l_precision_cb': 0.75,\n",
-       " 'rouge_l_precision_ce': 1.0,\n",
-       " 'rouge_l_f_score': 0.76961,\n",
-       " 'rouge_l_f_score_cb': 0.70588,\n",
-       " 'rouge_l_f_score_ce': 0.83334,\n",
-       " 'rouge_w_1.2_recall': 0.44238,\n",
-       " 'rouge_w_1.2_recall_cb': 0.40075,\n",
-       " 'rouge_w_1.2_recall_ce': 0.48401,\n",
-       " 'rouge_w_1.2_precision': 0.84981,\n",
-       " 'rouge_w_1.2_precision_cb': 0.69963,\n",
-       " 'rouge_w_1.2_precision_ce': 1.0,\n",
-       " 'rouge_w_1.2_f_score': 0.58095,\n",
-       " 'rouge_w_1.2_f_score_cb': 0.5096,\n",
-       " 'rouge_w_1.2_f_score_ce': 0.6523,\n",
-       " 'rouge_s*_recall': 0.44643,\n",
-       " 'rouge_s*_recall_cb': 0.41667,\n",
-       " 'rouge_s*_recall_ce': 0.47619,\n",
-       " 'rouge_s*_precision': 0.76785,\n",
-       " 'rouge_s*_precision_cb': 0.53571,\n",
-       " 'rouge_s*_precision_ce': 1.0,\n",
-       " 'rouge_s*_f_score': 0.55695,\n",
-       " 'rouge_s*_f_score_cb': 0.46875,\n",
-       " 'rouge_s*_f_score_ce': 0.64516,\n",
-       " 'rouge_su*_recall': 0.4979,\n",
-       " 'rouge_su*_recall_cb': 0.47727,\n",
-       " 'rouge_su*_recall_ce': 0.51852,\n",
-       " 'rouge_su*_precision': 0.8,\n",
-       " 'rouge_su*_precision_cb': 0.6,\n",
-       " 'rouge_su*_precision_ce': 1.0,\n",
-       " 'rouge_su*_f_score': 0.60729,\n",
-       " 'rouge_su*_f_score_cb': 0.53164,\n",
-       " 'rouge_su*_f_score_ce': 0.68293}"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
    "perl_rouge_scores"
   ]
@ -372,7 +217,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "nlp_gpu",
+   "display_name": "Python (nlp_gpu)",
   "language": "python",
   "name": "nlp_gpu"
  },
--- a/tests/unit/test_abstractive_summarization_seq2seq.py
+++ b/tests/unit/test_abstractive_summarization_seq2seq.py
@ -4,7 +4,11 @@
 import os
 import pytest

-from utils_nlp.models import S2SAbsSumProcessor, S2SAbstractiveSummarizer, S2SConfig
+from utils_nlp.models.transformers.abstractive_summarization_seq2seq import (
+    S2SAbsSumProcessor, 
+    S2SAbstractiveSummarizer, 
+    S2SConfig
+)

 from utils_nlp.models.transformers.datasets import (
    IterableSummarizationDataset,
--- a/tools/generate_conda_file.py
+++ b/tools/generate_conda_file.py
@ -96,6 +96,9 @@ PIP_BASE = {
    "methodtools": "methodtools",
    "s2s-ft": "-e git+https://github.com/microsoft/unilm.git"
    "@s2s-ft.v0.0#egg=s2s-ft&subdirectory=s2s-ft",
+    "requests": "requests==2.22.0",
+    "requests-oauthlib": "requests-oauthlib==1.2.0",
+    "regex": "regex==2020.2.20",
 }

 PIP_GPU = {}
--- a/utils_nlp/models/init.py
+++ b/utils_nlp/models/init.py
@ -1,7 +0,0 @@
-from .transformers.abstractive_summarization_seq2seq import (
-    S2SAbsSumProcessor,
-    S2SAbstractiveSummarizer,
-    S2SAbsSumDataset,
-    S2SAbsSumProcessor,
-    S2SConfig,
-)