robustdg/docs/notebooks/Preprocess.ipynb

159 строки
4.1 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Download Data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Download the datasets:\n",
" - NIH (https://nihcc.app.box.com/v/ChestXray-NIHCC) and place it in the directory '/data/datasets/NIH/'\n",
" - ChexPert (https://stanfordmlgroup.github.io/competitions/chexpert/) and place it in the directory 'data/datasets/CheXpert-v1.0-small/'\n",
" - Kaggle (https://www.kaggle.com/c/rsna-pneumonia-detection-challenge) and place it in the directory 'data/datasets/Kaggle/' "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Resize NIH images to 224 size"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from PIL import Image\n",
"from torchvision import datasets, transforms"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"resize= transforms.Compose([\n",
" transforms.Resize((224, 224)),\n",
" ])\n",
"\n",
"#Change root directory accordingly \n",
"root_dir= '/home/t-dimaha/RobustDG/robustdg'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data_dir= root_dir + '/data/datasets/NIH/images/'\n",
"save_dir= root_dir + '/data/datasets/NIH/images_224/'\n",
"if not os.path.exists(save_dir):\n",
" os.makedirs(save_dir)\n",
"\n",
"for subdir, dirs, files in os.walk(data_dir): \n",
" for file in files:\n",
" if 'png' in file:\n",
" img= Image.open(data_dir + file)\n",
" img_resize= resize(img)\n",
" img_resize.save(save_dir + file)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Convert DCM to JPG Kaggle"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import pydicom"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"base_dir= root_dir + '/data/datasets/Kaggle/stage_2_train_images/'\n",
"save_dir= root_dir + '/data/datasets/Kaggle/stage_2_train_images_jpg/'\n",
"if not os.path.exists(save_dir):\n",
" os.makedirs(save_dir)\n",
"\n",
"for subdir, dirs, files in os.walk(base_dir):\n",
" for file in files:\n",
" if '.dcm' in file:\n",
" file_name= base_dir + file\n",
" ds = pydicom.read_file(file_name)\n",
" img = ds.pixel_array\n",
" img_mem = Image.fromarray(img)\n",
" img_mem.save(save_dir + file.split('.')[0] + '.jpg' )"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"base_dir= root_dir + '/data/datasets/Kaggle/stage_2_test_images/'\n",
"save_dir= root_dir + '/data/datasets/Kaggle/stage_2_test_images_jpg/'\n",
"if not os.path.exists(save_dir):\n",
" os.makedirs(save_dir)\n",
"\n",
"for subdir, dirs, files in os.walk(base_dir):\n",
" for file in files:\n",
" if '.dcm' in file:\n",
" file_name= base_dir + file\n",
" ds = pydicom.read_file(file_name)\n",
" img = ds.pixel_array\n",
" img_mem = Image.fromarray(img)\n",
" img_mem.save(save_dir + file.split('.')[0] + '.jpg' )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "matchdg-env",
"language": "python",
"name": "matchdg-env"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}