159 строки
4.1 KiB
Plaintext
159 строки
4.1 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Download Data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Download the datasets:\n",
|
|
" - NIH (https://nihcc.app.box.com/v/ChestXray-NIHCC) and place it in the directory '/data/datasets/NIH/'\n",
|
|
" - ChexPert (https://stanfordmlgroup.github.io/competitions/chexpert/) and place it in the directory 'data/datasets/CheXpert-v1.0-small/'\n",
|
|
" - Kaggle (https://www.kaggle.com/c/rsna-pneumonia-detection-challenge) and place it in the directory 'data/datasets/Kaggle/' "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Resize NIH images to 224 size"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"from PIL import Image\n",
|
|
"from torchvision import datasets, transforms"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"resize= transforms.Compose([\n",
|
|
" transforms.Resize((224, 224)),\n",
|
|
" ])\n",
|
|
"\n",
|
|
"#Change root directory accordingly \n",
|
|
"root_dir= '/home/t-dimaha/RobustDG/robustdg'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data_dir= root_dir + '/data/datasets/NIH/images/'\n",
|
|
"save_dir= root_dir + '/data/datasets/NIH/images_224/'\n",
|
|
"if not os.path.exists(save_dir):\n",
|
|
" os.makedirs(save_dir)\n",
|
|
"\n",
|
|
"for subdir, dirs, files in os.walk(data_dir): \n",
|
|
" for file in files:\n",
|
|
" if 'png' in file:\n",
|
|
" img= Image.open(data_dir + file)\n",
|
|
" img_resize= resize(img)\n",
|
|
" img_resize.save(save_dir + file)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Convert DCM to JPG Kaggle"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pydicom"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"base_dir= root_dir + '/data/datasets/Kaggle/stage_2_train_images/'\n",
|
|
"save_dir= root_dir + '/data/datasets/Kaggle/stage_2_train_images_jpg/'\n",
|
|
"if not os.path.exists(save_dir):\n",
|
|
" os.makedirs(save_dir)\n",
|
|
"\n",
|
|
"for subdir, dirs, files in os.walk(base_dir):\n",
|
|
" for file in files:\n",
|
|
" if '.dcm' in file:\n",
|
|
" file_name= base_dir + file\n",
|
|
" ds = pydicom.read_file(file_name)\n",
|
|
" img = ds.pixel_array\n",
|
|
" img_mem = Image.fromarray(img)\n",
|
|
" img_mem.save(save_dir + file.split('.')[0] + '.jpg' )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"base_dir= root_dir + '/data/datasets/Kaggle/stage_2_test_images/'\n",
|
|
"save_dir= root_dir + '/data/datasets/Kaggle/stage_2_test_images_jpg/'\n",
|
|
"if not os.path.exists(save_dir):\n",
|
|
" os.makedirs(save_dir)\n",
|
|
"\n",
|
|
"for subdir, dirs, files in os.walk(base_dir):\n",
|
|
" for file in files:\n",
|
|
" if '.dcm' in file:\n",
|
|
" file_name= base_dir + file\n",
|
|
" ds = pydicom.read_file(file_name)\n",
|
|
" img = ds.pixel_array\n",
|
|
" img_mem = Image.fromarray(img)\n",
|
|
" img_mem.save(save_dir + file.split('.')[0] + '.jpg' )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "matchdg-env",
|
|
"language": "python",
|
|
"name": "matchdg-env"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.9"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|