зеркало из https://github.com/microsoft/Chestist.git
adding model
This commit is contained in:
Родитель
ea6624902d
Коммит
3632d72d61
|
@ -0,0 +1,27 @@
|
|||
import os
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torchvision.transforms as transforms
|
||||
from torch.utils.data import DataLoader
|
||||
from sklearn.metrics import roc_auc_score
|
||||
import torchvision
|
||||
|
||||
class CNNModel (nn.Module):
|
||||
|
||||
def __init__(self, classCount, isTrained):
|
||||
|
||||
super(CNNModel, self).__init__()
|
||||
|
||||
self.cnnmodel = torchvision.models.densenet121(pretrained=isTrained)
|
||||
|
||||
kernelCount = self.cnnmodel.classifier.in_features
|
||||
|
||||
self.cnnmodel.classifier = nn.Sequential(nn.Linear(kernelCount, classCount), nn.Sigmoid())
|
||||
|
||||
def forward(self, x):
|
||||
x = self.cnnmodel(x)
|
||||
return x
|
||||
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
import os
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import torch
|
||||
from torch.utils.data import Dataset
|
||||
import numpy as np
|
||||
import torchvision
|
||||
from torch.utils import data
|
||||
from torch.utils.data import DataLoader
|
||||
import torchvision.transforms as transforms
|
||||
from azureml.core import Workspace, Datastore, Dataset
|
||||
from azureml.core.authentication import InteractiveLoginAuthentication
|
||||
|
||||
|
||||
|
||||
class DatasetGenerator (Dataset):
|
||||
|
||||
def __init__ (self, pathImageDirectory, pathDatasetFile, listImages, labelList, transform, csvFilePath):
|
||||
|
||||
self.listImagePaths = []
|
||||
self.listImageLabels = []
|
||||
self.transform = transform
|
||||
|
||||
|
||||
# for i listImages:
|
||||
# imagesNames.aprint(listImages)
|
||||
labelList= self.createLists(listImages)
|
||||
print("listImageLabels")
|
||||
print(labelList)
|
||||
#print(labelList)
|
||||
for i,image in enumerate(listImages):
|
||||
listImages[i]=pathImageDirectory+"/"+mainImages+"/"+image
|
||||
self.listImagePaths.append(listImages[i])
|
||||
|
||||
|
||||
for label in labelList :
|
||||
self.listImageLabels.append(label)
|
||||
|
||||
|
||||
def __getitem__(self, idx):
|
||||
|
||||
|
||||
|
||||
image_index = self.listImagePaths[idx]
|
||||
img = Image.open(image_index).convert('RGB')
|
||||
#print(image_index)
|
||||
imageLabel= torch.FloatTensor(self.listImageLabels[idx])
|
||||
|
||||
if self.transform != None: imageData = self.transform(img)
|
||||
|
||||
return imageData, imageLabel
|
||||
|
||||
|
||||
|
||||
def __len__(self):
|
||||
|
||||
return len(self.listImagePaths)
|
||||
|
||||
|
||||
|
||||
def createLists(self,images): #just Images file names not paths
|
||||
|
||||
|
||||
#-------------------- SETTINGS: AML WORKSPACE AND DATASTORE
|
||||
interactive_auth = InteractiveLoginAuthentication(tenant_id=os.environ['TENANT_ID'])
|
||||
ws = Workspace(
|
||||
subscription_id=os.environ['SUBSCRIPTION_ID'],
|
||||
resource_group=os.environ["RESOURCE_GROUP"],
|
||||
workspace_name=os.environ['WORKSPACE_NAME'],
|
||||
auth=interactive_auth
|
||||
)
|
||||
datastore = Datastore.get(ws, datastore_name=os.environ['DATASTORE_NAME'])
|
||||
|
||||
|
||||
|
||||
#-------------------- SETTINGS: MOUNTING THE DATASET TO MAKE IT AVAILABLE
|
||||
chestist_data = Dataset.get_by_name(ws,os.environ['DATASET_NAME_CSV'])
|
||||
mountPoint = chestist_data.mount()
|
||||
mountPoint.start()
|
||||
mountFolder = mountPoint.mount_point
|
||||
#files=os.listdir(mountFolder+"/dataset"+"/images_01") #Need to generalize for the whole dataset
|
||||
patientDataFiltered = pd.read_csv(f"{mountFolder}/Data_Entry_2017_v2020 (1).csv", header=0)
|
||||
patientDataFiltered = patientDataFiltered.dropna()
|
||||
patientDataFiltered=patientDataFiltered[patientDataFiltered.isin(images).iloc[:,0]]
|
||||
|
||||
images=patientDataFiltered['Image Index'].tolist()
|
||||
patientDataFiltered['Finding Labels'] = patientDataFiltered['Finding Labels'].replace('No Finding', '')
|
||||
all_labels = ['Emphysema', 'Hernia', 'Pneumonia', 'Edema', 'Fibrosis', 'Pleural_Thickening', 'Mass', 'Atelectasis', 'Nodule', 'Effusion', 'Infiltration', 'Pneumothorax', 'Consolidation', 'Cardiomegaly']
|
||||
print("all_labels",all_labels)
|
||||
|
||||
# obtain list of unique diseases
|
||||
all_labels = [x for x in all_labels if len(x) > 0]
|
||||
#perform one-hot encoding based on diseases extracted
|
||||
for c_label in all_labels:
|
||||
if len(c_label)> 1: # leave out empty labels
|
||||
patientDataFiltered[c_label] = patientDataFiltered['Finding Labels'].map(lambda finding: 1.0 if c_label in finding else 0)
|
||||
|
||||
labelPatients=[]
|
||||
for index, rows in patientDataFiltered.iterrows():
|
||||
labels = []
|
||||
for label in all_labels:
|
||||
if label in rows:
|
||||
labels.append(int(rows[label]))
|
||||
else:
|
||||
labels.append(0)
|
||||
labelPatients.append(labels)
|
||||
|
||||
|
||||
|
||||
return labelPatients
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,201 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
import os
|
||||
import shutil
|
||||
import cv2
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import azureml.core
|
||||
from azureml.core import Workspace, Datastore, Dataset
|
||||
from azureml.core.authentication import InteractiveLoginAuthentication
|
||||
# #%matplotlib inline
|
||||
from TrainerTester import TrainerTester
|
||||
import random
|
||||
import time
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.utils.data import DataLoader
|
||||
import torch
|
||||
from torchvision import transforms
|
||||
from PIL import Image
|
||||
from sklearn.metrics.ranking import roc_auc_score
|
||||
|
||||
|
||||
# %env TENANT_ID=
|
||||
# %env SUBSCRIPTION_ID=
|
||||
# %env RESOURCE_GROUP=
|
||||
# %env WORKSPACE_NAME=
|
||||
# %env DATASTORE_NAME=
|
||||
# %env DATASET_NAME=
|
||||
# %env DATASET_NAME_CSV=
|
||||
# %env IMAGES_SUBFOLDER=
|
||||
|
||||
def main ():
|
||||
runTrain()
|
||||
#return true
|
||||
|
||||
|
||||
def runTrain():
|
||||
#Do some training here
|
||||
|
||||
CNNMODEL='CNNModel'
|
||||
timestampTime = time.strftime("%H%M%S")
|
||||
timestampDate = time.strftime("%d%m%Y")
|
||||
timestampLaunch = timestampDate + '-' + timestampTime
|
||||
|
||||
#-------------------- SETTINGS: AML WORKSPACE AND DATASTORE
|
||||
interactive_auth = InteractiveLoginAuthentication(tenant_id=os.environ['TENANT_ID'])
|
||||
ws = Workspace(
|
||||
subscription_id=os.environ['SUBSCRIPTION_ID'],
|
||||
resource_group=os.environ["RESOURCE_GROUP"],
|
||||
workspace_name=os.environ['WORKSPACE_NAME'],
|
||||
auth=interactive_auth
|
||||
)
|
||||
datastore = Datastore.get(ws, datastore_name=os.environ['DATASTORE_NAME'])
|
||||
|
||||
|
||||
|
||||
#-------------------- SETTINGS: MOUNTING THE DATASET TO MAKE IT AVAILABLE
|
||||
chestist_data = Dataset.get_by_name(ws,os.environ['DATASET_NAME'])
|
||||
mountPoint = chestist_data.mount()
|
||||
mountPoint.start()
|
||||
mountFolder = mountPoint.mount_point
|
||||
pathDirData=mountFolder
|
||||
#pathDirData=files
|
||||
|
||||
chestist_data_csv = Dataset.get_by_name(ws,os.environ['DATASET_NAME_CSV'])
|
||||
mountPoint = chestist_data.mount()
|
||||
mountPoint.start()
|
||||
mountFolder = mountPoint.mount_point
|
||||
csv=mountFolder
|
||||
#pathDirData=files
|
||||
|
||||
#-------------------- SPLIT DATA
|
||||
patient_data = pd.read_csv(f"{csv}/Data_Entry_2017_v2020 (1).csv", header=0)
|
||||
patient_data = patient_data.dropna()
|
||||
lenDataset=int(patient_data.shape[0])
|
||||
indexDataset=list(patient_data.index)
|
||||
|
||||
#pathFileTrain=[] #Path for images to train the model
|
||||
#pathFileVal=[]#Path for validation set
|
||||
|
||||
trainPercentage=(0.02*lenDataset)/100
|
||||
devPercentage=(0.01*lenDataset)/100
|
||||
valPercentage=(0.01*lenDataset)/100
|
||||
|
||||
train=random.sample(range(0, 112120), int(trainPercentage))
|
||||
indexDataset = [i for i in indexDataset if i not in train]
|
||||
|
||||
dev=random.sample(range(0, 112120), int(devPercentage))
|
||||
indexDataset = [i for i in indexDataset if i not in dev]
|
||||
|
||||
|
||||
val=random.sample(range(0, 112120), int(valPercentage))
|
||||
indexDataset = [i for i in indexDataset if i not in val]
|
||||
|
||||
#patient_data
|
||||
train_dataframe = patient_data.iloc[train, :]
|
||||
dev_dataframe = patient_data.iloc[dev, :]
|
||||
val_dataframe = patient_data.iloc[val, :]
|
||||
|
||||
trainListImages=train_dataframe['Image Index'].tolist()
|
||||
#patient_data.loc.values.flatten().tolist()
|
||||
#print(trainListImages)
|
||||
listImagePathsTrain=[]
|
||||
for i,image in enumerate(trainListImages):
|
||||
trainListImages[i]=mountFolder+"/"+image
|
||||
listImagePathsTrain.append(trainListImages[i])
|
||||
|
||||
valListImages=val_dataframe['Image Index'].tolist()
|
||||
#patient_data.loc.values.flatten().tolist()
|
||||
#print(trainListImages)
|
||||
listImagePathsVal=[]
|
||||
for i,image in enumerate(valListImages):
|
||||
valListImages[i]=mountFolder+"/"+image
|
||||
listImagePathsVal.append(valListImages[i])
|
||||
|
||||
devListImages=dev_dataframe['Image Index'].tolist()
|
||||
#patient_data.loc.values.flatten().tolist()
|
||||
#print(trainListImages)
|
||||
listImagesPathDev=[]
|
||||
for i,image in enumerate(valListImages):
|
||||
devListImages[i]=mountFolder+"/"+image
|
||||
listImagesPathDev.append(devListImages[i])
|
||||
|
||||
#listImageLabels=labelList
|
||||
#listImagePathsVal
|
||||
|
||||
#pathFileTrain=train
|
||||
#pathFileVal=val
|
||||
|
||||
|
||||
|
||||
#---- Neural network parameters: type of the network, is it pre-trained on imagenet? , number of classes
|
||||
nnArchitecture = CNNMODEL
|
||||
nnIsTrained = True
|
||||
nnClassCount = 14 #Because it has 14 different labels to be detected
|
||||
|
||||
#---- Training settings: batch size, maximum number of epochs
|
||||
trBatchSize = 16 #Might change! this is for the first iteration
|
||||
trMaxEpoch = 1 #Same here, this could change
|
||||
|
||||
#---- Parameters related to image transforms: size of the down-scaled image, cropped image
|
||||
imgtransResize = 256
|
||||
imgtransCrop = 224
|
||||
|
||||
#Path to save the trained model
|
||||
pathModel = 'm-' + timestampLaunch + '.pth.tar'
|
||||
|
||||
print ('Architecture Selected to train = ', nnArchitecture)
|
||||
print ('pathDirData',pathDirData)
|
||||
print ('listDir',os.listdir(pathDirData))
|
||||
TrainerTester.trainer(pathDirData, listImagePathsTrain, listImagePathsVal,listImagesPathDev, nnArchitecture, nnIsTrained, nnClassCount, trBatchSize, trMaxEpoch, imgtransResize, imgtransCrop, timestampLaunch, None)
|
||||
|
||||
|
||||
print ('Testing the trained model...')
|
||||
TrainerTester.tester(pathDirData, listImagesPathDev, pathModel, nnArchitecture, nnClassCount, nnIsTrained, trBatchSize, imgtransResize, imgtransCrop, timestampLaunch)
|
||||
|
||||
|
||||
def runTest():
|
||||
#Do some model testing here
|
||||
|
||||
timestampTime = time.strftime("%H%M%S")
|
||||
timestampDate = time.strftime("%d%m%Y")
|
||||
timestampLaunch = timestampDate + '-' + timestampTime
|
||||
|
||||
#-------------------- SETTINGS: AML WORKSPACE AND DATASTORE
|
||||
interactive_auth = InteractiveLoginAuthentication(tenant_id=TENANT_ID)
|
||||
ws = Workspace(
|
||||
subscription_id=SUBSCRIPTION_ID,
|
||||
resource_group=RESOURCE_GROUP,
|
||||
workspace_name=WORKSPACE_NAME,
|
||||
auth=interactive_auth
|
||||
)
|
||||
datastore = Datastore.get(ws, datastore_name=DATASTORE_NAME)
|
||||
|
||||
|
||||
|
||||
#-------------------- SETTINGS: MOUNTING THE DATASET TO MAKE IT AVAILABLE
|
||||
chestist_data = Dataset.get_by_name(ws,DATASET_NAME)
|
||||
mountPoint = chestist_data.mount()
|
||||
mountPoint.start()
|
||||
mountFolder = mountPoint.mount_point
|
||||
files=os.listdir(mountFolder+os.environ['IMAGES_SUBFOLDER']) #Need to generalize for the whole dataset
|
||||
|
||||
pathDirData = files
|
||||
pathFileTest = '' # Pat of images for test
|
||||
nnArchitecture = 'CNNModel'
|
||||
nnIsTrained = True
|
||||
nnClassCount = 14
|
||||
trBatchSize = 16
|
||||
imgtransResize = 256
|
||||
imgtransCrop = 224
|
||||
|
||||
|
||||
pathModel = 'm-' + timestampLaunch + '.pth.tar' #path of the model to test, needs to change
|
||||
|
||||
#timestampLaunch = ''
|
||||
|
||||
TrainerTester.tester(pathDirData, pathFileTest, pathModel, nnArchitecture, nnClassCount, nnIsTrained, trBatchSize, imgtransResize, imgtransCrop, timestampLaunch)
|
||||
|
||||
|
|
@ -0,0 +1,324 @@
|
|||
import os
|
||||
import numpy as np
|
||||
import time
|
||||
import sys
|
||||
import re
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torchvision
|
||||
import torchvision.transforms as transforms
|
||||
import torch.optim as optim
|
||||
import torch.nn.functional as tfunc
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.optim.lr_scheduler import ReduceLROnPlateau
|
||||
from tempfile import TemporaryFile
|
||||
import torch.nn.functional as func
|
||||
from CNNModel import CNNModel
|
||||
from DatasetGenerator import DatasetGenerator
|
||||
from azureml.core import Workspace, Datastore, Dataset
|
||||
from azureml.core.authentication import InteractiveLoginAuthentication
|
||||
from sklearn.metrics.ranking import roc_auc_score
|
||||
|
||||
class TrainerTester ():
|
||||
|
||||
def trainer(pathDirData, pathFileTrain, pathFileVal,pathFileTest, nnArchitecture, nnIsTrained, nnClassCount, trBatchSize, trMaxEpoch, transResize, transCrop, launchTimestamp, checkpoint):
|
||||
|
||||
#-------------------- SETTINGS: NETWORK ARCHITECTURE
|
||||
if nnArchitecture == 'CNNModel': model = CNNModel(nnClassCount, nnIsTrained).cuda()
|
||||
model = torch.nn.DataParallel(model).cuda()
|
||||
|
||||
|
||||
#-------------------- SETTINGS: AML WORKSPACE AND DATASTORE
|
||||
interactive_auth = InteractiveLoginAuthentication(tenant_id=os.environ['TENANT_ID'])
|
||||
ws = Workspace(
|
||||
subscription_id=os.environ['SUBSCRIPTION_ID'],
|
||||
resource_group=os.environ["RESOURCE_GROUP"],
|
||||
workspace_name=os.environ['WORKSPACE_NAME'],
|
||||
auth=interactive_auth
|
||||
)
|
||||
datastore = Datastore.get(ws, datastore_name=os.environ['DATASTORE_NAME'])
|
||||
|
||||
|
||||
|
||||
#-------------------- SETTINGS: MOUNTING THE DATASET TO MAKE IT AVAILABLE
|
||||
chestist_data = Dataset.get_by_name(ws,os.environ['DATASET_NAME_CSV'])
|
||||
mountPoint = chestist_data.mount()
|
||||
mountPoint.start()
|
||||
mountFolder = mountPoint.mount_point
|
||||
files=os.listdir(mountFolder) #Need to generalize for the whole dataset
|
||||
# pathDirData=files
|
||||
csvFilePath= mountFolder #Path for the csv file with the labels
|
||||
|
||||
|
||||
#-------------------- SETTINGS: DATA TRANSFORMS (IMAGES SETTINGS)
|
||||
normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) #Using the mean and std of Imagenet is a common practice. They are calculated based on millions of images. We can calculate the new mean and std
|
||||
transformList = []
|
||||
transformList.append(transforms.RandomResizedCrop(transCrop)) #randomize size as well
|
||||
transformList.append(transforms.RandomHorizontalFlip()) #we are adding here a random flip so that is not always horizontal
|
||||
transformList.append(transforms.ToTensor()) #This converts to tensor
|
||||
transformList.append(normalize)
|
||||
transformSequence=transforms.Compose(transformList)
|
||||
|
||||
#List of images paths for train and validation
|
||||
listImagesTrain=[]
|
||||
listImagesVal=[]
|
||||
listImagesTest=[]
|
||||
|
||||
|
||||
for imagePath in pathFileTrain:
|
||||
listImagesTrain.append(os.path.basename(imagePath))
|
||||
|
||||
for imagePath in pathFileVal:
|
||||
listImagesVal.append(os.path.basename(imagePath))
|
||||
|
||||
for imagePath in pathFileTest:
|
||||
listImagesTest.append(os.path.basename(imagePath))
|
||||
|
||||
labelList=[]
|
||||
#-------------------- DATASET BUILDERS
|
||||
datasetTrain = DatasetGenerator(pathImageDirectory=pathDirData, pathDatasetFile=pathFileTrain, listImages=listImagesTrain,labelList=labelList, transform=transformSequence,csvFilePath=csvFilePath)
|
||||
datasetVal = DatasetGenerator(pathImageDirectory=pathDirData, pathDatasetFile=pathFileVal, listImages=listImagesVal,labelList=labelList, transform=transformSequence,csvFilePath=csvFilePath)
|
||||
dataLoaderTrain = DataLoader(dataset=datasetTrain, batch_size=trBatchSize, shuffle=True, num_workers=24, pin_memory=True)
|
||||
dataLoaderVal = DataLoader(dataset=datasetVal, batch_size=trBatchSize, shuffle=False, num_workers=24, pin_memory=True)
|
||||
print("dataset")
|
||||
print(list(datasetTrain))
|
||||
|
||||
|
||||
#-------------------- SETTINGS: OPTIMIZER & SCHEDULER
|
||||
optimizer = optim.Adam (model.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-5)
|
||||
scheduler = ReduceLROnPlateau(optimizer, factor = 0.1, patience = 5, mode = 'min')
|
||||
|
||||
#-------------------- SETTINGS: LOSS
|
||||
loss = torch.nn.BCELoss(size_average = True)
|
||||
|
||||
#---- Load checkpoint
|
||||
if checkpoint != None:
|
||||
modelCheckpoint = torch.load(checkpoint)
|
||||
model.load_state_dict(modelCheckpoint['state_dict'],strict=False)
|
||||
optimizer.load_state_dict(modelCheckpoint['optimizer'])
|
||||
|
||||
|
||||
#---- TRAIN THE NETWORK
|
||||
|
||||
lossMIN = 100000 #Fixable
|
||||
|
||||
for epochID in range (0, trMaxEpoch):
|
||||
|
||||
timestampTime = time.strftime("%H%M%S")
|
||||
timestampDate = time.strftime("%d%m%Y")
|
||||
timestampSTART = timestampDate + '-' + timestampTime
|
||||
print("1")
|
||||
#print(list(dataLoaderTrain))
|
||||
TrainerTester.epochTrain (model, dataLoaderTrain, optimizer, scheduler, trMaxEpoch, nnClassCount, loss)
|
||||
#del dataLoaderTrain
|
||||
#torch.cuda.empty_cache()
|
||||
lossVal, losstensor = TrainerTester.epochVal (model, dataLoaderVal, optimizer, scheduler, trMaxEpoch, nnClassCount, loss)
|
||||
#del dataLoaderVal
|
||||
timestampTime = time.strftime("%H%M%S")
|
||||
timestampDate = time.strftime("%d%m%Y")
|
||||
timestampEND = timestampDate + '-' + timestampTime
|
||||
|
||||
scheduler.step(losstensor.item())
|
||||
|
||||
if lossVal < lossMIN:
|
||||
lossMIN = lossVal
|
||||
torch.save({'epoch': epochID + 1, 'state_dict': model.state_dict(), 'best_loss': lossMIN, 'optimizer' : optimizer.state_dict()}, 'm-' + launchTimestamp + '.pth.tar')
|
||||
print ('Epoch [' + str(epochID + 1) + '] [save] [' + timestampEND + '] loss= ' + str(lossVal))
|
||||
else:
|
||||
print ('Epoch [' + str(epochID + 1) + '] [----] [' + timestampEND + '] loss= ' + str(lossVal))
|
||||
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
|
||||
def epochTrain (model, dataLoader, optimizer, scheduler, epochMax, classCount, loss):
|
||||
|
||||
model.train()
|
||||
|
||||
print("Before batchID")
|
||||
for batchID, (input, target) in enumerate (dataLoader):
|
||||
#print(input)
|
||||
print("batchID")
|
||||
target = target.cuda(non_blocking = True)
|
||||
|
||||
varInput = torch.autograd.Variable(input)
|
||||
varTarget = torch.autograd.Variable(target)
|
||||
varOutput = model(varInput)
|
||||
|
||||
lossvalue = loss(varOutput, varTarget)
|
||||
|
||||
optimizer.zero_grad()
|
||||
lossvalue.backward()
|
||||
optimizer.step()
|
||||
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
|
||||
def epochVal (model, dataLoader, optimizer, scheduler, epochMax, classCount, loss):
|
||||
|
||||
with torch.no_grad():
|
||||
model.eval()
|
||||
|
||||
lossVal = 0
|
||||
lossValNorm = 0
|
||||
|
||||
losstensorMean = 0
|
||||
|
||||
for i, (input, target) in enumerate (dataLoader):
|
||||
print("validation")
|
||||
target = target.cuda(non_blocking=True)
|
||||
|
||||
varInput = torch.autograd.Variable(input, volatile=True)
|
||||
varTarget = torch.autograd.Variable(target, volatile=True)
|
||||
varOutput = model(varInput)
|
||||
|
||||
losstensor = loss(varOutput, varTarget)
|
||||
losstensorMean += losstensor
|
||||
|
||||
lossVal += losstensor.item()
|
||||
lossValNorm += 1
|
||||
|
||||
outLoss = lossVal / lossValNorm
|
||||
losstensorMean = losstensorMean / lossValNorm
|
||||
|
||||
return outLoss, losstensorMean
|
||||
|
||||
|
||||
|
||||
def computeAUROC (dataGT, dataPRED, classCount):
|
||||
|
||||
outAUROC = []
|
||||
|
||||
datanpGT = dataGT.cpu().numpy()
|
||||
datanpPRED = dataPRED.cpu().numpy()
|
||||
|
||||
for i in range(classCount):
|
||||
try:
|
||||
#roc_auc_score(y_true, y_scores)
|
||||
outAUROC.append(roc_auc_score(datanpGT[:, i], datanpPRED[:, i]))
|
||||
except ValueError:
|
||||
pass
|
||||
print(datanpGT)
|
||||
print(datanpPRED)
|
||||
|
||||
# save numpy array as csv file
|
||||
#from numpy import asarray
|
||||
#from numpy import savetxt
|
||||
# define data
|
||||
#data = asarray([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
|
||||
# save to csv file
|
||||
#tuple=datanpPRED.shape
|
||||
#savetxt('predictions_'+str(tuple[0])+'images'+'.csv', datanpPRED, delimiter=',')
|
||||
#ChexnetTrainer.uploadToBlob('predictions_'+str(tuple[0])+'images'+'.csv',"datashowcaseprod")
|
||||
|
||||
return outAUROC
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
def tester (pathDirData, pathFileTest, pathModel, nnArchitecture, nnClassCount, nnIsTrained, trBatchSize, transResize, transCrop, launchTimeStamp):
|
||||
|
||||
|
||||
#CLASS_NAMES = ['Cardiomegaly', 'Effusion', 'Nodule', 'Pneumonia','Pneumothorax']
|
||||
CLASS_NAMES = [ 'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia','Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia']
|
||||
#CLASS_NAMES = [ 'Effusion', 'Infiltration', 'Mass', 'Nodule','Pneumothorax']
|
||||
cudnn.benchmark = True
|
||||
|
||||
if nnArchitecture == 'CNNModel': model = CNNModel(nnClassCount, nnIsTrained).cuda()
|
||||
|
||||
#import re
|
||||
model = torch.nn.DataParallel(model).cuda()
|
||||
print("PWD")
|
||||
# !pwd
|
||||
checkpoint = torch.load(pathModel)
|
||||
state_dict = checkpoint['state_dict']
|
||||
remove_data_parallel = False # Change if you don't want to use nn.DataParallel(model)
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
for key in list(state_dict.keys()):
|
||||
match = pattern.match(key)
|
||||
new_key = match.group(1) + match.group(2) if match else key
|
||||
new_key = new_key[7:] if remove_data_parallel else new_key
|
||||
state_dict[new_key] = state_dict[key]
|
||||
# Delete old key only if modified.
|
||||
if match or remove_data_parallel:
|
||||
del state_dict[key]
|
||||
model.load_state_dict(checkpoint['state_dict'], strict=False )
|
||||
#optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
#modelCheckpoint = torch.load(pathModel)
|
||||
#model.load_state_dict(modelCheckpoint['state_dict'])
|
||||
|
||||
#-------------------- SETTINGS: DATA TRANSFORMS, TEN CROPS
|
||||
normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
||||
|
||||
#-------------------- SETTINGS: DATASET BUILDERS
|
||||
transformList = []
|
||||
transformList.append(transforms.Resize(transResize))
|
||||
transformList.append(transforms.TenCrop(transCrop))
|
||||
transformList.append(transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])))
|
||||
transformList.append(transforms.Lambda(lambda crops: torch.stack([normalize(crop) for crop in crops])))
|
||||
transformSequence=transforms.Compose(transformList)
|
||||
|
||||
interactive_auth = InteractiveLoginAuthentication(tenant_id=os.environ['TENANT_ID'])
|
||||
ws = Workspace(
|
||||
subscription_id=os.environ['SUBSCRIPTION_ID'],
|
||||
resource_group=os.environ["RESOURCE_GROUP"],
|
||||
workspace_name=os.environ['WORKSPACE_NAME'],
|
||||
auth=interactive_auth
|
||||
)
|
||||
datastore = Datastore.get(ws, datastore_name=os.environ['DATASTORE_NAME'])
|
||||
|
||||
|
||||
|
||||
#-------------------- SETTINGS: MOUNTING THE DATASET TO MAKE IT AVAILABLE
|
||||
chestist_data = Dataset.get_by_name(ws,os.environ['DATASET_NAME'])
|
||||
mountPoint = chestist_data.mount()
|
||||
mountPoint.start()
|
||||
mountFolder = mountPoint.mount_point
|
||||
pathDirData=mountFolder
|
||||
#pathDirData=files
|
||||
listImagesTest=[]
|
||||
for imagePath in pathFileTest:
|
||||
listImagesTest.append(os.path.basename(imagePath))
|
||||
csvFilePath=""
|
||||
labelList=[]
|
||||
datasetTest = DatasetGenerator(pathImageDirectory=pathDirData, pathDatasetFile=pathFileTest, listImages=listImagesTest,labelList=labelList, transform=transformSequence,csvFilePath=csvFilePath)
|
||||
dataLoaderTest = DataLoader(dataset=datasetTest, batch_size=trBatchSize, num_workers=8, shuffle=False, pin_memory=True)
|
||||
print("HEY2")
|
||||
|
||||
with torch.no_grad():
|
||||
|
||||
print(list(datasetTest))
|
||||
outGT = torch.FloatTensor().cuda()
|
||||
outPRED = torch.FloatTensor().cuda()
|
||||
|
||||
model.eval()
|
||||
|
||||
for i, (input, target) in enumerate(dataLoaderTest):
|
||||
|
||||
target = target.cuda()
|
||||
outGT = torch.cat((outGT, target), 0)
|
||||
|
||||
bs, n_crops, c, h, w = input.size()
|
||||
|
||||
varInput = torch.autograd.Variable(input.view(-1, c, h, w).cuda(), volatile=True)
|
||||
|
||||
out = model(varInput)
|
||||
outMean = out.view(bs, n_crops, -1).mean(1)
|
||||
|
||||
outPRED = torch.cat((outPRED, outMean.data), 0)
|
||||
|
||||
aurocIndividual = TrainerTester.computeAUROC(outGT, outPRED, nnClassCount)
|
||||
aurocMean = np.array(aurocIndividual).mean()
|
||||
#del dataLoaderTest
|
||||
print ('AUROC mean ', aurocMean)
|
||||
|
||||
for i in range (0, len(aurocIndividual)):
|
||||
print (CLASS_NAMES[i], ' ', aurocIndividual[i])
|
||||
|
||||
|
||||
return
|
||||
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче