Π·Π΅Ρ€ΠΊΠ°Π»ΠΎ ΠΈΠ·
1
0
Π€ΠΎΡ€ΠΊΠ½ΡƒΡ‚ΡŒ 0
## Describe your changes
Consistent dataloader for benchmark
Transforms.RandomCrop/Clip will generate different tensors every time
call dataloader creation function. It bring the inconsistent model
accuracy measurement.

This PR is a workaround used to ensure the input data is consist for
torch model and onnx model.


## Checklist before requesting a review
- [ ] Add unit tests for this change.
- [ ] Make sure all tests can pass.
- [ ] Update documents if necessary.
- [ ] Format your code by running `pre-commit run --all-files`
- [ ] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.

## (Optional) Issue link
This commit is contained in:
trajep 2023-10-19 10:48:56 +08:00 ΠΊΠΎΠΌΠΌΠΈΡ‚ ΠΏΡ€ΠΎΠΈΠ·Π²Ρ‘Π» GitHub
Π ΠΎΠ΄ΠΈΡ‚Π΅Π»ΡŒ ab70b6cd3a
ΠšΠΎΠΌΠΌΠΈΡ‚ 2b5aef171a
НС Π½Π°ΠΉΠ΄Π΅Π½ ΠΊΠ»ΡŽΡ‡, ΡΠΎΠΎΡ‚Π²Π΅Ρ‚ΡΡ‚Π²ΡƒΡŽΡ‰ΠΈΠΉ Π΄Π°Π½Π½ΠΎΠΉ подписи
Π˜Π΄Π΅Π½Ρ‚ΠΈΡ„ΠΈΠΊΠ°Ρ‚ΠΎΡ€ ΠΊΠ»ΡŽΡ‡Π° GPG: 4AEE18F83AFDEB23
2 ΠΈΠ·ΠΌΠ΅Π½Ρ‘Π½Π½Ρ‹Ρ… Ρ„Π°ΠΉΠ»ΠΎΠ²: 32 Π΄ΠΎΠ±Π°Π²Π»Π΅Π½ΠΈΠΉ ΠΈ 12 ΡƒΠ΄Π°Π»Π΅Π½ΠΈΠΉ

ΠŸΡ€ΠΎΡΠΌΠΎΡ‚Ρ€Π΅Ρ‚ΡŒ Ρ„Π°ΠΉΠ»

@ -56,12 +56,13 @@ def update_lr(optimizer, lr):
def prepare_model(num_epochs=1, models_dir="models", data_dir="data"):
seed = 0
# seed everything to 0 for reproducibility, https://pytorch.org/docs/stable/notes/randomness.html
random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
# the following are needed only for GPU
torch.cuda.manual_seed(0)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

ΠŸΡ€ΠΎΡΠΌΠΎΡ‚Ρ€Π΅Ρ‚ΡŒ Ρ„Π°ΠΉΠ»

@ -2,6 +2,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import torch
import torchmetrics
from onnxruntime.quantization.calibrate import CalibrationDataReader
@ -17,6 +18,15 @@ from olive.model import OliveModel
# Common Dataset
# -------------------------------------------------------------------------
seed = 0
# seed everything to 0 for reproducibility, https://pytorch.org/docs/stable/notes/randomness.html
# do not set random seed and np.random.seed for aml test, since it will cause aml job name conflict
torch.manual_seed(seed)
# the following are needed only for GPU
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
class CIFAR10DataSet:
def __init__(
@ -31,10 +41,15 @@ class CIFAR10DataSet:
def setup(self, stage: str):
transform = transforms.Compose(
[transforms.Pad(4), transforms.RandomHorizontalFlip(), transforms.RandomCrop(32), transforms.ToTensor()]
[
transforms.Pad(4),
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32),
transforms.ToTensor(),
]
)
self.train_dataset = CIFAR10(root=self.train_path, train=True, transform=transform, download=False)
self.val_dataset = CIFAR10(root=self.vld_path, train=True, transform=transform, download=False)
self.val_dataset = CIFAR10(root=self.vld_path, train=False, transform=transform, download=False)
class PytorchResNetDataset(Dataset):
@ -71,8 +86,7 @@ def post_process(output):
def create_dataloader(data_dir, batch_size, *args, **kwargs):
cifar10_dataset = CIFAR10DataSet(data_dir)
_, val_set = torch.utils.data.random_split(cifar10_dataset.val_dataset, [49000, 1000])
return DataLoader(PytorchResNetDataset(val_set), batch_size=batch_size, drop_last=True)
return DataLoader(PytorchResNetDataset(cifar10_dataset.val_dataset), batch_size=batch_size, drop_last=True)
# -------------------------------------------------------------------------
@ -83,11 +97,17 @@ def create_dataloader(data_dir, batch_size, *args, **kwargs):
class ResnetCalibrationDataReader(CalibrationDataReader):
def __init__(self, data_dir: str, batch_size: int = 16):
super().__init__()
self.iterator = iter(create_dataloader(data_dir, batch_size))
self.iterator = iter(create_train_dataloader(data_dir, batch_size))
self.sample_counter = 500
def get_next(self) -> dict:
if self.sample_counter <= 0:
return None
try:
return {"input": next(self.iterator)[0].numpy()}
item = {"input": next(self.iterator)[0].numpy()}
self.sample_counter -= 1
return item
except Exception:
return None
@ -161,8 +181,7 @@ def create_qat_config():
def create_train_dataloader(data_dir, batchsize, *args, **kwargs):
cifar10_dataset = CIFAR10DataSet(data_dir)
train_dataset, _ = torch.utils.data.random_split(cifar10_dataset.train_dataset, [40000, 10000])
return DataLoader(PytorchResNetDataset(train_dataset), batch_size=batchsize, drop_last=True)
return DataLoader(PytorchResNetDataset(cifar10_dataset.train_dataset), batch_size=batchsize, drop_last=True)
# -------------------------------------------------------------------------