зеркало из https://github.com/microsoft/nni.git
Merge from dogfood branch to master
This commit is contained in:
Родитель
98530fd247
Коммит
8314d6eec5
128
Makefile
128
Makefile
|
@ -1,10 +1,10 @@
|
||||||
BIN_PATH ?= /usr/bin
|
BIN_PATH ?= ${HOME}/.local/bin
|
||||||
NODE_PATH ?= /usr/share
|
INSTALL_PREFIX ?= ${HOME}/.local
|
||||||
EXAMPLE_PATH ?= /usr/share/nni/examples
|
PIP_MODE ?= --user
|
||||||
|
EXAMPLES_PATH ?= ${HOME}/nni/examples
|
||||||
SRC_DIR := ${PWD}
|
WHOAMI := $(shell whoami)
|
||||||
|
.PHONY: build install uninstall dev-install
|
||||||
.PHONY: build install uninstall
|
YARN := $(INSTALL_PREFIX)/yarn/bin/yarn
|
||||||
|
|
||||||
build:
|
build:
|
||||||
### Building NNI Manager ###
|
### Building NNI Manager ###
|
||||||
|
@ -21,50 +21,124 @@ build:
|
||||||
|
|
||||||
|
|
||||||
install:
|
install:
|
||||||
mkdir -p $(NODE_PATH)/nni
|
ifneq ('$(HOME)', '/root')
|
||||||
mkdir -p $(EXAMPLE_PATH)
|
ifeq (${WHOAMI}, root)
|
||||||
|
### Sorry, sudo make install is not supported ###
|
||||||
|
exit 1
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
mkdir -p $(BIN_PATH)
|
||||||
|
mkdir -p $(INSTALL_PREFIX)/nni
|
||||||
|
mkdir -p $(EXAMPLES_PATH)
|
||||||
|
|
||||||
### Installing NNI Manager ###
|
### Installing NNI Manager ###
|
||||||
cp -rT src/nni_manager/dist $(NODE_PATH)/nni/nni_manager
|
cp -rT src/nni_manager/dist $(INSTALL_PREFIX)/nni/nni_manager
|
||||||
cp -rT src/nni_manager/node_modules $(NODE_PATH)/nni/nni_manager/node_modules
|
cp -rT src/nni_manager/node_modules $(INSTALL_PREFIX)/nni/nni_manager/node_modules
|
||||||
|
|
||||||
### Installing Web UI ###
|
### Installing Web UI ###
|
||||||
cp -rT src/webui/build $(NODE_PATH)/nni/webui
|
cp -rT src/webui/build $(INSTALL_PREFIX)/nni/webui
|
||||||
ln -sf $(NODE_PATH)/nni/nni_manager/node_modules/serve/bin/serve.js $(BIN_PATH)/serve
|
ln -sf $(INSTALL_PREFIX)/nni/nni_manager/node_modules/serve/bin/serve.js $(BIN_PATH)/serve
|
||||||
|
|
||||||
### Installing Python SDK dependencies ###
|
### Installing Python SDK dependencies ###
|
||||||
pip3 install -r src/sdk/pynni/requirements.txt
|
pip3 install $(PIP_MODE) -r src/sdk/pynni/requirements.txt
|
||||||
### Installing Python SDK ###
|
### Installing Python SDK ###
|
||||||
cd src/sdk/pynni && python3 setup.py install
|
cd src/sdk/pynni && python3 setup.py install $(PIP_MODE)
|
||||||
|
|
||||||
### Installing nnictl ###
|
### Installing nnictl ###
|
||||||
cd tools && python3 setup.py install
|
cd tools && python3 setup.py install $(PIP_MODE)
|
||||||
|
|
||||||
echo '#!/bin/sh' > $(BIN_PATH)/nnimanager
|
echo '#!/bin/sh' > $(BIN_PATH)/nnimanager
|
||||||
echo 'cd $(NODE_PATH)/nni/nni_manager && node main.js $$@' >> $(BIN_PATH)/nnimanager
|
echo 'cd $(INSTALL_PREFIX)/nni/nni_manager && node main.js $$@' >> $(BIN_PATH)/nnimanager
|
||||||
chmod +x $(BIN_PATH)/nnimanager
|
chmod +x $(BIN_PATH)/nnimanager
|
||||||
|
|
||||||
install -m 755 tools/nnictl $(BIN_PATH)/nnictl
|
echo '#!/bin/sh' > $(BIN_PATH)/nnictl
|
||||||
|
echo 'NNI_MANAGER=$(BIN_PATH)/nnimanager WEB_UI_FOLDER=$(INSTALL_PREFIX)/nni/webui python3 -m nnicmd.nnictl $$@' >> $(BIN_PATH)/nnictl
|
||||||
|
chmod +x $(BIN_PATH)/nnictl
|
||||||
|
|
||||||
### Installing examples ###
|
### Installing examples ###
|
||||||
cp -rT examples $(EXAMPLE_PATH)
|
cp -rT examples $(EXAMPLES_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
pip-install:
|
||||||
|
ifneq ('$(HOME)', '/root')
|
||||||
|
ifeq (${WHOAMI}, root)
|
||||||
|
### Sorry, sudo make install is not supported ###
|
||||||
|
exit 1
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
### Prepare Node.js ###
|
||||||
|
wget https://nodejs.org/dist/v10.9.0/node-v10.9.0-linux-x64.tar.xz
|
||||||
|
tar xf node-v10.9.0-linux-x64.tar.xz
|
||||||
|
cp -rT node-v10.9.0-linux-x64 $(INSTALL_PREFIX)/node
|
||||||
|
|
||||||
|
### Prepare Yarn 1.9.4 ###
|
||||||
|
wget https://github.com/yarnpkg/yarn/releases/download/v1.9.4/yarn-v1.9.4.tar.gz
|
||||||
|
tar xf yarn-v1.9.4.tar.gz
|
||||||
|
cp -rT yarn-v1.9.4 $(INSTALL_PREFIX)/yarn
|
||||||
|
|
||||||
|
### Building NNI Manager ###
|
||||||
|
cd src/nni_manager && $(YARN) && $(YARN) build
|
||||||
|
|
||||||
|
### Building Web UI ###
|
||||||
|
cd src/webui && $(YARN) && $(YARN) build
|
||||||
|
|
||||||
|
mkdir -p $(BIN_PATH)
|
||||||
|
mkdir -p $(INSTALL_PREFIX)/nni
|
||||||
|
|
||||||
|
### Installing NNI Manager ###
|
||||||
|
cp -rT src/nni_manager/dist $(INSTALL_PREFIX)/nni/nni_manager
|
||||||
|
cp -rT src/nni_manager/node_modules $(INSTALL_PREFIX)/nni/nni_manager/node_modules
|
||||||
|
echo '#!/bin/sh' > $(BIN_PATH)/nnimanager
|
||||||
|
echo 'cd $(INSTALL_PREFIX)/nni/nni_manager && node main.js $$@' >> $(BIN_PATH)/nnimanager
|
||||||
|
chmod +x $(BIN_PATH)/nnimanager
|
||||||
|
|
||||||
|
### Installing Web UI ###
|
||||||
|
cp -rT src/webui/build $(INSTALL_PREFIX)/nni/webui
|
||||||
|
ln -sf $(INSTALL_PREFIX)/nni/nni_manager/node_modules/serve/bin/serve.js $(BIN_PATH)/serve
|
||||||
|
|
||||||
|
### Installing examples ###
|
||||||
|
cp -rT examples $(EXAMPLES_PATH)
|
||||||
|
|
||||||
|
|
||||||
dev-install:
|
dev-install:
|
||||||
|
mkdir -p $(BIN_PATH)
|
||||||
|
mkdir -p $(INSTALL_PREFIX)/nni
|
||||||
|
|
||||||
|
### Installing NNI Manager ###
|
||||||
|
ln -sf $(INSTALL_PREFIX)/nni/nni_manager $(PWD)/src/nni_manager/dist
|
||||||
|
ln -sf $(INSTALL_PREFIX)/nni/nni_manager/node_modules $(PWD)/src/nni_manager/node_modules
|
||||||
|
|
||||||
|
### Installing Web UI ###
|
||||||
|
ln -sf $(INSTALL_PREFIX)/nni/webui $(PWD)/src/webui
|
||||||
|
ln -sf $(INSTALL_PREFIX)/nni/nni_manager/node_modules/serve/bin/serve.js $(BIN_PATH)/serve
|
||||||
|
|
||||||
### Installing Python SDK dependencies ###
|
### Installing Python SDK dependencies ###
|
||||||
pip3 install --user -r src/sdk/pynni/requirements.txt
|
pip3 install $(PIP_MODE) -r src/sdk/pynni/requirements.txt
|
||||||
### Installing Python SDK ###
|
### Installing Python SDK ###
|
||||||
cd src/sdk/pynni && pip3 install --user -e .
|
cd src/sdk/pynni && pip3 install $(PIP_MODE) -e .
|
||||||
|
|
||||||
### Installing nnictl ###
|
### Installing nnictl ###
|
||||||
cd tools && pip3 install --user -e .
|
cd tools && pip3 install $(PIP_MODE) -e .
|
||||||
|
|
||||||
|
echo '#!/bin/sh' > $(BIN_PATH)/nnimanager
|
||||||
|
echo 'cd $(INSTALL_PREFIX)/nni/nni_manager && node main.js $$@' >> $(BIN_PATH)/nnimanager
|
||||||
|
chmod +x $(BIN_PATH)/nnimanager
|
||||||
|
|
||||||
|
echo '#!/bin/sh' > $(BIN_PATH)/nnictl
|
||||||
|
echo 'NNI_MANAGER=$(BIN_PATH)/nnimanager python3 -m nnicmd.nnictl $$@' >> $(BIN_PATH)/nnictl
|
||||||
|
chmod +x $(BIN_PATH)/nnictl
|
||||||
|
|
||||||
|
### Installing examples ###
|
||||||
|
ln -sf $(EXAMPLES_PATH) $(PWD)/examples
|
||||||
|
|
||||||
|
|
||||||
uninstall:
|
uninstall:
|
||||||
-rm -r $(EXAMPLE_PATH)
|
|
||||||
-rm -r $(NODE_PATH)/nni
|
|
||||||
-pip3 uninstall -y nnictl
|
|
||||||
-pip3 uninstall -y nni
|
-pip3 uninstall -y nni
|
||||||
-rm $(BIN_PATH)/nnictl
|
-pip3 uninstall -y nnictl
|
||||||
-rm $(BIN_PATH)/nnimanager
|
-rm -r $(INSTALL_PREFIX)/nni
|
||||||
|
-rm -r $(EXAMPLES_PATH)
|
||||||
-rm $(BIN_PATH)/serve
|
-rm $(BIN_PATH)/serve
|
||||||
|
-rm $(BIN_PATH)/nnimanager
|
||||||
|
-rm $(BIN_PATH)/nnictl
|
||||||
|
|
58
README.md
58
README.md
|
@ -1,21 +1,53 @@
|
||||||
# Introduction
|
# Introduction
|
||||||
Neural Network Intelligence(NNI) is a light package for supporting hyper-parameter tuning or neural architecture search.
|
|
||||||
It could easily run in different environments, such as: local/remote machine/cloud.
|
|
||||||
And it offers a new annotation language for user to conveniently design search space.
|
|
||||||
Also user could write code using any language or any machine learning framework.
|
|
||||||
|
|
||||||
# Getting Started
|
NNI (Neural Network Intelligence) is a toolkit to help users running automated machine learning experiments.
|
||||||
TODO: Guide users through getting your code up and running on their own system. In this section you can talk about:
|
The tool dispatches and runs trial jobs that generated by tuning algorithms to search the best neural architecture and/or hyper-parameters at different environments (e.g. local, remote servers, Cloud).
|
||||||
1. Installation process
|
|
||||||
2. Software dependencies
|
```
|
||||||
3. Latest releases
|
AutoML experiment Training Services
|
||||||
4. API references
|
┌────────┐ ┌────────────────────────┐ ┌────────────────┐
|
||||||
|
│ nnictl │ ─────> │ nni_manager │ │ Local Machine │
|
||||||
|
└────────┘ │ sdk/tuner │ └────────────────┘
|
||||||
|
│ hyperopt_tuner │
|
||||||
|
│ evolution_tuner │ trail jobs ┌────────────────┐
|
||||||
|
│ ... │ ────────> │ Remote Servers │
|
||||||
|
├────────────────────────┤ └────────────────┘
|
||||||
|
│ trial job source code │
|
||||||
|
│ sdk/annotation │ ┌────────────────┐
|
||||||
|
├────────────────────────┤ │ Yarn,K8s, │
|
||||||
|
│ nni_board │ │ ... │
|
||||||
|
└────────────────────────┘ └────────────────┘
|
||||||
|
```
|
||||||
|
## **Who should consider using NNI**
|
||||||
|
* You want to try different AutoML algorithms for your training code (model) at local
|
||||||
|
* You want to run AutoML trial jobs in different environments to speed up search (e.g. remote servers, Cloud)
|
||||||
|
* As a researcher and data scientist, you want to implement your own AutoML algorithms and compare with other algorithms
|
||||||
|
* As a ML platform owner, you want to support AutoML in your platform
|
||||||
|
|
||||||
|
# Getting Started with NNI
|
||||||
|
|
||||||
|
## **Installation**
|
||||||
|
Install through python pip
|
||||||
|
* requirements: python >= 3.5
|
||||||
|
```
|
||||||
|
pip3 install -v --user git+https://github.com/Microsoft/NeuralNetworkIntelligence.git
|
||||||
|
source ~/.bashrc
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## **Quick start: run an experiment at local**
|
||||||
|
Requirements:
|
||||||
|
* with NNI installed on your machine.
|
||||||
|
|
||||||
|
Run the following command to create an experiment for [mnist]
|
||||||
|
```bash
|
||||||
|
nnictl create --config ~/nni/examples/trials/mnist-annotation/config.yaml
|
||||||
|
```
|
||||||
|
This command will start the experiment and WebUI. The WebUI endpoint will be shown in the output of this command (for example, `http://localhost:8080`). Open this URL using your browsers. You can analyze your experiment through WebUI, or open trials' tensorboard. Please refer to [here](docs/GetStarted.md) for the GetStarted tutorial.
|
||||||
|
|
||||||
# Build and Test
|
|
||||||
TODO: Describe and show how to build your code and run the tests.
|
|
||||||
|
|
||||||
# Contribute
|
# Contribute
|
||||||
TODO: Explain how other users and developers can contribute to make your code better.
|
NNI is designed as an automatic searching framework with high extensibility. NNI has a very clear modular design. Contributing more tuner/assessor algorithms, training services, SDKs are really welcome. Please refer to [here](docs/ToContribute.md) for how to contribute.
|
||||||
|
|
||||||
# Privacy Statement
|
# Privacy Statement
|
||||||
The [Microsoft Enterprise and Developer Privacy Statement](https://privacy.microsoft.com/en-us/privacystatement) describes the privacy statement of this software.
|
The [Microsoft Enterprise and Developer Privacy Statement](https://privacy.microsoft.com/en-us/privacystatement) describes the privacy statement of this software.
|
||||||
|
|
|
@ -0,0 +1,88 @@
|
||||||
|
# Customized Tuner for Experts
|
||||||
|
|
||||||
|
*Tuner receive result from Trial as a matric to evaluate the performance of a specific parameters/architecture configure. And tuner send next hyper-parameter or architecture configure to Trial.*
|
||||||
|
|
||||||
|
So, if user want to implement a customized Tuner, she/he only need to:
|
||||||
|
|
||||||
|
1) Inherit a tuner of a base Tuner class
|
||||||
|
2) Implement receive_trial_result and generate_parameter function
|
||||||
|
3) Write a script to run Tuner
|
||||||
|
|
||||||
|
Here ia an example:
|
||||||
|
|
||||||
|
**1) Inherit a tuner of a base Tuner class**
|
||||||
|
```python
|
||||||
|
from nni.tuner import Tuner
|
||||||
|
|
||||||
|
class CustomizedTuner(Tuner):
|
||||||
|
def __init__(self, ...):
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
**2) Implement receive_trial_result and generate_parameter function**
|
||||||
|
```python
|
||||||
|
from nni.tuner import Tuner
|
||||||
|
|
||||||
|
class CustomizedTuner(Tuner):
|
||||||
|
def __init__(self, ...):
|
||||||
|
...
|
||||||
|
|
||||||
|
def receive_trial_result(self, parameter_id, parameters, reward):
|
||||||
|
'''
|
||||||
|
Record an observation of the objective function and Train
|
||||||
|
parameter_id: int
|
||||||
|
parameters: object created by 'generate_parameters()'
|
||||||
|
reward: object reported by trial
|
||||||
|
'''
|
||||||
|
# your code implements here.
|
||||||
|
...
|
||||||
|
|
||||||
|
def generate_parameters(self, parameter_id):
|
||||||
|
'''
|
||||||
|
Returns a set of trial (hyper-)parameters, as a serializable object
|
||||||
|
parameter_id: int
|
||||||
|
'''
|
||||||
|
# your code implements here.
|
||||||
|
return your_parameters
|
||||||
|
...
|
||||||
|
```
|
||||||
|
```receive_trial_result``` will receive ```the parameter_id, parameters, reward``` as parameters input. Also, Tuner will receive the ```reward``` object are exactly same reward that Trial send.
|
||||||
|
|
||||||
|
The ```your_parameters``` return from ```generate_parameters``` function, will be package as json object by NNI SDK. NNI SDK will unpack json object so the Trial will receive the exact same ```your_parameters``` from Tuner.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
If the you implement the ```generate_parameters``` like this:
|
||||||
|
```python
|
||||||
|
def generate_parameters(self, parameter_id):
|
||||||
|
'''
|
||||||
|
Returns a set of trial (hyper-)parameters, as a serializable object
|
||||||
|
parameter_id: int
|
||||||
|
'''
|
||||||
|
# your code implements here.
|
||||||
|
return {"dropout": 0.3, "learning_rate": 0.4}
|
||||||
|
```
|
||||||
|
It's means your Tuner will always generate parameters ```{"dropout": 0.3, "learning_rate": 0.4}```. Then Trial will receive ```{"dropout": 0.3, "learning_rate": 0.4}``` this object will using ```nni.get_parameters()``` API from NNI SDK. After training of Trial, it will send result to Tuner by calling ```nni.report_final_result(0.93)```. Then ```receive_trial_result``` will function will receied these parameters like:
|
||||||
|
```
|
||||||
|
parameter_id = 82347
|
||||||
|
parameters = {"dropout": 0.3, "learning_rate": 0.4}
|
||||||
|
reward = 0.93
|
||||||
|
```
|
||||||
|
|
||||||
|
**3) Configure your customized tuner in experiment yaml config file**
|
||||||
|
|
||||||
|
NNI needs to locate your customized tuner class and instantiate the class, so you need to specify the location of the customized tuner class and pass literal values as parameters to the \_\_init__ constructor.
|
||||||
|
```yaml
|
||||||
|
tuner:
|
||||||
|
codeDir: /home/abc/mytuner
|
||||||
|
classFileName: my_customized_tuner.py
|
||||||
|
className: CustomizedTuner
|
||||||
|
# Any parameter need to pass to your tuner class __init__ constructor
|
||||||
|
# can be specified in this optional classArgs field, for example
|
||||||
|
classArgs:
|
||||||
|
arg1: value1
|
||||||
|
```
|
||||||
|
|
||||||
|
More detail example you could see:
|
||||||
|
> * [evolution-tuner](../src/sdk/pynni/nni/evolution_tuner)
|
||||||
|
> * [hyperopt-tuner](../src/sdk/pynni/nni/hyperopt_tuner)
|
||||||
|
> * [evolution-based-customized-tuner](../examples/tuners/ga_customer_tuner)
|
|
@ -18,17 +18,19 @@ trainingServicePlatform: local
|
||||||
# choice: true, false
|
# choice: true, false
|
||||||
useAnnotation: true
|
useAnnotation: true
|
||||||
tuner:
|
tuner:
|
||||||
tunerName: TPE
|
builtinTunerName: TPE
|
||||||
optimizationMode: Maximize
|
classArgs:
|
||||||
|
optimize_mode: maximize
|
||||||
assessor:
|
assessor:
|
||||||
assessorName: Medianstop
|
builtinAssessorName: Medianstop
|
||||||
optimizationMode: Maximize
|
classArgs:
|
||||||
|
optimize_mode: maximize
|
||||||
trial:
|
trial:
|
||||||
trialCommand: python mnist.py
|
command: python mnist.py
|
||||||
trialCodeDir: /usr/share/nni/examples/trials/mnist-annotation
|
codeDir: /usr/share/nni/examples/trials/mnist-annotation
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
||||||
```
|
```
|
||||||
For our built-in assessors, you need to fill two fields: `assessorName` which chooses NNI provided assessors (refer to [here]() for built-in assessors), `optimizationMode` which includes Maximize and Minimize (you want to maximize or minimize your trial result).
|
For our built-in assessors, you need to fill two fields: `builtinAssessorName` which chooses NNI provided assessors (refer to [here]() for built-in assessors), `optimize_mode` which includes maximize and minimize (you want to maximize or minimize your trial result).
|
||||||
|
|
||||||
## Using user customized Assessor
|
## Using user customized Assessor
|
||||||
You can also write your own assessor following the guidance [here](). For example, you wrote an assessor for `examples/trials/mnist-annotation`. You should prepare the yaml configure below:
|
You can also write your own assessor following the guidance [here](). For example, you wrote an assessor for `examples/trials/mnist-annotation`. You should prepare the yaml configure below:
|
||||||
|
@ -46,15 +48,25 @@ trainingServicePlatform: local
|
||||||
# choice: true, false
|
# choice: true, false
|
||||||
useAnnotation: true
|
useAnnotation: true
|
||||||
tuner:
|
tuner:
|
||||||
tunerName: TPE
|
# Possible values: TPE, Random, Anneal, Evolution
|
||||||
optimizationMode: Maximize
|
builtinTunerName: TPE
|
||||||
|
classArgs:
|
||||||
|
optimize_mode: maximize
|
||||||
assessor:
|
assessor:
|
||||||
assessorCommand: your_command
|
# Your assessor code directory
|
||||||
assessorCodeDir: /path/of/your/asessor
|
codeDir:
|
||||||
assessorGpuNum: 0
|
# Name of the file which contains your assessor class
|
||||||
|
classFileName:
|
||||||
|
# Your assessor class name, must be a subclass of nni.Assessor
|
||||||
|
className:
|
||||||
|
# Parameter names and literal values you want to pass to
|
||||||
|
# the __init__ constructor of your assessor class
|
||||||
|
classArgs:
|
||||||
|
arg1: value1
|
||||||
|
gpuNum: 0
|
||||||
trial:
|
trial:
|
||||||
trialCommand: python mnist.py
|
command: python mnist.py
|
||||||
trialCodeDir: /usr/share/nni/examples/trials/mnist-annotation
|
codeDir: /usr/share/nni/examples/trials/mnist-annotation
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
||||||
```
|
```
|
||||||
You only need to fill three field: `assessorCommand`, `assessorCodeDir` and `assessorGpuNum`.
|
You need to fill: `codeDir`, `classFileName`, `className`, and pass parameters to \_\_init__ constructor through `classArgs` field if the \_\_init__ constructor of your assessor class has required parameters.
|
||||||
|
|
|
@ -19,14 +19,15 @@ searchSpacePath:
|
||||||
useAnnotation:
|
useAnnotation:
|
||||||
tuner:
|
tuner:
|
||||||
#choice: TPE, Random, Anneal, Evolution
|
#choice: TPE, Random, Anneal, Evolution
|
||||||
tunerName:
|
builtinTunerName:
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode:
|
#choice: maximize, minimize
|
||||||
tunerGpuNum:
|
optimize_mode:
|
||||||
|
gpuNum:
|
||||||
trial:
|
trial:
|
||||||
trialCommand:
|
command:
|
||||||
trialCodeDir:
|
codeDir:
|
||||||
trialGpuNum:
|
gpuNum:
|
||||||
#machineList can be empty if the platform is local
|
#machineList can be empty if the platform is local
|
||||||
machineList:
|
machineList:
|
||||||
- ip:
|
- ip:
|
||||||
|
@ -48,20 +49,22 @@ searchSpacePath:
|
||||||
useAnnotation:
|
useAnnotation:
|
||||||
tuner:
|
tuner:
|
||||||
#choice: TPE, Random, Anneal, Evolution
|
#choice: TPE, Random, Anneal, Evolution
|
||||||
tunerName:
|
builtinTunerName:
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode:
|
#choice: maximize, minimize
|
||||||
tunerGpuNum:
|
optimize_mode:
|
||||||
|
gpuNum:
|
||||||
assessor:
|
assessor:
|
||||||
#choice: Medianstop
|
#choice: Medianstop
|
||||||
assessorName:
|
builtinAssessorName:
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode:
|
#choice: maximize, minimize
|
||||||
assessorGpuNum:
|
optimize_mode:
|
||||||
|
gpuNum:
|
||||||
trial:
|
trial:
|
||||||
trialCommand:
|
command:
|
||||||
trialCodeDir:
|
codeDir:
|
||||||
trialGpuNum:
|
gpuNum:
|
||||||
#machineList can be empty if the platform is local
|
#machineList can be empty if the platform is local
|
||||||
machineList:
|
machineList:
|
||||||
- ip:
|
- ip:
|
||||||
|
@ -82,20 +85,22 @@ trainingServicePlatform:
|
||||||
useAnnotation:
|
useAnnotation:
|
||||||
tuner:
|
tuner:
|
||||||
#choice: TPE, Random, Anneal, Evolution
|
#choice: TPE, Random, Anneal, Evolution
|
||||||
tunerName:
|
builtinTunerName:
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode:
|
#choice: maximize, minimize
|
||||||
tunerGpuNum:
|
optimize_mode:
|
||||||
|
gpuNum:
|
||||||
assessor:
|
assessor:
|
||||||
#choice: Medianstop
|
#choice: Medianstop
|
||||||
assessorName:
|
builtinAssessorName:
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode:
|
#choice: maximize, minimize
|
||||||
assessorGpuNum:
|
optimize_mode:
|
||||||
|
gpuNum:
|
||||||
trial:
|
trial:
|
||||||
trialCommand:
|
command:
|
||||||
trialCodeDir:
|
codeDir:
|
||||||
trialGpuNum:
|
gpuNum:
|
||||||
#machineList can be empty if the platform is local
|
#machineList can be empty if the platform is local
|
||||||
machineList:
|
machineList:
|
||||||
- ip:
|
- ip:
|
||||||
|
@ -108,11 +113,13 @@ machineList:
|
||||||
* Description
|
* Description
|
||||||
|
|
||||||
__authorName__ is the name of the author who create the experiment.
|
__authorName__ is the name of the author who create the experiment.
|
||||||
|
TBD: add default value
|
||||||
|
|
||||||
* __experimentName__
|
* __experimentName__
|
||||||
* Description
|
* Description
|
||||||
|
|
||||||
__experimentName__ is the name of the experiment you created.
|
__experimentName__ is the name of the experiment you created.
|
||||||
|
TBD: add default value
|
||||||
|
|
||||||
* __trialConcurrency__
|
* __trialConcurrency__
|
||||||
* Description
|
* Description
|
||||||
|
@ -155,61 +162,73 @@ machineList:
|
||||||
* __tuner__
|
* __tuner__
|
||||||
* Description
|
* Description
|
||||||
|
|
||||||
__tuner__ specifies the tuner algorithm you use to run an experiment, there are two kinds of ways to set tuner. One way is to use tuner provided by nni sdk, you just need to set __tunerName__ and __optimizationMode__. Another way is to use your own tuner file, and you need to set __tunerCommand__, __tunerCwd__.
|
__tuner__ specifies the tuner algorithm you use to run an experiment, there are two kinds of ways to set tuner. One way is to use tuner provided by nni sdk, you just need to set __builtinTunerName__ and __classArgs__. Another way is to use your own tuner file, and you need to set __codeDirectory__, __classFileName__, __className__ and __classArgs__.
|
||||||
* __tunerName__ and __optimizationMode__
|
* __builtinTunerName__ and __classArgs__
|
||||||
* __tunerName__
|
* __builtinTunerName__
|
||||||
|
|
||||||
__tunerName__ specifies the name of system tuner you want to use, nni sdk provides four kinds of tuner, including {__TPE__, __Random__, __Anneal__, __Evolution__}
|
__builtinTunerName__ specifies the name of system tuner you want to use, nni sdk provides four kinds of tuner, including {__TPE__, __Random__, __Anneal__, __Evolution__}
|
||||||
* __optimizationMode__
|
* __classArgs__
|
||||||
|
|
||||||
__optimizationMode__ specifies the optimization mode of tuner algorithm, including {__Maximize__, __Minimize__}
|
__classArgs__ specifies the arguments of tuner algorithm
|
||||||
* __tunerCommand__ and __tunerCwd__
|
* __codeDir__, __classFileName__, __className__ and __classArgs__
|
||||||
* __tunerCommand__
|
* __codeDir__
|
||||||
|
|
||||||
__tunerCommand__ specifies the command you want to use to run your own tuner file, for example {__python3 mytuner.py__}
|
__codeDir__ specifies the directory of tuner code.
|
||||||
* __tunerCwd__
|
* __classFileName__
|
||||||
|
|
||||||
__tunerCwd__ specifies the working directory of your own tuner file, which is the path of your own tuner file.
|
__classFileName__ specifies the name of tuner file.
|
||||||
* __tunerGpuNum__
|
* __className__
|
||||||
|
|
||||||
|
__className__ specifies the name of tuner class.
|
||||||
|
* __classArgs__
|
||||||
|
|
||||||
|
__classArgs__ specifies the arguments of tuner algorithm.
|
||||||
|
* __gpuNum__
|
||||||
|
|
||||||
__tunerGPUNum__ specifies the gpu number you want to use to run the tuner process. The value of this field should be a positive number.
|
__gpuNum__ specifies the gpu number you want to use to run the tuner process. The value of this field should be a positive number.
|
||||||
|
|
||||||
Note: you could only specify one way to set tuner, for example, you could set {tunerName, optimizationMode} or {tunerCommand, tunerCwd}, and you could not set them both.
|
Note: you could only specify one way to set tuner, for example, you could set {tunerName, optimizationMode} or {tunerCommand, tunerCwd}, and you could not set them both.
|
||||||
|
|
||||||
* __assessor__
|
* __assessor__
|
||||||
|
|
||||||
* Description
|
* Description
|
||||||
|
|
||||||
__assessor__ specifies the assessor algorithm you use to run experiment, there are two kinds of ways to set assessor. One way is to use assessor provided by nni sdk, you just need to set __assessorName__ and __optimizationMode__. Another way is to use your own assessor file, and you need to set __assessorCommand__, __assessorCwd__.
|
__assessor__ specifies the assessor algorithm you use to run an experiment, there are two kinds of ways to set assessor. One way is to use assessor provided by nni sdk, you just need to set __builtinAssessorName__ and __classArgs__. Another way is to use your own tuner file, and you need to set __codeDirectory__, __classFileName__, __className__ and __classArgs__.
|
||||||
* __assessorName__ and __optimizationMode__
|
* __builtinAssessorName__ and __classArgs__
|
||||||
* __assessorName__
|
* __builtinAssessorName__
|
||||||
|
|
||||||
__assessorName__ specifies the name of system assessor you want to use, nni sdk provides one kind of assessor, which is {__Medianstop__}.
|
__builtinAssessorName__ specifies the name of system assessor you want to use, nni sdk provides four kinds of tuner, including {__TPE__, __Random__, __Anneal__, __Evolution__}
|
||||||
* __optimizationMode__
|
* __classArgs__
|
||||||
|
|
||||||
__optimizationMode__ specifies the optimization mode of tuner algorithm, including {__Maximize__, __Minimize__}
|
__classArgs__ specifies the arguments of tuner algorithm
|
||||||
* __assessorCommand__ and __assessorCwd__
|
* __codeDir__, __classFileName__, __className__ and __classArgs__
|
||||||
* __assessorCommand__
|
* __codeDir__
|
||||||
|
|
||||||
__assessorCommand__ specifies the command you want to use to run your own assessor file, for example {__python3 myassessor.py__}
|
__codeDir__ specifies the directory of tuner code.
|
||||||
* __assessorCwd__
|
* __classFileName__
|
||||||
|
|
||||||
__assessorCwd__ specifies the working directory of your own assessor file, which is the path of your own assessor file.
|
__classFileName__ specifies the name of tuner file.
|
||||||
* __assessorGpuNum__
|
* __className__
|
||||||
|
|
||||||
|
__className__ specifies the name of tuner class.
|
||||||
|
* __classArgs__
|
||||||
|
|
||||||
|
__classArgs__ specifies the arguments of tuner algorithm.
|
||||||
|
* __gpuNum__
|
||||||
|
|
||||||
__assessorGPUNum__ specifies the gpu number you want to use to run the assessor process. The value of this field should be a positive number.
|
__gpuNum__ specifies the gpu number you want to use to run the assessor process. The value of this field should be a positive number.
|
||||||
|
|
||||||
Note: you could only specify one way to set assessor, for example, you could set {assessorName, optimizationMode} or {assessorCommand, assessorCwd}, and you could not set them both.If you do not want to use assessor, you just need to leave assessor empty or remove assessor in your config file.
|
Note: you could only specify one way to set assessor, for example, you could set {assessorName, optimizationMode} or {assessorCommand, assessorCwd}, and you could not set them both.If you do not want to use assessor, you just need to leave assessor empty or remove assessor in your config file. Default value is 0.
|
||||||
* __trial__
|
* __trial__
|
||||||
* __trialCommand__
|
* __command__
|
||||||
|
|
||||||
__trialCommand__ specifies the command to run trial process.
|
__command__ specifies the command to run trial process.
|
||||||
* __trialCodeDir__
|
* __codeDir__
|
||||||
|
|
||||||
__trialCodeDir__ specifies the directory of your own trial file.
|
__codeDir__ specifies the directory of your own trial file.
|
||||||
* __trialGpuNum__
|
* __gpuNum__
|
||||||
|
|
||||||
__trialGpuNum__ specifies the num of gpu you want to use to run your trial process.
|
__gpuNum__ specifies the num of gpu you want to use to run your trial process. Default value is 0.
|
||||||
* __machineList__
|
* __machineList__
|
||||||
|
|
||||||
__machineList__ should be set if you set __trainingServicePlatform__=remote, or it could be empty.
|
__machineList__ should be set if you set __trainingServicePlatform__=remote, or it could be empty.
|
||||||
|
@ -228,6 +247,17 @@ machineList:
|
||||||
|
|
||||||
__passwd__ specifies the password of your account.
|
__passwd__ specifies the password of your account.
|
||||||
|
|
||||||
|
* __sshKeyPath__
|
||||||
|
|
||||||
|
If you want to use ssh key to login remote machine, you could set __sshKeyPath__ in config file. __sshKeyPath__ is the path of ssh key file, which should be valid.
|
||||||
|
|
||||||
|
Note: if you set passwd and sshKeyPath simultaneously, nni will try passwd.
|
||||||
|
|
||||||
|
* __passphrase__
|
||||||
|
|
||||||
|
__passphrase__ is used to protect ssh key, which could be empty if you don't have passphrase.
|
||||||
|
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
* __local mode__
|
* __local mode__
|
||||||
|
|
||||||
|
@ -244,14 +274,15 @@ trainingServicePlatform: local
|
||||||
useAnnotation: true
|
useAnnotation: true
|
||||||
tuner:
|
tuner:
|
||||||
#choice: TPE, Random, Anneal, Evolution
|
#choice: TPE, Random, Anneal, Evolution
|
||||||
tunerName: TPE
|
builtinTunerName: TPE
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode: Maximize
|
#choice: maximize, minimize
|
||||||
tunerGpuNum: 0
|
optimize_mode: maximize
|
||||||
|
gpuNum: 0
|
||||||
trial:
|
trial:
|
||||||
trialCommand: python3 mnist.py
|
command: python3 mnist.py
|
||||||
trialCodeDir: /nni/mnist
|
codeDir: /nni/mnist
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
||||||
```
|
```
|
||||||
|
|
||||||
If you want to use assessor, you could add assessor configuration in your file.
|
If you want to use assessor, you could add assessor configuration in your file.
|
||||||
|
@ -268,20 +299,22 @@ searchSpacePath: /nni/search_space.json
|
||||||
useAnnotation: false
|
useAnnotation: false
|
||||||
tuner:
|
tuner:
|
||||||
#choice: TPE, Random, Anneal, Evolution
|
#choice: TPE, Random, Anneal, Evolution
|
||||||
tunerName: TPE
|
builtinTunerName: TPE
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode: Maximize
|
#choice: maximize, minimize
|
||||||
tunerGpuNum: 0
|
optimize_mode: maximize
|
||||||
|
gpuNum: 0
|
||||||
assessor:
|
assessor:
|
||||||
#choice: Medianstop
|
#choice: Medianstop
|
||||||
assessorName: Medianstop
|
builtinAssessorName: Medianstop
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode: Maximize
|
#choice: maximize, minimize
|
||||||
assessorGpuNum: 0
|
optimize_mode: maximize
|
||||||
|
gpuNum: 0
|
||||||
trial:
|
trial:
|
||||||
trialCommand: python3 mnist.py
|
command: python3 mnist.py
|
||||||
trialCodeDir: /nni/mnist
|
codeDir: /nni/mnist
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
||||||
```
|
```
|
||||||
|
|
||||||
Or you could specify your own tuner and assessor file as following:
|
Or you could specify your own tuner and assessor file as following:
|
||||||
|
@ -297,17 +330,25 @@ searchSpacePath: /nni/search_space.json
|
||||||
#choice: true, false
|
#choice: true, false
|
||||||
useAnnotation: false
|
useAnnotation: false
|
||||||
tuner:
|
tuner:
|
||||||
tunerCommand: python3 mytuner.py
|
codeDir: /nni/tuner
|
||||||
tunerCwd: /nni/tuner
|
classFileName: mytuner.py
|
||||||
tunerGpuNum: 0
|
className: MyTuner
|
||||||
|
classArgs:
|
||||||
|
#choice: maximize, minimize
|
||||||
|
optimize_mode: maximize
|
||||||
|
gpuNum: 0
|
||||||
assessor:
|
assessor:
|
||||||
assessorCommand: python3 myassessor.py
|
codeDir: /nni/assessor
|
||||||
assessorCwd: /nni/assessor
|
classFileName: myassessor.py
|
||||||
assessorGpuNum: 0
|
className: MyAssessor
|
||||||
|
classArgs:
|
||||||
|
#choice: maximize, minimize
|
||||||
|
optimize_mode: maximize
|
||||||
|
gpuNum: 0
|
||||||
trial:
|
trial:
|
||||||
trialCommand: python3 mnist.py
|
command: python3 mnist.py
|
||||||
trialCodeDir: /nni/mnist
|
codeDir: /nni/mnist
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
||||||
```
|
```
|
||||||
|
|
||||||
* __remote mode__
|
* __remote mode__
|
||||||
|
@ -326,14 +367,15 @@ searchSpacePath: /nni/search_space.json
|
||||||
useAnnotation: false
|
useAnnotation: false
|
||||||
tuner:
|
tuner:
|
||||||
#choice: TPE, Random, Anneal, Evolution
|
#choice: TPE, Random, Anneal, Evolution
|
||||||
tunerName: TPE
|
builtinTunerName: TPE
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode: Maximize
|
#choice: maximize, minimize
|
||||||
tunerGpuNum: 0
|
optimize_mode: maximize
|
||||||
|
gpuNum: 0
|
||||||
trial:
|
trial:
|
||||||
trialCommand: python3 mnist.py
|
command: python3 mnist.py
|
||||||
trialCodeDir: /nni/mnist
|
codeDir: /nni/mnist
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
||||||
#machineList can be empty if the platform is local
|
#machineList can be empty if the platform is local
|
||||||
machineList:
|
machineList:
|
||||||
- ip: 10.10.10.10
|
- ip: 10.10.10.10
|
||||||
|
@ -347,5 +389,6 @@ machineList:
|
||||||
- ip: 10.10.10.12
|
- ip: 10.10.10.12
|
||||||
port: 22
|
port: 22
|
||||||
username: test
|
username: test
|
||||||
passwd: test
|
sshKeyPath: /nni/sshkey
|
||||||
|
passphrase: qwert
|
||||||
```
|
```
|
|
@ -1,60 +1,26 @@
|
||||||
**Getting Started with NNI**
|
**Getting Started with NNI**
|
||||||
===
|
===
|
||||||
NNI (Nerual Network Intelligance) is a toolkit to help users running automated machine learning experiment.
|
|
||||||
The tool dispatchs and runs trail jobs that generated by tunning algorithms to search the best neural architecture and/or hyper-parameters at different enviroments (e.g. local, remote servers, Cloud).
|
|
||||||
|
|
||||||
```
|
## **Installation**
|
||||||
AutoML experiment Training Services
|
* __Dependencies__
|
||||||
┌────────┐ ┌────────────────────────┐ ┌────────────────┐
|
|
||||||
│ nnictl │ ─────> │ nni_manager │ │ Local Machine │
|
|
||||||
└────────┘ │ sdk/tuner │ └────────────────┘
|
|
||||||
│ hyperopt_tuner │
|
|
||||||
│ evlution_tuner │ trail jobs ┌────────────────┐
|
|
||||||
│ ... │ ────────> │ Remote Servers │
|
|
||||||
├────────────────────────┤ └────────────────┘
|
|
||||||
│ trail job source code │
|
|
||||||
│ sdk/annotation │ ┌────────────────┐
|
|
||||||
├────────────────────────┤ │ Yarn,K8s, │
|
|
||||||
│ nni_board │ │ ... │
|
|
||||||
└────────────────────────┘ └────────────────┘
|
|
||||||
```
|
|
||||||
## **Who should consider using NNI**
|
|
||||||
* You want to try different AutoML algorithms for your training code (model) at local
|
|
||||||
* You want to run AutoML trail jobs in different enviroments to speed up search (e.g. remote servers, Cloud)
|
|
||||||
* As a reseacher and data scientist, you want to implement your own AutoML algorithms and compare with other algorithms
|
|
||||||
* As a ML platform owner, you want to support AutoML in your platform
|
|
||||||
|
|
||||||
## **Setup**
|
python >= 3.5
|
||||||
* __Dependencies__
|
|
||||||
nni requires:
|
|
||||||
```
|
|
||||||
python >= 3.5
|
|
||||||
node >= 10.9.0
|
|
||||||
yarn >= 1.9.4
|
|
||||||
```
|
|
||||||
Before install nni, please make sure you have installed python environment correctly.
|
|
||||||
* __User installation__
|
|
||||||
|
|
||||||
* clone nni repository
|
python pip should also be correctly installed. You could use "which pip" or "pip -V" to check in Linux.
|
||||||
|
TBD: For now, we don's support virtual environment.
|
||||||
|
|
||||||
|
* __Install NNI through pip__
|
||||||
|
|
||||||
|
pip3 install -v --user git+https://github.com/Microsoft/NeuralNetworkIntelligence.git
|
||||||
|
source ~/.bashrc
|
||||||
|
|
||||||
|
* __Install NNI through source code__
|
||||||
|
|
||||||
git clone https://github.com/Microsoft/NeuralNetworkIntelligence
|
git clone https://github.com/Microsoft/NeuralNetworkIntelligence
|
||||||
|
cd NeuralNetworkIntelligence
|
||||||
|
chmod +x install.sh
|
||||||
|
source install.sh
|
||||||
|
|
||||||
* run install.sh
|
|
||||||
|
|
||||||
cd NeuralNetworkIntelligence
|
|
||||||
sh ./install.sh
|
|
||||||
|
|
||||||
For more details about installation, please refer to [Installation instructions](Installation.md).
|
|
||||||
|
|
||||||
## **Quick start: run an experiment at local**
|
|
||||||
Requirements:
|
|
||||||
* local enviroment setup [TODO]
|
|
||||||
|
|
||||||
Run the following command to create an experiemnt for [mnist]
|
|
||||||
```bash
|
|
||||||
nnictl create --config /usr/share/nni/examples/trials/mnist-annotation/config.yml
|
|
||||||
```
|
|
||||||
This command will start the experiment and WebUI. The WebUI endpoint will be shown in the output of this command (for example, `http://localhost:8080`). Open this URL using your browsers. You can analyze your experiment through WebUI, or open trials' tensorboard.
|
|
||||||
|
|
||||||
## **Quick start: run a customized experiment**
|
## **Quick start: run a customized experiment**
|
||||||
An experiment is to run multiple trial jobs, each trial job tries a configuration which includes a specific neural architecture (or model) and hyper-parameter values. To run an experiment through NNI, you should:
|
An experiment is to run multiple trial jobs, each trial job tries a configuration which includes a specific neural architecture (or model) and hyper-parameter values. To run an experiment through NNI, you should:
|
||||||
|
@ -64,48 +30,54 @@ An experiment is to run multiple trial jobs, each trial job tries a configuratio
|
||||||
* Provide a yaml experiment configure file
|
* Provide a yaml experiment configure file
|
||||||
* (optional) Provide or choose an assessor
|
* (optional) Provide or choose an assessor
|
||||||
|
|
||||||
**Prepare trial**: Let's use a simple trial example, e.g. mnist, provided by NNI. After you installed NNI, NNI examples have been put in /usr/share/nni/examples, run `ls /usr/share/nni/examples/trials` to see all the trial examples. You can simply execute the following command to run the NNI mnist example:
|
**Prepare trial**: Let's use a simple trial example, e.g. mnist, provided by NNI. After you installed NNI, NNI examples have been put in ~/nni/examples, run `ls ~/nni/examples/trials` to see all the trial examples. You can simply execute the following command to run the NNI mnist example:
|
||||||
|
|
||||||
python /usr/share/nni/examples/trials/mnist-annotation/mnist.py
|
python ~/nni/examples/trials/mnist-annotation/mnist.py
|
||||||
|
|
||||||
This command will be filled in the yaml configure file below. Please refer to [here]() for how to write your own trial.
|
This command will be filled in the yaml configure file below. Please refer to [here]() for how to write your own trial.
|
||||||
|
|
||||||
**Prepare tuner**: NNI supports several popular automl algorithms, including Random Search, Tree of Parzen Estimators (TPE), Bayesian Optimization etc. Users can write their own tuner (refer to [here]()), but for simplicity, here we can choose a tuner provided by NNI as below:
|
**Prepare tuner**: NNI supports several popular automl algorithms, including Random Search, Tree of Parzen Estimators (TPE), Evolution algorithm etc. Users can write their own tuner (refer to [here]()), but for simplicity, here we choose a tuner provided by NNI as below:
|
||||||
|
|
||||||
tunerName: TPE
|
tunerName: TPE
|
||||||
optimizationMode: maximize
|
optimizationMode: maximize
|
||||||
|
|
||||||
*tunerName* is used to specify a tuner in NNI, *optimizationMode* is to indicate whether you want to maximize or minimize your trial's result.
|
*tunerName* is used to specify a tuner in NNI, *optimizationMode* is to indicate whether you want to maximize or minimize your trial's result.
|
||||||
|
|
||||||
**Prepare configure file**: Since you have already known which trial code you are going to run and which tuner you are going to use, it is time to prepare the yaml configure file. NNI provides a demo configure file for each trial example, `cat /usr/share/nni/examples/trials/mnist-annotation/config.yml` to see it. Its content is basically shown below:
|
**Prepare configure file**: Since you have already known which trial code you are going to run and which tuner you are going to use, it is time to prepare the yaml configure file. NNI provides a demo configure file for each trial example, `cat ~/nni/examples/trials/mnist-annotation/config.yml` to see it. Its content is basically shown below:
|
||||||
|
|
||||||
```
|
```
|
||||||
authorName: your_name
|
authorName: your_name
|
||||||
experimentName: auto_mnist
|
experimentName: auto_mnist
|
||||||
|
|
||||||
# how many trials could be concurrently running
|
# how many trials could be concurrently running
|
||||||
trialConcurrency: 2
|
trialConcurrency: 2
|
||||||
|
|
||||||
# maximum experiment running duration
|
# maximum experiment running duration
|
||||||
maxExecDuration: 3h
|
maxExecDuration: 3h
|
||||||
|
|
||||||
# empty means never stop
|
# empty means never stop
|
||||||
maxTrialNum: 100
|
maxTrialNum: 100
|
||||||
|
|
||||||
# choice: local, remote
|
# choice: local, remote
|
||||||
trainingServicePlatform: local
|
trainingServicePlatform: local
|
||||||
|
|
||||||
# choice: true, false
|
# choice: true, false
|
||||||
useAnnotation: true
|
useAnnotation: true
|
||||||
tuner:
|
tuner:
|
||||||
tunerName: TPE
|
builtinTunerName: TPE
|
||||||
optimizationMode: Maximize
|
classArgs:
|
||||||
|
optimize_mode: maximize
|
||||||
trial:
|
trial:
|
||||||
trialCommand: python mnist.py
|
command: python mnist.py
|
||||||
trialCodeDir: /usr/share/nni/examples/trials/mnist-annotation
|
codeDir: ~/nni/examples/trials/mnist-annotation
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
||||||
```
|
```
|
||||||
|
|
||||||
Here *useAnnotation* is true because this trial example uses our python annotation (refer to [here]() for details). For trial, we should provide *trialCommand* which is the command to run the trial, provide *trialCodeDir* where the trial code is. The command will be executed in this directory. We should also provide how many GPUs a trial requires.
|
Here *useAnnotation* is true because this trial example uses our python annotation (refer to [here]() for details). For trial, we should provide *trialCommand* which is the command to run the trial, provide *trialCodeDir* where the trial code is. The command will be executed in this directory. We should also provide how many GPUs a trial requires.
|
||||||
|
|
||||||
With all these steps done, we can run the experiment with the following command:
|
With all these steps done, we can run the experiment with the following command:
|
||||||
|
|
||||||
nnictl create --config /usr/share/nni/examples/trials/mnist-annotation/config.yml
|
nnictl create --config ~/nni/examples/trials/mnist-annotation/config.yml
|
||||||
|
|
||||||
You can refer to [here](NNICTLDOC.md) for more usage guide of *nnictl* command line tool.
|
You can refer to [here](NNICTLDOC.md) for more usage guide of *nnictl* command line tool.
|
||||||
|
|
||||||
|
@ -118,11 +90,8 @@ The experiment has been running now, NNI provides WebUI for you to view experime
|
||||||
* [Tuners supported by NNI.](../src/sdk/pynni/nni/README.md)
|
* [Tuners supported by NNI.](../src/sdk/pynni/nni/README.md)
|
||||||
* [How to enable early stop (i.e. assessor) in an experiment?](EnableAssessor.md)
|
* [How to enable early stop (i.e. assessor) in an experiment?](EnableAssessor.md)
|
||||||
* [How to run an experiment on multiple machines?](RemoteMachineMode.md)
|
* [How to run an experiment on multiple machines?](RemoteMachineMode.md)
|
||||||
* [How to write a customized tuner?](../examples/tuners/README.md)
|
* [How to write a customized tuner?](CustomizedTuner.md)
|
||||||
* [How to write a customized assessor?](../examples/assessors/README.md)
|
* [How to write a customized assessor?](../examples/assessors/README.md)
|
||||||
* [How to resume an experiment?]()
|
* [How to resume an experiment?](NNICTLDOC.md)
|
||||||
* [Tutorial of the command tool *nnictl*.](NNICTLDOC.md)
|
* [Tutorial of the command tool *nnictl*.](NNICTLDOC.md)
|
||||||
* [How to use *nnictl* to control multiple experiments?]()
|
* [How to use *nnictl* to control multiple experiments?]()
|
||||||
|
|
||||||
## How to contribute
|
|
||||||
TBD
|
|
||||||
|
|
|
@ -8,12 +8,10 @@ nnictl support commands:
|
||||||
```
|
```
|
||||||
nnictl create
|
nnictl create
|
||||||
nnictl stop
|
nnictl stop
|
||||||
nnictl create
|
|
||||||
nnictl update
|
nnictl update
|
||||||
nnictl resume
|
nnictl resume
|
||||||
nnictl trial
|
nnictl trial
|
||||||
nnictl webui
|
nnictl webui
|
||||||
nnictl rest
|
|
||||||
nnictl experiment
|
nnictl experiment
|
||||||
nnictl config
|
nnictl config
|
||||||
nnictl log
|
nnictl log
|
||||||
|
@ -72,7 +70,7 @@ nnictl log
|
||||||
* __nnictl update searchspace__
|
* __nnictl update searchspace__
|
||||||
* Description
|
* Description
|
||||||
|
|
||||||
You can use this command to update an experiment's searchspace.
|
You can use this command to update an experiment's search space.
|
||||||
|
|
||||||
* Usage
|
* Usage
|
||||||
|
|
||||||
|
@ -200,15 +198,7 @@ nnictl log
|
||||||
* Usage
|
* Usage
|
||||||
|
|
||||||
nnictl config show
|
nnictl config show
|
||||||
|
|
||||||
### Manage restful server
|
|
||||||
* __nnictl rest check__
|
|
||||||
* Description
|
|
||||||
|
|
||||||
Check the status of restful server
|
|
||||||
* Usage
|
|
||||||
|
|
||||||
nnictl rest check
|
|
||||||
|
|
||||||
### Manage log
|
### Manage log
|
||||||
* __nnictl log stdout__
|
* __nnictl log stdout__
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
# Release 0.1.0 - 9/15/2018
|
||||||
|
|
||||||
|
Initial release of Neural Network Intelligence (NNI).
|
||||||
|
|
||||||
|
## Major Features
|
||||||
|
* Installation and Deployment
|
||||||
|
* Support pip install and source codes install
|
||||||
|
* Support training services on local mode(including Multi-GPU mode) as well as multi-machines mode
|
||||||
|
* Tuners, Accessors and Trial
|
||||||
|
* Support AutoML algorithms including: hyperopt_tpe, hyperopt_annealing, hyperopt_random, and evolution_tuner
|
||||||
|
* Support assessor(early stop) algorithms including: medianstop algorithm
|
||||||
|
* Provide Python API for user defined tuners and accessors
|
||||||
|
* Provide Python API for user to wrap trial code as NNI deployable codes
|
||||||
|
* Experiments
|
||||||
|
* Provide a command line toolkit 'nnictl' for experiments management
|
||||||
|
* Provide a web UI for viewing experiments details and managing experiments
|
||||||
|
* Continuous Integration
|
||||||
|
* Support CI by providing out-of-box integration with [travis-ci](https://github.com/travis-ci) on ubuntu
|
||||||
|
* Others
|
||||||
|
* Support simple GPU job scheduling
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,18 @@ NNI supports running an experiment on multiple machines, called remote machine m
|
||||||
## Setup environment
|
## Setup environment
|
||||||
Install NNI on each of your machines following the install guide [here](GetStarted.md).
|
Install NNI on each of your machines following the install guide [here](GetStarted.md).
|
||||||
|
|
||||||
|
For remote machines that are used only to run trials but not the nnictl, you can just install python SDK:
|
||||||
|
|
||||||
|
* __Install python SDK through pip__
|
||||||
|
|
||||||
|
pip3 install --user git+https://github.com/Microsoft/NeuralNetworkIntelligence.git#subdirectory=src/sdk/pynni
|
||||||
|
|
||||||
|
* __Install python SDK through source code__
|
||||||
|
|
||||||
|
git clone https://github.com/Microsoft/NeuralNetworkIntelligence
|
||||||
|
cd src/sdk/pynni
|
||||||
|
python3 setup.py install
|
||||||
|
|
||||||
## Run an experiment
|
## Run an experiment
|
||||||
Still using `examples/trials/mnist-annotation` as an example here. The yaml file you need is shown below:
|
Still using `examples/trials/mnist-annotation` as an example here. The yaml file you need is shown below:
|
||||||
```
|
```
|
||||||
|
@ -26,12 +38,13 @@ trainingServicePlatform: local
|
||||||
# choice: true, false
|
# choice: true, false
|
||||||
useAnnotation: true
|
useAnnotation: true
|
||||||
tuner:
|
tuner:
|
||||||
tunerName: TPE
|
builtinTunerName: TPE
|
||||||
optimizationMode: Maximize
|
classArgs:
|
||||||
|
optimize_mode: maximize
|
||||||
trial:
|
trial:
|
||||||
trialCommand: python mnist.py
|
command: python mnist.py
|
||||||
trialCodeDir: /usr/share/nni/examples/trials/mnist-annotation
|
codeDir: /usr/share/nni/examples/trials/mnist-annotation
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
||||||
#machineList can be empty if the platform is local
|
#machineList can be empty if the platform is local
|
||||||
machineList:
|
machineList:
|
||||||
- ip: 10.1.1.1
|
- ip: 10.1.1.1
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
## How to define search space?
|
||||||
|
|
||||||
|
### Hyper-parameter Search Space
|
||||||
|
|
||||||
|
* A search space configure example as follow:
|
||||||
|
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
"dropout_rate":{"_type":"uniform","_value":[0.1,0.5]},
|
||||||
|
"conv_size":{"_type":"choice","_value":[2,3,5,7]},
|
||||||
|
"hidden_size":{"_type":"choice","_value":[124, 512, 1024]},
|
||||||
|
"batch_size":{"_type":"choice","_value":[50, 250, 500]},
|
||||||
|
"learning_rate":{"_type":"uniform","_value":[0.0001, 0.1]}
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
The example define ```dropout_rate``` as variable which priori distribution is uniform distribution, and its value from ```0.1``` and ```0.5```.
|
||||||
|
The tuner will sample parameters/architecture by understanding the search space first.
|
||||||
|
|
||||||
|
User should define the name of variable, type and candidate value of variable.
|
||||||
|
The candidate type and value for variable is here:
|
||||||
|
|
||||||
|
* {"_type":"choice","_value":options}
|
||||||
|
* Which means the variable value is one of the options, which should be a list The elements of options can themselves be [nested] stochastic expressions. In this case, the stochastic choices that only appear in some of the options become conditional parameters.
|
||||||
|
<br/>
|
||||||
|
* {"_type":"randint","_value":[upper]}
|
||||||
|
* Which means the variable value is a random integer in the range [0, upper). The semantics of this distribution is that there is no more correlation in the loss function between nearby integer values, as compared with more distant integer values. This is an appropriate distribution for describing random seeds for example. If the loss function is probably more correlated for nearby integer values, then you should probably use one of the "quantized" continuous distributions, such as either quniform, qloguniform, qnormal or qlognormal.
|
||||||
|
<br/>
|
||||||
|
* {"_type":"uniform","_value":[low, high]}
|
||||||
|
* Which means the variable value is a value uniformly between low and high.
|
||||||
|
* When optimizing, this variable is constrained to a two-sided interval.
|
||||||
|
<br/>
|
||||||
|
* {"_type":"quniform","_value":[low, high, q]}
|
||||||
|
* Which means the variable value is a value like round(uniform(low, high) / q) * q
|
||||||
|
* Suitable for a discrete value with respect to which the objective is still somewhat "smooth", but which should be bounded both above and below.
|
||||||
|
<br/>
|
||||||
|
* {"_type":"loguniform","_value":[low, high]}
|
||||||
|
* Which means the variable value is a value drawn according to exp(uniform(low, high)) so that the logarithm of the return value is uniformly distributed.
|
||||||
|
* When optimizing, this variable is constrained to the interval [exp(low), exp(high)].
|
||||||
|
<br/>
|
||||||
|
* {"_type":"qloguniform","_value":[low, high, q]}
|
||||||
|
* Which means the variable value is a value like round(exp(uniform(low, high)) / q) * q
|
||||||
|
* Suitable for a discrete variable with respect to which the objective is "smooth" and gets smoother with the size of the value, but which should be bounded both above and below.
|
||||||
|
<br/>
|
||||||
|
* {"_type":"normal","_value":[label, mu, sigma]}
|
||||||
|
* Which means the variable value is a real value that's normally-distributed with mean mu and standard deviation sigma. When optimizing, this is an unconstrained variable.
|
||||||
|
<br/>
|
||||||
|
* {"_type":"qnormal","_value":[label, mu, sigma, q]}
|
||||||
|
* Which means the variable value is a value like round(normal(mu, sigma) / q) * q
|
||||||
|
* Suitable for a discrete variable that probably takes a value around mu, but is fundamentally unbounded.
|
||||||
|
<br/>
|
||||||
|
* {"_type":"lognormal","_value":[label, mu, sigma]}
|
||||||
|
* Which means the variable value is a value drawn according to exp(normal(mu, sigma)) so that the logarithm of the return value is normally distributed. When optimizing, this variable is constrained to be positive.
|
||||||
|
<br/>
|
||||||
|
* {"_type":"qlognormal","_value":[label, mu, sigma, q]}
|
||||||
|
* Which means the variable value is a value like round(exp(normal(mu, sigma)) / q) * q
|
||||||
|
* Suitable for a discrete variable with respect to which the objective is smooth and gets smoother with the size of the variable, which is bounded from one side.
|
||||||
|
<br/>
|
|
@ -0,0 +1,3 @@
|
||||||
|
## How to contribute
|
||||||
|
|
||||||
|
TBD
|
|
@ -2,63 +2,7 @@
|
||||||
===
|
===
|
||||||
There would be only a few changes on your existing trial(model) code to make the code runnable on NNI. We provide two approaches for you to modify your code: `Python annotation` and `NNI APIs for trial`
|
There would be only a few changes on your existing trial(model) code to make the code runnable on NNI. We provide two approaches for you to modify your code: `Python annotation` and `NNI APIs for trial`
|
||||||
|
|
||||||
## Python annotation
|
## NNI APIs
|
||||||
We designed a new syntax for users to annotation which variable they want to tune and in what range they want to tune the variable. Also, they can annotate which variable they want to report as intermediate result to `assessor`, and which variable to report as the final result (e.g. model accuracy) to `tuner`. A really appealing feature of our python annotation is that it exists as comments in your code, which means you can run your code as before without NNI. Let's look at an example, below is a piece of tensorflow code:
|
|
||||||
```
|
|
||||||
with tf.Session() as sess:
|
|
||||||
sess.run(tf.global_variables_initializer())
|
|
||||||
batch_size = 128
|
|
||||||
for i in range(10000):
|
|
||||||
batch = mnist.train.next_batch(batch_size)
|
|
||||||
dropout_rate = 0.5
|
|
||||||
mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0],
|
|
||||||
mnist_network.labels: batch[1],
|
|
||||||
mnist_network.keep_prob: dropout_rate})
|
|
||||||
if i % 100 == 0:
|
|
||||||
test_acc = mnist_network.accuracy.eval(
|
|
||||||
feed_dict={mnist_network.images: mnist.test.images,
|
|
||||||
mnist_network.labels: mnist.test.labels,
|
|
||||||
mnist_network.keep_prob: 1.0})
|
|
||||||
|
|
||||||
test_acc = mnist_network.accuracy.eval(
|
|
||||||
feed_dict={mnist_network.images: mnist.test.images,
|
|
||||||
mnist_network.labels: mnist.test.labels,
|
|
||||||
mnist_network.keep_prob: 1.0})
|
|
||||||
```
|
|
||||||
|
|
||||||
Let's say you want to tune batch\_size and dropout\_rate, and report test\_acc every 100 steps, at last report test\_acc as final result. With our python annotation, your code would look like below:
|
|
||||||
```
|
|
||||||
with tf.Session() as sess:
|
|
||||||
sess.run(tf.global_variables_initializer())
|
|
||||||
"""@nni.variable(nni.choice(50, 250, 500), name=batch_size)"""
|
|
||||||
batch_size = 128
|
|
||||||
for i in range(10000):
|
|
||||||
batch = mnist.train.next_batch(batch_size)
|
|
||||||
"""@nni.variable(nni.choice(1, 5), name=dropout_rate)"""
|
|
||||||
dropout_rate = 0.5
|
|
||||||
mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0],
|
|
||||||
mnist_network.labels: batch[1],
|
|
||||||
mnist_network.keep_prob: dropout_rate})
|
|
||||||
if i % 100 == 0:
|
|
||||||
test_acc = mnist_network.accuracy.eval(
|
|
||||||
feed_dict={mnist_network.images: mnist.test.images,
|
|
||||||
mnist_network.labels: mnist.test.labels,
|
|
||||||
mnist_network.keep_prob: 1.0})
|
|
||||||
"""@nni.report_intermediate_result(test_acc)"""
|
|
||||||
|
|
||||||
test_acc = mnist_network.accuracy.eval(
|
|
||||||
feed_dict={mnist_network.images: mnist.test.images,
|
|
||||||
mnist_network.labels: mnist.test.labels,
|
|
||||||
mnist_network.keep_prob: 1.0})
|
|
||||||
"""@nni.report_final_result(test_acc)"""
|
|
||||||
```
|
|
||||||
|
|
||||||
Simply adding four lines would make your code runnable on NNI. You can still run your code independently. `@nni.variable` works on its next line assignment, and `@nni.report_intermediate_result`/`@nni.report_final_result` would send the data to assessor/tuner at that line. Please refer to [here](../tools/annotation/README.md) for more annotation syntax and more powerful usage. In the yaml configure file, you need one line to enable Python annotation:
|
|
||||||
```
|
|
||||||
useAnnotation: true
|
|
||||||
```
|
|
||||||
|
|
||||||
## NNI APIs for trial
|
|
||||||
We also support NNI APIs for trial code. By using this approach, you should first prepare a search space file. An example is shown below:
|
We also support NNI APIs for trial code. By using this approach, you should first prepare a search space file. An example is shown below:
|
||||||
```
|
```
|
||||||
{
|
{
|
||||||
|
@ -68,18 +12,21 @@ We also support NNI APIs for trial code. By using this approach, you should firs
|
||||||
"learning_rate":{"_type":"uniform","_value":[0.0001, 0.1]}
|
"learning_rate":{"_type":"uniform","_value":[0.0001, 0.1]}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
You can refer to [here]() for the tutorial of search space.
|
You can refer to [here](SearchSpaceSpec.md) for the tutorial of search space.
|
||||||
|
|
||||||
Then, include `import nni` in your trial code to use APIs. Using the line:
|
Then, include `import nni` in your trial code to use NNI APIs. Using the line:
|
||||||
```
|
```
|
||||||
RECEIVED_PARAMS = nni.get_parameters()
|
RECEIVED_PARAMS = nni.get_parameters()
|
||||||
```
|
```
|
||||||
to get hyper-parameters' values assigned by tuner. `RECEIVED_PARAMS` is a json object, for example:
|
to get hyper-parameters' values assigned by tuner. `RECEIVED_PARAMS` is an object, for example:
|
||||||
```
|
```
|
||||||
{'conv_size': 2, 'hidden_size': 124, 'learning_rate': 0.0307, 'dropout_rate': 0.2029}
|
{"conv_size": 2, "hidden_size": 124, "learning_rate": 0.0307, "dropout_rate": 0.2029}
|
||||||
```
|
```
|
||||||
|
|
||||||
On the other hand, you can use the API: `nni.report_intermediate_result(accuracy)` to send `accuracy` to assessor. And use `nni.report_final_result(accuracy)` to send `accuracy` to tuner. Here `accuracy` could be any python data type, but **NOTE that if you use built-in tuner/assessor, `accuracy` should be a number (e.g. float, int)**.
|
On the other hand, you can use the API: `nni.report_intermediate_result(accuracy)` to send `accuracy` to assessor. And use `nni.report_final_result(accuracy)` to send `accuracy` to tuner. Here `accuracy` could be any python data type, but **NOTE that if you use built-in tuner/assessor, `accuracy` should be a numerical variable(e.g. float, int)**.
|
||||||
|
|
||||||
|
The assessor will decide which trial should early stop based on the history performance of trial(intermediate result of one trial).
|
||||||
|
The tuner will generate next parameters/architecture based on the explore history(final result of all trials).
|
||||||
|
|
||||||
In the yaml configure file, you need two lines to enable NNI APIs:
|
In the yaml configure file, you need two lines to enable NNI APIs:
|
||||||
```
|
```
|
||||||
|
@ -87,4 +34,44 @@ useAnnotation: false
|
||||||
searchSpacePath: /path/to/your/search_space.json
|
searchSpacePath: /path/to/your/search_space.json
|
||||||
```
|
```
|
||||||
|
|
||||||
You can refer to [here](../examples/trials/README.md) for more information about how to write trial code using NNI APIs.
|
You can refer to [here](../examples/trials/README.md) for more information about how to write trial code using NNI APIs.
|
||||||
|
|
||||||
|
## NNI Annotation
|
||||||
|
We designed a new syntax for users to annotate the variables they want to tune and in what range they want to tune the variables. Also, they can annotate which variable they want to report as intermediate result to `assessor`, and which variable to report as the final result (e.g. model accuracy) to `tuner`. A really appealing feature of our NNI annotation is that it exists as comments in your code, which means you can run your code as before without NNI. Let's look at an example, below is a piece of tensorflow code:
|
||||||
|
```diff
|
||||||
|
with tf.Session() as sess:
|
||||||
|
sess.run(tf.global_variables_initializer())
|
||||||
|
+ """@nni.variable(nni.choice(50, 250, 500), name=batch_size)"""
|
||||||
|
batch_size = 128
|
||||||
|
for i in range(10000):
|
||||||
|
batch = mnist.train.next_batch(batch_size)
|
||||||
|
+ """@nni.variable(nni.choice(1, 5), name=dropout_rate)"""
|
||||||
|
dropout_rate = 0.5
|
||||||
|
mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0],
|
||||||
|
mnist_network.labels: batch[1],
|
||||||
|
mnist_network.keep_prob: dropout_rate})
|
||||||
|
if i % 100 == 0:
|
||||||
|
test_acc = mnist_network.accuracy.eval(
|
||||||
|
feed_dict={mnist_network.images: mnist.test.images,
|
||||||
|
mnist_network.labels: mnist.test.labels,
|
||||||
|
mnist_network.keep_prob: 1.0})
|
||||||
|
+ """@nni.report_intermediate_result(test_acc)"""
|
||||||
|
|
||||||
|
test_acc = mnist_network.accuracy.eval(
|
||||||
|
feed_dict={mnist_network.images: mnist.test.images,
|
||||||
|
mnist_network.labels: mnist.test.labels,
|
||||||
|
mnist_network.keep_prob: 1.0})
|
||||||
|
+ """@nni.report_final_result(test_acc)"""
|
||||||
|
```
|
||||||
|
|
||||||
|
Let's say you want to tune batch\_size and dropout\_rate, and report test\_acc every 100 steps, at last report test\_acc as final result. With our NNI annotation, your code would look like below:
|
||||||
|
|
||||||
|
|
||||||
|
Simply adding four lines would make your code runnable on NNI. You can still run your code independently. `@nni.variable` works on its next line assignment, and `@nni.report_intermediate_result`/`@nni.report_final_result` would send the data to assessor/tuner at that line. Please refer to [here](../tools/annotation/README.md) for more annotation syntax and more powerful usage. In the yaml configure file, you need one line to enable NNI annotation:
|
||||||
|
```
|
||||||
|
useAnnotation: true
|
||||||
|
```
|
||||||
|
|
||||||
|
For users to correctly leverage NNI annotation, we briefly introduce how NNI annotation works here: NNI precompiles users' trial code to find all the annotations each of which is one line with `"""@nni` at the head of the line. Then NNI replaces each annotation with a corresponding NNI API at the location where the annotation is.
|
||||||
|
|
||||||
|
**Note that: in your trial code, you can use either one of NNI APIs and NNI annotation, but not both of them simultaneously.**
|
|
@ -9,17 +9,21 @@ searchSpacePath:
|
||||||
#choice: true, false
|
#choice: true, false
|
||||||
useAnnotation:
|
useAnnotation:
|
||||||
tuner:
|
tuner:
|
||||||
tunerCommand:
|
#choice: TPE, Random, Anneal, Evolution
|
||||||
tunerCwd:
|
builtinTunerName:
|
||||||
tunerGpuNum:
|
classArgs:
|
||||||
|
#choice: maximize, minimize
|
||||||
|
optimize_mode:
|
||||||
assessor:
|
assessor:
|
||||||
assessorCommand:
|
#choice: Medianstop
|
||||||
assessorCwd:
|
builtinAssessorName:
|
||||||
assessorGpuNum:
|
classArgs:
|
||||||
|
#choice: maximize, minimize
|
||||||
|
optimize_mode:
|
||||||
trial:
|
trial:
|
||||||
trialCommand:
|
command:
|
||||||
trialCodeDir:
|
codeDir:
|
||||||
trialGpuNum:
|
gpuNum:
|
||||||
#machineList can be empty if the platform is local
|
#machineList can be empty if the platform is local
|
||||||
machineList:
|
machineList:
|
||||||
- ip:
|
- ip:
|
||||||
|
|
|
@ -8,9 +8,12 @@ trainingServicePlatform: local
|
||||||
#choice: true, false
|
#choice: true, false
|
||||||
useAnnotation: false
|
useAnnotation: false
|
||||||
tuner:
|
tuner:
|
||||||
tunerCommand: python3 __main__.py
|
codeDir: ~/nni/examples/tuners/ga_customer_tuner
|
||||||
tunerCwd: /usr/share/nni/examples/tuners/ga_customer_tuner
|
classFileName: customer_tuner.py
|
||||||
|
className: CustomerTuner
|
||||||
|
classArgs:
|
||||||
|
optimize_mode: maximize
|
||||||
trial:
|
trial:
|
||||||
trialCommand: python3 trial.py
|
command: python3 trial.py
|
||||||
trialCodeDir: /usr/share/nni/examples/trials/ga_squad
|
codeDir: ~/nni/examples/trials/ga_squad
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
|
@ -9,10 +9,11 @@ trainingServicePlatform: local
|
||||||
useAnnotation: true
|
useAnnotation: true
|
||||||
tuner:
|
tuner:
|
||||||
#choice: TPE, Random, Anneal, Evolution
|
#choice: TPE, Random, Anneal, Evolution
|
||||||
tunerName: TPE
|
builtinTunerName: TPE
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode: Maximize
|
#choice: maximize, minimize
|
||||||
|
optimize_mode: maximize
|
||||||
trial:
|
trial:
|
||||||
trialCommand: python3 mnist.py
|
command: python3 mnist.py
|
||||||
trialCodeDir: /usr/share/nni/examples/trials/mnist-annotation
|
codeDir: ~/nni/examples/trials/mnist-annotation
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
|
@ -5,15 +5,16 @@ maxExecDuration: 1h
|
||||||
maxTrialNum: 1
|
maxTrialNum: 1
|
||||||
#choice: local, remote
|
#choice: local, remote
|
||||||
trainingServicePlatform: local
|
trainingServicePlatform: local
|
||||||
searchSpacePath: /usr/share/nni/examples/trials/mnist-keras/search_space.json
|
searchSpacePath: ~/nni/examples/trials/mnist-keras/search_space.json
|
||||||
#choice: true, false
|
#choice: true, false
|
||||||
useAnnotation: false
|
useAnnotation: false
|
||||||
tuner:
|
tuner:
|
||||||
#choice: TPE, Random, Anneal, Evolution
|
#choice: TPE, Random, Anneal, Evolution
|
||||||
tunerName: TPE
|
builtinTunerName: TPE
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode: Maximize
|
#choice: maximize, minimize
|
||||||
|
optimize_mode: maximize
|
||||||
trial:
|
trial:
|
||||||
trialCommand: python3 mnist-keras.py
|
command: python3 mnist-keras.py
|
||||||
trialCodeDir: /usr/share/nni/examples/trials/mnist-keras
|
codeDir: ~/nni/examples/trials/mnist-keras
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
|
@ -84,7 +84,7 @@ class SendMetrics(keras.callbacks.Callback):
|
||||||
Run on end of each epoch
|
Run on end of each epoch
|
||||||
'''
|
'''
|
||||||
LOG.debug(logs)
|
LOG.debug(logs)
|
||||||
nni.report_intermediate_result(logs)
|
nni.report_intermediate_result(logs['acc'])
|
||||||
|
|
||||||
def train(args, params):
|
def train(args, params):
|
||||||
'''
|
'''
|
||||||
|
|
|
@ -9,10 +9,11 @@ trainingServicePlatform: local
|
||||||
useAnnotation: true
|
useAnnotation: true
|
||||||
tuner:
|
tuner:
|
||||||
#choice: TPE, Random, Anneal, Evolution
|
#choice: TPE, Random, Anneal, Evolution
|
||||||
tunerName: TPE
|
builtinTunerName: TPE
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode: Maximize
|
#choice: maximize, minimize
|
||||||
|
optimize_mode: maximize
|
||||||
trial:
|
trial:
|
||||||
trialCommand: python3 mnist.py
|
command: python3 mnist.py
|
||||||
trialCodeDir: /usr/share/nni/examples/trials/mnist-smartparam
|
codeDir: ~/nni/examples/trials/mnist-smartparam
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
|
@ -5,15 +5,16 @@ maxExecDuration: 1h
|
||||||
maxTrialNum: 1
|
maxTrialNum: 1
|
||||||
#choice: local, remote
|
#choice: local, remote
|
||||||
trainingServicePlatform: local
|
trainingServicePlatform: local
|
||||||
searchSpacePath: /usr/share/nni/examples/trials/mnist/search_space.json
|
searchSpacePath: ~/nni/examples/trials/mnist/search_space.json
|
||||||
#choice: true, false
|
#choice: true, false
|
||||||
useAnnotation: false
|
useAnnotation: false
|
||||||
tuner:
|
tuner:
|
||||||
#choice: TPE, Random, Anneal, Evolution
|
#choice: TPE, Random, Anneal, Evolution
|
||||||
tunerName: TPE
|
builtinTunerName: TPE
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode: Maximize
|
#choice: maximize, minimize
|
||||||
|
optimize_mode: maximize
|
||||||
trial:
|
trial:
|
||||||
trialCommand: python3 mnist.py
|
command: python3 mnist.py
|
||||||
trialCodeDir: /usr/share/nni/examples/trials/mnist
|
codeDir: ~/nni/examples/trials/mnist
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
|
@ -5,20 +5,22 @@ maxExecDuration: 1h
|
||||||
maxTrialNum: 1
|
maxTrialNum: 1
|
||||||
#choice: local, remote
|
#choice: local, remote
|
||||||
trainingServicePlatform: local
|
trainingServicePlatform: local
|
||||||
searchSpacePath: /usr/share/nni/examples/trials/mnist/search_space.json
|
searchSpacePath: ~/nni/examples/trials/mnist/search_space.json
|
||||||
#choice: true, false
|
#choice: true, false
|
||||||
useAnnotation: false
|
useAnnotation: false
|
||||||
tuner:
|
tuner:
|
||||||
#choice: TPE, Random, Anneal, Evolution
|
#choice: TPE, Random, Anneal, Evolution
|
||||||
tunerName: TPE
|
builtinTunerName: TPE
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode: Maximize
|
#choice: maximize, minimize
|
||||||
|
optimize_mode: maximize
|
||||||
assessor:
|
assessor:
|
||||||
#choice: Medianstop
|
#choice: Medianstop
|
||||||
assessorName: Medianstop
|
builtinAssessorName: Medianstop
|
||||||
#choice: Maximize, Minimize
|
classArgs:
|
||||||
optimizationMode: Maximize
|
#choice: maximize, minimize
|
||||||
|
optimize_mode: maximize
|
||||||
trial:
|
trial:
|
||||||
trialCommand: python3 mnist.py
|
command: python3 mnist.py
|
||||||
trialCodeDir: /usr/share/nni/examples/trials/mnist
|
codeDir: ~/nni/examples/trials/mnist
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
|
@ -0,0 +1,20 @@
|
||||||
|
authorName: default
|
||||||
|
experimentName: example_pytorch_cifar10
|
||||||
|
trialConcurrency: 1
|
||||||
|
maxExecDuration: 100h
|
||||||
|
maxTrialNum: 1
|
||||||
|
#choice: local, remote
|
||||||
|
trainingServicePlatform: local
|
||||||
|
searchSpacePath: ~/nni/examples/trials/pytorch_cifar10/search_space.json
|
||||||
|
#choice: true, false
|
||||||
|
useAnnotation: false
|
||||||
|
tuner:
|
||||||
|
#choice: TPE, Random, Anneal, Evolution
|
||||||
|
builtinTunerName: TPE
|
||||||
|
classArgs:
|
||||||
|
#choice: maximize, minimize
|
||||||
|
optimize_mode: maximize
|
||||||
|
trial:
|
||||||
|
command: python3 main.py
|
||||||
|
codeDir: ~/nni/examples/trials/pytorch_cifar10
|
||||||
|
gpuNum: 1
|
|
@ -0,0 +1,193 @@
|
||||||
|
'''Train CIFAR10 with PyTorch.'''
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import torch.backends.cudnn as cudnn
|
||||||
|
|
||||||
|
import torchvision
|
||||||
|
import torchvision.transforms as transforms
|
||||||
|
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from models import *
|
||||||
|
from utils import progress_bar
|
||||||
|
|
||||||
|
import nni
|
||||||
|
|
||||||
|
_logger = logging.getLogger("cifar10_pytorch_automl")
|
||||||
|
|
||||||
|
trainloader = None
|
||||||
|
testloader = None
|
||||||
|
net = None
|
||||||
|
criterion = None
|
||||||
|
optimizer = None
|
||||||
|
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||||
|
best_acc = 0.0 # best test accuracy
|
||||||
|
start_epoch = 0 # start from epoch 0 or last checkpoint epoch
|
||||||
|
|
||||||
|
def prepare(args):
|
||||||
|
global trainloader
|
||||||
|
global testloader
|
||||||
|
global net
|
||||||
|
global criterion
|
||||||
|
global optimizer
|
||||||
|
|
||||||
|
# Data
|
||||||
|
print('==> Preparing data..')
|
||||||
|
transform_train = transforms.Compose([
|
||||||
|
transforms.RandomCrop(32, padding=4),
|
||||||
|
transforms.RandomHorizontalFlip(),
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
|
||||||
|
])
|
||||||
|
|
||||||
|
transform_test = transforms.Compose([
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
|
||||||
|
])
|
||||||
|
|
||||||
|
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
|
||||||
|
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
|
||||||
|
|
||||||
|
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
|
||||||
|
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)
|
||||||
|
|
||||||
|
#classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
|
||||||
|
|
||||||
|
# Model
|
||||||
|
print('==> Building model..')
|
||||||
|
if args['model'] == 'vgg':
|
||||||
|
net = VGG('VGG19')
|
||||||
|
if args['model'] == 'resnet18':
|
||||||
|
net = ResNet18()
|
||||||
|
if args['model'] == 'googlenet':
|
||||||
|
net = GoogLeNet()
|
||||||
|
if args['model'] == 'densenet121':
|
||||||
|
net = DenseNet121()
|
||||||
|
if args['model'] == 'mobilenet':
|
||||||
|
net = MobileNet()
|
||||||
|
if args['model'] == 'dpn92':
|
||||||
|
net = DPN92()
|
||||||
|
if args['model'] == 'shufflenetg2':
|
||||||
|
net = ShuffleNetG2()
|
||||||
|
if args['model'] == 'senet18':
|
||||||
|
net = SENet18()
|
||||||
|
|
||||||
|
net = net.to(device)
|
||||||
|
if device == 'cuda':
|
||||||
|
net = torch.nn.DataParallel(net)
|
||||||
|
cudnn.benchmark = True
|
||||||
|
|
||||||
|
criterion = nn.CrossEntropyLoss()
|
||||||
|
#optimizer = optim.SGD(net.parameters(), lr=args['lr'], momentum=0.9, weight_decay=5e-4)
|
||||||
|
|
||||||
|
if args['optimizer'] == 'SGD':
|
||||||
|
optimizer = optim.SGD(net.parameters(), lr=args['lr'], momentum=0.9, weight_decay=5e-4)
|
||||||
|
if args['optimizer'] == 'Adadelta':
|
||||||
|
optimizer = optim.Adadelta(net.parameters(), lr=args['lr'])
|
||||||
|
if args['optimizer'] == 'Adagrad':
|
||||||
|
optimizer = optim.Adagrad(net.parameters(), lr=args['lr'])
|
||||||
|
if args['optimizer'] == 'Adam':
|
||||||
|
optimizer = optim.Adam(net.parameters(), lr=args['lr'])
|
||||||
|
if args['optimizer'] == 'Adamax':
|
||||||
|
optimizer = optim.Adam(net.parameters(), lr=args['lr'])
|
||||||
|
|
||||||
|
|
||||||
|
# Training
|
||||||
|
def train(epoch):
|
||||||
|
global trainloader
|
||||||
|
global testloader
|
||||||
|
global net
|
||||||
|
global criterion
|
||||||
|
global optimizer
|
||||||
|
|
||||||
|
print('\nEpoch: %d' % epoch)
|
||||||
|
net.train()
|
||||||
|
train_loss = 0
|
||||||
|
correct = 0
|
||||||
|
total = 0
|
||||||
|
for batch_idx, (inputs, targets) in enumerate(trainloader):
|
||||||
|
inputs, targets = inputs.to(device), targets.to(device)
|
||||||
|
optimizer.zero_grad()
|
||||||
|
outputs = net(inputs)
|
||||||
|
loss = criterion(outputs, targets)
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
train_loss += loss.item()
|
||||||
|
_, predicted = outputs.max(1)
|
||||||
|
total += targets.size(0)
|
||||||
|
correct += predicted.eq(targets).sum().item()
|
||||||
|
|
||||||
|
acc = 100.*correct/total
|
||||||
|
|
||||||
|
progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
|
||||||
|
% (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
|
||||||
|
|
||||||
|
def test(epoch):
|
||||||
|
global best_acc
|
||||||
|
global trainloader
|
||||||
|
global testloader
|
||||||
|
global net
|
||||||
|
global criterion
|
||||||
|
global optimizer
|
||||||
|
|
||||||
|
net.eval()
|
||||||
|
test_loss = 0
|
||||||
|
correct = 0
|
||||||
|
total = 0
|
||||||
|
with torch.no_grad():
|
||||||
|
for batch_idx, (inputs, targets) in enumerate(testloader):
|
||||||
|
inputs, targets = inputs.to(device), targets.to(device)
|
||||||
|
outputs = net(inputs)
|
||||||
|
loss = criterion(outputs, targets)
|
||||||
|
|
||||||
|
test_loss += loss.item()
|
||||||
|
_, predicted = outputs.max(1)
|
||||||
|
total += targets.size(0)
|
||||||
|
correct += predicted.eq(targets).sum().item()
|
||||||
|
|
||||||
|
acc = 100.*correct/total
|
||||||
|
|
||||||
|
progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
|
||||||
|
% (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
|
||||||
|
|
||||||
|
# Save checkpoint.
|
||||||
|
acc = 100.*correct/total
|
||||||
|
if acc > best_acc:
|
||||||
|
print('Saving..')
|
||||||
|
state = {
|
||||||
|
'net': net.state_dict(),
|
||||||
|
'acc': acc,
|
||||||
|
'epoch': epoch,
|
||||||
|
}
|
||||||
|
if not os.path.isdir('checkpoint'):
|
||||||
|
os.mkdir('checkpoint')
|
||||||
|
torch.save(state, './checkpoint/ckpt.t7')
|
||||||
|
best_acc = acc
|
||||||
|
return acc, best_acc
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
RCV_CONFIG = nni.get_parameters()
|
||||||
|
#RCV_CONFIG = {'lr': 0.1, 'optimizer': 'Adam', 'model':'senet18'}
|
||||||
|
_logger.debug(RCV_CONFIG)
|
||||||
|
|
||||||
|
prepare(RCV_CONFIG)
|
||||||
|
acc = 0.0
|
||||||
|
best_acc = 0.0
|
||||||
|
for epoch in range(start_epoch, start_epoch+200):
|
||||||
|
train(epoch)
|
||||||
|
acc, best_acc = test(epoch)
|
||||||
|
nni.report_intermediate_result(acc)
|
||||||
|
|
||||||
|
nni.report_final_result(best_acc)
|
||||||
|
except Exception as exception:
|
||||||
|
_logger.exception(exception)
|
||||||
|
raise
|
|
@ -0,0 +1,11 @@
|
||||||
|
from .vgg import *
|
||||||
|
from .densenet import *
|
||||||
|
from .dpn import *
|
||||||
|
from .googlenet import *
|
||||||
|
from .lenet import *
|
||||||
|
from .mobilenet import *
|
||||||
|
from .pnasnet import *
|
||||||
|
from .resnet import *
|
||||||
|
from .senet import *
|
||||||
|
from .shufflenet import *
|
||||||
|
|
|
@ -0,0 +1,107 @@
|
||||||
|
'''DenseNet in PyTorch.'''
|
||||||
|
import math
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class Bottleneck(nn.Module):
|
||||||
|
def __init__(self, in_planes, growth_rate):
|
||||||
|
super(Bottleneck, self).__init__()
|
||||||
|
self.bn1 = nn.BatchNorm2d(in_planes)
|
||||||
|
self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(4*growth_rate)
|
||||||
|
self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.conv1(F.relu(self.bn1(x)))
|
||||||
|
out = self.conv2(F.relu(self.bn2(out)))
|
||||||
|
out = torch.cat([out,x], 1)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class Transition(nn.Module):
|
||||||
|
def __init__(self, in_planes, out_planes):
|
||||||
|
super(Transition, self).__init__()
|
||||||
|
self.bn = nn.BatchNorm2d(in_planes)
|
||||||
|
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.conv(F.relu(self.bn(x)))
|
||||||
|
out = F.avg_pool2d(out, 2)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class DenseNet(nn.Module):
|
||||||
|
def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
|
||||||
|
super(DenseNet, self).__init__()
|
||||||
|
self.growth_rate = growth_rate
|
||||||
|
|
||||||
|
num_planes = 2*growth_rate
|
||||||
|
self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
|
||||||
|
|
||||||
|
self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
|
||||||
|
num_planes += nblocks[0]*growth_rate
|
||||||
|
out_planes = int(math.floor(num_planes*reduction))
|
||||||
|
self.trans1 = Transition(num_planes, out_planes)
|
||||||
|
num_planes = out_planes
|
||||||
|
|
||||||
|
self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
|
||||||
|
num_planes += nblocks[1]*growth_rate
|
||||||
|
out_planes = int(math.floor(num_planes*reduction))
|
||||||
|
self.trans2 = Transition(num_planes, out_planes)
|
||||||
|
num_planes = out_planes
|
||||||
|
|
||||||
|
self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
|
||||||
|
num_planes += nblocks[2]*growth_rate
|
||||||
|
out_planes = int(math.floor(num_planes*reduction))
|
||||||
|
self.trans3 = Transition(num_planes, out_planes)
|
||||||
|
num_planes = out_planes
|
||||||
|
|
||||||
|
self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
|
||||||
|
num_planes += nblocks[3]*growth_rate
|
||||||
|
|
||||||
|
self.bn = nn.BatchNorm2d(num_planes)
|
||||||
|
self.linear = nn.Linear(num_planes, num_classes)
|
||||||
|
|
||||||
|
def _make_dense_layers(self, block, in_planes, nblock):
|
||||||
|
layers = []
|
||||||
|
for i in range(nblock):
|
||||||
|
layers.append(block(in_planes, self.growth_rate))
|
||||||
|
in_planes += self.growth_rate
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.conv1(x)
|
||||||
|
out = self.trans1(self.dense1(out))
|
||||||
|
out = self.trans2(self.dense2(out))
|
||||||
|
out = self.trans3(self.dense3(out))
|
||||||
|
out = self.dense4(out)
|
||||||
|
out = F.avg_pool2d(F.relu(self.bn(out)), 4)
|
||||||
|
out = out.view(out.size(0), -1)
|
||||||
|
out = self.linear(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def DenseNet121():
|
||||||
|
return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)
|
||||||
|
|
||||||
|
def DenseNet169():
|
||||||
|
return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
|
||||||
|
|
||||||
|
def DenseNet201():
|
||||||
|
return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
|
||||||
|
|
||||||
|
def DenseNet161():
|
||||||
|
return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
|
||||||
|
|
||||||
|
def densenet_cifar():
|
||||||
|
return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
|
||||||
|
|
||||||
|
def test():
|
||||||
|
net = densenet_cifar()
|
||||||
|
x = torch.randn(1,3,32,32)
|
||||||
|
y = net(x)
|
||||||
|
print(y)
|
||||||
|
|
||||||
|
# test()
|
|
@ -0,0 +1,98 @@
|
||||||
|
'''Dual Path Networks in PyTorch.'''
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class Bottleneck(nn.Module):
|
||||||
|
def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
|
||||||
|
super(Bottleneck, self).__init__()
|
||||||
|
self.out_planes = out_planes
|
||||||
|
self.dense_depth = dense_depth
|
||||||
|
|
||||||
|
self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(in_planes)
|
||||||
|
self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(in_planes)
|
||||||
|
self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
|
||||||
|
self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
|
||||||
|
|
||||||
|
self.shortcut = nn.Sequential()
|
||||||
|
if first_layer:
|
||||||
|
self.shortcut = nn.Sequential(
|
||||||
|
nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
|
||||||
|
nn.BatchNorm2d(out_planes+dense_depth)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = F.relu(self.bn2(self.conv2(out)))
|
||||||
|
out = self.bn3(self.conv3(out))
|
||||||
|
x = self.shortcut(x)
|
||||||
|
d = self.out_planes
|
||||||
|
out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
|
||||||
|
out = F.relu(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class DPN(nn.Module):
|
||||||
|
def __init__(self, cfg):
|
||||||
|
super(DPN, self).__init__()
|
||||||
|
in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
|
||||||
|
num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
|
||||||
|
|
||||||
|
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(64)
|
||||||
|
self.last_planes = 64
|
||||||
|
self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
|
||||||
|
self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
|
||||||
|
self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
|
||||||
|
self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
|
||||||
|
self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)
|
||||||
|
|
||||||
|
def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
|
||||||
|
strides = [stride] + [1]*(num_blocks-1)
|
||||||
|
layers = []
|
||||||
|
for i,stride in enumerate(strides):
|
||||||
|
layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
|
||||||
|
self.last_planes = out_planes + (i+2) * dense_depth
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = self.layer1(out)
|
||||||
|
out = self.layer2(out)
|
||||||
|
out = self.layer3(out)
|
||||||
|
out = self.layer4(out)
|
||||||
|
out = F.avg_pool2d(out, 4)
|
||||||
|
out = out.view(out.size(0), -1)
|
||||||
|
out = self.linear(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def DPN26():
|
||||||
|
cfg = {
|
||||||
|
'in_planes': (96,192,384,768),
|
||||||
|
'out_planes': (256,512,1024,2048),
|
||||||
|
'num_blocks': (2,2,2,2),
|
||||||
|
'dense_depth': (16,32,24,128)
|
||||||
|
}
|
||||||
|
return DPN(cfg)
|
||||||
|
|
||||||
|
def DPN92():
|
||||||
|
cfg = {
|
||||||
|
'in_planes': (96,192,384,768),
|
||||||
|
'out_planes': (256,512,1024,2048),
|
||||||
|
'num_blocks': (3,4,20,3),
|
||||||
|
'dense_depth': (16,32,24,128)
|
||||||
|
}
|
||||||
|
return DPN(cfg)
|
||||||
|
|
||||||
|
|
||||||
|
def test():
|
||||||
|
net = DPN92()
|
||||||
|
x = torch.randn(1,3,32,32)
|
||||||
|
y = net(x)
|
||||||
|
print(y)
|
||||||
|
|
||||||
|
# test()
|
|
@ -0,0 +1,107 @@
|
||||||
|
'''GoogLeNet with PyTorch.'''
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class Inception(nn.Module):
|
||||||
|
def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
|
||||||
|
super(Inception, self).__init__()
|
||||||
|
# 1x1 conv branch
|
||||||
|
self.b1 = nn.Sequential(
|
||||||
|
nn.Conv2d(in_planes, n1x1, kernel_size=1),
|
||||||
|
nn.BatchNorm2d(n1x1),
|
||||||
|
nn.ReLU(True),
|
||||||
|
)
|
||||||
|
|
||||||
|
# 1x1 conv -> 3x3 conv branch
|
||||||
|
self.b2 = nn.Sequential(
|
||||||
|
nn.Conv2d(in_planes, n3x3red, kernel_size=1),
|
||||||
|
nn.BatchNorm2d(n3x3red),
|
||||||
|
nn.ReLU(True),
|
||||||
|
nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
|
||||||
|
nn.BatchNorm2d(n3x3),
|
||||||
|
nn.ReLU(True),
|
||||||
|
)
|
||||||
|
|
||||||
|
# 1x1 conv -> 5x5 conv branch
|
||||||
|
self.b3 = nn.Sequential(
|
||||||
|
nn.Conv2d(in_planes, n5x5red, kernel_size=1),
|
||||||
|
nn.BatchNorm2d(n5x5red),
|
||||||
|
nn.ReLU(True),
|
||||||
|
nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
|
||||||
|
nn.BatchNorm2d(n5x5),
|
||||||
|
nn.ReLU(True),
|
||||||
|
nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
|
||||||
|
nn.BatchNorm2d(n5x5),
|
||||||
|
nn.ReLU(True),
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3x3 pool -> 1x1 conv branch
|
||||||
|
self.b4 = nn.Sequential(
|
||||||
|
nn.MaxPool2d(3, stride=1, padding=1),
|
||||||
|
nn.Conv2d(in_planes, pool_planes, kernel_size=1),
|
||||||
|
nn.BatchNorm2d(pool_planes),
|
||||||
|
nn.ReLU(True),
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
y1 = self.b1(x)
|
||||||
|
y2 = self.b2(x)
|
||||||
|
y3 = self.b3(x)
|
||||||
|
y4 = self.b4(x)
|
||||||
|
return torch.cat([y1,y2,y3,y4], 1)
|
||||||
|
|
||||||
|
|
||||||
|
class GoogLeNet(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(GoogLeNet, self).__init__()
|
||||||
|
self.pre_layers = nn.Sequential(
|
||||||
|
nn.Conv2d(3, 192, kernel_size=3, padding=1),
|
||||||
|
nn.BatchNorm2d(192),
|
||||||
|
nn.ReLU(True),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
|
||||||
|
self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
|
||||||
|
|
||||||
|
self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
|
||||||
|
|
||||||
|
self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
|
||||||
|
self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
|
||||||
|
self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
|
||||||
|
self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
|
||||||
|
self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
|
||||||
|
|
||||||
|
self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
|
||||||
|
self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
|
||||||
|
|
||||||
|
self.avgpool = nn.AvgPool2d(8, stride=1)
|
||||||
|
self.linear = nn.Linear(1024, 10)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.pre_layers(x)
|
||||||
|
out = self.a3(out)
|
||||||
|
out = self.b3(out)
|
||||||
|
out = self.maxpool(out)
|
||||||
|
out = self.a4(out)
|
||||||
|
out = self.b4(out)
|
||||||
|
out = self.c4(out)
|
||||||
|
out = self.d4(out)
|
||||||
|
out = self.e4(out)
|
||||||
|
out = self.maxpool(out)
|
||||||
|
out = self.a5(out)
|
||||||
|
out = self.b5(out)
|
||||||
|
out = self.avgpool(out)
|
||||||
|
out = out.view(out.size(0), -1)
|
||||||
|
out = self.linear(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def test():
|
||||||
|
net = GoogLeNet()
|
||||||
|
x = torch.randn(1,3,32,32)
|
||||||
|
y = net(x)
|
||||||
|
print(y.size())
|
||||||
|
|
||||||
|
# test()
|
|
@ -0,0 +1,23 @@
|
||||||
|
'''LeNet in PyTorch.'''
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
class LeNet(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(LeNet, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(3, 6, 5)
|
||||||
|
self.conv2 = nn.Conv2d(6, 16, 5)
|
||||||
|
self.fc1 = nn.Linear(16*5*5, 120)
|
||||||
|
self.fc2 = nn.Linear(120, 84)
|
||||||
|
self.fc3 = nn.Linear(84, 10)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.conv1(x))
|
||||||
|
out = F.max_pool2d(out, 2)
|
||||||
|
out = F.relu(self.conv2(out))
|
||||||
|
out = F.max_pool2d(out, 2)
|
||||||
|
out = out.view(out.size(0), -1)
|
||||||
|
out = F.relu(self.fc1(out))
|
||||||
|
out = F.relu(self.fc2(out))
|
||||||
|
out = self.fc3(out)
|
||||||
|
return out
|
|
@ -0,0 +1,61 @@
|
||||||
|
'''MobileNet in PyTorch.
|
||||||
|
|
||||||
|
See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications"
|
||||||
|
for more details.
|
||||||
|
'''
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class Block(nn.Module):
|
||||||
|
'''Depthwise conv + Pointwise conv'''
|
||||||
|
def __init__(self, in_planes, out_planes, stride=1):
|
||||||
|
super(Block, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(in_planes)
|
||||||
|
self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(out_planes)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = F.relu(self.bn2(self.conv2(out)))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class MobileNet(nn.Module):
|
||||||
|
# (128,2) means conv planes=128, conv stride=2, by default conv stride=1
|
||||||
|
cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
|
||||||
|
|
||||||
|
def __init__(self, num_classes=10):
|
||||||
|
super(MobileNet, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(32)
|
||||||
|
self.layers = self._make_layers(in_planes=32)
|
||||||
|
self.linear = nn.Linear(1024, num_classes)
|
||||||
|
|
||||||
|
def _make_layers(self, in_planes):
|
||||||
|
layers = []
|
||||||
|
for x in self.cfg:
|
||||||
|
out_planes = x if isinstance(x, int) else x[0]
|
||||||
|
stride = 1 if isinstance(x, int) else x[1]
|
||||||
|
layers.append(Block(in_planes, out_planes, stride))
|
||||||
|
in_planes = out_planes
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = self.layers(out)
|
||||||
|
out = F.avg_pool2d(out, 2)
|
||||||
|
out = out.view(out.size(0), -1)
|
||||||
|
out = self.linear(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def test():
|
||||||
|
net = MobileNet()
|
||||||
|
x = torch.randn(1,3,32,32)
|
||||||
|
y = net(x)
|
||||||
|
print(y.size())
|
||||||
|
|
||||||
|
# test()
|
|
@ -0,0 +1,86 @@
|
||||||
|
'''MobileNetV2 in PyTorch.
|
||||||
|
|
||||||
|
See the paper "Inverted Residuals and Linear Bottlenecks:
|
||||||
|
Mobile Networks for Classification, Detection and Segmentation" for more details.
|
||||||
|
'''
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class Block(nn.Module):
|
||||||
|
'''expand + depthwise + pointwise'''
|
||||||
|
def __init__(self, in_planes, out_planes, expansion, stride):
|
||||||
|
super(Block, self).__init__()
|
||||||
|
self.stride = stride
|
||||||
|
|
||||||
|
planes = expansion * in_planes
|
||||||
|
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(planes)
|
||||||
|
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(planes)
|
||||||
|
self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
|
||||||
|
self.bn3 = nn.BatchNorm2d(out_planes)
|
||||||
|
|
||||||
|
self.shortcut = nn.Sequential()
|
||||||
|
if stride == 1 and in_planes != out_planes:
|
||||||
|
self.shortcut = nn.Sequential(
|
||||||
|
nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
|
||||||
|
nn.BatchNorm2d(out_planes),
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = F.relu(self.bn2(self.conv2(out)))
|
||||||
|
out = self.bn3(self.conv3(out))
|
||||||
|
out = out + self.shortcut(x) if self.stride==1 else out
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class MobileNetV2(nn.Module):
|
||||||
|
# (expansion, out_planes, num_blocks, stride)
|
||||||
|
cfg = [(1, 16, 1, 1),
|
||||||
|
(6, 24, 2, 1), # NOTE: change stride 2 -> 1 for CIFAR10
|
||||||
|
(6, 32, 3, 2),
|
||||||
|
(6, 64, 4, 2),
|
||||||
|
(6, 96, 3, 1),
|
||||||
|
(6, 160, 3, 2),
|
||||||
|
(6, 320, 1, 1)]
|
||||||
|
|
||||||
|
def __init__(self, num_classes=10):
|
||||||
|
super(MobileNetV2, self).__init__()
|
||||||
|
# NOTE: change conv1 stride 2 -> 1 for CIFAR10
|
||||||
|
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(32)
|
||||||
|
self.layers = self._make_layers(in_planes=32)
|
||||||
|
self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(1280)
|
||||||
|
self.linear = nn.Linear(1280, num_classes)
|
||||||
|
|
||||||
|
def _make_layers(self, in_planes):
|
||||||
|
layers = []
|
||||||
|
for expansion, out_planes, num_blocks, stride in self.cfg:
|
||||||
|
strides = [stride] + [1]*(num_blocks-1)
|
||||||
|
for stride in strides:
|
||||||
|
layers.append(Block(in_planes, out_planes, expansion, stride))
|
||||||
|
in_planes = out_planes
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = self.layers(out)
|
||||||
|
out = F.relu(self.bn2(self.conv2(out)))
|
||||||
|
# NOTE: change pooling kernel_size 7 -> 4 for CIFAR10
|
||||||
|
out = F.avg_pool2d(out, 4)
|
||||||
|
out = out.view(out.size(0), -1)
|
||||||
|
out = self.linear(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def test():
|
||||||
|
net = MobileNetV2()
|
||||||
|
x = torch.randn(2,3,32,32)
|
||||||
|
y = net(x)
|
||||||
|
print(y.size())
|
||||||
|
|
||||||
|
# test()
|
|
@ -0,0 +1,125 @@
|
||||||
|
'''PNASNet in PyTorch.
|
||||||
|
|
||||||
|
Paper: Progressive Neural Architecture Search
|
||||||
|
'''
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class SepConv(nn.Module):
|
||||||
|
'''Separable Convolution.'''
|
||||||
|
def __init__(self, in_planes, out_planes, kernel_size, stride):
|
||||||
|
super(SepConv, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(in_planes, out_planes,
|
||||||
|
kernel_size, stride,
|
||||||
|
padding=(kernel_size-1)//2,
|
||||||
|
bias=False, groups=in_planes)
|
||||||
|
self.bn1 = nn.BatchNorm2d(out_planes)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.bn1(self.conv1(x))
|
||||||
|
|
||||||
|
|
||||||
|
class CellA(nn.Module):
|
||||||
|
def __init__(self, in_planes, out_planes, stride=1):
|
||||||
|
super(CellA, self).__init__()
|
||||||
|
self.stride = stride
|
||||||
|
self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
|
||||||
|
if stride==2:
|
||||||
|
self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(out_planes)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
y1 = self.sep_conv1(x)
|
||||||
|
y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
|
||||||
|
if self.stride==2:
|
||||||
|
y2 = self.bn1(self.conv1(y2))
|
||||||
|
return F.relu(y1+y2)
|
||||||
|
|
||||||
|
class CellB(nn.Module):
|
||||||
|
def __init__(self, in_planes, out_planes, stride=1):
|
||||||
|
super(CellB, self).__init__()
|
||||||
|
self.stride = stride
|
||||||
|
# Left branch
|
||||||
|
self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
|
||||||
|
self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
|
||||||
|
# Right branch
|
||||||
|
self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
|
||||||
|
if stride==2:
|
||||||
|
self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(out_planes)
|
||||||
|
# Reduce channels
|
||||||
|
self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(out_planes)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# Left branch
|
||||||
|
y1 = self.sep_conv1(x)
|
||||||
|
y2 = self.sep_conv2(x)
|
||||||
|
# Right branch
|
||||||
|
y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
|
||||||
|
if self.stride==2:
|
||||||
|
y3 = self.bn1(self.conv1(y3))
|
||||||
|
y4 = self.sep_conv3(x)
|
||||||
|
# Concat & reduce channels
|
||||||
|
b1 = F.relu(y1+y2)
|
||||||
|
b2 = F.relu(y3+y4)
|
||||||
|
y = torch.cat([b1,b2], 1)
|
||||||
|
return F.relu(self.bn2(self.conv2(y)))
|
||||||
|
|
||||||
|
class PNASNet(nn.Module):
|
||||||
|
def __init__(self, cell_type, num_cells, num_planes):
|
||||||
|
super(PNASNet, self).__init__()
|
||||||
|
self.in_planes = num_planes
|
||||||
|
self.cell_type = cell_type
|
||||||
|
|
||||||
|
self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(num_planes)
|
||||||
|
|
||||||
|
self.layer1 = self._make_layer(num_planes, num_cells=6)
|
||||||
|
self.layer2 = self._downsample(num_planes*2)
|
||||||
|
self.layer3 = self._make_layer(num_planes*2, num_cells=6)
|
||||||
|
self.layer4 = self._downsample(num_planes*4)
|
||||||
|
self.layer5 = self._make_layer(num_planes*4, num_cells=6)
|
||||||
|
|
||||||
|
self.linear = nn.Linear(num_planes*4, 10)
|
||||||
|
|
||||||
|
def _make_layer(self, planes, num_cells):
|
||||||
|
layers = []
|
||||||
|
for _ in range(num_cells):
|
||||||
|
layers.append(self.cell_type(self.in_planes, planes, stride=1))
|
||||||
|
self.in_planes = planes
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def _downsample(self, planes):
|
||||||
|
layer = self.cell_type(self.in_planes, planes, stride=2)
|
||||||
|
self.in_planes = planes
|
||||||
|
return layer
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = self.layer1(out)
|
||||||
|
out = self.layer2(out)
|
||||||
|
out = self.layer3(out)
|
||||||
|
out = self.layer4(out)
|
||||||
|
out = self.layer5(out)
|
||||||
|
out = F.avg_pool2d(out, 8)
|
||||||
|
out = self.linear(out.view(out.size(0), -1))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def PNASNetA():
|
||||||
|
return PNASNet(CellA, num_cells=6, num_planes=44)
|
||||||
|
|
||||||
|
def PNASNetB():
|
||||||
|
return PNASNet(CellB, num_cells=6, num_planes=32)
|
||||||
|
|
||||||
|
|
||||||
|
def test():
|
||||||
|
net = PNASNetB()
|
||||||
|
x = torch.randn(1,3,32,32)
|
||||||
|
y = net(x)
|
||||||
|
print(y)
|
||||||
|
|
||||||
|
# test()
|
|
@ -0,0 +1,118 @@
|
||||||
|
'''Pre-activation ResNet in PyTorch.
|
||||||
|
|
||||||
|
Reference:
|
||||||
|
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
||||||
|
Identity Mappings in Deep Residual Networks. arXiv:1603.05027
|
||||||
|
'''
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class PreActBlock(nn.Module):
|
||||||
|
'''Pre-activation version of the BasicBlock.'''
|
||||||
|
expansion = 1
|
||||||
|
|
||||||
|
def __init__(self, in_planes, planes, stride=1):
|
||||||
|
super(PreActBlock, self).__init__()
|
||||||
|
self.bn1 = nn.BatchNorm2d(in_planes)
|
||||||
|
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(planes)
|
||||||
|
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
|
||||||
|
|
||||||
|
if stride != 1 or in_planes != self.expansion*planes:
|
||||||
|
self.shortcut = nn.Sequential(
|
||||||
|
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(x))
|
||||||
|
shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
|
||||||
|
out = self.conv1(out)
|
||||||
|
out = self.conv2(F.relu(self.bn2(out)))
|
||||||
|
out += shortcut
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class PreActBottleneck(nn.Module):
|
||||||
|
'''Pre-activation version of the original Bottleneck module.'''
|
||||||
|
expansion = 4
|
||||||
|
|
||||||
|
def __init__(self, in_planes, planes, stride=1):
|
||||||
|
super(PreActBottleneck, self).__init__()
|
||||||
|
self.bn1 = nn.BatchNorm2d(in_planes)
|
||||||
|
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(planes)
|
||||||
|
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||||
|
self.bn3 = nn.BatchNorm2d(planes)
|
||||||
|
self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
|
||||||
|
|
||||||
|
if stride != 1 or in_planes != self.expansion*planes:
|
||||||
|
self.shortcut = nn.Sequential(
|
||||||
|
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(x))
|
||||||
|
shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
|
||||||
|
out = self.conv1(out)
|
||||||
|
out = self.conv2(F.relu(self.bn2(out)))
|
||||||
|
out = self.conv3(F.relu(self.bn3(out)))
|
||||||
|
out += shortcut
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class PreActResNet(nn.Module):
|
||||||
|
def __init__(self, block, num_blocks, num_classes=10):
|
||||||
|
super(PreActResNet, self).__init__()
|
||||||
|
self.in_planes = 64
|
||||||
|
|
||||||
|
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
|
||||||
|
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
|
||||||
|
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
|
||||||
|
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
|
||||||
|
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
|
||||||
|
self.linear = nn.Linear(512*block.expansion, num_classes)
|
||||||
|
|
||||||
|
def _make_layer(self, block, planes, num_blocks, stride):
|
||||||
|
strides = [stride] + [1]*(num_blocks-1)
|
||||||
|
layers = []
|
||||||
|
for stride in strides:
|
||||||
|
layers.append(block(self.in_planes, planes, stride))
|
||||||
|
self.in_planes = planes * block.expansion
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.conv1(x)
|
||||||
|
out = self.layer1(out)
|
||||||
|
out = self.layer2(out)
|
||||||
|
out = self.layer3(out)
|
||||||
|
out = self.layer4(out)
|
||||||
|
out = F.avg_pool2d(out, 4)
|
||||||
|
out = out.view(out.size(0), -1)
|
||||||
|
out = self.linear(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def PreActResNet18():
|
||||||
|
return PreActResNet(PreActBlock, [2,2,2,2])
|
||||||
|
|
||||||
|
def PreActResNet34():
|
||||||
|
return PreActResNet(PreActBlock, [3,4,6,3])
|
||||||
|
|
||||||
|
def PreActResNet50():
|
||||||
|
return PreActResNet(PreActBottleneck, [3,4,6,3])
|
||||||
|
|
||||||
|
def PreActResNet101():
|
||||||
|
return PreActResNet(PreActBottleneck, [3,4,23,3])
|
||||||
|
|
||||||
|
def PreActResNet152():
|
||||||
|
return PreActResNet(PreActBottleneck, [3,8,36,3])
|
||||||
|
|
||||||
|
|
||||||
|
def test():
|
||||||
|
net = PreActResNet18()
|
||||||
|
y = net((torch.randn(1,3,32,32)))
|
||||||
|
print(y.size())
|
||||||
|
|
||||||
|
# test()
|
|
@ -0,0 +1,121 @@
|
||||||
|
'''ResNet in PyTorch.
|
||||||
|
|
||||||
|
For Pre-activation ResNet, see 'preact_resnet.py'.
|
||||||
|
|
||||||
|
Reference:
|
||||||
|
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
|
||||||
|
Deep Residual Learning for Image Recognition. arXiv:1512.03385
|
||||||
|
'''
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class BasicBlock(nn.Module):
|
||||||
|
expansion = 1
|
||||||
|
|
||||||
|
def __init__(self, in_planes, planes, stride=1):
|
||||||
|
super(BasicBlock, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(planes)
|
||||||
|
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(planes)
|
||||||
|
|
||||||
|
self.shortcut = nn.Sequential()
|
||||||
|
if stride != 1 or in_planes != self.expansion*planes:
|
||||||
|
self.shortcut = nn.Sequential(
|
||||||
|
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
|
||||||
|
nn.BatchNorm2d(self.expansion*planes)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = self.bn2(self.conv2(out))
|
||||||
|
out += self.shortcut(x)
|
||||||
|
out = F.relu(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class Bottleneck(nn.Module):
|
||||||
|
expansion = 4
|
||||||
|
|
||||||
|
def __init__(self, in_planes, planes, stride=1):
|
||||||
|
super(Bottleneck, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(planes)
|
||||||
|
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(planes)
|
||||||
|
self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
|
||||||
|
self.bn3 = nn.BatchNorm2d(self.expansion*planes)
|
||||||
|
|
||||||
|
self.shortcut = nn.Sequential()
|
||||||
|
if stride != 1 or in_planes != self.expansion*planes:
|
||||||
|
self.shortcut = nn.Sequential(
|
||||||
|
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
|
||||||
|
nn.BatchNorm2d(self.expansion*planes)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = F.relu(self.bn2(self.conv2(out)))
|
||||||
|
out = self.bn3(self.conv3(out))
|
||||||
|
out += self.shortcut(x)
|
||||||
|
out = F.relu(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class ResNet(nn.Module):
|
||||||
|
def __init__(self, block, num_blocks, num_classes=10):
|
||||||
|
super(ResNet, self).__init__()
|
||||||
|
self.in_planes = 64
|
||||||
|
|
||||||
|
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(64)
|
||||||
|
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
|
||||||
|
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
|
||||||
|
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
|
||||||
|
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
|
||||||
|
self.linear = nn.Linear(512*block.expansion, num_classes)
|
||||||
|
|
||||||
|
def _make_layer(self, block, planes, num_blocks, stride):
|
||||||
|
strides = [stride] + [1]*(num_blocks-1)
|
||||||
|
layers = []
|
||||||
|
for stride in strides:
|
||||||
|
layers.append(block(self.in_planes, planes, stride))
|
||||||
|
self.in_planes = planes * block.expansion
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = self.layer1(out)
|
||||||
|
out = self.layer2(out)
|
||||||
|
out = self.layer3(out)
|
||||||
|
out = self.layer4(out)
|
||||||
|
out = F.avg_pool2d(out, 4)
|
||||||
|
out = out.view(out.size(0), -1)
|
||||||
|
out = self.linear(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def ResNet18():
|
||||||
|
return ResNet(BasicBlock, [2,2,2,2])
|
||||||
|
|
||||||
|
def ResNet34():
|
||||||
|
return ResNet(BasicBlock, [3,4,6,3])
|
||||||
|
|
||||||
|
def ResNet50():
|
||||||
|
return ResNet(Bottleneck, [3,4,6,3])
|
||||||
|
|
||||||
|
def ResNet101():
|
||||||
|
return ResNet(Bottleneck, [3,4,23,3])
|
||||||
|
|
||||||
|
def ResNet152():
|
||||||
|
return ResNet(Bottleneck, [3,8,36,3])
|
||||||
|
|
||||||
|
|
||||||
|
def test():
|
||||||
|
net = ResNet18()
|
||||||
|
y = net(torch.randn(1,3,32,32))
|
||||||
|
print(y.size())
|
||||||
|
|
||||||
|
# test()
|
|
@ -0,0 +1,95 @@
|
||||||
|
'''ResNeXt in PyTorch.
|
||||||
|
|
||||||
|
See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
|
||||||
|
'''
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class Block(nn.Module):
|
||||||
|
'''Grouped convolution block.'''
|
||||||
|
expansion = 2
|
||||||
|
|
||||||
|
def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
|
||||||
|
super(Block, self).__init__()
|
||||||
|
group_width = cardinality * bottleneck_width
|
||||||
|
self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(group_width)
|
||||||
|
self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(group_width)
|
||||||
|
self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
|
||||||
|
self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
|
||||||
|
|
||||||
|
self.shortcut = nn.Sequential()
|
||||||
|
if stride != 1 or in_planes != self.expansion*group_width:
|
||||||
|
self.shortcut = nn.Sequential(
|
||||||
|
nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
|
||||||
|
nn.BatchNorm2d(self.expansion*group_width)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = F.relu(self.bn2(self.conv2(out)))
|
||||||
|
out = self.bn3(self.conv3(out))
|
||||||
|
out += self.shortcut(x)
|
||||||
|
out = F.relu(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class ResNeXt(nn.Module):
|
||||||
|
def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
|
||||||
|
super(ResNeXt, self).__init__()
|
||||||
|
self.cardinality = cardinality
|
||||||
|
self.bottleneck_width = bottleneck_width
|
||||||
|
self.in_planes = 64
|
||||||
|
|
||||||
|
self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(64)
|
||||||
|
self.layer1 = self._make_layer(num_blocks[0], 1)
|
||||||
|
self.layer2 = self._make_layer(num_blocks[1], 2)
|
||||||
|
self.layer3 = self._make_layer(num_blocks[2], 2)
|
||||||
|
# self.layer4 = self._make_layer(num_blocks[3], 2)
|
||||||
|
self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
|
||||||
|
|
||||||
|
def _make_layer(self, num_blocks, stride):
|
||||||
|
strides = [stride] + [1]*(num_blocks-1)
|
||||||
|
layers = []
|
||||||
|
for stride in strides:
|
||||||
|
layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
|
||||||
|
self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
|
||||||
|
# Increase bottleneck_width by 2 after each stage.
|
||||||
|
self.bottleneck_width *= 2
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = self.layer1(out)
|
||||||
|
out = self.layer2(out)
|
||||||
|
out = self.layer3(out)
|
||||||
|
# out = self.layer4(out)
|
||||||
|
out = F.avg_pool2d(out, 8)
|
||||||
|
out = out.view(out.size(0), -1)
|
||||||
|
out = self.linear(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def ResNeXt29_2x64d():
|
||||||
|
return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)
|
||||||
|
|
||||||
|
def ResNeXt29_4x64d():
|
||||||
|
return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64)
|
||||||
|
|
||||||
|
def ResNeXt29_8x64d():
|
||||||
|
return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64)
|
||||||
|
|
||||||
|
def ResNeXt29_32x4d():
|
||||||
|
return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4)
|
||||||
|
|
||||||
|
def test_resnext():
|
||||||
|
net = ResNeXt29_2x64d()
|
||||||
|
x = torch.randn(1,3,32,32)
|
||||||
|
y = net(x)
|
||||||
|
print(y.size())
|
||||||
|
|
||||||
|
# test_resnext()
|
|
@ -0,0 +1,121 @@
|
||||||
|
'''SENet in PyTorch.
|
||||||
|
|
||||||
|
SENet is the winner of ImageNet-2017. The paper is not released yet.
|
||||||
|
'''
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class BasicBlock(nn.Module):
|
||||||
|
def __init__(self, in_planes, planes, stride=1):
|
||||||
|
super(BasicBlock, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(planes)
|
||||||
|
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(planes)
|
||||||
|
|
||||||
|
self.shortcut = nn.Sequential()
|
||||||
|
if stride != 1 or in_planes != planes:
|
||||||
|
self.shortcut = nn.Sequential(
|
||||||
|
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
|
||||||
|
nn.BatchNorm2d(planes)
|
||||||
|
)
|
||||||
|
|
||||||
|
# SE layers
|
||||||
|
self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) # Use nn.Conv2d instead of nn.Linear
|
||||||
|
self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = self.bn2(self.conv2(out))
|
||||||
|
|
||||||
|
# Squeeze
|
||||||
|
w = F.avg_pool2d(out, out.size(2))
|
||||||
|
w = F.relu(self.fc1(w))
|
||||||
|
w = F.sigmoid(self.fc2(w))
|
||||||
|
# Excitation
|
||||||
|
out = out * w # New broadcasting feature from v0.2!
|
||||||
|
|
||||||
|
out += self.shortcut(x)
|
||||||
|
out = F.relu(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class PreActBlock(nn.Module):
|
||||||
|
def __init__(self, in_planes, planes, stride=1):
|
||||||
|
super(PreActBlock, self).__init__()
|
||||||
|
self.bn1 = nn.BatchNorm2d(in_planes)
|
||||||
|
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(planes)
|
||||||
|
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
|
||||||
|
|
||||||
|
if stride != 1 or in_planes != planes:
|
||||||
|
self.shortcut = nn.Sequential(
|
||||||
|
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False)
|
||||||
|
)
|
||||||
|
|
||||||
|
# SE layers
|
||||||
|
self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)
|
||||||
|
self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(x))
|
||||||
|
shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
|
||||||
|
out = self.conv1(out)
|
||||||
|
out = self.conv2(F.relu(self.bn2(out)))
|
||||||
|
|
||||||
|
# Squeeze
|
||||||
|
w = F.avg_pool2d(out, out.size(2))
|
||||||
|
w = F.relu(self.fc1(w))
|
||||||
|
w = F.sigmoid(self.fc2(w))
|
||||||
|
# Excitation
|
||||||
|
out = out * w
|
||||||
|
|
||||||
|
out += shortcut
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class SENet(nn.Module):
|
||||||
|
def __init__(self, block, num_blocks, num_classes=10):
|
||||||
|
super(SENet, self).__init__()
|
||||||
|
self.in_planes = 64
|
||||||
|
|
||||||
|
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(64)
|
||||||
|
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
|
||||||
|
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
|
||||||
|
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
|
||||||
|
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
|
||||||
|
self.linear = nn.Linear(512, num_classes)
|
||||||
|
|
||||||
|
def _make_layer(self, block, planes, num_blocks, stride):
|
||||||
|
strides = [stride] + [1]*(num_blocks-1)
|
||||||
|
layers = []
|
||||||
|
for stride in strides:
|
||||||
|
layers.append(block(self.in_planes, planes, stride))
|
||||||
|
self.in_planes = planes
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = self.layer1(out)
|
||||||
|
out = self.layer2(out)
|
||||||
|
out = self.layer3(out)
|
||||||
|
out = self.layer4(out)
|
||||||
|
out = F.avg_pool2d(out, 4)
|
||||||
|
out = out.view(out.size(0), -1)
|
||||||
|
out = self.linear(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def SENet18():
|
||||||
|
return SENet(PreActBlock, [2,2,2,2])
|
||||||
|
|
||||||
|
|
||||||
|
def test():
|
||||||
|
net = SENet18()
|
||||||
|
y = net(torch.randn(1,3,32,32))
|
||||||
|
print(y.size())
|
||||||
|
|
||||||
|
# test()
|
|
@ -0,0 +1,109 @@
|
||||||
|
'''ShuffleNet in PyTorch.
|
||||||
|
|
||||||
|
See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
|
||||||
|
'''
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class ShuffleBlock(nn.Module):
|
||||||
|
def __init__(self, groups):
|
||||||
|
super(ShuffleBlock, self).__init__()
|
||||||
|
self.groups = groups
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
'''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
|
||||||
|
N,C,H,W = x.size()
|
||||||
|
g = self.groups
|
||||||
|
return x.view(N,g,C/g,H,W).permute(0,2,1,3,4).contiguous().view(N,C,H,W)
|
||||||
|
|
||||||
|
|
||||||
|
class Bottleneck(nn.Module):
|
||||||
|
def __init__(self, in_planes, out_planes, stride, groups):
|
||||||
|
super(Bottleneck, self).__init__()
|
||||||
|
self.stride = stride
|
||||||
|
|
||||||
|
mid_planes = out_planes/4
|
||||||
|
g = 1 if in_planes==24 else groups
|
||||||
|
self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(mid_planes)
|
||||||
|
self.shuffle1 = ShuffleBlock(groups=g)
|
||||||
|
self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(mid_planes)
|
||||||
|
self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
|
||||||
|
self.bn3 = nn.BatchNorm2d(out_planes)
|
||||||
|
|
||||||
|
self.shortcut = nn.Sequential()
|
||||||
|
if stride == 2:
|
||||||
|
self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = self.shuffle1(out)
|
||||||
|
out = F.relu(self.bn2(self.conv2(out)))
|
||||||
|
out = self.bn3(self.conv3(out))
|
||||||
|
res = self.shortcut(x)
|
||||||
|
out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class ShuffleNet(nn.Module):
|
||||||
|
def __init__(self, cfg):
|
||||||
|
super(ShuffleNet, self).__init__()
|
||||||
|
out_planes = cfg['out_planes']
|
||||||
|
num_blocks = cfg['num_blocks']
|
||||||
|
groups = cfg['groups']
|
||||||
|
|
||||||
|
self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(24)
|
||||||
|
self.in_planes = 24
|
||||||
|
self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
|
||||||
|
self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
|
||||||
|
self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
|
||||||
|
self.linear = nn.Linear(out_planes[2], 10)
|
||||||
|
|
||||||
|
def _make_layer(self, out_planes, num_blocks, groups):
|
||||||
|
layers = []
|
||||||
|
for i in range(num_blocks):
|
||||||
|
stride = 2 if i == 0 else 1
|
||||||
|
cat_planes = self.in_planes if i == 0 else 0
|
||||||
|
layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
|
||||||
|
self.in_planes = out_planes
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = F.relu(self.bn1(self.conv1(x)))
|
||||||
|
out = self.layer1(out)
|
||||||
|
out = self.layer2(out)
|
||||||
|
out = self.layer3(out)
|
||||||
|
out = F.avg_pool2d(out, 4)
|
||||||
|
out = out.view(out.size(0), -1)
|
||||||
|
out = self.linear(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def ShuffleNetG2():
|
||||||
|
cfg = {
|
||||||
|
'out_planes': [200,400,800],
|
||||||
|
'num_blocks': [4,8,4],
|
||||||
|
'groups': 2
|
||||||
|
}
|
||||||
|
return ShuffleNet(cfg)
|
||||||
|
|
||||||
|
def ShuffleNetG3():
|
||||||
|
cfg = {
|
||||||
|
'out_planes': [240,480,960],
|
||||||
|
'num_blocks': [4,8,4],
|
||||||
|
'groups': 3
|
||||||
|
}
|
||||||
|
return ShuffleNet(cfg)
|
||||||
|
|
||||||
|
|
||||||
|
def test():
|
||||||
|
net = ShuffleNetG2()
|
||||||
|
x = torch.randn(1,3,32,32)
|
||||||
|
y = net(x)
|
||||||
|
print(y)
|
||||||
|
|
||||||
|
# test()
|
|
@ -0,0 +1,47 @@
|
||||||
|
'''VGG11/13/16/19 in Pytorch.'''
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
|
||||||
|
cfg = {
|
||||||
|
'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
|
||||||
|
'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
|
||||||
|
'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
|
||||||
|
'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class VGG(nn.Module):
|
||||||
|
def __init__(self, vgg_name):
|
||||||
|
super(VGG, self).__init__()
|
||||||
|
self.features = self._make_layers(cfg[vgg_name])
|
||||||
|
self.classifier = nn.Linear(512, 10)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.features(x)
|
||||||
|
out = out.view(out.size(0), -1)
|
||||||
|
out = self.classifier(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def _make_layers(self, cfg):
|
||||||
|
layers = []
|
||||||
|
in_channels = 3
|
||||||
|
for x in cfg:
|
||||||
|
if x == 'M':
|
||||||
|
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
|
||||||
|
else:
|
||||||
|
layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
|
||||||
|
nn.BatchNorm2d(x),
|
||||||
|
nn.ReLU(inplace=True)]
|
||||||
|
in_channels = x
|
||||||
|
layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
|
||||||
|
def test():
|
||||||
|
net = VGG('VGG11')
|
||||||
|
x = torch.randn(2,3,32,32)
|
||||||
|
y = net(x)
|
||||||
|
print(y.size())
|
||||||
|
|
||||||
|
# test()
|
|
@ -0,0 +1,6 @@
|
||||||
|
This example requires pytorch.
|
||||||
|
pytorch install package should be chosen based on python version and cuda version.
|
||||||
|
|
||||||
|
Here is an example of the environment python==3.5 and cuda == 8.0, then using the following commands to install pytorch:
|
||||||
|
pip3 install http://download.pytorch.org/whl/cu80/torch-0.4.1-cp35-cp35m-linux_x86_64.whl
|
||||||
|
pip3 install torchvision
|
|
@ -0,0 +1,5 @@
|
||||||
|
{
|
||||||
|
"lr":{"_type":"choice", "_value":[0.1, 0.01, 0.001, 0.0001]},
|
||||||
|
"optimizer":{"_type":"choice", "_value":["SGD", "Adadelta", "Adagrad", "Adam", "Adamax"]},
|
||||||
|
"model":{"_type":"choice", "_value":["vgg", "resnet18", "googlenet", "densenet121", "mobilenet", "dpn92", "senet18"]}
|
||||||
|
}
|
|
@ -0,0 +1,127 @@
|
||||||
|
'''Some helper functions for PyTorch, including:
|
||||||
|
- get_mean_and_std: calculate the mean and std value of dataset.
|
||||||
|
- msr_init: net parameter initialization.
|
||||||
|
- progress_bar: progress bar mimic xlua.progress.
|
||||||
|
'''
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import math
|
||||||
|
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.init as init
|
||||||
|
|
||||||
|
|
||||||
|
def get_mean_and_std(dataset):
|
||||||
|
'''Compute the mean and std value of dataset.'''
|
||||||
|
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
|
||||||
|
mean = torch.zeros(3)
|
||||||
|
std = torch.zeros(3)
|
||||||
|
print('==> Computing mean and std..')
|
||||||
|
for inputs, targets in dataloader:
|
||||||
|
for i in range(3):
|
||||||
|
mean[i] += inputs[:,i,:,:].mean()
|
||||||
|
std[i] += inputs[:,i,:,:].std()
|
||||||
|
mean.div_(len(dataset))
|
||||||
|
std.div_(len(dataset))
|
||||||
|
return mean, std
|
||||||
|
|
||||||
|
def init_params(net):
|
||||||
|
'''Init layer parameters.'''
|
||||||
|
for m in net.modules():
|
||||||
|
if isinstance(m, nn.Conv2d):
|
||||||
|
init.kaiming_normal(m.weight, mode='fan_out')
|
||||||
|
if m.bias:
|
||||||
|
init.constant(m.bias, 0)
|
||||||
|
elif isinstance(m, nn.BatchNorm2d):
|
||||||
|
init.constant(m.weight, 1)
|
||||||
|
init.constant(m.bias, 0)
|
||||||
|
elif isinstance(m, nn.Linear):
|
||||||
|
init.normal(m.weight, std=1e-3)
|
||||||
|
if m.bias:
|
||||||
|
init.constant(m.bias, 0)
|
||||||
|
|
||||||
|
term_width = 0
|
||||||
|
try:
|
||||||
|
_, term_width = os.popen('stty size', 'r').read().split()
|
||||||
|
except Exception as exception:
|
||||||
|
term_width = 200
|
||||||
|
term_width = int(term_width)
|
||||||
|
|
||||||
|
TOTAL_BAR_LENGTH = 65.
|
||||||
|
last_time = time.time()
|
||||||
|
begin_time = last_time
|
||||||
|
def progress_bar(current, total, msg=None):
|
||||||
|
global last_time, begin_time
|
||||||
|
if current == 0:
|
||||||
|
begin_time = time.time() # Reset for new bar.
|
||||||
|
|
||||||
|
cur_len = int(TOTAL_BAR_LENGTH*current/total)
|
||||||
|
rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
|
||||||
|
|
||||||
|
sys.stdout.write(' [')
|
||||||
|
for i in range(cur_len):
|
||||||
|
sys.stdout.write('=')
|
||||||
|
sys.stdout.write('>')
|
||||||
|
for i in range(rest_len):
|
||||||
|
sys.stdout.write('.')
|
||||||
|
sys.stdout.write(']')
|
||||||
|
|
||||||
|
cur_time = time.time()
|
||||||
|
step_time = cur_time - last_time
|
||||||
|
last_time = cur_time
|
||||||
|
tot_time = cur_time - begin_time
|
||||||
|
|
||||||
|
L = []
|
||||||
|
L.append(' Step: %s' % format_time(step_time))
|
||||||
|
L.append(' | Tot: %s' % format_time(tot_time))
|
||||||
|
if msg:
|
||||||
|
L.append(' | ' + msg)
|
||||||
|
|
||||||
|
msg = ''.join(L)
|
||||||
|
sys.stdout.write(msg)
|
||||||
|
for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
|
||||||
|
sys.stdout.write(' ')
|
||||||
|
|
||||||
|
# Go back to the center of the bar.
|
||||||
|
for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
|
||||||
|
sys.stdout.write('\b')
|
||||||
|
sys.stdout.write(' %d/%d ' % (current+1, total))
|
||||||
|
|
||||||
|
if current < total-1:
|
||||||
|
sys.stdout.write('\r')
|
||||||
|
else:
|
||||||
|
sys.stdout.write('\n')
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
def format_time(seconds):
|
||||||
|
days = int(seconds / 3600/24)
|
||||||
|
seconds = seconds - days*3600*24
|
||||||
|
hours = int(seconds / 3600)
|
||||||
|
seconds = seconds - hours*3600
|
||||||
|
minutes = int(seconds / 60)
|
||||||
|
seconds = seconds - minutes*60
|
||||||
|
secondsf = int(seconds)
|
||||||
|
seconds = seconds - secondsf
|
||||||
|
millis = int(seconds*1000)
|
||||||
|
|
||||||
|
f = ''
|
||||||
|
i = 1
|
||||||
|
if days > 0:
|
||||||
|
f += str(days) + 'D'
|
||||||
|
i += 1
|
||||||
|
if hours > 0 and i <= 2:
|
||||||
|
f += str(hours) + 'h'
|
||||||
|
i += 1
|
||||||
|
if minutes > 0 and i <= 2:
|
||||||
|
f += str(minutes) + 'm'
|
||||||
|
i += 1
|
||||||
|
if secondsf > 0 and i <= 2:
|
||||||
|
f += str(secondsf) + 's'
|
||||||
|
i += 1
|
||||||
|
if millis > 0 and i <= 2:
|
||||||
|
f += str(millis) + 'ms'
|
||||||
|
i += 1
|
||||||
|
if f == '':
|
||||||
|
f = '0ms'
|
||||||
|
return f
|
|
@ -0,0 +1,15 @@
|
||||||
|
# How to use ga_customer_tuner?
|
||||||
|
This tuner is a customized tuner which only suitable for trial whose code path is "~/nni/examples/trials/ga_squad",
|
||||||
|
type `cd ~/nni/examples/trials/ga_squad` and check readme.md to get more information for ga_squad trial.
|
||||||
|
|
||||||
|
# config
|
||||||
|
If you want to use ga_customer_tuner in your experiment, you could set config file as following format:
|
||||||
|
|
||||||
|
```
|
||||||
|
tuner:
|
||||||
|
codeDir: ~/nni/examples/tuners/ga_customer_tuner
|
||||||
|
classFileName: customer_tuner.py
|
||||||
|
className: CustomerTuner
|
||||||
|
classArgs:
|
||||||
|
optimize_mode: maximize
|
||||||
|
```
|
15
install.sh
15
install.sh
|
@ -1,12 +1,19 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
INSTALL_PREFIX=${HOME}/.local
|
||||||
|
mkdir -p ${INSTALL_PREFIX}
|
||||||
wget -4 -nc https://nodejs.org/dist/v10.9.0/node-v10.9.0-linux-x64.tar.xz --header "Referer: nodejs.org"
|
wget -4 -nc https://nodejs.org/dist/v10.9.0/node-v10.9.0-linux-x64.tar.xz --header "Referer: nodejs.org"
|
||||||
tar -xf 'node-v10.9.0-linux-x64.tar.xz'
|
tar -xf 'node-v10.9.0-linux-x64.tar.xz'
|
||||||
sudo cp -rf node-v10.9.0-linux-x64/* /usr/local/node/
|
cp -rT node-v10.9.0-linux-x64 ${INSTALL_PREFIX}/node
|
||||||
rm -rf node-v10.9.0-linux-x64*
|
rm -rf node-v10.9.0-linux-x64*
|
||||||
wget -4 -nc https://github.com/yarnpkg/yarn/releases/download/v1.9.4/yarn-v1.9.4.tar.gz
|
wget -4 -nc https://github.com/yarnpkg/yarn/releases/download/v1.9.4/yarn-v1.9.4.tar.gz
|
||||||
tar -xf 'yarn-v1.9.4.tar.gz'
|
tar -xf 'yarn-v1.9.4.tar.gz'
|
||||||
sudo cp -rf yarn-v1.9.4/* /usr/local/yarn/
|
cp -rT yarn-v1.9.4 ${INSTALL_PREFIX}/yarn
|
||||||
rm -rf yarn-v1.9.4*
|
rm -rf yarn-v1.9.4*
|
||||||
export PATH=/usr/local/node/bin:/usr/local/yarn/bin:$PATH
|
NODE_BIN=${INSTALL_PREFIX}/node/bin
|
||||||
|
YARN_BIN=${INSTALL_PREFIX}/yarn/bin
|
||||||
|
export PATH=${INSTALL_PREFIX}/node/bin:${INSTALL_PREFIX}/yarn/bin:$PATH
|
||||||
|
echo $PATH|grep -q ${NODE_BIN} || echo "export PATH=${NODE_BIN}:\${PATH}" >> ${HOME}/.bashrc
|
||||||
|
echo $PATH|grep -q ${YARN_BIN} || echo "export PATH=${YARN_BIN}:\${PATH}" >> ${HOME}/.bashrc
|
||||||
|
source ${HOME}/.bashrc
|
||||||
make
|
make
|
||||||
sudo make install
|
make install
|
|
@ -0,0 +1,96 @@
|
||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
#
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||||
|
# associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||||
|
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||||
|
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in all copies or
|
||||||
|
# substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||||
|
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||||
|
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
from setuptools.command.install import install
|
||||||
|
from subprocess import Popen
|
||||||
|
|
||||||
|
def read(fname):
|
||||||
|
return open(os.path.join(os.path.dirname(__file__), fname)).read()
|
||||||
|
|
||||||
|
class CustomInstallCommand(install):
|
||||||
|
'''a customized install class in pip module'''
|
||||||
|
def makeInstall(self):
|
||||||
|
'''execute make pip-install command'''
|
||||||
|
cmds = ['make', 'pip-install']
|
||||||
|
process = Popen(cmds)
|
||||||
|
if process.wait() != 0:
|
||||||
|
print('Error: Make Install Failed')
|
||||||
|
exit(-1)
|
||||||
|
|
||||||
|
def writeEnvironmentVariables(self, variable_name):
|
||||||
|
'''write an environment variable into ~/.bashrc'''
|
||||||
|
paths = os.getenv("PATH").split(':')
|
||||||
|
bin_path = os.path.join(os.getenv('HOME'),'.local/'+variable_name+'/bin')
|
||||||
|
|
||||||
|
if bin_path not in paths:
|
||||||
|
bashrc_path = os.path.join(os.getenv('HOME'), '.bashrc')
|
||||||
|
process = Popen('echo export PATH=' + bin_path + ':\$PATH >> ' + bashrc_path, shell=True)
|
||||||
|
if process.wait() != 0:
|
||||||
|
print('Error: Write Environment Variables Failed')
|
||||||
|
exit(-1)
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
install.run(self)
|
||||||
|
self.makeInstall()
|
||||||
|
self.writeEnvironmentVariables('node')
|
||||||
|
self.writeEnvironmentVariables('yarn')
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name = 'NNI',
|
||||||
|
version = '0.0.1',
|
||||||
|
author = 'Microsoft NNI Team',
|
||||||
|
author_email = 'nni@microsoft.com',
|
||||||
|
description = 'Neural Network Intelligence project',
|
||||||
|
long_description = read('docs/NNICTLDOC.md'),
|
||||||
|
license = 'MIT',
|
||||||
|
url = 'https://msrasrg.visualstudio.com/NeuralNetworkIntelligence',
|
||||||
|
|
||||||
|
packages = find_packages('src/sdk/pynni', exclude=['tests']) + find_packages('tools'),
|
||||||
|
package_dir = {
|
||||||
|
'annotation': 'tools/annotation',
|
||||||
|
'nni': 'src/sdk/pynni/nni',
|
||||||
|
'nnicmd': 'tools/nnicmd'
|
||||||
|
},
|
||||||
|
python_requires = '>=3.5',
|
||||||
|
install_requires = [
|
||||||
|
'astor',
|
||||||
|
'json_tricks',
|
||||||
|
'numpy',
|
||||||
|
'psutil',
|
||||||
|
'pymc3',
|
||||||
|
'pyyaml',
|
||||||
|
'requests',
|
||||||
|
'scipy'
|
||||||
|
|
||||||
|
],
|
||||||
|
dependency_links = [
|
||||||
|
'git+https://github.com/hyperopt/hyperopt.git',
|
||||||
|
],
|
||||||
|
|
||||||
|
cmdclass={
|
||||||
|
'install': CustomInstallCommand
|
||||||
|
},
|
||||||
|
entry_points={
|
||||||
|
'console_scripts': ['nnictl = nnicmd.nnictl:parse_args']
|
||||||
|
}
|
||||||
|
)
|
|
@ -32,16 +32,22 @@ interface ExperimentParams {
|
||||||
maxTrialNum: number;
|
maxTrialNum: number;
|
||||||
searchSpace: string;
|
searchSpace: string;
|
||||||
tuner: {
|
tuner: {
|
||||||
tunerCommand: string;
|
className: string;
|
||||||
tunerCwd: string;
|
builtinTunerName?: string;
|
||||||
tunerCheckpointDirectory: string;
|
codeDir?: string;
|
||||||
tunerGpuNum?: number;
|
classArgs?: any;
|
||||||
|
classFileName?: string;
|
||||||
|
checkpointDir: string;
|
||||||
|
gpuNum?: number;
|
||||||
};
|
};
|
||||||
assessor?: {
|
assessor?: {
|
||||||
assessorCommand: string;
|
className: string;
|
||||||
assessorCwd: string;
|
builtinAssessorName?: string;
|
||||||
assessorCheckpointDirectory: string;
|
codeDir?: string;
|
||||||
assessorGpuNum?: number;
|
classArgs?: any;
|
||||||
|
classFileName?: string;
|
||||||
|
checkpointDir: string;
|
||||||
|
gpuNum?: number;
|
||||||
};
|
};
|
||||||
clusterMetaData?: {
|
clusterMetaData?: {
|
||||||
key: string;
|
key: string;
|
||||||
|
|
|
@ -105,6 +105,8 @@ abstract class TrainingService {
|
||||||
public abstract addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void;
|
public abstract addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void;
|
||||||
public abstract removeTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void;
|
public abstract removeTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void;
|
||||||
public abstract submitTrialJob(form: JobApplicationForm): Promise<TrialJobDetail>;
|
public abstract submitTrialJob(form: JobApplicationForm): Promise<TrialJobDetail>;
|
||||||
|
public abstract updateTrialJob(trialJobId: string, form: JobApplicationForm): Promise<TrialJobDetail>;
|
||||||
|
public abstract get isMultiPhaseJobSupported(): boolean;
|
||||||
public abstract cancelTrialJob(trialJobId: string): Promise<void>;
|
public abstract cancelTrialJob(trialJobId: string): Promise<void>;
|
||||||
public abstract setClusterMetadata(key: string, value: string): Promise<void>;
|
public abstract setClusterMetadata(key: string, value: string): Promise<void>;
|
||||||
public abstract getClusterMetadata(key: string): Promise<string>;
|
public abstract getClusterMetadata(key: string): Promise<string>;
|
||||||
|
|
|
@ -28,7 +28,7 @@ import { Container } from 'typescript-ioc';
|
||||||
import * as util from 'util';
|
import * as util from 'util';
|
||||||
|
|
||||||
import { Database, DataStore } from './datastore';
|
import { Database, DataStore } from './datastore';
|
||||||
import { ExperimentStartupInfo, setExperimentStartupInfo, getExperimentId } from './experimentStartupInfo';
|
import { ExperimentStartupInfo, getExperimentId, setExperimentStartupInfo } from './experimentStartupInfo';
|
||||||
import { Manager } from './manager';
|
import { Manager } from './manager';
|
||||||
import { TrainingService } from './trainingService';
|
import { TrainingService } from './trainingService';
|
||||||
|
|
||||||
|
@ -127,6 +127,63 @@ function parseArg(names: string[]): string {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate command line to start advisor process which runs tuner and assessor
|
||||||
|
* @param tuner : For builtin tuner:
|
||||||
|
* {
|
||||||
|
* className: 'EvolutionTuner'
|
||||||
|
* classArgs: {
|
||||||
|
* optimize_mode: 'maximize',
|
||||||
|
* population_size: 3
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* customized:
|
||||||
|
* {
|
||||||
|
* codeDir: '/tmp/mytuner'
|
||||||
|
* classFile: 'best_tuner.py'
|
||||||
|
* className: 'BestTuner'
|
||||||
|
* classArgs: {
|
||||||
|
* optimize_mode: 'maximize',
|
||||||
|
* population_size: 3
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* @param assessor: similiar as tuner
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
function getMsgDispatcherCommand(tuner: any, assessor: any): string {
|
||||||
|
let command: string = `python3 -m nni --tuner_class_name ${tuner.className}`;
|
||||||
|
|
||||||
|
if (tuner.classArgs !== undefined) {
|
||||||
|
command += ` --tuner_args ${JSON.stringify(JSON.stringify(tuner.classArgs))}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tuner.codeDir !== undefined && tuner.codeDir.length > 1) {
|
||||||
|
command += ` --tuner_directory ${tuner.codeDir}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tuner.classFileName !== undefined && tuner.classFileName.length > 1) {
|
||||||
|
command += ` --tuner_class_filename ${tuner.classFileName}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (assessor !== undefined && assessor.className !== undefined) {
|
||||||
|
command += ` --assessor_class_name ${assessor.className}`;
|
||||||
|
if (assessor.classArgs !== undefined) {
|
||||||
|
command += ` --assessor_args ${JSON.stringify(JSON.stringify(assessor.classArgs))}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (assessor.codeDir !== undefined && assessor.codeDir.length > 1) {
|
||||||
|
command += ` --assessor_directory ${assessor.codeDir}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (assessor.classFileName !== undefined && assessor.classFileName.length > 1) {
|
||||||
|
command += ` --assessor_class_filename ${assessor.classFileName}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return command;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize a pseudo experiment environment for unit test.
|
* Initialize a pseudo experiment environment for unit test.
|
||||||
* Must be paired with `cleanupUnitTest()`.
|
* Must be paired with `cleanupUnitTest()`.
|
||||||
|
@ -161,5 +218,5 @@ function cleanupUnitTest(): void {
|
||||||
Container.restore(ExperimentStartupInfo);
|
Container.restore(ExperimentStartupInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
export { getLogDir, getExperimentRootDir, getDefaultDatabaseDir, mkDirP, delay, prepareUnitTest,
|
export { getMsgDispatcherCommand, getLogDir, getExperimentRootDir, getDefaultDatabaseDir, mkDirP, delay, prepareUnitTest,
|
||||||
parseArg, cleanupUnitTest, uniqueString };
|
parseArg, cleanupUnitTest, uniqueString };
|
||||||
|
|
|
@ -135,16 +135,8 @@ class IpcInterface {
|
||||||
* Create IPC proxy for tuner process
|
* Create IPC proxy for tuner process
|
||||||
* @param process_ the tuner process
|
* @param process_ the tuner process
|
||||||
*/
|
*/
|
||||||
function createTunerInterface(process: ChildProcess): IpcInterface {
|
function createDispatcherInterface(process: ChildProcess): IpcInterface {
|
||||||
return new IpcInterface(process, CommandType.TUNER_COMMANDS);
|
return new IpcInterface(process, new Set([...CommandType.TUNER_COMMANDS, ...CommandType.ASSESSOR_COMMANDS]));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
export { IpcInterface, createDispatcherInterface };
|
||||||
* Create IPC proxy for assessor process
|
|
||||||
* @param process_ the assessor process
|
|
||||||
*/
|
|
||||||
function createAssessorInterface(process: ChildProcess): IpcInterface {
|
|
||||||
return new IpcInterface(process, CommandType.ASSESSOR_COMMANDS);
|
|
||||||
}
|
|
||||||
|
|
||||||
export { IpcInterface, createTunerInterface, createAssessorInterface };
|
|
||||||
|
|
|
@ -185,6 +185,9 @@ class NNIDataStore implements DataStore {
|
||||||
// assume data is stored by time ASC order
|
// assume data is stored by time ASC order
|
||||||
for (const record of trialJobEvents) {
|
for (const record of trialJobEvents) {
|
||||||
let jobInfo: TrialJobInfo | undefined;
|
let jobInfo: TrialJobInfo | undefined;
|
||||||
|
if (record.trialJobId === undefined || record.trialJobId.length < 1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (map.has(record.trialJobId)) {
|
if (map.has(record.trialJobId)) {
|
||||||
jobInfo = map.get(record.trialJobId);
|
jobInfo = map.get(record.trialJobId);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -34,12 +34,12 @@ import {
|
||||||
import {
|
import {
|
||||||
TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, TrialJobStatus
|
TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, TrialJobStatus
|
||||||
} from '../common/trainingService';
|
} from '../common/trainingService';
|
||||||
import { delay , getLogDir} from '../common/utils';
|
import { delay , getLogDir, getMsgDispatcherCommand} from '../common/utils';
|
||||||
import {
|
import {
|
||||||
ADD_CUSTOMIZED_TRIAL_JOB, KILL_TRIAL_JOB, NEW_TRIAL_JOB, NO_MORE_TRIAL_JOBS, REPORT_METRIC_DATA,
|
ADD_CUSTOMIZED_TRIAL_JOB, KILL_TRIAL_JOB, NEW_TRIAL_JOB, NO_MORE_TRIAL_JOBS, REPORT_METRIC_DATA,
|
||||||
REQUEST_TRIAL_JOBS, TERMINATE, TRIAL_END, UPDATE_SEARCH_SPACE
|
REQUEST_TRIAL_JOBS, TERMINATE, TRIAL_END, UPDATE_SEARCH_SPACE
|
||||||
} from './commands';
|
} from './commands';
|
||||||
import { createAssessorInterface, createTunerInterface, IpcInterface } from './ipcInterface';
|
import { createDispatcherInterface, IpcInterface } from './ipcInterface';
|
||||||
import { TrialJobMaintainerEvent, TrialJobs } from './trialJobs';
|
import { TrialJobMaintainerEvent, TrialJobs } from './trialJobs';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -47,8 +47,7 @@ import { TrialJobMaintainerEvent, TrialJobs } from './trialJobs';
|
||||||
*/
|
*/
|
||||||
class NNIManager implements Manager {
|
class NNIManager implements Manager {
|
||||||
private trainingService: TrainingService;
|
private trainingService: TrainingService;
|
||||||
private tuner: IpcInterface | undefined;
|
private dispatcher: IpcInterface | undefined;
|
||||||
private assessor: IpcInterface | undefined;
|
|
||||||
private trialJobsMaintainer: TrialJobs | undefined;
|
private trialJobsMaintainer: TrialJobs | undefined;
|
||||||
private currSubmittedTrialNum: number; // need to be recovered
|
private currSubmittedTrialNum: number; // need to be recovered
|
||||||
private trialConcurrencyReduction: number;
|
private trialConcurrencyReduction: number;
|
||||||
|
@ -56,9 +55,7 @@ class NNIManager implements Manager {
|
||||||
private log: Logger;
|
private log: Logger;
|
||||||
private dataStore: DataStore;
|
private dataStore: DataStore;
|
||||||
private experimentProfile: ExperimentProfile;
|
private experimentProfile: ExperimentProfile;
|
||||||
// TO DO: could use struct here
|
private dispatcherPid: number;
|
||||||
private tunerPid: number;
|
|
||||||
private assessorPid: number;
|
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.currSubmittedTrialNum = 0;
|
this.currSubmittedTrialNum = 0;
|
||||||
|
@ -67,8 +64,7 @@ class NNIManager implements Manager {
|
||||||
const experimentId: string = getExperimentId();
|
const experimentId: string = getExperimentId();
|
||||||
this.trainingService = component.get(TrainingService);
|
this.trainingService = component.get(TrainingService);
|
||||||
assert(this.trainingService);
|
assert(this.trainingService);
|
||||||
this.tunerPid = 0;
|
this.dispatcherPid = 0;
|
||||||
this.assessorPid = 0;
|
|
||||||
|
|
||||||
this.log = getLogger();
|
this.log = getLogger();
|
||||||
this.dataStore = component.get(DataStore);
|
this.dataStore = component.get(DataStore);
|
||||||
|
@ -84,9 +80,9 @@ class NNIManager implements Manager {
|
||||||
maxTrialNum: 0, // maxTrialNum includes all the submitted trial jobs
|
maxTrialNum: 0, // maxTrialNum includes all the submitted trial jobs
|
||||||
searchSpace: '',
|
searchSpace: '',
|
||||||
tuner: {
|
tuner: {
|
||||||
tunerCommand: '',
|
className: '',
|
||||||
tunerCwd: '',
|
classArgs: {},
|
||||||
tunerCheckpointDirectory: ''
|
checkpointDir: ''
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -134,21 +130,15 @@ class NNIManager implements Manager {
|
||||||
this.experimentProfile.params = expParams;
|
this.experimentProfile.params = expParams;
|
||||||
await this.storeExperimentProfile();
|
await this.storeExperimentProfile();
|
||||||
this.log.debug('Setup tuner...');
|
this.log.debug('Setup tuner...');
|
||||||
this.setupTuner(
|
|
||||||
expParams.tuner.tunerCommand,
|
|
||||||
expParams.tuner.tunerCwd,
|
|
||||||
'start',
|
|
||||||
expParams.tuner.tunerCheckpointDirectory);
|
|
||||||
|
|
||||||
if (expParams.assessor !== undefined) {
|
const dispatcherCommand: string = getMsgDispatcherCommand(expParams.tuner, expParams.assessor);
|
||||||
this.log.debug('Setup assessor...');
|
console.log(`dispatcher command: ${dispatcherCommand}`);
|
||||||
this.setupAssessor(
|
this.setupTuner(
|
||||||
expParams.assessor.assessorCommand,
|
//expParams.tuner.tunerCommand,
|
||||||
expParams.assessor.assessorCwd,
|
dispatcherCommand,
|
||||||
'start',
|
undefined,
|
||||||
expParams.assessor.assessorCheckpointDirectory
|
'start',
|
||||||
);
|
expParams.tuner.checkpointDir);
|
||||||
}
|
|
||||||
|
|
||||||
this.experimentProfile.startTime = new Date();
|
this.experimentProfile.startTime = new Date();
|
||||||
await this.storeExperimentProfile();
|
await this.storeExperimentProfile();
|
||||||
|
@ -164,20 +154,13 @@ class NNIManager implements Manager {
|
||||||
this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId);
|
this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId);
|
||||||
const expParams: ExperimentParams = this.experimentProfile.params;
|
const expParams: ExperimentParams = this.experimentProfile.params;
|
||||||
|
|
||||||
|
const dispatcherCommand: string = getMsgDispatcherCommand(expParams.tuner, expParams.assessor);
|
||||||
|
console.log(`dispatcher command: ${dispatcherCommand}`);
|
||||||
this.setupTuner(
|
this.setupTuner(
|
||||||
expParams.tuner.tunerCommand,
|
dispatcherCommand,
|
||||||
expParams.tuner.tunerCwd,
|
undefined,
|
||||||
'resume',
|
'resume',
|
||||||
expParams.tuner.tunerCheckpointDirectory);
|
expParams.tuner.checkpointDir);
|
||||||
|
|
||||||
if (expParams.assessor !== undefined) {
|
|
||||||
this.setupAssessor(
|
|
||||||
expParams.assessor.assessorCommand,
|
|
||||||
expParams.assessor.assessorCwd,
|
|
||||||
'resume',
|
|
||||||
expParams.assessor.assessorCheckpointDirectory
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const allTrialJobs: TrialJobInfo[] = await this.dataStore.listTrialJobs();
|
const allTrialJobs: TrialJobInfo[] = await this.dataStore.listTrialJobs();
|
||||||
|
|
||||||
|
@ -204,7 +187,7 @@ class NNIManager implements Manager {
|
||||||
// TO DO: move timeout value to constants file
|
// TO DO: move timeout value to constants file
|
||||||
const delay1: Promise<{}> = new Promise((resolve: Function, reject: Function): void => {
|
const delay1: Promise<{}> = new Promise((resolve: Function, reject: Function): void => {
|
||||||
timeoutId = setTimeout(
|
timeoutId = setTimeout(
|
||||||
() => { reject(new Error('TrainingService setClusterMetadata timeout.')); },
|
() => { reject(new Error('TrainingService setClusterMetadata timeout. Please check your config file.')); },
|
||||||
10000);
|
10000);
|
||||||
});
|
});
|
||||||
await Promise.race([delay1, this.trainingService.setClusterMetadata(key, value)]).finally(() => {
|
await Promise.race([delay1, this.trainingService.setClusterMetadata(key, value)]).finally(() => {
|
||||||
|
@ -248,8 +231,8 @@ class NNIManager implements Manager {
|
||||||
return this.dataStore.listTrialJobs(status);
|
return this.dataStore.listTrialJobs(status);
|
||||||
}
|
}
|
||||||
|
|
||||||
private setupTuner(command: string, cwd: string, mode: 'start' | 'resume', dataDirectory: string): void {
|
private setupTuner(command: string, cwd: string | undefined, mode: 'start' | 'resume', dataDirectory: string): void {
|
||||||
if (this.tuner !== undefined) {
|
if (this.dispatcher !== undefined) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const stdio: (string | NodeJS.WriteStream)[] = ['ignore', process.stdout, process.stderr, 'pipe', 'pipe'];
|
const stdio: (string | NodeJS.WriteStream)[] = ['ignore', process.stdout, process.stderr, 'pipe', 'pipe'];
|
||||||
|
@ -270,36 +253,8 @@ class NNIManager implements Manager {
|
||||||
},
|
},
|
||||||
shell: true
|
shell: true
|
||||||
});
|
});
|
||||||
this.tunerPid = tunerProc.pid;
|
this.dispatcherPid = tunerProc.pid;
|
||||||
this.tuner = createTunerInterface(tunerProc);
|
this.dispatcher = createDispatcherInterface(tunerProc);
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
private setupAssessor(command: string, cwd: string, mode: 'start' | 'resume', dataDirectory: string): void {
|
|
||||||
if (this.assessor !== undefined) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const stdio: (string | NodeJS.WriteStream)[] = ['ignore', process.stdout, process.stderr, 'pipe', 'pipe'];
|
|
||||||
let newCwd: string;
|
|
||||||
if (cwd === undefined || cwd === '') {
|
|
||||||
newCwd = getLogDir();
|
|
||||||
} else {
|
|
||||||
newCwd = cwd;
|
|
||||||
}
|
|
||||||
// TO DO: add CUDA_VISIBLE_DEVICES
|
|
||||||
const assessorProc: ChildProcess = spawn(command, [], {
|
|
||||||
stdio,
|
|
||||||
cwd: newCwd,
|
|
||||||
env: {
|
|
||||||
NNI_MODE: mode,
|
|
||||||
NNI_CHECKPOINT_DIRECTORY: dataDirectory,
|
|
||||||
NNI_LOG_DIRECTORY: getLogDir()
|
|
||||||
},
|
|
||||||
shell: true
|
|
||||||
});
|
|
||||||
this.assessorPid = assessorProc.pid;
|
|
||||||
this.assessor = createAssessorInterface(assessorProc);
|
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -307,10 +262,10 @@ class NNIManager implements Manager {
|
||||||
private updateTrialConcurrency(trialConcurrency: number): void {
|
private updateTrialConcurrency(trialConcurrency: number): void {
|
||||||
// TO DO: this method can only be called after startExperiment/resumeExperiment
|
// TO DO: this method can only be called after startExperiment/resumeExperiment
|
||||||
if (trialConcurrency > this.experimentProfile.params.trialConcurrency) {
|
if (trialConcurrency > this.experimentProfile.params.trialConcurrency) {
|
||||||
if (this.tuner === undefined) {
|
if (this.dispatcher === undefined) {
|
||||||
throw new Error('Error: tuner has to be initialized');
|
throw new Error('Error: tuner has to be initialized');
|
||||||
}
|
}
|
||||||
this.tuner.sendCommand(
|
this.dispatcher.sendCommand(
|
||||||
REQUEST_TRIAL_JOBS,
|
REQUEST_TRIAL_JOBS,
|
||||||
String(trialConcurrency - this.experimentProfile.params.trialConcurrency)
|
String(trialConcurrency - this.experimentProfile.params.trialConcurrency)
|
||||||
);
|
);
|
||||||
|
@ -333,45 +288,31 @@ class NNIManager implements Manager {
|
||||||
}
|
}
|
||||||
|
|
||||||
private updateSearchSpace(searchSpace: string): void {
|
private updateSearchSpace(searchSpace: string): void {
|
||||||
if (this.tuner === undefined) {
|
if (this.dispatcher === undefined) {
|
||||||
throw new Error('Error: tuner has not been setup');
|
throw new Error('Error: tuner has not been setup');
|
||||||
}
|
}
|
||||||
this.tuner.sendCommand(UPDATE_SEARCH_SPACE, searchSpace);
|
this.dispatcher.sendCommand(UPDATE_SEARCH_SPACE, searchSpace);
|
||||||
this.experimentProfile.params.searchSpace = searchSpace;
|
this.experimentProfile.params.searchSpace = searchSpace;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
private async experimentDoneCleanUp(): Promise<void> {
|
private async experimentDoneCleanUp(): Promise<void> {
|
||||||
if (this.tuner === undefined) {
|
if (this.dispatcher === undefined) {
|
||||||
throw new Error('Error: tuner has not been setup');
|
throw new Error('Error: tuner has not been setup');
|
||||||
}
|
}
|
||||||
this.tuner.sendCommand(TERMINATE);
|
this.dispatcher.sendCommand(TERMINATE);
|
||||||
if (this.assessor !== undefined) {
|
|
||||||
this.assessor.sendCommand(TERMINATE);
|
|
||||||
}
|
|
||||||
let tunerAlive: boolean = true;
|
let tunerAlive: boolean = true;
|
||||||
let assessorAlive: boolean = true;
|
|
||||||
// gracefully terminate tuner and assessor here, wait at most 30 seconds.
|
// gracefully terminate tuner and assessor here, wait at most 30 seconds.
|
||||||
for (let i: number = 0; i < 30; i++) {
|
for (let i: number = 0; i < 30; i++) {
|
||||||
if (!tunerAlive && !assessorAlive) { break; }
|
if (!tunerAlive) { break; }
|
||||||
try {
|
try {
|
||||||
await cpp.exec(`kill -0 ${this.tunerPid}`);
|
await cpp.exec(`kill -0 ${this.dispatcherPid}`);
|
||||||
} catch (error) { tunerAlive = false; }
|
} catch (error) { tunerAlive = false; }
|
||||||
if (this.assessor !== undefined) {
|
|
||||||
try {
|
|
||||||
await cpp.exec(`kill -0 ${this.assessorPid}`);
|
|
||||||
} catch (error) { assessorAlive = false; }
|
|
||||||
} else {
|
|
||||||
assessorAlive = false;
|
|
||||||
}
|
|
||||||
await delay(1000);
|
await delay(1000);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
await cpp.exec(`kill ${this.tunerPid}`);
|
await cpp.exec(`kill ${this.dispatcherPid}`);
|
||||||
if (this.assessorPid !== undefined) {
|
|
||||||
await cpp.exec(`kill ${this.assessorPid}`);
|
|
||||||
}
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// this.tunerPid does not exist, do nothing here
|
// this.tunerPid does not exist, do nothing here
|
||||||
}
|
}
|
||||||
|
@ -408,25 +349,18 @@ class NNIManager implements Manager {
|
||||||
return this.dataStore.storeExperimentProfile(this.experimentProfile);
|
return this.dataStore.storeExperimentProfile(this.experimentProfile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tslint:disable-next-line:max-func-body-length
|
||||||
private runInternal(): Promise<void> {
|
private runInternal(): Promise<void> {
|
||||||
// TO DO: cannot run this method more than once in one NNIManager instance
|
// TO DO: cannot run this method more than once in one NNIManager instance
|
||||||
if (this.tuner === undefined) {
|
if (this.dispatcher === undefined) {
|
||||||
throw new Error('Error: tuner has not been setup');
|
throw new Error('Error: tuner has not been setup');
|
||||||
}
|
}
|
||||||
this.trainingService.addTrialJobMetricListener(async (metric: TrialJobMetric) => {
|
this.trainingService.addTrialJobMetricListener(async (metric: TrialJobMetric) => {
|
||||||
await this.dataStore.storeMetricData(metric.id, metric.data);
|
await this.dataStore.storeMetricData(metric.id, metric.data);
|
||||||
if (this.tuner === undefined) {
|
if (this.dispatcher === undefined) {
|
||||||
throw new Error('Error: tuner has not been setup');
|
throw new Error('Error: tuner has not been setup');
|
||||||
}
|
}
|
||||||
this.tuner.sendCommand(REPORT_METRIC_DATA, metric.data);
|
this.dispatcher.sendCommand(REPORT_METRIC_DATA, metric.data);
|
||||||
if (this.assessor !== undefined) {
|
|
||||||
try {
|
|
||||||
this.assessor.sendCommand(REPORT_METRIC_DATA, metric.data);
|
|
||||||
} catch (error) {
|
|
||||||
this.log.critical(`ASSESSOR ERROR: ${error.message}`);
|
|
||||||
this.log.critical(`ASSESSOR ERROR: ${error.stack}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
this.trialJobsMaintainer = new TrialJobs(
|
this.trialJobsMaintainer = new TrialJobs(
|
||||||
|
@ -439,7 +373,7 @@ class NNIManager implements Manager {
|
||||||
} else {
|
} else {
|
||||||
this.log.debug(`Job event: ${event}`);
|
this.log.debug(`Job event: ${event}`);
|
||||||
}
|
}
|
||||||
if (this.tuner === undefined) {
|
if (this.dispatcher === undefined) {
|
||||||
throw new Error('Error: tuner has not been setup');
|
throw new Error('Error: tuner has not been setup');
|
||||||
}
|
}
|
||||||
switch (event) {
|
switch (event) {
|
||||||
|
@ -453,15 +387,13 @@ class NNIManager implements Manager {
|
||||||
if (this.currSubmittedTrialNum < this.experimentProfile.params.maxTrialNum) {
|
if (this.currSubmittedTrialNum < this.experimentProfile.params.maxTrialNum) {
|
||||||
if (this.customizedTrials.length > 0) {
|
if (this.customizedTrials.length > 0) {
|
||||||
const hyperParams: string | undefined = this.customizedTrials.shift();
|
const hyperParams: string | undefined = this.customizedTrials.shift();
|
||||||
this.tuner.sendCommand(ADD_CUSTOMIZED_TRIAL_JOB, hyperParams);
|
this.dispatcher.sendCommand(ADD_CUSTOMIZED_TRIAL_JOB, hyperParams);
|
||||||
} else {
|
} else {
|
||||||
this.tuner.sendCommand(REQUEST_TRIAL_JOBS, '1');
|
this.dispatcher.sendCommand(REQUEST_TRIAL_JOBS, '1');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (this.assessor !== undefined) {
|
this.dispatcher.sendCommand(TRIAL_END, JSON.stringify({trial_job_id: trialJobDetail.id, event: event}));
|
||||||
this.assessor.sendCommand(TRIAL_END, JSON.stringify({trial_job_id: trialJobDetail.id, event: event}));
|
|
||||||
}
|
|
||||||
await this.dataStore.storeTrialJobEvent(event, trialJobDetail.id, undefined, trialJobDetail.url);
|
await this.dataStore.storeTrialJobEvent(event, trialJobDetail.id, undefined, trialJobDetail.url);
|
||||||
break;
|
break;
|
||||||
case 'RUNNING':
|
case 'RUNNING':
|
||||||
|
@ -478,15 +410,14 @@ class NNIManager implements Manager {
|
||||||
});
|
});
|
||||||
|
|
||||||
// TO DO: we should send INITIALIZE command to tuner if user's tuner needs to run init method in tuner
|
// TO DO: we should send INITIALIZE command to tuner if user's tuner needs to run init method in tuner
|
||||||
// TO DO: we should send INITIALIZE command to assessor if user's tuner needs to run init method in tuner
|
this.log.debug(`Send tuner command: update search space: ${this.experimentProfile.params.searchSpace}`);
|
||||||
this.log.debug(`Send tuner command: update search space: ${this.experimentProfile.params.searchSpace}`)
|
this.dispatcher.sendCommand(UPDATE_SEARCH_SPACE, this.experimentProfile.params.searchSpace);
|
||||||
this.tuner.sendCommand(UPDATE_SEARCH_SPACE, this.experimentProfile.params.searchSpace);
|
|
||||||
if (this.trialConcurrencyReduction !== 0) {
|
if (this.trialConcurrencyReduction !== 0) {
|
||||||
return Promise.reject(new Error('Error: cannot modify trialConcurrency before startExperiment'));
|
return Promise.reject(new Error('Error: cannot modify trialConcurrency before startExperiment'));
|
||||||
}
|
}
|
||||||
this.log.debug(`Send tuner command: ${this.experimentProfile.params.trialConcurrency}`)
|
this.log.debug(`Send tuner command: ${this.experimentProfile.params.trialConcurrency}`)
|
||||||
this.tuner.sendCommand(REQUEST_TRIAL_JOBS, String(this.experimentProfile.params.trialConcurrency));
|
this.dispatcher.sendCommand(REQUEST_TRIAL_JOBS, String(this.experimentProfile.params.trialConcurrency));
|
||||||
this.tuner.onCommand(async (commandType: string, content: string) => {
|
this.dispatcher.onCommand(async (commandType: string, content: string) => {
|
||||||
this.log.info(`Command from tuner: ${commandType}, ${content}`);
|
this.log.info(`Command from tuner: ${commandType}, ${content}`);
|
||||||
if (this.trialJobsMaintainer === undefined) {
|
if (this.trialJobsMaintainer === undefined) {
|
||||||
throw new Error('Error: trialJobsMaintainer not initialized');
|
throw new Error('Error: trialJobsMaintainer not initialized');
|
||||||
|
@ -501,8 +432,7 @@ class NNIManager implements Manager {
|
||||||
};
|
};
|
||||||
const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(trialJobAppForm);
|
const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(trialJobAppForm);
|
||||||
this.trialJobsMaintainer.setTrialJob(trialJobDetail.id, Object.assign({}, trialJobDetail));
|
this.trialJobsMaintainer.setTrialJob(trialJobDetail.id, Object.assign({}, trialJobDetail));
|
||||||
// TO DO: to uncomment
|
assert(trialJobDetail.status === 'WAITING');
|
||||||
//assert(trialJobDetail.status === 'WAITING');
|
|
||||||
await this.dataStore.storeTrialJobEvent(trialJobDetail.status, trialJobDetail.id, content, trialJobDetail.url);
|
await this.dataStore.storeTrialJobEvent(trialJobDetail.status, trialJobDetail.id, content, trialJobDetail.url);
|
||||||
if (this.currSubmittedTrialNum === this.experimentProfile.params.maxTrialNum) {
|
if (this.currSubmittedTrialNum === this.experimentProfile.params.maxTrialNum) {
|
||||||
this.trialJobsMaintainer.setNoMoreTrials();
|
this.trialJobsMaintainer.setNoMoreTrials();
|
||||||
|
@ -512,19 +442,13 @@ class NNIManager implements Manager {
|
||||||
case NO_MORE_TRIAL_JOBS:
|
case NO_MORE_TRIAL_JOBS:
|
||||||
this.trialJobsMaintainer.setNoMoreTrials();
|
this.trialJobsMaintainer.setNoMoreTrials();
|
||||||
break;
|
break;
|
||||||
|
case KILL_TRIAL_JOB:
|
||||||
|
await this.trainingService.cancelTrialJob(JSON.parse(content));
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new Error('Error: unsupported command type from tuner');
|
throw new Error(`Error: unsupported command type: [${commandType}]`);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
if (this.assessor !== undefined) {
|
|
||||||
this.assessor.onCommand(async (commandType: string, content: string) => {
|
|
||||||
if (commandType === KILL_TRIAL_JOB) {
|
|
||||||
await this.trainingService.cancelTrialJob(JSON.parse(content));
|
|
||||||
} else {
|
|
||||||
throw new Error('Error: unsupported command type from assessor');
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
return this.trialJobsMaintainer.run();
|
return this.trialJobsMaintainer.run();
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,10 +69,9 @@ describe('Unit test for dataStore', () => {
|
||||||
}
|
}
|
||||||
}`,
|
}`,
|
||||||
tuner: {
|
tuner: {
|
||||||
tunerCommand: 'python3 tunner.py',
|
className: 'testTuner',
|
||||||
tunerCwd: '/tmp',
|
checkpointDir: '/tmp/cp',
|
||||||
tunerCheckpointDirectory: '/tmp/cp',
|
gpuNum: 0
|
||||||
tunerGpuNum: 0
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
id: 'exp123',
|
id: 'exp123',
|
||||||
|
|
|
@ -20,6 +20,4 @@ from nni.assessor import Assessor, AssessResult
|
||||||
|
|
||||||
class DummyAssessor(Assessor):
|
class DummyAssessor(Assessor):
|
||||||
def assess_trial(self, trial_job_id, trial_history):
|
def assess_trial(self, trial_job_id, trial_history):
|
||||||
return AssessResult.Good
|
return AssessResult.Good
|
||||||
|
|
||||||
DummyAssessor().run()
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
# Copyright (c) Microsoft Corporation
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
||||||
|
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
||||||
|
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
||||||
|
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
|
||||||
|
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
|
||||||
|
from nni.tuner import Tuner
|
||||||
|
|
||||||
|
class DummyTuner(Tuner):
|
||||||
|
def generate_parameters(self, parameter_id):
|
||||||
|
return 'unit-test-parm'
|
||||||
|
|
||||||
|
def generate_multiple_parameters(self, parameter_id_list):
|
||||||
|
return ['unit-test-param1', 'unit-test-param2']
|
||||||
|
|
||||||
|
def receive_trial_result(self, parameter_id, parameters, reward):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def receive_customized_trial_result(self, parameter_id, parameters, reward):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def update_search_space(self, search_space):
|
||||||
|
pass
|
|
@ -24,7 +24,7 @@ import { ChildProcess, spawn } from 'child_process';
|
||||||
import { Deferred } from 'ts-deferred';
|
import { Deferred } from 'ts-deferred';
|
||||||
import { cleanupUnitTest, prepareUnitTest } from '../../common/utils';
|
import { cleanupUnitTest, prepareUnitTest } from '../../common/utils';
|
||||||
import * as CommandType from '../commands';
|
import * as CommandType from '../commands';
|
||||||
import { createAssessorInterface, createTunerInterface, IpcInterface } from '../ipcInterface';
|
import { createDispatcherInterface, IpcInterface } from '../ipcInterface';
|
||||||
|
|
||||||
let sentCommands: {[key: string]: string}[] = [];
|
let sentCommands: {[key: string]: string}[] = [];
|
||||||
const receivedCommands: {[key: string]: string}[] = [];
|
const receivedCommands: {[key: string]: string}[] = [];
|
||||||
|
@ -52,27 +52,27 @@ function runProcess(): Promise<Error | null> {
|
||||||
});
|
});
|
||||||
|
|
||||||
// create IPC interface
|
// create IPC interface
|
||||||
const assessor: IpcInterface = createAssessorInterface(proc);
|
const dispatcher: IpcInterface = createDispatcherInterface(proc);
|
||||||
assessor.onCommand((commandType: string, content: string): void => {
|
dispatcher.onCommand((commandType: string, content: string): void => {
|
||||||
receivedCommands.push({ commandType, content });
|
receivedCommands.push({ commandType, content });
|
||||||
});
|
});
|
||||||
|
|
||||||
// Command #1: ok
|
// Command #1: ok
|
||||||
assessor.sendCommand('IN');
|
dispatcher.sendCommand('IN');
|
||||||
|
|
||||||
// Command #2: ok
|
// Command #2: ok
|
||||||
assessor.sendCommand('ME', '123');
|
dispatcher.sendCommand('ME', '123');
|
||||||
|
|
||||||
// Command #3: too long
|
// Command #3: too long
|
||||||
try {
|
try {
|
||||||
assessor.sendCommand('ME', 'x'.repeat(1_000_000));
|
dispatcher.sendCommand('ME', 'x'.repeat(1_000_000));
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
commandTooLong = error;
|
commandTooLong = error;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Command #4: not assessor command
|
// Command #4: FE is not tuner/assessor command, test the exception type of send non-valid command
|
||||||
try {
|
try {
|
||||||
assessor.sendCommand('GE', '1');
|
dispatcher.sendCommand('FE', '1');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
rejectCommandType = error;
|
rejectCommandType = error;
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,18 +22,34 @@
|
||||||
import * as assert from 'assert';
|
import * as assert from 'assert';
|
||||||
import { ChildProcess, spawn } from 'child_process';
|
import { ChildProcess, spawn } from 'child_process';
|
||||||
import { Deferred } from 'ts-deferred';
|
import { Deferred } from 'ts-deferred';
|
||||||
import { cleanupUnitTest, prepareUnitTest } from '../../common/utils';
|
import { cleanupUnitTest, prepareUnitTest, getMsgDispatcherCommand } from '../../common/utils';
|
||||||
import * as CommandType from '../commands';
|
import * as CommandType from '../commands';
|
||||||
import { createAssessorInterface, IpcInterface } from '../ipcInterface';
|
import { createDispatcherInterface, IpcInterface } from '../ipcInterface';
|
||||||
|
|
||||||
let assessor: IpcInterface | undefined;
|
let dispatcher: IpcInterface | undefined;
|
||||||
let procExit: boolean = false;
|
let procExit: boolean = false;
|
||||||
let procError: boolean = false;
|
let procError: boolean = false;
|
||||||
|
|
||||||
function startProcess(): void {
|
function startProcess(): void {
|
||||||
// create fake assessor process
|
// create fake assessor process
|
||||||
const stdio: {}[] = ['ignore', 'pipe', process.stderr, 'pipe', 'pipe'];
|
const stdio: {}[] = ['ignore', 'pipe', process.stderr, 'pipe', 'pipe'];
|
||||||
const proc: ChildProcess = spawn('python3 dummy_assessor.py', [], { stdio, cwd: 'core/test', shell: true });
|
|
||||||
|
const dispatcherCmd : string = getMsgDispatcherCommand(
|
||||||
|
// Mock tuner config
|
||||||
|
{
|
||||||
|
className: 'DummyTuner',
|
||||||
|
codeDir: './',
|
||||||
|
classFileName: 'dummy_tuner.py'
|
||||||
|
},
|
||||||
|
// Mock assessor config
|
||||||
|
{
|
||||||
|
className: 'DummyAssessor',
|
||||||
|
codeDir: './',
|
||||||
|
classFileName: 'dummy_assessor.py'
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
const proc: ChildProcess = spawn(dispatcherCmd, [], { stdio, cwd: 'core/test', shell: true });
|
||||||
|
|
||||||
proc.on('error', (error: Error): void => {
|
proc.on('error', (error: Error): void => {
|
||||||
procExit = true;
|
procExit = true;
|
||||||
|
@ -43,10 +59,10 @@ function startProcess(): void {
|
||||||
procExit = true;
|
procExit = true;
|
||||||
procError = (code !== 0);
|
procError = (code !== 0);
|
||||||
});
|
});
|
||||||
|
|
||||||
// create IPC interface
|
// create IPC interface
|
||||||
assessor = createAssessorInterface(proc);
|
dispatcher = createDispatcherInterface(proc);
|
||||||
(<IpcInterface>assessor).onCommand((commandType: string, content: string): void => {
|
(<IpcInterface>dispatcher).onCommand((commandType: string, content: string): void => {
|
||||||
console.log(commandType, content); // tslint:disable-line:no-console
|
console.log(commandType, content); // tslint:disable-line:no-console
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -62,13 +78,13 @@ describe('core/ipcInterface.terminate', (): void => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('normal', () => {
|
it('normal', () => {
|
||||||
(<IpcInterface>assessor).sendCommand(
|
(<IpcInterface>dispatcher).sendCommand(
|
||||||
CommandType.REPORT_METRIC_DATA,
|
CommandType.REPORT_METRIC_DATA,
|
||||||
'{"trial_job_id":"A","type":"periodical","value":1}');
|
'{"trial_job_id":"A","type":"PERIODICAL","value":1,"sequence":123}');
|
||||||
|
|
||||||
const deferred: Deferred<void> = new Deferred<void>();
|
const deferred: Deferred<void> = new Deferred<void>();
|
||||||
setTimeout(
|
setTimeout(
|
||||||
() => {
|
() => {
|
||||||
assert.ok(!procExit);
|
assert.ok(!procExit);
|
||||||
assert.ok(!procError);
|
assert.ok(!procError);
|
||||||
deferred.resolve();
|
deferred.resolve();
|
||||||
|
@ -79,7 +95,7 @@ describe('core/ipcInterface.terminate', (): void => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('terminate', () => {
|
it('terminate', () => {
|
||||||
(<IpcInterface>assessor).sendCommand(CommandType.TERMINATE);
|
(<IpcInterface>dispatcher).sendCommand(CommandType.TERMINATE);
|
||||||
|
|
||||||
const deferred: Deferred<void> = new Deferred<void>();
|
const deferred: Deferred<void> = new Deferred<void>();
|
||||||
setTimeout(
|
setTimeout(
|
||||||
|
@ -88,7 +104,7 @@ describe('core/ipcInterface.terminate', (): void => {
|
||||||
assert.ok(!procError);
|
assert.ok(!procError);
|
||||||
deferred.resolve();
|
deferred.resolve();
|
||||||
},
|
},
|
||||||
1000);
|
2000);
|
||||||
|
|
||||||
return deferred.promise;
|
return deferred.promise;
|
||||||
});
|
});
|
||||||
|
|
|
@ -30,7 +30,6 @@ const testTrainingServiceProvider: Provider = {
|
||||||
};
|
};
|
||||||
|
|
||||||
class MockedTrainingService extends TrainingService {
|
class MockedTrainingService extends TrainingService {
|
||||||
|
|
||||||
public mockedMetaDataValue: string = "default";
|
public mockedMetaDataValue: string = "default";
|
||||||
public jobDetail1: TrialJobDetail = {
|
public jobDetail1: TrialJobDetail = {
|
||||||
id: '1234',
|
id: '1234',
|
||||||
|
@ -93,6 +92,14 @@ class MockedTrainingService extends TrainingService {
|
||||||
return deferred.promise;
|
return deferred.promise;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public updateTrialJob(trialJobId: string, form: TrialJobApplicationForm): Promise<TrialJobDetail> {
|
||||||
|
throw new MethodNotImplementedError();
|
||||||
|
}
|
||||||
|
|
||||||
|
public get isMultiPhaseJobSupported(): boolean {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
public cancelTrialJob(trialJobId: string): Promise<void> {
|
public cancelTrialJob(trialJobId: string): Promise<void> {
|
||||||
const deferred = new Deferred<void>();
|
const deferred = new Deferred<void>();
|
||||||
if(trialJobId === '1234' || trialJobId === '3456'){
|
if(trialJobId === '1234' || trialJobId === '3456'){
|
||||||
|
@ -125,7 +132,7 @@ class MockedTrainingService extends TrainingService {
|
||||||
}
|
}
|
||||||
|
|
||||||
public cleanUp(): Promise<void> {
|
public cleanUp(): Promise<void> {
|
||||||
throw new MethodNotImplementedError();
|
return Promise.resolve();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -56,16 +56,17 @@ describe('Unit test for nnimanager', function () {
|
||||||
maxTrialNum: 2,
|
maxTrialNum: 2,
|
||||||
searchSpace: '{"x":1}',
|
searchSpace: '{"x":1}',
|
||||||
tuner: {
|
tuner: {
|
||||||
tunerCommand: 'python3 hyperopt.py',
|
className: 'EvolutionTuner',
|
||||||
tunerCwd: 'core/test',
|
classArgs: {
|
||||||
tunerCheckpointDirectory: '',
|
optimize_mode: 'maximize'
|
||||||
tunerGpuNum: 1
|
},
|
||||||
|
checkpointDir: '',
|
||||||
|
gpuNum: 1
|
||||||
},
|
},
|
||||||
assessor: {
|
assessor: {
|
||||||
assessorCommand: 'python3 dummy_assessor.py',
|
className: 'MedianstopAssessor',
|
||||||
assessorCwd: 'core/test',
|
checkpointDir: '',
|
||||||
assessorCheckpointDirectory: '',
|
gpuNum: 1
|
||||||
assessorGpuNum: 1
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,10 +38,9 @@ const expParams1: ExperimentParams = {
|
||||||
maxTrialNum: 5,
|
maxTrialNum: 5,
|
||||||
searchSpace: 'SS',
|
searchSpace: 'SS',
|
||||||
tuner: {
|
tuner: {
|
||||||
tunerCommand: './tuner.sh',
|
className: 'testTuner',
|
||||||
tunerCwd: '.',
|
checkpointDir: '/tmp',
|
||||||
tunerCheckpointDirectory: '/tmp',
|
gpuNum: 0
|
||||||
tunerGpuNum: 0
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -53,14 +52,12 @@ const expParams2: ExperimentParams = {
|
||||||
maxTrialNum: 5,
|
maxTrialNum: 5,
|
||||||
searchSpace: '',
|
searchSpace: '',
|
||||||
tuner: {
|
tuner: {
|
||||||
tunerCommand: 'python tuner.py',
|
className: 'testTuner',
|
||||||
tunerCwd: '/tmp',
|
checkpointDir: '/tmp'
|
||||||
tunerCheckpointDirectory: '/tmp'
|
|
||||||
},
|
},
|
||||||
assessor: {
|
assessor: {
|
||||||
assessorCommand: 'python assessor.py',
|
className: 'testAssessor',
|
||||||
assessorCwd: '/tmp',
|
checkpointDir: '/tmp'
|
||||||
assessorCheckpointDirectory: '/tmp'
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -37,7 +37,7 @@ export const testManagerProvider: Provider = {
|
||||||
};
|
};
|
||||||
|
|
||||||
export class MockedNNIManager extends Manager {
|
export class MockedNNIManager extends Manager {
|
||||||
public updateExperimentProfile(experimentProfile: ExperimentProfile, updateType: ProfileUpdateType ): Promise<void> {
|
public updateExperimentProfile(experimentProfile: ExperimentProfile, updateType: ProfileUpdateType): Promise<void> {
|
||||||
return Promise.resolve();
|
return Promise.resolve();
|
||||||
}
|
}
|
||||||
public getTrialJobStatistics(): Promise<TrialJobStatistics[]> {
|
public getTrialJobStatistics(): Promise<TrialJobStatistics[]> {
|
||||||
|
@ -103,23 +103,15 @@ export class MockedNNIManager extends Manager {
|
||||||
return deferred.promise;
|
return deferred.promise;
|
||||||
}
|
}
|
||||||
|
|
||||||
public getTrialJob(trialJobId: string): Promise<TrialJobDetail> {
|
public getTrialJob(trialJobId: string): Promise<TrialJobInfo> {
|
||||||
const deferred: Deferred<TrialJobDetail> = new Deferred<TrialJobDetail>();
|
const deferred: Deferred<TrialJobInfo> = new Deferred<TrialJobInfo>();
|
||||||
const jobDetail: TrialJobDetail = {
|
const jobInfo: TrialJobInfo = {
|
||||||
id: '1234',
|
id: '1234',
|
||||||
status: 'SUCCEEDED',
|
status: 'SUCCEEDED',
|
||||||
submitTime: new Date(),
|
|
||||||
startTime: new Date(),
|
startTime: new Date(),
|
||||||
endTime: new Date(),
|
endTime: new Date()
|
||||||
tags: ['test'],
|
|
||||||
// tslint:disable-next-line:no-http-string
|
|
||||||
url: 'http://test',
|
|
||||||
workingDirectory: '/tmp/mocked',
|
|
||||||
form: {
|
|
||||||
jobType: 'TRIAL'
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
deferred.resolve(jobDetail);
|
deferred.resolve(jobInfo);
|
||||||
|
|
||||||
return deferred.promise;
|
return deferred.promise;
|
||||||
}
|
}
|
||||||
|
@ -139,9 +131,8 @@ export class MockedNNIManager extends Manager {
|
||||||
maxTrialNum: 3,
|
maxTrialNum: 3,
|
||||||
searchSpace: '{lr: 0.01}',
|
searchSpace: '{lr: 0.01}',
|
||||||
tuner: {
|
tuner: {
|
||||||
tunerCommand: 'python3 tuner.py',
|
className: 'testTuner',
|
||||||
tunerCwd: '/tmp/tunner',
|
checkpointDir: ''
|
||||||
tunerCheckpointDirectory: ''
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
id: '2345',
|
id: '2345',
|
||||||
|
|
|
@ -116,7 +116,7 @@ describe('Unit test for rest server', () => {
|
||||||
}
|
}
|
||||||
|
|
||||||
const req: request.Options = {
|
const req: request.Options = {
|
||||||
uri: `${ROOT_URL}/experiment`,
|
uri: `${ROOT_URL}/experiment?update_type=TRIAL_CONCURRENCY`,
|
||||||
method: 'PUT',
|
method: 'PUT',
|
||||||
json: true,
|
json: true,
|
||||||
body: profile
|
body: profile
|
||||||
|
@ -141,7 +141,7 @@ describe('Unit test for rest server', () => {
|
||||||
body: {
|
body: {
|
||||||
exception_test_key: 'test'
|
exception_test_key: 'test'
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
request(req, (err: Error, res: request.Response) => {
|
request(req, (err: Error, res: request.Response) => {
|
||||||
if (err) {
|
if (err) {
|
||||||
assert.fail(err.message);
|
assert.fail(err.message);
|
||||||
|
@ -158,7 +158,7 @@ describe('Unit test for rest server', () => {
|
||||||
method: 'PUT',
|
method: 'PUT',
|
||||||
json: true,
|
json: true,
|
||||||
body: {
|
body: {
|
||||||
MACHINE_LIST: [{
|
machine_list: [{
|
||||||
ip: '10.10.10.101',
|
ip: '10.10.10.101',
|
||||||
port: 22,
|
port: 22,
|
||||||
username: 'test',
|
username: 'test',
|
||||||
|
@ -170,7 +170,7 @@ describe('Unit test for rest server', () => {
|
||||||
passwd: '1234'
|
passwd: '1234'
|
||||||
}]
|
}]
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
request(req, (err: Error, res: request.Response) => {
|
request(req, (err: Error, res: request.Response) => {
|
||||||
if (err) {
|
if (err) {
|
||||||
assert.fail(err.message);
|
assert.fail(err.message);
|
||||||
|
@ -180,29 +180,4 @@ describe('Unit test for rest server', () => {
|
||||||
done();
|
done();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('Test POST experiment', (done: Mocha.Done) => {
|
|
||||||
const req: request.Options = {
|
|
||||||
uri: `${ROOT_URL}/experiment`,
|
|
||||||
method: 'POST',
|
|
||||||
json: true,
|
|
||||||
body: {
|
|
||||||
author: 'test',
|
|
||||||
trial: {
|
|
||||||
entrypoint: 'python',
|
|
||||||
args: 'mnist.py'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
// tslint:disable-next-line:no-any
|
|
||||||
request(req, (err: Error, res: request.Response, body: any) => {
|
|
||||||
if (err) {
|
|
||||||
assert.fail(err.message);
|
|
||||||
} else {
|
|
||||||
expect(res.statusCode).to.equal(200);
|
|
||||||
expect(body.experiment_id).to.equal('id-1234');
|
|
||||||
}
|
|
||||||
done();
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
|
@ -25,7 +25,7 @@ import { EventEmitter } from 'events';
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import * as ts from 'tail-stream';
|
import * as ts from 'tail-stream';
|
||||||
import { NNIError, NNIErrorNames } from '../../common/errors';
|
import { MethodNotImplementedError, NNIError, NNIErrorNames } from '../../common/errors';
|
||||||
import { getLogger, Logger } from '../../common/log';
|
import { getLogger, Logger } from '../../common/log';
|
||||||
import { TrialConfig } from '../common/trialConfig';
|
import { TrialConfig } from '../common/trialConfig';
|
||||||
import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey';
|
import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey';
|
||||||
|
@ -205,6 +205,22 @@ class LocalTrainingService implements TrainingService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update trial job for multi-phase
|
||||||
|
* @param trialJobId trial job id
|
||||||
|
* @param form job application form
|
||||||
|
*/
|
||||||
|
public updateTrialJob(trialJobId: string, form: JobApplicationForm): Promise<TrialJobDetail> {
|
||||||
|
throw new MethodNotImplementedError();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is multiphase job supported in current training service
|
||||||
|
*/
|
||||||
|
public get isMultiPhaseJobSupported(): boolean {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
public async cancelTrialJob(trialJobId: string): Promise<void> {
|
public async cancelTrialJob(trialJobId: string): Promise<void> {
|
||||||
this.log.info(`cancelTrialJob: ${trialJobId}`);
|
this.log.info(`cancelTrialJob: ${trialJobId}`);
|
||||||
const trialJob: LocalTrialJobDetail | undefined = this.jobMap.get(trialJobId);
|
const trialJob: LocalTrialJobDetail | undefined = this.jobMap.get(trialJobId);
|
||||||
|
@ -309,7 +325,7 @@ class LocalTrainingService implements TrainingService {
|
||||||
runScriptLines.push(`export ${variable.key}=${variable.value}`);
|
runScriptLines.push(`export ${variable.key}=${variable.value}`);
|
||||||
}
|
}
|
||||||
runScriptLines.push(
|
runScriptLines.push(
|
||||||
`eval ${this.localTrailConfig.command} 2>${path.join(trialJobDetail.workingDirectory, '.nni', 'stderr')}`,
|
`eval ${this.localTrailConfig.command} 2>${path.join(trialJobDetail.workingDirectory, 'stderr')}`,
|
||||||
`echo $? \`date +%s%3N\` >${path.join(trialJobDetail.workingDirectory, '.nni', 'state')}`);
|
`echo $? \`date +%s%3N\` >${path.join(trialJobDetail.workingDirectory, '.nni', 'state')}`);
|
||||||
|
|
||||||
await cpp.exec(`mkdir -p ${trialJobDetail.workingDirectory}`);
|
await cpp.exec(`mkdir -p ${trialJobDetail.workingDirectory}`);
|
||||||
|
|
|
@ -82,7 +82,12 @@ export class MetricsCollector {
|
||||||
private getTrialJobIdsGroupByRmMeta(status: TrialJobStatus[]): Map<RemoteMachineMeta, string[]> {
|
private getTrialJobIdsGroupByRmMeta(status: TrialJobStatus[]): Map<RemoteMachineMeta, string[]> {
|
||||||
const map: Map<RemoteMachineMeta, string[]> = new Map<RemoteMachineMeta, string[]>();
|
const map: Map<RemoteMachineMeta, string[]> = new Map<RemoteMachineMeta, string[]>();
|
||||||
this.trialJobsMap.forEach((trialJob, id) => {
|
this.trialJobsMap.forEach((trialJob, id) => {
|
||||||
if (status.includes(trialJob.status)) {
|
let reservedTrialJobIds : string[] = [];
|
||||||
|
if(trialJob.rmMeta !== undefined
|
||||||
|
&& trialJob.rmMeta.gpuReservation !== undefined) {
|
||||||
|
reservedTrialJobIds = Array.from(trialJob.rmMeta.gpuReservation.values());
|
||||||
|
}
|
||||||
|
if (reservedTrialJobIds.includes(id) || status.includes(trialJob.status)) {
|
||||||
if (map.has(trialJob.rmMeta)) {
|
if (map.has(trialJob.rmMeta)) {
|
||||||
const ids = map.get(trialJob.rmMeta);
|
const ids = map.get(trialJob.rmMeta);
|
||||||
if (ids !== undefined && !ids.includes(id)) {
|
if (ids !== undefined && !ids.includes(id)) {
|
||||||
|
@ -93,7 +98,7 @@ export class MetricsCollector {
|
||||||
|
|
||||||
// If the remote machine has jobs reserve GPU, also put that jobs into list to get metrics data
|
// If the remote machine has jobs reserve GPU, also put that jobs into list to get metrics data
|
||||||
if(trialJob.rmMeta.gpuReservation !== undefined) {
|
if(trialJob.rmMeta.gpuReservation !== undefined) {
|
||||||
const concatJobIds : string[] = initJobIds.concat(Array.from(trialJob.rmMeta.gpuReservation.values()));
|
const concatJobIds : string[] = initJobIds.concat(reservedTrialJobIds);
|
||||||
initJobIds = concatJobIds.filter((item, pos) => concatJobIds.indexOf(item) === pos);
|
initJobIds = concatJobIds.filter((item, pos) => concatJobIds.indexOf(item) === pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,16 +31,21 @@ export class RemoteMachineMeta {
|
||||||
public readonly ip : string;
|
public readonly ip : string;
|
||||||
public readonly port : number;
|
public readonly port : number;
|
||||||
public readonly username : string;
|
public readonly username : string;
|
||||||
public readonly passwd: string;
|
public readonly passwd?: string;
|
||||||
|
public readonly sshKeyPath?: string;
|
||||||
|
public readonly passphrase?: string;
|
||||||
public gpuSummary : GPUSummary | undefined;
|
public gpuSummary : GPUSummary | undefined;
|
||||||
/* GPU Reservation info, the key is GPU index, the value is the job id which reserves this GPU*/
|
/* GPU Reservation info, the key is GPU index, the value is the job id which reserves this GPU*/
|
||||||
public gpuReservation : Map<number, string>;
|
public gpuReservation : Map<number, string>;
|
||||||
|
|
||||||
constructor(ip : string, port : number, username : string, passwd : string) {
|
constructor(ip : string, port : number, username : string, passwd : string,
|
||||||
|
sshKeyPath : string, passphrase : string) {
|
||||||
this.ip = ip;
|
this.ip = ip;
|
||||||
this.port = port;
|
this.port = port;
|
||||||
this.username = username;
|
this.username = username;
|
||||||
this.passwd = passwd;
|
this.passwd = passwd;
|
||||||
|
this.sshKeyPath = sshKeyPath;
|
||||||
|
this.passphrase = passphrase;
|
||||||
this.gpuReservation = new Map<number, string>();
|
this.gpuReservation = new Map<number, string>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,11 +24,11 @@ import { EventEmitter } from 'events';
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as os from 'os';
|
import * as os from 'os';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import { Client } from 'ssh2';
|
import { Client, ConnectConfig } from 'ssh2';
|
||||||
import { Deferred } from 'ts-deferred';
|
import { Deferred } from 'ts-deferred';
|
||||||
import { String } from 'typescript-string-operations';
|
import { String } from 'typescript-string-operations';
|
||||||
import * as component from '../../common/component';
|
import * as component from '../../common/component';
|
||||||
import { NNIError, NNIErrorNames } from '../../common/errors';
|
import { MethodNotImplementedError, NNIError, NNIErrorNames } from '../../common/errors';
|
||||||
import { getExperimentId } from '../../common/experimentStartupInfo';
|
import { getExperimentId } from '../../common/experimentStartupInfo';
|
||||||
import { getLogger, Logger } from '../../common/log';
|
import { getLogger, Logger } from '../../common/log';
|
||||||
import { ObservableTimer } from '../../common/observableTimer';
|
import { ObservableTimer } from '../../common/observableTimer';
|
||||||
|
@ -195,6 +195,22 @@ class RemoteMachineTrainingService implements TrainingService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update trial job for multi-phase
|
||||||
|
* @param trialJobId trial job id
|
||||||
|
* @param form job application form
|
||||||
|
*/
|
||||||
|
public updateTrialJob(trialJobId: string, form: JobApplicationForm): Promise<TrialJobDetail> {
|
||||||
|
throw new MethodNotImplementedError();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is multiphase job supported in current training service
|
||||||
|
*/
|
||||||
|
public get isMultiPhaseJobSupported(): boolean {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cancel trial job
|
* Cancel trial job
|
||||||
* @param trialJobId ID of trial job
|
* @param trialJobId ID of trial job
|
||||||
|
@ -290,6 +306,24 @@ class RemoteMachineTrainingService implements TrainingService {
|
||||||
let connectedRMNum: number = 0;
|
let connectedRMNum: number = 0;
|
||||||
rmMetaList.forEach((rmMeta: RemoteMachineMeta) => {
|
rmMetaList.forEach((rmMeta: RemoteMachineMeta) => {
|
||||||
const conn: Client = new Client();
|
const conn: Client = new Client();
|
||||||
|
let connectConfig: ConnectConfig = {
|
||||||
|
host: rmMeta.ip,
|
||||||
|
port: rmMeta.port,
|
||||||
|
username: rmMeta.username };
|
||||||
|
if (rmMeta.passwd) {
|
||||||
|
connectConfig.password = rmMeta.passwd;
|
||||||
|
} else if(rmMeta.sshKeyPath) {
|
||||||
|
if(!fs.existsSync(rmMeta.sshKeyPath)) {
|
||||||
|
//SSh key path is not a valid file, reject
|
||||||
|
deferred.reject(new Error(`${rmMeta.sshKeyPath} does not exist.`));
|
||||||
|
}
|
||||||
|
const privateKey: string = fs.readFileSync(rmMeta.sshKeyPath, 'utf8');
|
||||||
|
|
||||||
|
connectConfig.privateKey = privateKey;
|
||||||
|
connectConfig.passphrase = rmMeta.passphrase;
|
||||||
|
} else {
|
||||||
|
deferred.reject(new Error(`No valid passwd or sshKeyPath is configed.`));
|
||||||
|
}
|
||||||
this.machineSSHClientMap.set(rmMeta, conn);
|
this.machineSSHClientMap.set(rmMeta, conn);
|
||||||
conn.on('ready', async () => {
|
conn.on('ready', async () => {
|
||||||
await this.initRemoteMachineOnConnected(rmMeta, conn);
|
await this.initRemoteMachineOnConnected(rmMeta, conn);
|
||||||
|
@ -299,12 +333,7 @@ class RemoteMachineTrainingService implements TrainingService {
|
||||||
}).on('error', (err: Error) => {
|
}).on('error', (err: Error) => {
|
||||||
// SSH connection error, reject with error message
|
// SSH connection error, reject with error message
|
||||||
deferred.reject(new Error(err.message));
|
deferred.reject(new Error(err.message));
|
||||||
}).connect({
|
}).connect(connectConfig);
|
||||||
host: rmMeta.ip,
|
|
||||||
port: rmMeta.port,
|
|
||||||
username: rmMeta.username,
|
|
||||||
password: rmMeta.passwd
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
return deferred.promise;
|
return deferred.promise;
|
||||||
|
@ -402,7 +431,7 @@ class RemoteMachineTrainingService implements TrainingService {
|
||||||
(typeof cuda_visible_device === 'string' && cuda_visible_device.length > 0) ?
|
(typeof cuda_visible_device === 'string' && cuda_visible_device.length > 0) ?
|
||||||
`CUDA_VISIBLE_DEVICES=${cuda_visible_device} ` : `CUDA_VISIBLE_DEVICES=" " `,
|
`CUDA_VISIBLE_DEVICES=${cuda_visible_device} ` : `CUDA_VISIBLE_DEVICES=" " `,
|
||||||
this.trialConfig.command,
|
this.trialConfig.command,
|
||||||
path.join(trialWorkingFolder, '.nni', 'stderr'),
|
path.join(trialWorkingFolder, 'stderr'),
|
||||||
path.join(trialWorkingFolder, '.nni', 'code'));
|
path.join(trialWorkingFolder, '.nni', 'code'));
|
||||||
|
|
||||||
//create tmp trial working folder locally.
|
//create tmp trial working folder locally.
|
||||||
|
|
|
@ -0,0 +1,131 @@
|
||||||
|
# Copyright (c) Microsoft Corporation
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge,
|
||||||
|
# to any person obtaining a copy of this software and associated
|
||||||
|
# documentation files (the "Software"), to deal in the Software without restriction,
|
||||||
|
# including without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
# distribute, sublicense, and/or sell copies of the Software, and
|
||||||
|
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
# The above copyright notice and this permission notice shall be included
|
||||||
|
# in all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
|
||||||
|
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
'''
|
||||||
|
__main__.py
|
||||||
|
'''
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
from nni.msg_dispatcher import MsgDispatcher
|
||||||
|
from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
|
||||||
|
from nni.evolution_tuner.evolution_tuner import EvolutionTuner
|
||||||
|
from nni.medianstop_assessor.medianstop_assessor import MedianstopAssessor
|
||||||
|
|
||||||
|
logger = logging.getLogger('nni.main')
|
||||||
|
logger.debug('START')
|
||||||
|
|
||||||
|
BUILT_IN_CLASS_NAMES = ['HyperoptTuner', 'EvolutionTuner', 'MedianstopAssessor']
|
||||||
|
|
||||||
|
def create_builtin_class_instance(classname, jsonstr_args):
|
||||||
|
if jsonstr_args:
|
||||||
|
class_args = json.loads(jsonstr_args)
|
||||||
|
instance = eval(classname)(**class_args)
|
||||||
|
else:
|
||||||
|
instance = eval(classname)()
|
||||||
|
return instance
|
||||||
|
|
||||||
|
def create_customized_class_instance(class_dir, class_filename, classname, jsonstr_args):
|
||||||
|
if not os.path.isfile(os.path.join(class_dir, class_filename)):
|
||||||
|
raise ValueError('Class file not found: {}'.format(os.path.join(class_dir, class_filename)))
|
||||||
|
sys.path.append(class_dir)
|
||||||
|
module_name = class_filename.split('.')[0]
|
||||||
|
class_module = importlib.import_module(module_name)
|
||||||
|
class_constructor = getattr(class_module, classname)
|
||||||
|
if jsonstr_args:
|
||||||
|
class_args = json.loads(jsonstr_args)
|
||||||
|
instance = class_constructor(**class_args)
|
||||||
|
else:
|
||||||
|
instance = class_constructor()
|
||||||
|
return instance
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(description='parse command line parameters.')
|
||||||
|
parser.add_argument('--tuner_class_name', type=str, required=True,
|
||||||
|
help='Tuner class name, the class must be a subclass of nni.Tuner')
|
||||||
|
parser.add_argument('--tuner_args', type=str, required=False,
|
||||||
|
help='Parameters pass to tuner __init__ constructor')
|
||||||
|
parser.add_argument('--tuner_directory', type=str, required=False,
|
||||||
|
help='Tuner directory')
|
||||||
|
parser.add_argument('--tuner_class_filename', type=str, required=False,
|
||||||
|
help='Tuner class file path')
|
||||||
|
|
||||||
|
parser.add_argument('--assessor_class_name', type=str, required=False,
|
||||||
|
help='Assessor class name, the class must be a subclass of nni.Assessor')
|
||||||
|
parser.add_argument('--assessor_args', type=str, required=False,
|
||||||
|
help='Parameters pass to assessor __init__ constructor')
|
||||||
|
parser.add_argument('--assessor_directory', type=str, required=False,
|
||||||
|
help='Assessor directory')
|
||||||
|
parser.add_argument('--assessor_class_filename', type=str, required=False,
|
||||||
|
help='Assessor class file path')
|
||||||
|
|
||||||
|
flags, _ = parser.parse_known_args()
|
||||||
|
return flags
|
||||||
|
|
||||||
|
def main():
|
||||||
|
'''
|
||||||
|
main function.
|
||||||
|
'''
|
||||||
|
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
|
tuner = None
|
||||||
|
assessor = None
|
||||||
|
|
||||||
|
if args.tuner_class_name is None:
|
||||||
|
raise ValueError('Tuner must be specified')
|
||||||
|
if args.tuner_class_name in BUILT_IN_CLASS_NAMES:
|
||||||
|
tuner = create_builtin_class_instance(args.tuner_class_name, args.tuner_args)
|
||||||
|
else:
|
||||||
|
tuner = create_customized_class_instance(args.tuner_directory, args.tuner_class_filename, args.tuner_class_name, args.tuner_args)
|
||||||
|
|
||||||
|
if args.assessor_class_name:
|
||||||
|
if args.assessor_class_name in BUILT_IN_CLASS_NAMES:
|
||||||
|
assessor = create_builtin_class_instance(args.assessor_class_name, args.assessor_args)
|
||||||
|
else:
|
||||||
|
assessor = create_customized_class_instance(args.assessor_directory, \
|
||||||
|
args.assessor_class_filename, args.assessor_class_name, args.assessor_args)
|
||||||
|
|
||||||
|
if tuner is None:
|
||||||
|
raise AssertionError('Failed to create Tuner instance')
|
||||||
|
|
||||||
|
dispatcher = MsgDispatcher(tuner, assessor)
|
||||||
|
|
||||||
|
try:
|
||||||
|
dispatcher.run()
|
||||||
|
tuner._on_exit()
|
||||||
|
if assessor is not None:
|
||||||
|
assessor._on_exit()
|
||||||
|
except Exception as exception:
|
||||||
|
logger.exception(exception)
|
||||||
|
tuner._on_error()
|
||||||
|
if assessor is not None:
|
||||||
|
assessor._on_error()
|
||||||
|
raise
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
main()
|
||||||
|
except Exception as exception:
|
||||||
|
logger.exception(exception)
|
||||||
|
raise
|
|
@ -19,27 +19,18 @@
|
||||||
# ==================================================================================================
|
# ==================================================================================================
|
||||||
|
|
||||||
|
|
||||||
from collections import defaultdict
|
|
||||||
from enum import Enum
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
from enum import Enum
|
||||||
|
|
||||||
import json_tricks
|
from .recoverable import Recoverable
|
||||||
|
|
||||||
from .common import init_logger
|
|
||||||
from .protocol import CommandType, send, receive
|
|
||||||
|
|
||||||
|
|
||||||
init_logger('assessor.log')
|
|
||||||
_logger = logging.getLogger(__name__)
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class AssessResult(Enum):
|
class AssessResult(Enum):
|
||||||
Good = True
|
Good = True
|
||||||
Bad = False
|
Bad = False
|
||||||
|
|
||||||
|
class Assessor(Recoverable):
|
||||||
class Assessor:
|
|
||||||
# pylint: disable=no-self-use,unused-argument
|
# pylint: disable=no-self-use,unused-argument
|
||||||
|
|
||||||
def assess_trial(self, trial_job_id, trial_history):
|
def assess_trial(self, trial_job_id, trial_history):
|
||||||
|
@ -57,101 +48,22 @@ class Assessor:
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def load_checkpoint(self, path):
|
def load_checkpoint(self):
|
||||||
"""Load the checkpoint of assessor.
|
"""Load the checkpoint of assessr.
|
||||||
path: checkpoint directory of assessor
|
path: checkpoint directory for assessor
|
||||||
"""
|
"""
|
||||||
_logger.info('Load checkpoint ignored by assessor')
|
checkpoin_path = self.get_checkpoint_path()
|
||||||
|
_logger.info('Load checkpoint ignored by assessor, checkpoint path: %s' % checkpoin_path)
|
||||||
|
|
||||||
def save_checkpoint(self, path):
|
def save_checkpoint(self):
|
||||||
"""Save the checkpoint of assessor.
|
"""Save the checkpoint of assessor.
|
||||||
path: checkpoint directory of assessor
|
path: checkpoint directory for assessor
|
||||||
"""
|
"""
|
||||||
_logger.info('Save checkpoint ignored by assessor')
|
checkpoin_path = self.get_checkpoint_path()
|
||||||
|
_logger.info('Save checkpoint ignored by assessor, checkpoint path: %s' % checkpoin_path)
|
||||||
|
|
||||||
def request_save_checkpoint(self):
|
def _on_exit(self):
|
||||||
"""Request to save the checkpoint of assessor
|
pass
|
||||||
"""
|
|
||||||
self.save_checkpoint(os.getenv('NNI_CHECKPOINT_DIRECTORY'))
|
|
||||||
|
|
||||||
def run(self):
|
def _on_error(self):
|
||||||
"""Run the assessor.
|
pass
|
||||||
This function will never return unless raise.
|
|
||||||
"""
|
|
||||||
mode = os.getenv('NNI_MODE')
|
|
||||||
if mode == 'resume':
|
|
||||||
self.load_checkpoint(os.getenv('NNI_CHECKPOINT_DIRECTORY'))
|
|
||||||
while _handle_request(self):
|
|
||||||
pass
|
|
||||||
_logger.info('Terminated by NNI manager')
|
|
||||||
|
|
||||||
|
|
||||||
_trial_history = defaultdict(dict)
|
|
||||||
'''key: trial job ID; value: intermediate results, mapping from sequence number to data'''
|
|
||||||
|
|
||||||
_ended_trials = set()
|
|
||||||
'''trial_job_id of all ended trials.
|
|
||||||
We need this because NNI manager may send metrics after reporting a trial ended.
|
|
||||||
TODO: move this logic to NNI manager
|
|
||||||
'''
|
|
||||||
|
|
||||||
def _sort_history(history):
|
|
||||||
ret = [ ]
|
|
||||||
for i, _ in enumerate(history):
|
|
||||||
if i in history:
|
|
||||||
ret.append(history[i])
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def _handle_request(assessor):
|
|
||||||
_logger.debug('waiting receive_message')
|
|
||||||
|
|
||||||
command, data = receive()
|
|
||||||
|
|
||||||
_logger.debug(command)
|
|
||||||
_logger.debug(data)
|
|
||||||
|
|
||||||
if command is CommandType.Terminate:
|
|
||||||
return False
|
|
||||||
|
|
||||||
data = json_tricks.loads(data)
|
|
||||||
|
|
||||||
if command is CommandType.ReportMetricData:
|
|
||||||
if data['type'] != 'PERIODICAL':
|
|
||||||
return True
|
|
||||||
|
|
||||||
trial_job_id = data['trial_job_id']
|
|
||||||
if trial_job_id in _ended_trials:
|
|
||||||
return True
|
|
||||||
|
|
||||||
history = _trial_history[trial_job_id]
|
|
||||||
history[data['sequence']] = data['value']
|
|
||||||
ordered_history = _sort_history(history)
|
|
||||||
if len(ordered_history) < data['sequence']: # no user-visible update since last time
|
|
||||||
return True
|
|
||||||
|
|
||||||
result = assessor.assess_trial(trial_job_id, ordered_history)
|
|
||||||
if isinstance(result, bool):
|
|
||||||
result = AssessResult.Good if result else AssessResult.Bad
|
|
||||||
elif not isinstance(result, AssessResult):
|
|
||||||
msg = 'Result of Assessor.assess_trial must be an object of AssessResult, not %s'
|
|
||||||
raise RuntimeError(msg % type(result))
|
|
||||||
|
|
||||||
if result is AssessResult.Bad:
|
|
||||||
_logger.debug('BAD, kill %s', trial_job_id)
|
|
||||||
send(CommandType.KillTrialJob, json_tricks.dumps(trial_job_id))
|
|
||||||
else:
|
|
||||||
_logger.debug('GOOD')
|
|
||||||
|
|
||||||
elif command is CommandType.TrialEnd:
|
|
||||||
trial_job_id = data['trial_job_id']
|
|
||||||
_ended_trials.add(trial_job_id)
|
|
||||||
if trial_job_id in _trial_history:
|
|
||||||
_trial_history.pop(trial_job_id)
|
|
||||||
assessor.trial_end(trial_job_id, data['event'] == 'SUCCEEDED')
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise AssertionError('Unsupported command: %s' % command)
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ class MedianstopAssessor(Assessor):
|
||||||
if the trial’s best objective value by step S is strictly worse than the median value
|
if the trial’s best objective value by step S is strictly worse than the median value
|
||||||
of the running averages of all completed trials’ objectives reported up to step S
|
of the running averages of all completed trials’ objectives reported up to step S
|
||||||
'''
|
'''
|
||||||
def __init__(self, start_step, optimize_mode):
|
def __init__(self, optimize_mode='maximize', start_step=0):
|
||||||
self.start_step = start_step
|
self.start_step = start_step
|
||||||
self.running_history = dict()
|
self.running_history = dict()
|
||||||
self.completed_avg_history = dict()
|
self.completed_avg_history = dict()
|
||||||
|
|
|
@ -0,0 +1,165 @@
|
||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
#
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||||
|
# associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||||
|
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||||
|
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in all copies or
|
||||||
|
# substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||||
|
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||||
|
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from collections import defaultdict
|
||||||
|
import json_tricks
|
||||||
|
|
||||||
|
from .protocol import CommandType, send
|
||||||
|
from .msg_dispatcher_base import MsgDispatcherBase
|
||||||
|
from .assessor import AssessResult
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Assessor global variables
|
||||||
|
_trial_history = defaultdict(dict)
|
||||||
|
'''key: trial job ID; value: intermediate results, mapping from sequence number to data'''
|
||||||
|
|
||||||
|
_ended_trials = set()
|
||||||
|
'''trial_job_id of all ended trials.
|
||||||
|
We need this because NNI manager may send metrics after reporting a trial ended.
|
||||||
|
TODO: move this logic to NNI manager
|
||||||
|
'''
|
||||||
|
|
||||||
|
def _sort_history(history):
|
||||||
|
ret = [ ]
|
||||||
|
for i, _ in enumerate(history):
|
||||||
|
if i in history:
|
||||||
|
ret.append(history[i])
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return ret
|
||||||
|
|
||||||
|
# Tuner global variables
|
||||||
|
_next_parameter_id = 0
|
||||||
|
_trial_params = {}
|
||||||
|
'''key: trial job ID; value: parameters'''
|
||||||
|
_customized_parameter_ids = set()
|
||||||
|
|
||||||
|
def _create_parameter_id():
|
||||||
|
global _next_parameter_id # pylint: disable=global-statement
|
||||||
|
_next_parameter_id += 1
|
||||||
|
return _next_parameter_id - 1
|
||||||
|
|
||||||
|
def _pack_parameter(parameter_id, params, customized=False):
|
||||||
|
_trial_params[parameter_id] = params
|
||||||
|
ret = {
|
||||||
|
'parameter_id': parameter_id,
|
||||||
|
'parameter_source': 'customized' if customized else 'algorithm',
|
||||||
|
'parameters': params
|
||||||
|
}
|
||||||
|
return json_tricks.dumps(ret)
|
||||||
|
|
||||||
|
class MsgDispatcher(MsgDispatcherBase):
|
||||||
|
def __init__(self, tuner, assessor=None):
|
||||||
|
super()
|
||||||
|
self.tuner = tuner
|
||||||
|
self.assessor = assessor
|
||||||
|
if assessor is None:
|
||||||
|
_logger.debug('Assessor is not configured')
|
||||||
|
|
||||||
|
def load_checkpoint(self):
|
||||||
|
self.tuner.load_checkpoint()
|
||||||
|
if self.assessor is not None:
|
||||||
|
self.assessor.load_checkpoint()
|
||||||
|
|
||||||
|
def save_checkpoint(self):
|
||||||
|
self.tuner.save_checkpoint()
|
||||||
|
if self.assessor is not None:
|
||||||
|
self.assessor.save_checkpoint()
|
||||||
|
|
||||||
|
def handle_request_trial_jobs(self, data):
|
||||||
|
# data: number or trial jobs
|
||||||
|
ids = [_create_parameter_id() for _ in range(data)]
|
||||||
|
params_list = self.tuner.generate_multiple_parameters(ids)
|
||||||
|
assert len(ids) == len(params_list)
|
||||||
|
for i, _ in enumerate(ids):
|
||||||
|
send(CommandType.NewTrialJob, _pack_parameter(ids[i], params_list[i]))
|
||||||
|
return True
|
||||||
|
|
||||||
|
def handle_update_search_space(self, data):
|
||||||
|
self.tuner.update_search_space(data)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def handle_add_customized_trial(self, data):
|
||||||
|
# data: parameters
|
||||||
|
id_ = _create_parameter_id()
|
||||||
|
_customized_parameter_ids.add(id_)
|
||||||
|
send(CommandType.NewTrialJob, _pack_parameter(id_, data, customized=True))
|
||||||
|
return True
|
||||||
|
|
||||||
|
def handle_report_metric_data(self, data):
|
||||||
|
if data['type'] == 'FINAL':
|
||||||
|
id_ = data['parameter_id']
|
||||||
|
if id_ in _customized_parameter_ids:
|
||||||
|
self.tuner.receive_customized_trial_result(id_, _trial_params[id_], data['value'])
|
||||||
|
else:
|
||||||
|
self.tuner.receive_trial_result(id_, _trial_params[id_], data['value'])
|
||||||
|
elif data['type'] == 'PERIODICAL':
|
||||||
|
if self.assessor is not None:
|
||||||
|
self._handle_intermediate_metric_data(data)
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise ValueError('Data type not supported: {}'.format(data['type']))
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def handle_trial_end(self, data):
|
||||||
|
trial_job_id = data['trial_job_id']
|
||||||
|
_ended_trials.add(trial_job_id)
|
||||||
|
if trial_job_id in _trial_history:
|
||||||
|
_trial_history.pop(trial_job_id)
|
||||||
|
if self.assessor is not None:
|
||||||
|
self.assessor.trial_end(trial_job_id, data['event'] == 'SUCCEEDED')
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _handle_intermediate_metric_data(self, data):
|
||||||
|
if data['type'] != 'PERIODICAL':
|
||||||
|
return True
|
||||||
|
if self.assessor is None:
|
||||||
|
return True
|
||||||
|
|
||||||
|
trial_job_id = data['trial_job_id']
|
||||||
|
if trial_job_id in _ended_trials:
|
||||||
|
return True
|
||||||
|
|
||||||
|
history = _trial_history[trial_job_id]
|
||||||
|
history[data['sequence']] = data['value']
|
||||||
|
ordered_history = _sort_history(history)
|
||||||
|
if len(ordered_history) < data['sequence']: # no user-visible update since last time
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = self.assessor.assess_trial(trial_job_id, ordered_history)
|
||||||
|
except Exception as e:
|
||||||
|
_logger.exception('Assessor error')
|
||||||
|
|
||||||
|
if isinstance(result, bool):
|
||||||
|
result = AssessResult.Good if result else AssessResult.Bad
|
||||||
|
elif not isinstance(result, AssessResult):
|
||||||
|
msg = 'Result of Assessor.assess_trial must be an object of AssessResult, not %s'
|
||||||
|
raise RuntimeError(msg % type(result))
|
||||||
|
|
||||||
|
if result is AssessResult.Bad:
|
||||||
|
_logger.debug('BAD, kill %s', trial_job_id)
|
||||||
|
send(CommandType.KillTrialJob, json_tricks.dumps(trial_job_id))
|
||||||
|
else:
|
||||||
|
_logger.debug('GOOD')
|
|
@ -0,0 +1,90 @@
|
||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
#
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||||
|
# associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||||
|
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||||
|
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in all copies or
|
||||||
|
# substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||||
|
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||||
|
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
#import json_tricks
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
import json_tricks
|
||||||
|
|
||||||
|
from .common import init_logger
|
||||||
|
from .recoverable import Recoverable
|
||||||
|
from .protocol import CommandType, receive
|
||||||
|
|
||||||
|
init_logger('dispatcher.log')
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class MsgDispatcherBase(Recoverable):
|
||||||
|
def run(self):
|
||||||
|
"""Run the tuner.
|
||||||
|
This function will never return unless raise.
|
||||||
|
"""
|
||||||
|
mode = os.getenv('NNI_MODE')
|
||||||
|
if mode == 'resume':
|
||||||
|
self.load_checkpoint()
|
||||||
|
|
||||||
|
while self.handle_request():
|
||||||
|
pass
|
||||||
|
|
||||||
|
_logger.info('Terminated by NNI manager')
|
||||||
|
|
||||||
|
def handle_request(self):
|
||||||
|
_logger.debug('waiting receive_message')
|
||||||
|
|
||||||
|
command, data = receive()
|
||||||
|
if command is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
_logger.debug('handle request: command: [{}], data: [{}]'.format(command, data))
|
||||||
|
|
||||||
|
if command is CommandType.Terminate:
|
||||||
|
return False
|
||||||
|
|
||||||
|
data = json_tricks.loads(data)
|
||||||
|
|
||||||
|
command_handlers = {
|
||||||
|
# Tunner commands:
|
||||||
|
CommandType.RequestTrialJobs: self.handle_request_trial_jobs,
|
||||||
|
CommandType.UpdateSearchSpace: self.handle_update_search_space,
|
||||||
|
CommandType.AddCustomizedTrialJob: self.handle_add_customized_trial,
|
||||||
|
|
||||||
|
# Tunner/Assessor commands:
|
||||||
|
CommandType.ReportMetricData: self.handle_report_metric_data,
|
||||||
|
|
||||||
|
CommandType.TrialEnd: self.handle_trial_end,
|
||||||
|
}
|
||||||
|
if command not in command_handlers:
|
||||||
|
raise AssertionError('Unsupported command: {}'.format(command))
|
||||||
|
|
||||||
|
return command_handlers[command](data)
|
||||||
|
|
||||||
|
def handle_request_trial_jobs(self, data):
|
||||||
|
raise NotImplementedError('handle_request_trial_jobs not implemented')
|
||||||
|
|
||||||
|
def handle_update_search_space(self, data):
|
||||||
|
raise NotImplementedError('handle_update_search_space not implemented')
|
||||||
|
|
||||||
|
def handle_add_customized_trial(self, data):
|
||||||
|
raise NotImplementedError('handle_add_customized_trial not implemented')
|
||||||
|
|
||||||
|
def handle_report_metric_data(self, data):
|
||||||
|
raise NotImplementedError('handle_report_metric_data not implemented')
|
||||||
|
|
||||||
|
def handle_trial_end(self, data):
|
||||||
|
raise NotImplementedError('handle_trial_end not implemented')
|
|
@ -28,7 +28,7 @@ from ..common import init_logger
|
||||||
_dir = os.environ['NNI_SYS_DIR']
|
_dir = os.environ['NNI_SYS_DIR']
|
||||||
_metric_file = open(os.path.join(_dir, '.nni', 'metrics'), 'wb')
|
_metric_file = open(os.path.join(_dir, '.nni', 'metrics'), 'wb')
|
||||||
|
|
||||||
_log_file_path = os.path.join(_dir, '.nni', 'trial.log')
|
_log_file_path = os.path.join(_dir, 'trial.log')
|
||||||
init_logger(_log_file_path)
|
init_logger(_log_file_path)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -55,6 +55,7 @@ def send(command, data):
|
||||||
data = data.encode('utf8')
|
data = data.encode('utf8')
|
||||||
assert len(data) < 1000000, 'Command too long'
|
assert len(data) < 1000000, 'Command too long'
|
||||||
msg = b'%b%06d%b' % (command.value, len(data), data)
|
msg = b'%b%06d%b' % (command.value, len(data), data)
|
||||||
|
logging.getLogger(__name__).debug('Sending command, data: [%s]' % data)
|
||||||
_out_file.write(msg)
|
_out_file.write(msg)
|
||||||
_out_file.flush()
|
_out_file.flush()
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
#
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||||
|
# associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||||
|
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||||
|
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in all copies or
|
||||||
|
# substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||||
|
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||||
|
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
class Recoverable:
|
||||||
|
def load_checkpoint(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def save_checkpont(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_checkpoint_path(self):
|
||||||
|
ckp_path = os.getenv('NNI_CHECKPOINT_DIRECTORY')
|
||||||
|
if ckp_path is not None and os.path.isdir(ckp_path):
|
||||||
|
return ckp_path
|
||||||
|
return None
|
|
@ -20,19 +20,13 @@
|
||||||
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
|
||||||
|
|
||||||
import json_tricks
|
from .recoverable import Recoverable
|
||||||
|
|
||||||
from .common import init_logger
|
|
||||||
from .protocol import CommandType, send, receive
|
|
||||||
|
|
||||||
|
|
||||||
init_logger('tuner.log')
|
|
||||||
_logger = logging.getLogger(__name__)
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Tuner:
|
class Tuner(Recoverable):
|
||||||
# pylint: disable=no-self-use,unused-argument
|
# pylint: disable=no-self-use,unused-argument
|
||||||
|
|
||||||
def generate_parameters(self, parameter_id):
|
def generate_parameters(self, parameter_id):
|
||||||
|
@ -72,100 +66,22 @@ class Tuner:
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError('Tuner: update_search_space not implemented')
|
raise NotImplementedError('Tuner: update_search_space not implemented')
|
||||||
|
|
||||||
def load_checkpoint(self, path):
|
def load_checkpoint(self):
|
||||||
"""Load the checkpoint of tuner.
|
"""Load the checkpoint of tuner.
|
||||||
path: checkpoint directory for tuner
|
path: checkpoint directory for tuner
|
||||||
"""
|
"""
|
||||||
_logger.info('Load checkpoint ignored by tuner')
|
checkpoin_path = self.get_checkpoint_path()
|
||||||
|
_logger.info('Load checkpoint ignored by tuner, checkpoint path: %s' % checkpoin_path)
|
||||||
|
|
||||||
def save_checkpoint(self, path):
|
def save_checkpoint(self):
|
||||||
"""Save the checkpoint of tuner.
|
"""Save the checkpoint of tuner.
|
||||||
path: checkpoint directory for tuner
|
path: checkpoint directory for tuner
|
||||||
"""
|
"""
|
||||||
_logger.info('Save checkpoint ignored by tuner')
|
checkpoin_path = self.get_checkpoint_path()
|
||||||
|
_logger.info('Save checkpoint ignored by tuner, checkpoint path: %s' % checkpoin_path)
|
||||||
|
|
||||||
def request_save_checkpoint(self):
|
def _on_exit(self):
|
||||||
"""Request to save the checkpoint of tuner
|
pass
|
||||||
"""
|
|
||||||
self.save_checkpoint(os.getenv('NNI_CHECKPOINT_DIRECTORY'))
|
|
||||||
|
|
||||||
def run(self):
|
def _on_error(self):
|
||||||
"""Run the tuner.
|
pass
|
||||||
This function will never return unless raise.
|
|
||||||
"""
|
|
||||||
mode = os.getenv('NNI_MODE')
|
|
||||||
if mode == 'resume':
|
|
||||||
self.load_checkpoint(os.getenv('NNI_CHECKPOINT_DIRECTORY'))
|
|
||||||
while _handle_request(self):
|
|
||||||
pass
|
|
||||||
_logger.info('Terminated by NNI manager')
|
|
||||||
|
|
||||||
|
|
||||||
_next_parameter_id = 0
|
|
||||||
_trial_params = {}
|
|
||||||
'''key: trial job ID; value: parameters'''
|
|
||||||
_customized_parameter_ids = set()
|
|
||||||
|
|
||||||
|
|
||||||
def _create_parameter_id():
|
|
||||||
global _next_parameter_id # pylint: disable=global-statement
|
|
||||||
_next_parameter_id += 1
|
|
||||||
return _next_parameter_id - 1
|
|
||||||
|
|
||||||
|
|
||||||
def _pack_parameter(parameter_id, params, customized=False):
|
|
||||||
_trial_params[parameter_id] = params
|
|
||||||
ret = {
|
|
||||||
'parameter_id': parameter_id,
|
|
||||||
'parameter_source': 'customized' if customized else 'algorithm',
|
|
||||||
'parameters': params
|
|
||||||
}
|
|
||||||
return json_tricks.dumps(ret)
|
|
||||||
|
|
||||||
|
|
||||||
def _handle_request(tuner):
|
|
||||||
_logger.debug('waiting receive_message')
|
|
||||||
|
|
||||||
command, data = receive()
|
|
||||||
if command is None:
|
|
||||||
return False
|
|
||||||
|
|
||||||
_logger.debug(command)
|
|
||||||
_logger.debug(data)
|
|
||||||
|
|
||||||
if command is CommandType.Terminate:
|
|
||||||
return False
|
|
||||||
|
|
||||||
data = json_tricks.loads(data)
|
|
||||||
|
|
||||||
if command is CommandType.RequestTrialJobs:
|
|
||||||
# data: number or trial jobs
|
|
||||||
ids = [_create_parameter_id() for _ in range(data)]
|
|
||||||
params_list = list(tuner.generate_multiple_parameters(ids))
|
|
||||||
assert len(ids) == len(params_list)
|
|
||||||
for i, _ in enumerate(ids):
|
|
||||||
send(CommandType.NewTrialJob, _pack_parameter(ids[i], params_list[i]))
|
|
||||||
|
|
||||||
elif command is CommandType.ReportMetricData:
|
|
||||||
# data: { 'type': 'FINAL', 'parameter_id': ..., 'value': ... }
|
|
||||||
if data['type'] == 'FINAL':
|
|
||||||
id_ = data['parameter_id']
|
|
||||||
if id_ in _customized_parameter_ids:
|
|
||||||
tuner.receive_customized_trial_result(id_, _trial_params[id_], data['value'])
|
|
||||||
else:
|
|
||||||
tuner.receive_trial_result(id_, _trial_params[id_], data['value'])
|
|
||||||
|
|
||||||
elif command is CommandType.UpdateSearchSpace:
|
|
||||||
# data: search space
|
|
||||||
tuner.update_search_space(data)
|
|
||||||
|
|
||||||
elif command is CommandType.AddCustomizedTrialJob:
|
|
||||||
# data: parameters
|
|
||||||
id_ = _create_parameter_id()
|
|
||||||
_customized_parameter_ids.add(id_)
|
|
||||||
send(CommandType.NewTrialJob, _pack_parameter(id_, data, customized=True))
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise AssertionError('Unsupported command: %s' % command)
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
|
@ -261,17 +261,12 @@ class Control extends React.Component<{}, ControlState> {
|
||||||
} else {
|
} else {
|
||||||
this.addButtonLoad();
|
this.addButtonLoad();
|
||||||
// new experiment obj
|
// new experiment obj
|
||||||
const parameter = [];
|
|
||||||
parameter.push({
|
|
||||||
parameters: addTrial
|
|
||||||
});
|
|
||||||
const sendPara = JSON.stringify(parameter[0]);
|
|
||||||
axios(`${MANAGER_IP}/trial-jobs`, {
|
axios(`${MANAGER_IP}/trial-jobs`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
'Content-Type': 'application/json'
|
'Content-Type': 'application/json'
|
||||||
},
|
},
|
||||||
data: sendPara
|
data: addTrial
|
||||||
}).then(res => {
|
}).then(res => {
|
||||||
if (res.status === 200) {
|
if (res.status === 200) {
|
||||||
message.success('Submit successfully');
|
message.success('Submit successfully');
|
||||||
|
|
|
@ -71,7 +71,7 @@ class Para extends React.Component<{}, ParaState> {
|
||||||
paraNodata: '',
|
paraNodata: '',
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
hyperParaPic = () => {
|
hyperParaPic = () => {
|
||||||
axios
|
axios
|
||||||
.all([
|
.all([
|
||||||
|
@ -238,9 +238,6 @@ class Para extends React.Component<{}, ParaState> {
|
||||||
type: 'continuous',
|
type: 'continuous',
|
||||||
min: 0,
|
min: 0,
|
||||||
max: 1,
|
max: 1,
|
||||||
realtime: false,
|
|
||||||
calculable: true,
|
|
||||||
precision: 1,
|
|
||||||
// gradient color
|
// gradient color
|
||||||
color: ['#fb7c7c', 'yellow', 'lightblue']
|
color: ['#fb7c7c', 'yellow', 'lightblue']
|
||||||
},
|
},
|
||||||
|
@ -357,6 +354,7 @@ class Para extends React.Component<{}, ParaState> {
|
||||||
this._isMounted = false;
|
this._isMounted = false;
|
||||||
window.clearInterval(this.intervalIDPara);
|
window.clearInterval(this.intervalIDPara);
|
||||||
}
|
}
|
||||||
|
|
||||||
render() {
|
render() {
|
||||||
const { option, paraNodata, dimName } = this.state;
|
const { option, paraNodata, dimName } = this.state;
|
||||||
return (
|
return (
|
||||||
|
@ -365,6 +363,7 @@ class Para extends React.Component<{}, ParaState> {
|
||||||
<div className="paraTitle">
|
<div className="paraTitle">
|
||||||
<div className="paraLeft">Hyper Parameter</div>
|
<div className="paraLeft">Hyper Parameter</div>
|
||||||
<div className="paraRight">
|
<div className="paraRight">
|
||||||
|
{/* <span>top</span> */}
|
||||||
<Select
|
<Select
|
||||||
className="parapercent"
|
className="parapercent"
|
||||||
style={{ width: '20%' }}
|
style={{ width: '20%' }}
|
||||||
|
@ -372,10 +371,10 @@ class Para extends React.Component<{}, ParaState> {
|
||||||
optionFilterProp="children"
|
optionFilterProp="children"
|
||||||
onSelect={this.percentNum}
|
onSelect={this.percentNum}
|
||||||
>
|
>
|
||||||
<Option value="0.2">0.2</Option>
|
<Option value="0.2">20%</Option>
|
||||||
<Option value="0.5">0.5</Option>
|
<Option value="0.5">50%</Option>
|
||||||
<Option value="0.8">0.8</Option>
|
<Option value="0.8">80%</Option>
|
||||||
<Option value="1">1</Option>
|
<Option value="1">100%</Option>
|
||||||
</Select>
|
</Select>
|
||||||
<Select
|
<Select
|
||||||
style={{ width: '60%' }}
|
style={{ width: '60%' }}
|
||||||
|
|
|
@ -39,12 +39,6 @@ class SlideBar extends React.Component<{}, {}> {
|
||||||
<Icon className="floicon" type="right" />
|
<Icon className="floicon" type="right" />
|
||||||
</Link>
|
</Link>
|
||||||
</li>
|
</li>
|
||||||
<li>
|
|
||||||
<Link to={'/tensor'} activeClassName="high">
|
|
||||||
<Icon className="icon" type="link" />Tensorboard
|
|
||||||
<Icon className="floicon" type="right" />
|
|
||||||
</Link>
|
|
||||||
</li>
|
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
|
|
@ -234,25 +234,21 @@ class TrialStatus extends React.Component<{}, TabState> {
|
||||||
// kill job
|
// kill job
|
||||||
killJob = (key: number, id: string, status: string) => {
|
killJob = (key: number, id: string, status: string) => {
|
||||||
|
|
||||||
if (status === 'RUNNING') {
|
axios(`${MANAGER_IP}/trial-jobs/${id}`, {
|
||||||
axios(`${MANAGER_IP}/trial-jobs/${id}`, {
|
method: 'DELETE',
|
||||||
method: 'DELETE',
|
headers: {
|
||||||
headers: {
|
'Content-Type': 'application/json;charset=utf-8'
|
||||||
'Content-Type': 'application/json;charset=utf-8'
|
}
|
||||||
|
})
|
||||||
|
.then(res => {
|
||||||
|
if (res.status === 200) {
|
||||||
|
message.success('Cancel the job successfully');
|
||||||
|
// render the table
|
||||||
|
this.drawTable();
|
||||||
|
} else {
|
||||||
|
message.error('fail to cancel the job');
|
||||||
}
|
}
|
||||||
})
|
});
|
||||||
.then(res => {
|
|
||||||
if (res.status === 200) {
|
|
||||||
message.success('Cancel the job successfully');
|
|
||||||
// render the table
|
|
||||||
this.drawTable();
|
|
||||||
} else {
|
|
||||||
message.error('fail to cancel the job');
|
|
||||||
}
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
message.error('you just can kill the job that status is Running');
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// get tensorflow address
|
// get tensorflow address
|
||||||
|
@ -347,13 +343,34 @@ class TrialStatus extends React.Component<{}, TabState> {
|
||||||
key: 'operation',
|
key: 'operation',
|
||||||
width: '10%',
|
width: '10%',
|
||||||
render: (text: string, record: TableObj) => {
|
render: (text: string, record: TableObj) => {
|
||||||
|
let trialStatus = record.status;
|
||||||
|
let flagKill = false;
|
||||||
|
if (trialStatus === 'RUNNING') {
|
||||||
|
flagKill = true;
|
||||||
|
} else {
|
||||||
|
flagKill = false;
|
||||||
|
}
|
||||||
return (
|
return (
|
||||||
<Popconfirm
|
flagKill
|
||||||
title="Are you sure to delete this trial?"
|
?
|
||||||
onConfirm={this.killJob.bind(this, record.key, record.id, record.status)}
|
(
|
||||||
>
|
<Popconfirm
|
||||||
<Button type="primary" className="tableButton">Kill</Button>
|
title="Are you sure to delete this trial?"
|
||||||
</Popconfirm>
|
onConfirm={this.killJob.bind(this, record.key, record.id, record.status)}
|
||||||
|
>
|
||||||
|
<Button type="primary" className="tableButton">Kill</Button>
|
||||||
|
</Popconfirm>
|
||||||
|
)
|
||||||
|
:
|
||||||
|
(
|
||||||
|
<Button
|
||||||
|
type="primary"
|
||||||
|
className="tableButton"
|
||||||
|
disabled={true}
|
||||||
|
>
|
||||||
|
Kill
|
||||||
|
</Button>
|
||||||
|
)
|
||||||
);
|
);
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
|
|
@ -24,6 +24,11 @@
|
||||||
float: right;
|
float: right;
|
||||||
width: 60%;
|
width: 60%;
|
||||||
}
|
}
|
||||||
|
.paraRight>span{
|
||||||
|
font-size: 14px;
|
||||||
|
color: #333;
|
||||||
|
margin-right: 5px;
|
||||||
|
}
|
||||||
.paraRight .parapercent{
|
.paraRight .parapercent{
|
||||||
margin-right: 10px;
|
margin-right: 10px;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,28 +1,22 @@
|
||||||
authorName: nni
|
authorName: nni
|
||||||
experimentName: naive
|
experimentName: naive
|
||||||
trialConcurrency: 3
|
trialConcurrency: 3
|
||||||
maxExecDuration: 1h
|
maxExecDuration: 1h
|
||||||
maxTrialNum: 10
|
maxTrialNum: 10
|
||||||
#choice: local, remote
|
#choice: local, remote
|
||||||
trainingServicePlatform: local
|
trainingServicePlatform: local
|
||||||
searchSpacePath: search_space.json
|
searchSpacePath: search_space.json
|
||||||
#choice: true, false
|
#choice: true, false
|
||||||
useAnnotation: false
|
useAnnotation: false
|
||||||
tuner:
|
tuner:
|
||||||
tunerCommand: /home/travis/virtualenv/python3.6.3/bin/python3 naive_tuner.py
|
codeDir: .
|
||||||
tunerCwd: .
|
classFileName: naive_tuner.py
|
||||||
tunerGpuNum: 0
|
className: NaiveTuner
|
||||||
assessor:
|
assessor:
|
||||||
assessorCommand: /home/travis/virtualenv/python3.6.3/bin/python3 naive_assessor.py
|
codeDir: .
|
||||||
assessorCwd: .
|
classFileName: naive_assessor.py
|
||||||
assessorGpuNum: 0
|
className: NaiveAssessor
|
||||||
trial:
|
trial:
|
||||||
trialCommand: /home/travis/virtualenv/python3.6.3/bin/python3 naive_trial.py
|
command: python3 naive_trial.py
|
||||||
trialCodeDir: .
|
codeDir: .
|
||||||
trialGpuNum: 0
|
gpuNum: 0
|
||||||
#machineList can be empty if the platform is local
|
|
||||||
machineList:
|
|
||||||
- ip:
|
|
||||||
port:
|
|
||||||
username:
|
|
||||||
passwd:
|
|
||||||
|
|
|
@ -4,7 +4,7 @@ from nni.assessor import Assessor, AssessResult
|
||||||
|
|
||||||
_logger = logging.getLogger('NaiveAssessor')
|
_logger = logging.getLogger('NaiveAssessor')
|
||||||
_logger.info('start')
|
_logger.info('start')
|
||||||
_result = open('assessor_result.txt', 'w')
|
_result = open('/tmp/nni_assessor_result.txt', 'w')
|
||||||
|
|
||||||
class NaiveAssessor(Assessor):
|
class NaiveAssessor(Assessor):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -29,10 +29,10 @@ class NaiveAssessor(Assessor):
|
||||||
|
|
||||||
return AssessResult.Good
|
return AssessResult.Good
|
||||||
|
|
||||||
try:
|
def _on_exit(self):
|
||||||
NaiveAssessor().run()
|
_result.write('DONE\n')
|
||||||
_result.write('DONE\n')
|
_result.close()
|
||||||
except Exception as e:
|
|
||||||
_logger.exception(e)
|
def _on_error(self):
|
||||||
_result.write('ERROR\n')
|
_result.write('ERROR\n')
|
||||||
_result.close()
|
_result.close()
|
||||||
|
|
|
@ -5,7 +5,7 @@ from nni.tuner import Tuner
|
||||||
|
|
||||||
_logger = logging.getLogger('NaiveTuner')
|
_logger = logging.getLogger('NaiveTuner')
|
||||||
_logger.info('start')
|
_logger.info('start')
|
||||||
_result = open('tuner_result.txt', 'w')
|
_result = open('/tmp/nni_tuner_result.txt', 'w')
|
||||||
|
|
||||||
class NaiveTuner(Tuner):
|
class NaiveTuner(Tuner):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -24,13 +24,13 @@ class NaiveTuner(Tuner):
|
||||||
|
|
||||||
def update_search_space(self, search_space):
|
def update_search_space(self, search_space):
|
||||||
_logger.info('update_search_space: %s' % search_space)
|
_logger.info('update_search_space: %s' % search_space)
|
||||||
with open('tuner_search_space.json', 'w') as file_:
|
with open('/tmp/nni_tuner_search_space.json', 'w') as file_:
|
||||||
json.dump(search_space, file_)
|
json.dump(search_space, file_)
|
||||||
|
|
||||||
try:
|
def _on_exit(self):
|
||||||
NaiveTuner().run()
|
_result.write('DONE\n')
|
||||||
_result.write('DONE\n')
|
_result.close()
|
||||||
except Exception as e:
|
|
||||||
_logger.exception(e)
|
def _on_error(self):
|
||||||
_result.write('ERROR\n')
|
_result.write('ERROR\n')
|
||||||
_result.close()
|
_result.close()
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
python3 -m nnicmd.nnictl $@
|
WEB_UI_FOLDER=${PWD}/../../src/webui python3 -m nnicmd.nnictl $@
|
||||||
|
|
|
@ -5,6 +5,7 @@ import json
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
|
import traceback
|
||||||
|
|
||||||
GREEN = '\33[32m'
|
GREEN = '\33[32m'
|
||||||
RED = '\33[31m'
|
RED = '\33[31m'
|
||||||
|
@ -25,7 +26,7 @@ def run():
|
||||||
with contextlib.suppress(FileNotFoundError):
|
with contextlib.suppress(FileNotFoundError):
|
||||||
os.remove('tuner_result.txt')
|
os.remove('tuner_result.txt')
|
||||||
with contextlib.suppress(FileNotFoundError):
|
with contextlib.suppress(FileNotFoundError):
|
||||||
os.remove('assessor_result.txt')
|
os.remove('/tmp/nni_assessor_result.txt')
|
||||||
|
|
||||||
proc = subprocess.run(['nnictl', 'create', '--config', 'local.yml'])
|
proc = subprocess.run(['nnictl', 'create', '--config', 'local.yml'])
|
||||||
assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode
|
assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode
|
||||||
|
@ -36,8 +37,8 @@ def run():
|
||||||
for _ in range(60):
|
for _ in range(60):
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
tuner_status = read_last_line('tuner_result.txt')
|
tuner_status = read_last_line('/tmp/nni_tuner_result.txt')
|
||||||
assessor_status = read_last_line('assessor_result.txt')
|
assessor_status = read_last_line('/tmp/nni_assessor_result.txt')
|
||||||
|
|
||||||
assert tuner_status != 'ERROR', 'Tuner exited with error'
|
assert tuner_status != 'ERROR', 'Tuner exited with error'
|
||||||
assert assessor_status != 'ERROR', 'Assessor exited with error'
|
assert assessor_status != 'ERROR', 'Assessor exited with error'
|
||||||
|
@ -46,7 +47,7 @@ def run():
|
||||||
break
|
break
|
||||||
|
|
||||||
if tuner_status is not None:
|
if tuner_status is not None:
|
||||||
for line in open('tuner_result.txt'):
|
for line in open('/tmp/nni_tuner_result.txt'):
|
||||||
if line.strip() in ('DONE', 'ERROR'):
|
if line.strip() in ('DONE', 'ERROR'):
|
||||||
break
|
break
|
||||||
trial = int(line.split(' ')[0])
|
trial = int(line.split(' ')[0])
|
||||||
|
@ -57,16 +58,16 @@ def run():
|
||||||
assert tuner_status == 'DONE' and assessor_status == 'DONE', 'Failed to finish in 1 min'
|
assert tuner_status == 'DONE' and assessor_status == 'DONE', 'Failed to finish in 1 min'
|
||||||
|
|
||||||
ss1 = json.load(open('search_space.json'))
|
ss1 = json.load(open('search_space.json'))
|
||||||
ss2 = json.load(open('tuner_search_space.json'))
|
ss2 = json.load(open('/tmp/nni_tuner_search_space.json'))
|
||||||
assert ss1 == ss2, 'Tuner got wrong search space'
|
assert ss1 == ss2, 'Tuner got wrong search space'
|
||||||
|
|
||||||
tuner_result = set(open('tuner_result.txt'))
|
tuner_result = set(open('/tmp/nni_tuner_result.txt'))
|
||||||
expected = set(open('expected_tuner_result.txt'))
|
expected = set(open('expected_tuner_result.txt'))
|
||||||
# Trials may complete before NNI gets assessor's result,
|
# Trials may complete before NNI gets assessor's result,
|
||||||
# so it is possible to have more final result than expected
|
# so it is possible to have more final result than expected
|
||||||
assert tuner_result.issuperset(expected), 'Bad tuner result'
|
assert tuner_result.issuperset(expected), 'Bad tuner result'
|
||||||
|
|
||||||
assessor_result = set(open('assessor_result.txt'))
|
assessor_result = set(open('/tmp/nni_assessor_result.txt'))
|
||||||
expected = set(open('expected_assessor_result.txt'))
|
expected = set(open('expected_assessor_result.txt'))
|
||||||
assert assessor_result == expected, 'Bad assessor result'
|
assert assessor_result == expected, 'Bad assessor result'
|
||||||
|
|
||||||
|
@ -78,5 +79,6 @@ if __name__ == '__main__':
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(RED + 'FAIL' + CLEAR)
|
print(RED + 'FAIL' + CLEAR)
|
||||||
print('%r' % e)
|
print('%r' % e)
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
subprocess.run(['nnictl', 'stop'])
|
subprocess.run(['nnictl', 'stop'])
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
# Introduction
|
||||||
|
|
||||||
|
For good user experience and reduce user effort, we need to design a good annotation grammar.
|
||||||
|
|
||||||
|
If users use NNI system, they only need to:
|
||||||
|
|
||||||
|
1. Annotation variable in code as:
|
||||||
|
|
||||||
|
'''@nni.variable(nni.choice(2,3,5,7),name=self.conv_size)'''
|
||||||
|
|
||||||
|
2. Annotation intermediate in code as:
|
||||||
|
|
||||||
|
'''@nni.report_intermediate_result(test_acc)'''
|
||||||
|
|
||||||
|
3. Annotation output in code as:
|
||||||
|
|
||||||
|
'''@nni.report_final_result(test_acc)'''
|
||||||
|
|
||||||
|
4. Annotation `function_choice` in code as:
|
||||||
|
|
||||||
|
'''@nni.function_choice(max_pool(h_conv1, self.pool_size),avg_pool(h_conv1, self.pool_size),name=max_pool)'''
|
||||||
|
|
||||||
|
In this way, they can easily realize automatic tuning on NNI.
|
||||||
|
|
||||||
|
For `@nni.variable`, `nni.choice` is the type of search space and there are 10 types to express your search space as follows:
|
||||||
|
|
||||||
|
1. `@nni.variable(nni.choice(option1,option2,...,optionN),name=variable)`
|
||||||
|
Which means the variable value is one of the options, which should be a list The elements of options can themselves be stochastic expressions
|
||||||
|
|
||||||
|
2. `@nni.variable(nni.randint(upper),name=variable)`
|
||||||
|
Which means the variable value is a random integer in the range [0, upper).
|
||||||
|
|
||||||
|
3. `@nni.variable(nni.uniform(low, high),name=variable)`
|
||||||
|
Which means the variable value is a value uniformly between low and high.
|
||||||
|
|
||||||
|
4. `@nni.variable(nni.quniform(low, high, q),name=variable)`
|
||||||
|
Which means the variable value is a value like round(uniform(low, high) / q) * q
|
||||||
|
|
||||||
|
5. `@nni.variable(nni.loguniform(low, high),name=variable)`
|
||||||
|
Which means the variable value is a value drawn according to exp(uniform(low, high)) so that the logarithm of the return value is uniformly distributed.
|
||||||
|
|
||||||
|
6. `@nni.variable(nni.qloguniform(low, high, q),name=variable)`
|
||||||
|
Which means the variable value is a value like round(exp(uniform(low, high)) / q) * q
|
||||||
|
|
||||||
|
7. `@nni.variable(nni.normal(label, mu, sigma),name=variable)`
|
||||||
|
Which means the variable value is a real value that's normally-distributed with mean mu and standard deviation sigma.
|
||||||
|
|
||||||
|
8. `@nni.variable(nni.qnormal(label, mu, sigma, q),name=variable)`
|
||||||
|
Which means the variable value is a value like round(normal(mu, sigma) / q) * q
|
||||||
|
|
||||||
|
9. `@nni.variable(nni.lognormal(label, mu, sigma),name=variable)`
|
||||||
|
Which means the variable value is a value drawn according to exp(normal(mu, sigma))
|
||||||
|
|
||||||
|
10. `@nni.variable(nni.qlognormal(label, mu, sigma, q),name=variable)`
|
||||||
|
Which means the variable value is a value like round(exp(normal(mu, sigma)) / q) * q
|
|
@ -0,0 +1,104 @@
|
||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
#
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||||
|
# associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||||
|
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||||
|
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in all copies or
|
||||||
|
# substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||||
|
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||||
|
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from . import code_generator
|
||||||
|
from . import search_space_generator
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['generate_search_space', 'expand_annotations']
|
||||||
|
|
||||||
|
|
||||||
|
def generate_search_space(code_dir):
|
||||||
|
"""Generate search space from Python source code.
|
||||||
|
Return a serializable search space object.
|
||||||
|
code_dir: directory path of source files (str)
|
||||||
|
"""
|
||||||
|
search_space = {}
|
||||||
|
|
||||||
|
if code_dir.endswith('/'):
|
||||||
|
code_dir = code_dir[:-1]
|
||||||
|
|
||||||
|
for subdir, _, files in os.walk(code_dir):
|
||||||
|
# generate module name from path
|
||||||
|
if subdir == code_dir:
|
||||||
|
package = ''
|
||||||
|
else:
|
||||||
|
assert subdir.startswith(code_dir + '/'), subdir
|
||||||
|
prefix_len = len(code_dir) + 1
|
||||||
|
package = subdir[prefix_len:].replace('/', '.') + '.'
|
||||||
|
|
||||||
|
for file_name in files:
|
||||||
|
if file_name.endswith('.py'):
|
||||||
|
path = os.path.join(subdir, file_name)
|
||||||
|
module = package + file_name[:-3]
|
||||||
|
search_space.update(_generate_file_search_space(path, module))
|
||||||
|
|
||||||
|
return search_space
|
||||||
|
|
||||||
|
def _generate_file_search_space(path, module):
|
||||||
|
with open(path) as src:
|
||||||
|
try:
|
||||||
|
return search_space_generator.generate(module, src.read())
|
||||||
|
except Exception as exc: # pylint: disable=broad-except
|
||||||
|
if exc.args:
|
||||||
|
raise RuntimeError(path + ' ' + '\n'.join(exc.args))
|
||||||
|
else:
|
||||||
|
raise RuntimeError('Failed to generate search space for %s: %r' % (path, exc))
|
||||||
|
|
||||||
|
|
||||||
|
def expand_annotations(src_dir, dst_dir):
|
||||||
|
"""Expand annotations in user code.
|
||||||
|
src_dir: directory path of user code (str)
|
||||||
|
dst_dir: directory to place generated files (str)
|
||||||
|
"""
|
||||||
|
if src_dir[-1] == '/':
|
||||||
|
src_dir = src_dir[:-1]
|
||||||
|
if dst_dir[-1] == '/':
|
||||||
|
dst_dir = dst_dir[:-1]
|
||||||
|
|
||||||
|
for src_subdir, dirs, files in os.walk(src_dir):
|
||||||
|
assert src_subdir.startswith(src_dir)
|
||||||
|
dst_subdir = src_subdir.replace(src_dir, dst_dir, 1)
|
||||||
|
os.makedirs(dst_subdir, exist_ok=True)
|
||||||
|
|
||||||
|
for file_name in files:
|
||||||
|
src_path = os.path.join(src_subdir, file_name)
|
||||||
|
dst_path = os.path.join(dst_subdir, file_name)
|
||||||
|
if file_name.endswith('.py'):
|
||||||
|
_expand_file_annotations(src_path, dst_path)
|
||||||
|
else:
|
||||||
|
shutil.copyfile(src_path, dst_path)
|
||||||
|
|
||||||
|
for dir_name in dirs:
|
||||||
|
os.makedirs(os.path.join(dst_subdir, dir_name), exist_ok=True)
|
||||||
|
|
||||||
|
def _expand_file_annotations(src_path, dst_path):
|
||||||
|
with open(src_path) as src, open(dst_path, 'w') as dst:
|
||||||
|
try:
|
||||||
|
dst.write(code_generator.parse(src.read()))
|
||||||
|
except Exception as exc: # pylint: disable=broad-except
|
||||||
|
if exc.args:
|
||||||
|
raise RuntimeError(src_path + ' ' + '\n'.join(exc.args))
|
||||||
|
else:
|
||||||
|
raise RuntimeError('Failed to expand annotations for %s: %r' % (src_path, exc))
|
|
@ -0,0 +1,240 @@
|
||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
#
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||||
|
# associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||||
|
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||||
|
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in all copies or
|
||||||
|
# substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||||
|
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||||
|
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
|
||||||
|
import ast
|
||||||
|
|
||||||
|
import astor
|
||||||
|
|
||||||
|
# pylint: disable=unidiomatic-typecheck
|
||||||
|
|
||||||
|
|
||||||
|
def parse_annotation(code):
|
||||||
|
"""Parse an annotation string.
|
||||||
|
Return an AST Expr node.
|
||||||
|
code: annotation string (excluding '@')
|
||||||
|
"""
|
||||||
|
module = ast.parse(code)
|
||||||
|
assert type(module) is ast.Module, 'internal error #1'
|
||||||
|
assert len(module.body) == 1, 'Annotation contains more than one expression'
|
||||||
|
assert type(module.body[0]) is ast.Expr, 'Annotation is not expression'
|
||||||
|
return module.body[0]
|
||||||
|
|
||||||
|
|
||||||
|
def parse_annotation_function(code, func_name):
|
||||||
|
"""Parse an annotation function.
|
||||||
|
Return the value of `name` keyword argument and the AST Call node.
|
||||||
|
func_name: expected function name
|
||||||
|
"""
|
||||||
|
expr = parse_annotation(code)
|
||||||
|
call = expr.value
|
||||||
|
assert type(call) is ast.Call, 'Annotation is not a function call'
|
||||||
|
|
||||||
|
assert type(call.func) is ast.Attribute, 'Unexpected annotation function'
|
||||||
|
assert type(call.func.value) is ast.Name, 'Invalid annotation function name'
|
||||||
|
assert call.func.value.id == 'nni', 'Annotation is not a NNI function'
|
||||||
|
assert call.func.attr == func_name, 'internal error #2'
|
||||||
|
|
||||||
|
assert len(call.keywords) == 1, 'Annotation function contains more than one keyword argument'
|
||||||
|
assert call.keywords[0].arg == 'name', 'Annotation keyword argument is not "name"'
|
||||||
|
name = call.keywords[0].value
|
||||||
|
|
||||||
|
return name, call
|
||||||
|
|
||||||
|
|
||||||
|
def parse_nni_variable(code):
|
||||||
|
"""Parse `nni.variable` expression.
|
||||||
|
Return the name argument and AST node of annotated expression.
|
||||||
|
code: annotation string
|
||||||
|
"""
|
||||||
|
name, call = parse_annotation_function(code, 'variable')
|
||||||
|
|
||||||
|
assert len(call.args) == 1, 'nni.variable contains more than one arguments'
|
||||||
|
arg = call.args[0]
|
||||||
|
assert type(arg) is ast.Call, 'Value of nni.variable is not a function call'
|
||||||
|
assert type(arg.func) is ast.Attribute, 'nni.variable value is not a NNI function'
|
||||||
|
assert type(arg.func.value) is ast.Name, 'nni.variable value is not a NNI function'
|
||||||
|
assert arg.func.value.id == 'nni', 'nni.variable value is not a NNI function'
|
||||||
|
|
||||||
|
name_str = astor.to_source(name).strip()
|
||||||
|
keyword_arg = ast.keyword(arg='name', value=ast.Str(s=name_str))
|
||||||
|
arg.keywords.append(keyword_arg)
|
||||||
|
|
||||||
|
return name, arg
|
||||||
|
|
||||||
|
|
||||||
|
def parse_nni_function(code):
|
||||||
|
"""Parse `nni.function_choice` expression.
|
||||||
|
Return the AST node of annotated expression and a list of dumped function call expressions.
|
||||||
|
code: annotation string
|
||||||
|
"""
|
||||||
|
name, call = parse_annotation_function(code, 'function_choice')
|
||||||
|
funcs = [ast.dump(func, False) for func in call.args]
|
||||||
|
call.args = [make_lambda(arg) for arg in call.args]
|
||||||
|
|
||||||
|
name_str = astor.to_source(name).strip()
|
||||||
|
call.keywords[0].value = ast.Str(s=name_str)
|
||||||
|
|
||||||
|
return call, funcs
|
||||||
|
|
||||||
|
|
||||||
|
def make_lambda(call):
|
||||||
|
"""Wrap an AST Call node to lambda expression node.
|
||||||
|
call: ast.Call node
|
||||||
|
"""
|
||||||
|
assert type(call) is ast.Call, 'Argument of nni.function_choice is not function call'
|
||||||
|
empty_args = ast.arguments(args=[], vararg=None, kwarg=None, defaults=[])
|
||||||
|
return ast.Lambda(args=empty_args, body=call)
|
||||||
|
|
||||||
|
|
||||||
|
def test_variable_equal(var1, var2):
|
||||||
|
"""Test whether two variables are the same."""
|
||||||
|
if type(var1) is not type(var2):
|
||||||
|
return False
|
||||||
|
if type(var1) is ast.Name:
|
||||||
|
return var1.id == var2.id
|
||||||
|
if type(var1) is ast.Attribute:
|
||||||
|
return var1.attr == var2.attr and test_variable_equal(var1.value, var2.value)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def replace_variable_node(node, annotation):
|
||||||
|
"""Replace a node annotated by `nni.variable`.
|
||||||
|
node: the AST node to replace
|
||||||
|
annotation: annotation string
|
||||||
|
"""
|
||||||
|
assert type(node) is ast.Assign, 'nni.variable is not annotating assignment expression'
|
||||||
|
assert len(node.targets) == 1, 'Annotated assignment has more than one left-hand value'
|
||||||
|
name, expr = parse_nni_variable(annotation)
|
||||||
|
assert test_variable_equal(node.targets[0], name), 'Annotated variable has wrong name'
|
||||||
|
node.value = expr
|
||||||
|
return node
|
||||||
|
|
||||||
|
|
||||||
|
def replace_function_node(node, annotation):
|
||||||
|
"""Replace a node annotated by `nni.function_choice`.
|
||||||
|
node: the AST node to replace
|
||||||
|
annotation: annotation string
|
||||||
|
"""
|
||||||
|
target, funcs = parse_nni_function(annotation)
|
||||||
|
FuncReplacer(funcs, target).visit(node)
|
||||||
|
return node
|
||||||
|
|
||||||
|
|
||||||
|
class FuncReplacer(ast.NodeTransformer):
|
||||||
|
"""To replace target function call expressions in a node annotated by `nni.function_choice`"""
|
||||||
|
|
||||||
|
def __init__(self, funcs, target):
|
||||||
|
"""Constructor.
|
||||||
|
funcs: list of dumped function call expressions to replace
|
||||||
|
target: use this AST node to replace matching expressions
|
||||||
|
"""
|
||||||
|
self.funcs = set(funcs)
|
||||||
|
self.target = target
|
||||||
|
|
||||||
|
def visit_Call(self, node): # pylint: disable=invalid-name
|
||||||
|
if ast.dump(node, False) in self.funcs:
|
||||||
|
return self.target
|
||||||
|
return node
|
||||||
|
|
||||||
|
|
||||||
|
class Transformer(ast.NodeTransformer):
|
||||||
|
"""Transform original code to annotated code"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.stack = []
|
||||||
|
self.last_line = 0
|
||||||
|
|
||||||
|
def visit(self, node):
|
||||||
|
if isinstance(node, (ast.expr, ast.stmt)):
|
||||||
|
self.last_line = node.lineno
|
||||||
|
|
||||||
|
# do nothing for root
|
||||||
|
if not self.stack:
|
||||||
|
return self._visit_children(node)
|
||||||
|
|
||||||
|
annotation = self.stack[-1]
|
||||||
|
|
||||||
|
# this is a standalone string, may be an annotation
|
||||||
|
if type(node) is ast.Expr and type(node.value) is ast.Str:
|
||||||
|
# must not annotate an annotation string
|
||||||
|
assert annotation is None, 'Annotating an annotation'
|
||||||
|
return self._visit_string(node)
|
||||||
|
|
||||||
|
if annotation is not None: # this expression is annotated
|
||||||
|
self.stack[-1] = None # so next expression is not
|
||||||
|
if annotation.startswith('nni.variable'):
|
||||||
|
return replace_variable_node(node, annotation)
|
||||||
|
if annotation.startswith('nni.function_choice'):
|
||||||
|
return replace_function_node(node, annotation)
|
||||||
|
|
||||||
|
return self._visit_children(node)
|
||||||
|
|
||||||
|
|
||||||
|
def _visit_string(self, node):
|
||||||
|
string = node.value.s
|
||||||
|
|
||||||
|
if not string.startswith('@nni.'):
|
||||||
|
return node # not an annotation, ignore it
|
||||||
|
|
||||||
|
if string.startswith('@nni.report_intermediate_result(') \
|
||||||
|
or string.startswith('@nni.report_final_result('):
|
||||||
|
return parse_annotation(string[1:]) # expand annotation string to code
|
||||||
|
|
||||||
|
if string.startswith('@nni.variable(') \
|
||||||
|
or string.startswith('@nni.function_choice('):
|
||||||
|
self.stack[-1] = string[1:] # mark that the next expression is annotated
|
||||||
|
return None
|
||||||
|
|
||||||
|
raise AssertionError('Unexpected annotation function')
|
||||||
|
|
||||||
|
|
||||||
|
def _visit_children(self, node):
|
||||||
|
self.stack.append(None)
|
||||||
|
self.generic_visit(node)
|
||||||
|
annotation = self.stack.pop()
|
||||||
|
assert annotation is None, 'Annotation has no target'
|
||||||
|
return node
|
||||||
|
|
||||||
|
|
||||||
|
def parse(code):
|
||||||
|
"""Annotate user code.
|
||||||
|
Return annotated code (str).
|
||||||
|
code: original user code (str)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
ast_tree = ast.parse(code)
|
||||||
|
except Exception:
|
||||||
|
raise RuntimeError('Bad Python code')
|
||||||
|
|
||||||
|
try:
|
||||||
|
Transformer().visit(ast_tree)
|
||||||
|
except AssertionError as exc:
|
||||||
|
raise RuntimeError('%d: %s' % (ast_tree.last_line, exc.args[0]))
|
||||||
|
|
||||||
|
last_future_import = -1
|
||||||
|
import_nni = ast.Import(names=[ast.alias(name='nni', asname=None)])
|
||||||
|
nodes = ast_tree.body
|
||||||
|
for i, _ in enumerate(nodes):
|
||||||
|
if type(nodes[i]) is ast.ImportFrom and nodes[i].module == '__future__':
|
||||||
|
last_future_import = i
|
||||||
|
nodes.insert(last_future_import + 1, import_nni)
|
||||||
|
|
||||||
|
return astor.to_source(ast_tree)
|
|
@ -0,0 +1,185 @@
|
||||||
|
import nni
|
||||||
|
"""A deep MNIST classifier using convolutional layers."""
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
|
import tempfile
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.examples.tutorials.mnist import input_data
|
||||||
|
FLAGS = None
|
||||||
|
logger = logging.getLogger('mnist_AutoML')
|
||||||
|
|
||||||
|
|
||||||
|
class MnistNetwork(object):
|
||||||
|
"""
|
||||||
|
MnistNetwork is for initlizing and building basic network for mnist.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, channel_1_num, channel_2_num, conv_size, hidden_size,
|
||||||
|
pool_size, learning_rate, x_dim=784, y_dim=10):
|
||||||
|
self.channel_1_num = channel_1_num
|
||||||
|
self.channel_2_num = channel_2_num
|
||||||
|
self.conv_size = nni.choice(2, 3, 5, 7, name='self.conv_size')
|
||||||
|
self.hidden_size = nni.choice(124, 512, 1024, name='self.hidden_size')
|
||||||
|
self.pool_size = pool_size
|
||||||
|
self.learning_rate = nni.uniform(0.0001, 0.1, name='self.learning_rate'
|
||||||
|
)
|
||||||
|
self.x_dim = x_dim
|
||||||
|
self.y_dim = y_dim
|
||||||
|
self.images = tf.placeholder(tf.float32, [None, self.x_dim], name=
|
||||||
|
'input_x')
|
||||||
|
self.labels = tf.placeholder(tf.float32, [None, self.y_dim], name=
|
||||||
|
'input_y')
|
||||||
|
self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
|
||||||
|
self.train_step = None
|
||||||
|
self.accuracy = None
|
||||||
|
|
||||||
|
def build_network(self):
|
||||||
|
"""
|
||||||
|
Building network for mnist
|
||||||
|
"""
|
||||||
|
with tf.name_scope('reshape'):
|
||||||
|
try:
|
||||||
|
input_dim = int(math.sqrt(self.x_dim))
|
||||||
|
except:
|
||||||
|
print('input dim cannot be sqrt and reshape. input dim: ' +
|
||||||
|
str(self.x_dim))
|
||||||
|
logger.debug(
|
||||||
|
'input dim cannot be sqrt and reshape. input dim: %s',
|
||||||
|
str(self.x_dim))
|
||||||
|
raise
|
||||||
|
x_image = tf.reshape(self.images, [-1, input_dim, input_dim, 1])
|
||||||
|
with tf.name_scope('conv1'):
|
||||||
|
w_conv1 = weight_variable([self.conv_size, self.conv_size, 1,
|
||||||
|
self.channel_1_num])
|
||||||
|
b_conv1 = bias_variable([self.channel_1_num])
|
||||||
|
h_conv1 = nni.function_choice(lambda : tf.nn.relu(conv2d(
|
||||||
|
x_image, w_conv1) + b_conv1), lambda : tf.nn.sigmoid(conv2d
|
||||||
|
(x_image, w_conv1) + b_conv1), lambda : tf.nn.tanh(conv2d(
|
||||||
|
x_image, w_conv1) + b_conv1), name='tf.nn.relu')
|
||||||
|
with tf.name_scope('pool1'):
|
||||||
|
h_pool1 = nni.function_choice(lambda : max_pool(h_conv1, self.
|
||||||
|
pool_size), lambda : avg_pool(h_conv1, self.pool_size),
|
||||||
|
name='max_pool')
|
||||||
|
with tf.name_scope('conv2'):
|
||||||
|
w_conv2 = weight_variable([self.conv_size, self.conv_size, self
|
||||||
|
.channel_1_num, self.channel_2_num])
|
||||||
|
b_conv2 = bias_variable([self.channel_2_num])
|
||||||
|
h_conv2 = tf.nn.relu(conv2d(h_pool1, w_conv2) + b_conv2)
|
||||||
|
with tf.name_scope('pool2'):
|
||||||
|
h_pool2 = max_pool(h_conv2, self.pool_size)
|
||||||
|
last_dim = int(input_dim / (self.pool_size * self.pool_size))
|
||||||
|
with tf.name_scope('fc1'):
|
||||||
|
w_fc1 = weight_variable([last_dim * last_dim * self.
|
||||||
|
channel_2_num, self.hidden_size])
|
||||||
|
b_fc1 = bias_variable([self.hidden_size])
|
||||||
|
h_pool2_flat = tf.reshape(h_pool2, [-1, last_dim * last_dim * self.
|
||||||
|
channel_2_num])
|
||||||
|
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)
|
||||||
|
with tf.name_scope('dropout'):
|
||||||
|
h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
|
||||||
|
with tf.name_scope('fc2'):
|
||||||
|
w_fc2 = weight_variable([self.hidden_size, self.y_dim])
|
||||||
|
b_fc2 = bias_variable([self.y_dim])
|
||||||
|
y_conv = tf.matmul(h_fc1_drop, w_fc2) + b_fc2
|
||||||
|
with tf.name_scope('loss'):
|
||||||
|
cross_entropy = tf.reduce_mean(tf.nn.
|
||||||
|
softmax_cross_entropy_with_logits(labels=self.labels,
|
||||||
|
logits=y_conv))
|
||||||
|
with tf.name_scope('adam_optimizer'):
|
||||||
|
self.train_step = tf.train.AdamOptimizer(self.learning_rate
|
||||||
|
).minimize(cross_entropy)
|
||||||
|
with tf.name_scope('accuracy'):
|
||||||
|
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(
|
||||||
|
self.labels, 1))
|
||||||
|
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.
|
||||||
|
float32))
|
||||||
|
|
||||||
|
|
||||||
|
def conv2d(x_input, w_matrix):
|
||||||
|
"""conv2d returns a 2d convolution layer with full stride."""
|
||||||
|
return tf.nn.conv2d(x_input, w_matrix, strides=[1, 1, 1, 1], padding='SAME'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def max_pool(x_input, pool_size):
|
||||||
|
"""max_pool downsamples a feature map by 2X."""
|
||||||
|
return tf.nn.max_pool(x_input, ksize=[1, pool_size, pool_size, 1],
|
||||||
|
strides=[1, pool_size, pool_size, 1], padding='SAME')
|
||||||
|
|
||||||
|
|
||||||
|
def avg_pool(x_input, pool_size):
|
||||||
|
return tf.nn.avg_pool(x_input, ksize=[1, pool_size, pool_size, 1],
|
||||||
|
strides=[1, pool_size, pool_size, 1], padding='SAME')
|
||||||
|
|
||||||
|
|
||||||
|
def weight_variable(shape):
|
||||||
|
"""weight_variable generates a weight variable of a given shape."""
|
||||||
|
initial = tf.truncated_normal(shape, stddev=0.1)
|
||||||
|
return tf.Variable(initial)
|
||||||
|
|
||||||
|
|
||||||
|
def bias_variable(shape):
|
||||||
|
"""bias_variable generates a bias variable of a given shape."""
|
||||||
|
initial = tf.constant(0.1, shape=shape)
|
||||||
|
return tf.Variable(initial)
|
||||||
|
|
||||||
|
|
||||||
|
def main(params):
|
||||||
|
"""
|
||||||
|
Main function, build mnist network, run and send result to NNI.
|
||||||
|
"""
|
||||||
|
mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
|
||||||
|
print('Mnist download data down.')
|
||||||
|
logger.debug('Mnist download data down.')
|
||||||
|
mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'],
|
||||||
|
channel_2_num=params['channel_2_num'], conv_size=params['conv_size'
|
||||||
|
], hidden_size=params['hidden_size'], pool_size=params['pool_size'],
|
||||||
|
learning_rate=params['learning_rate'])
|
||||||
|
mnist_network.build_network()
|
||||||
|
logger.debug('Mnist build network done.')
|
||||||
|
graph_location = tempfile.mkdtemp()
|
||||||
|
logger.debug('Saving graph to: %s', graph_location)
|
||||||
|
train_writer = tf.summary.FileWriter(graph_location)
|
||||||
|
train_writer.add_graph(tf.get_default_graph())
|
||||||
|
test_acc = 0.0
|
||||||
|
with tf.Session() as sess:
|
||||||
|
sess.run(tf.global_variables_initializer())
|
||||||
|
batch_num = nni.choice(50, 250, 500, name='batch_num')
|
||||||
|
for i in range(batch_num):
|
||||||
|
batch = mnist.train.next_batch(batch_num)
|
||||||
|
dropout_rate = nni.choice(1, 5, name='dropout_rate')
|
||||||
|
mnist_network.train_step.run(feed_dict={mnist_network.images:
|
||||||
|
batch[0], mnist_network.labels: batch[1], mnist_network.
|
||||||
|
keep_prob: dropout_rate})
|
||||||
|
if i % 100 == 0:
|
||||||
|
test_acc = mnist_network.accuracy.eval(feed_dict={
|
||||||
|
mnist_network.images: mnist.test.images, mnist_network.
|
||||||
|
labels: mnist.test.labels, mnist_network.keep_prob: 1.0})
|
||||||
|
nni.report_intermediate_result(test_acc)
|
||||||
|
logger.debug('test accuracy %g', test_acc)
|
||||||
|
logger.debug('Pipe send intermediate result done.')
|
||||||
|
test_acc = mnist_network.accuracy.eval(feed_dict={mnist_network.
|
||||||
|
images: mnist.test.images, mnist_network.labels: mnist.test.
|
||||||
|
labels, mnist_network.keep_prob: 1.0})
|
||||||
|
nni.report_final_result(test_acc)
|
||||||
|
logger.debug('Final result is %g', test_acc)
|
||||||
|
logger.debug('Send final result done.')
|
||||||
|
|
||||||
|
|
||||||
|
def generate_defualt_params():
|
||||||
|
"""
|
||||||
|
Generate default parameters for mnist network.
|
||||||
|
"""
|
||||||
|
params = {'data_dir': '/tmp/tensorflow/mnist/input_data',
|
||||||
|
'dropout_rate': 0.5, 'channel_1_num': 32, 'channel_2_num': 64,
|
||||||
|
'conv_size': 5, 'pool_size': 2, 'hidden_size': 1024,
|
||||||
|
'learning_rate': 0.0001, 'batch_num': 200}
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
main(generate_defualt_params())
|
||||||
|
except Exception as exception:
|
||||||
|
logger.exception(exception)
|
||||||
|
raise
|
|
@ -0,0 +1,56 @@
|
||||||
|
{
|
||||||
|
"mnist_with_annotation/batch_num/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mnist_with_annotation/dropout_rate/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [
|
||||||
|
0,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mnist_with_annotation/max_pool/function_choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [
|
||||||
|
0,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mnist_with_annotation/self.conv_size/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
3
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mnist_with_annotation/self.hidden_size/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mnist_with_annotation/self.learning_rate/uniform": {
|
||||||
|
"_type": "uniform",
|
||||||
|
"_value": [
|
||||||
|
0.0001,
|
||||||
|
0.1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mnist_with_annotation/tf.nn.relu/function_choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,254 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
# Copyright (c) Microsoft Corporation
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
||||||
|
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
||||||
|
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
||||||
|
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
|
||||||
|
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
"""A deep MNIST classifier using convolutional layers."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
|
import tempfile
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from tensorflow.examples.tutorials.mnist import input_data
|
||||||
|
|
||||||
|
FLAGS = None
|
||||||
|
|
||||||
|
logger = logging.getLogger('mnist_AutoML')
|
||||||
|
|
||||||
|
|
||||||
|
class MnistNetwork(object):
|
||||||
|
'''
|
||||||
|
MnistNetwork is for initlizing and building basic network for mnist.
|
||||||
|
'''
|
||||||
|
def __init__(self,
|
||||||
|
channel_1_num,
|
||||||
|
channel_2_num,
|
||||||
|
conv_size,
|
||||||
|
hidden_size,
|
||||||
|
pool_size,
|
||||||
|
learning_rate,
|
||||||
|
x_dim=784,
|
||||||
|
y_dim=10):
|
||||||
|
self.channel_1_num = channel_1_num
|
||||||
|
self.channel_2_num = channel_2_num
|
||||||
|
"""@nni.variable(nni.choice(2, 3, 5, 7),name=self.conv_size)"""
|
||||||
|
self.conv_size = conv_size
|
||||||
|
"""@nni.variable(nni.choice(124, 512, 1024), name=self.hidden_size)"""
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.pool_size = pool_size
|
||||||
|
"""@nni.variable(nni.uniform(0.0001, 0.1), name=self.learning_rate)"""
|
||||||
|
self.learning_rate = learning_rate
|
||||||
|
self.x_dim = x_dim
|
||||||
|
self.y_dim = y_dim
|
||||||
|
|
||||||
|
self.images = tf.placeholder(tf.float32, [None, self.x_dim], name='input_x')
|
||||||
|
self.labels = tf.placeholder(tf.float32, [None, self.y_dim], name='input_y')
|
||||||
|
self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
|
||||||
|
|
||||||
|
self.train_step = None
|
||||||
|
self.accuracy = None
|
||||||
|
|
||||||
|
def build_network(self):
|
||||||
|
'''
|
||||||
|
Building network for mnist
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Reshape to use within a convolutional neural net.
|
||||||
|
# Last dimension is for "features" - there is only one here, since images are
|
||||||
|
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
|
||||||
|
with tf.name_scope('reshape'):
|
||||||
|
try:
|
||||||
|
input_dim = int(math.sqrt(self.x_dim))
|
||||||
|
except:
|
||||||
|
print(
|
||||||
|
'input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim))
|
||||||
|
logger.debug(
|
||||||
|
'input dim cannot be sqrt and reshape. input dim: %s', str(self.x_dim))
|
||||||
|
raise
|
||||||
|
x_image = tf.reshape(self.images, [-1, input_dim, input_dim, 1])
|
||||||
|
|
||||||
|
# First convolutional layer - maps one grayscale image to 32 feature maps.
|
||||||
|
with tf.name_scope('conv1'):
|
||||||
|
w_conv1 = weight_variable(
|
||||||
|
[self.conv_size, self.conv_size, 1, self.channel_1_num])
|
||||||
|
b_conv1 = bias_variable([self.channel_1_num])
|
||||||
|
"""@nni.function_choice(tf.nn.relu(conv2d(x_image, w_conv1) + b_conv1), tf.nn.sigmoid(conv2d(x_image, w_conv1) + b_conv1), tf.nn.tanh(conv2d(x_image, w_conv1) + b_conv1), name=tf.nn.relu)"""
|
||||||
|
h_conv1 = tf.nn.relu(conv2d(x_image, w_conv1) + b_conv1)
|
||||||
|
|
||||||
|
# Pooling layer - downsamples by 2X.
|
||||||
|
with tf.name_scope('pool1'):
|
||||||
|
"""@nni.function_choice(max_pool(h_conv1, self.pool_size), avg_pool(h_conv1, self.pool_size), name=max_pool)"""
|
||||||
|
h_pool1 = max_pool(h_conv1, self.pool_size)
|
||||||
|
|
||||||
|
# Second convolutional layer -- maps 32 feature maps to 64.
|
||||||
|
with tf.name_scope('conv2'):
|
||||||
|
w_conv2 = weight_variable([self.conv_size, self.conv_size,
|
||||||
|
self.channel_1_num, self.channel_2_num])
|
||||||
|
b_conv2 = bias_variable([self.channel_2_num])
|
||||||
|
h_conv2 = tf.nn.relu(conv2d(h_pool1, w_conv2) + b_conv2)
|
||||||
|
|
||||||
|
# Second pooling layer.
|
||||||
|
with tf.name_scope('pool2'):
|
||||||
|
h_pool2 = max_pool(h_conv2, self.pool_size)
|
||||||
|
|
||||||
|
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
|
||||||
|
# is down to 7x7x64 feature maps -- maps this to 1024 features.
|
||||||
|
last_dim = int(input_dim / (self.pool_size * self.pool_size))
|
||||||
|
with tf.name_scope('fc1'):
|
||||||
|
w_fc1 = weight_variable(
|
||||||
|
[last_dim * last_dim * self.channel_2_num, self.hidden_size])
|
||||||
|
b_fc1 = bias_variable([self.hidden_size])
|
||||||
|
|
||||||
|
h_pool2_flat = tf.reshape(
|
||||||
|
h_pool2, [-1, last_dim * last_dim * self.channel_2_num])
|
||||||
|
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)
|
||||||
|
|
||||||
|
# Dropout - controls the complexity of the model, prevents co-adaptation of features.
|
||||||
|
with tf.name_scope('dropout'):
|
||||||
|
h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
|
||||||
|
|
||||||
|
# Map the 1024 features to 10 classes, one for each digit
|
||||||
|
with tf.name_scope('fc2'):
|
||||||
|
w_fc2 = weight_variable([self.hidden_size, self.y_dim])
|
||||||
|
b_fc2 = bias_variable([self.y_dim])
|
||||||
|
y_conv = tf.matmul(h_fc1_drop, w_fc2) + b_fc2
|
||||||
|
|
||||||
|
with tf.name_scope('loss'):
|
||||||
|
cross_entropy = tf.reduce_mean(
|
||||||
|
tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=y_conv))
|
||||||
|
with tf.name_scope('adam_optimizer'):
|
||||||
|
self.train_step = tf.train.AdamOptimizer(
|
||||||
|
self.learning_rate).minimize(cross_entropy)
|
||||||
|
|
||||||
|
with tf.name_scope('accuracy'):
|
||||||
|
correct_prediction = tf.equal(
|
||||||
|
tf.argmax(y_conv, 1), tf.argmax(self.labels, 1))
|
||||||
|
self.accuracy = tf.reduce_mean(
|
||||||
|
tf.cast(correct_prediction, tf.float32))
|
||||||
|
|
||||||
|
|
||||||
|
def conv2d(x_input, w_matrix):
|
||||||
|
"""conv2d returns a 2d convolution layer with full stride."""
|
||||||
|
return tf.nn.conv2d(x_input, w_matrix, strides=[1, 1, 1, 1], padding='SAME')
|
||||||
|
|
||||||
|
|
||||||
|
def max_pool(x_input, pool_size):
|
||||||
|
"""max_pool downsamples a feature map by 2X."""
|
||||||
|
return tf.nn.max_pool(x_input, ksize=[1, pool_size, pool_size, 1],
|
||||||
|
strides=[1, pool_size, pool_size, 1], padding='SAME')
|
||||||
|
|
||||||
|
|
||||||
|
def avg_pool(x_input, pool_size):
|
||||||
|
return tf.nn.avg_pool(x_input, ksize=[1, pool_size, pool_size, 1],
|
||||||
|
strides=[1, pool_size, pool_size, 1], padding='SAME')
|
||||||
|
|
||||||
|
|
||||||
|
def weight_variable(shape):
|
||||||
|
"""weight_variable generates a weight variable of a given shape."""
|
||||||
|
initial = tf.truncated_normal(shape, stddev=0.1)
|
||||||
|
return tf.Variable(initial)
|
||||||
|
|
||||||
|
|
||||||
|
def bias_variable(shape):
|
||||||
|
"""bias_variable generates a bias variable of a given shape."""
|
||||||
|
initial = tf.constant(0.1, shape=shape)
|
||||||
|
return tf.Variable(initial)
|
||||||
|
|
||||||
|
|
||||||
|
def main(params):
|
||||||
|
'''
|
||||||
|
Main function, build mnist network, run and send result to NNI.
|
||||||
|
'''
|
||||||
|
# Import data
|
||||||
|
mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
|
||||||
|
print('Mnist download data down.')
|
||||||
|
logger.debug('Mnist download data down.')
|
||||||
|
|
||||||
|
# Create the model
|
||||||
|
# Build the graph for the deep net
|
||||||
|
mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'],
|
||||||
|
channel_2_num=params['channel_2_num'],
|
||||||
|
conv_size=params['conv_size'],
|
||||||
|
hidden_size=params['hidden_size'],
|
||||||
|
pool_size=params['pool_size'],
|
||||||
|
learning_rate=params['learning_rate'])
|
||||||
|
mnist_network.build_network()
|
||||||
|
logger.debug('Mnist build network done.')
|
||||||
|
|
||||||
|
# Write log
|
||||||
|
graph_location = tempfile.mkdtemp()
|
||||||
|
logger.debug('Saving graph to: %s', graph_location)
|
||||||
|
train_writer = tf.summary.FileWriter(graph_location)
|
||||||
|
train_writer.add_graph(tf.get_default_graph())
|
||||||
|
|
||||||
|
test_acc = 0.0
|
||||||
|
with tf.Session() as sess:
|
||||||
|
sess.run(tf.global_variables_initializer())
|
||||||
|
"""@nni.variable(nni.choice(50, 250, 500), name=batch_num)"""
|
||||||
|
batch_num = params['batch_num']
|
||||||
|
for i in range(batch_num):
|
||||||
|
batch = mnist.train.next_batch(batch_num)
|
||||||
|
"""@nni.variable(nni.choice(1, 5), name=dropout_rate)"""
|
||||||
|
dropout_rate = params['dropout_rate']
|
||||||
|
mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0],
|
||||||
|
mnist_network.labels: batch[1],
|
||||||
|
mnist_network.keep_prob: dropout_rate}
|
||||||
|
)
|
||||||
|
|
||||||
|
if i % 100 == 0:
|
||||||
|
test_acc = mnist_network.accuracy.eval(
|
||||||
|
feed_dict={mnist_network.images: mnist.test.images,
|
||||||
|
mnist_network.labels: mnist.test.labels,
|
||||||
|
mnist_network.keep_prob: 1.0})
|
||||||
|
|
||||||
|
"""@nni.report_intermediate_result(test_acc)"""
|
||||||
|
logger.debug('test accuracy %g', test_acc)
|
||||||
|
logger.debug('Pipe send intermediate result done.')
|
||||||
|
|
||||||
|
test_acc = mnist_network.accuracy.eval(
|
||||||
|
feed_dict={mnist_network.images: mnist.test.images,
|
||||||
|
mnist_network.labels: mnist.test.labels,
|
||||||
|
mnist_network.keep_prob: 1.0})
|
||||||
|
|
||||||
|
"""@nni.report_final_result(test_acc)"""
|
||||||
|
logger.debug('Final result is %g', test_acc)
|
||||||
|
logger.debug('Send final result done.')
|
||||||
|
|
||||||
|
|
||||||
|
def generate_defualt_params():
|
||||||
|
'''
|
||||||
|
Generate default parameters for mnist network.
|
||||||
|
'''
|
||||||
|
params = {
|
||||||
|
'data_dir': '/tmp/tensorflow/mnist/input_data',
|
||||||
|
'dropout_rate': 0.5,
|
||||||
|
'channel_1_num': 32,
|
||||||
|
'channel_2_num': 64,
|
||||||
|
'conv_size': 5,
|
||||||
|
'pool_size': 2,
|
||||||
|
'hidden_size': 1024,
|
||||||
|
'learning_rate': 1e-4,
|
||||||
|
'batch_num': 200}
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
main(generate_defualt_params())
|
||||||
|
except Exception as exception:
|
||||||
|
logger.exception(exception)
|
||||||
|
raise
|
|
@ -0,0 +1,56 @@
|
||||||
|
{
|
||||||
|
"mnist_without_annotation/#31/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mnist_without_annotation/#68/function_choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mnist_without_annotation/batch_num/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mnist_without_annotation/conv-size/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
3
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mnist_without_annotation/dropout_rate/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [
|
||||||
|
0,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mnist_without_annotation/h_pool1/function_choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [
|
||||||
|
0,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"mnist_without_annotation/learning_rate/uniform": {
|
||||||
|
"_type": "uniform",
|
||||||
|
"_value": [
|
||||||
|
0.0001,
|
||||||
|
0.1
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,248 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
# Copyright (c) Microsoft Corporation
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
||||||
|
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
||||||
|
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
||||||
|
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
|
||||||
|
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
"""A deep MNIST classifier using convolutional layers."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
|
import tempfile
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from tensorflow.examples.tutorials.mnist import input_data
|
||||||
|
|
||||||
|
import nni
|
||||||
|
|
||||||
|
FLAGS = None
|
||||||
|
|
||||||
|
logger = logging.getLogger('mnist_AutoML')
|
||||||
|
|
||||||
|
|
||||||
|
class MnistNetwork(object):
|
||||||
|
'''
|
||||||
|
MnistNetwork is for initlizing and building basic network for mnist.
|
||||||
|
'''
|
||||||
|
def __init__(self,
|
||||||
|
channel_1_num,
|
||||||
|
channel_2_num,
|
||||||
|
pool_size,
|
||||||
|
learning_rate,
|
||||||
|
x_dim=784,
|
||||||
|
y_dim=10):
|
||||||
|
self.channel_1_num = channel_1_num
|
||||||
|
self.channel_2_num = channel_2_num
|
||||||
|
self.conv_size = nni.choice(2, 3, 5, 7, name='conv-size')
|
||||||
|
self.hidden_size = nni.choice(124, 512, 1024) # example: without name
|
||||||
|
self.pool_size = pool_size
|
||||||
|
self.learning_rate = nni.uniform(0.0001, 0.1, name='learning_rate')
|
||||||
|
self.x_dim = x_dim
|
||||||
|
self.y_dim = y_dim
|
||||||
|
|
||||||
|
self.images = tf.placeholder(tf.float32, [None, self.x_dim], name='input_x')
|
||||||
|
self.labels = tf.placeholder(tf.float32, [None, self.y_dim], name='input_y')
|
||||||
|
self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
|
||||||
|
|
||||||
|
self.train_step = None
|
||||||
|
self.accuracy = None
|
||||||
|
|
||||||
|
def build_network(self):
|
||||||
|
'''
|
||||||
|
Building network for mnist
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Reshape to use within a convolutional neural net.
|
||||||
|
# Last dimension is for "features" - there is only one here, since images are
|
||||||
|
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
|
||||||
|
with tf.name_scope('reshape'):
|
||||||
|
try:
|
||||||
|
input_dim = int(math.sqrt(self.x_dim))
|
||||||
|
except:
|
||||||
|
print(
|
||||||
|
'input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim))
|
||||||
|
logger.debug(
|
||||||
|
'input dim cannot be sqrt and reshape. input dim: %s', str(self.x_dim))
|
||||||
|
raise
|
||||||
|
x_image = tf.reshape(self.images, [-1, input_dim, input_dim, 1])
|
||||||
|
|
||||||
|
# First convolutional layer - maps one grayscale image to 32 feature maps.
|
||||||
|
with tf.name_scope('conv1'):
|
||||||
|
w_conv1 = weight_variable(
|
||||||
|
[self.conv_size, self.conv_size, 1, self.channel_1_num])
|
||||||
|
b_conv1 = bias_variable([self.channel_1_num])
|
||||||
|
h_conv1 = nni.function_choice(
|
||||||
|
lambda: tf.nn.relu(conv2d(x_image, w_conv1) + b_conv1),
|
||||||
|
lambda: tf.nn.sigmoid(conv2d(x_image, w_conv1) + b_conv1),
|
||||||
|
lambda: tf.nn.tanh(conv2d(x_image, w_conv1) + b_conv1)
|
||||||
|
) # example: without name
|
||||||
|
|
||||||
|
# Pooling layer - downsamples by 2X.
|
||||||
|
with tf.name_scope('pool1'):
|
||||||
|
h_pool1 = max_pool(h_conv1, self.pool_size)
|
||||||
|
h_pool1 = nni.function_choice(
|
||||||
|
lambda: max_pool(h_conv1, self.pool_size),
|
||||||
|
lambda: avg_pool(h_conv1, self.pool_size),
|
||||||
|
name='h_pool1')
|
||||||
|
|
||||||
|
|
||||||
|
# Second convolutional layer -- maps 32 feature maps to 64.
|
||||||
|
with tf.name_scope('conv2'):
|
||||||
|
w_conv2 = weight_variable([self.conv_size, self.conv_size,
|
||||||
|
self.channel_1_num, self.channel_2_num])
|
||||||
|
b_conv2 = bias_variable([self.channel_2_num])
|
||||||
|
h_conv2 = tf.nn.relu(conv2d(h_pool1, w_conv2) + b_conv2)
|
||||||
|
|
||||||
|
# Second pooling layer.
|
||||||
|
with tf.name_scope('pool2'): # example: another style
|
||||||
|
h_pool2 = max_pool(h_conv2, self.pool_size)
|
||||||
|
|
||||||
|
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
|
||||||
|
# is down to 7x7x64 feature maps -- maps this to 1024 features.
|
||||||
|
last_dim = int(input_dim / (self.pool_size * self.pool_size))
|
||||||
|
with tf.name_scope('fc1'):
|
||||||
|
w_fc1 = weight_variable(
|
||||||
|
[last_dim * last_dim * self.channel_2_num, self.hidden_size])
|
||||||
|
b_fc1 = bias_variable([self.hidden_size])
|
||||||
|
|
||||||
|
h_pool2_flat = tf.reshape(
|
||||||
|
h_pool2, [-1, last_dim * last_dim * self.channel_2_num])
|
||||||
|
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)
|
||||||
|
|
||||||
|
# Dropout - controls the complexity of the model, prevents co-adaptation of features.
|
||||||
|
with tf.name_scope('dropout'):
|
||||||
|
h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
|
||||||
|
|
||||||
|
# Map the 1024 features to 10 classes, one for each digit
|
||||||
|
with tf.name_scope('fc2'):
|
||||||
|
w_fc2 = weight_variable([self.hidden_size, self.y_dim])
|
||||||
|
b_fc2 = bias_variable([self.y_dim])
|
||||||
|
y_conv = tf.matmul(h_fc1_drop, w_fc2) + b_fc2
|
||||||
|
|
||||||
|
with tf.name_scope('loss'):
|
||||||
|
cross_entropy = tf.reduce_mean(
|
||||||
|
tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=y_conv))
|
||||||
|
with tf.name_scope('adam_optimizer'):
|
||||||
|
self.train_step = tf.train.AdamOptimizer(
|
||||||
|
self.learning_rate).minimize(cross_entropy)
|
||||||
|
|
||||||
|
with tf.name_scope('accuracy'):
|
||||||
|
correct_prediction = tf.equal(
|
||||||
|
tf.argmax(y_conv, 1), tf.argmax(self.labels, 1))
|
||||||
|
self.accuracy = tf.reduce_mean(
|
||||||
|
tf.cast(correct_prediction, tf.float32))
|
||||||
|
|
||||||
|
|
||||||
|
def conv2d(x_input, w_matrix):
|
||||||
|
"""conv2d returns a 2d convolution layer with full stride."""
|
||||||
|
return tf.nn.conv2d(x_input, w_matrix, strides=[1, 1, 1, 1], padding='SAME')
|
||||||
|
|
||||||
|
|
||||||
|
def max_pool(x_input, pool_size):
|
||||||
|
"""max_pool downsamples a feature map by 2X."""
|
||||||
|
return tf.nn.max_pool(x_input, ksize=[1, pool_size, pool_size, 1],
|
||||||
|
strides=[1, pool_size, pool_size, 1], padding='SAME')
|
||||||
|
|
||||||
|
|
||||||
|
def avg_pool(x_input, pool_size):
|
||||||
|
return tf.nn.avg_pool(x_input, ksize=[1, pool_size, pool_size, 1],
|
||||||
|
strides=[1, pool_size, pool_size, 1], padding='SAME')
|
||||||
|
|
||||||
|
|
||||||
|
def weight_variable(shape):
|
||||||
|
"""weight_variable generates a weight variable of a given shape."""
|
||||||
|
initial = tf.truncated_normal(shape, stddev=0.1)
|
||||||
|
return tf.Variable(initial)
|
||||||
|
|
||||||
|
|
||||||
|
def bias_variable(shape):
|
||||||
|
"""bias_variable generates a bias variable of a given shape."""
|
||||||
|
initial = tf.constant(0.1, shape=shape)
|
||||||
|
return tf.Variable(initial)
|
||||||
|
|
||||||
|
|
||||||
|
def main(params):
|
||||||
|
'''
|
||||||
|
Main function, build mnist network, run and send result to NNI.
|
||||||
|
'''
|
||||||
|
# Import data
|
||||||
|
mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
|
||||||
|
print('Mnist download data down.')
|
||||||
|
logger.debug('Mnist download data down.')
|
||||||
|
|
||||||
|
# Create the model
|
||||||
|
# Build the graph for the deep net
|
||||||
|
mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'],
|
||||||
|
channel_2_num=params['channel_2_num'],
|
||||||
|
pool_size=params['pool_size'])
|
||||||
|
mnist_network.build_network()
|
||||||
|
logger.debug('Mnist build network done.')
|
||||||
|
|
||||||
|
# Write log
|
||||||
|
graph_location = tempfile.mkdtemp()
|
||||||
|
logger.debug('Saving graph to: %s', graph_location)
|
||||||
|
train_writer = tf.summary.FileWriter(graph_location)
|
||||||
|
train_writer.add_graph(tf.get_default_graph())
|
||||||
|
|
||||||
|
test_acc = 0.0
|
||||||
|
with tf.Session() as sess:
|
||||||
|
sess.run(tf.global_variables_initializer())
|
||||||
|
batch_num = nni.choice(50, 250, 500, name='batch_num')
|
||||||
|
for i in range(batch_num):
|
||||||
|
batch = mnist.train.next_batch(batch_num)
|
||||||
|
dropout_rate = nni.choice(1, 5, name='dropout_rate')
|
||||||
|
mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0],
|
||||||
|
mnist_network.labels: batch[1],
|
||||||
|
mnist_network.keep_prob: dropout_rate}
|
||||||
|
)
|
||||||
|
|
||||||
|
if i % 100 == 0:
|
||||||
|
test_acc = mnist_network.accuracy.eval(
|
||||||
|
feed_dict={mnist_network.images: mnist.test.images,
|
||||||
|
mnist_network.labels: mnist.test.labels,
|
||||||
|
mnist_network.keep_prob: 1.0})
|
||||||
|
|
||||||
|
nni.report_intermediate_result(test_acc)
|
||||||
|
logger.debug('test accuracy %g', test_acc)
|
||||||
|
logger.debug('Pipe send intermediate result done.')
|
||||||
|
|
||||||
|
test_acc = mnist_network.accuracy.eval(
|
||||||
|
feed_dict={mnist_network.images: mnist.test.images,
|
||||||
|
mnist_network.labels: mnist.test.labels,
|
||||||
|
mnist_network.keep_prob: 1.0})
|
||||||
|
|
||||||
|
nni.report_final_result(test_acc)
|
||||||
|
logger.debug('Final result is %g', test_acc)
|
||||||
|
logger.debug('Send final result done.')
|
||||||
|
|
||||||
|
|
||||||
|
def generate_defualt_params():
|
||||||
|
'''
|
||||||
|
Generate default parameters for mnist network.
|
||||||
|
'''
|
||||||
|
params = {
|
||||||
|
'data_dir': '/tmp/tensorflow/mnist/input_data',
|
||||||
|
'channel_1_num': 32,
|
||||||
|
'channel_2_num': 64,
|
||||||
|
'pool_size': 2}
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
main(generate_defualt_params())
|
||||||
|
except Exception as exception:
|
||||||
|
logger.exception(exception)
|
||||||
|
raise
|
|
@ -0,0 +1,123 @@
|
||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
#
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||||
|
# associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||||
|
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||||
|
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in all copies or
|
||||||
|
# substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||||
|
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||||
|
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
|
||||||
|
import ast
|
||||||
|
|
||||||
|
# pylint: disable=unidiomatic-typecheck
|
||||||
|
|
||||||
|
|
||||||
|
# list of functions related to search space generating
|
||||||
|
_ss_funcs = [
|
||||||
|
'choice',
|
||||||
|
'randint',
|
||||||
|
'uniform',
|
||||||
|
'quniform',
|
||||||
|
'loguniform',
|
||||||
|
'qloguniform',
|
||||||
|
'normal',
|
||||||
|
'qnormal',
|
||||||
|
'lognormal',
|
||||||
|
'qlognormal',
|
||||||
|
'function_choice'
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class SearchSpaceGenerator(ast.NodeVisitor):
|
||||||
|
"""Generate search space from smart parater APIs"""
|
||||||
|
|
||||||
|
def __init__(self, module_name):
|
||||||
|
self.module_name = module_name
|
||||||
|
self.search_space = {}
|
||||||
|
self.last_line = 0 # last parsed line, useful for error reporting
|
||||||
|
|
||||||
|
def visit_Call(self, node): # pylint: disable=invalid-name
|
||||||
|
self.generic_visit(node)
|
||||||
|
|
||||||
|
# ignore if the function is not 'nni.*'
|
||||||
|
if type(node.func) is not ast.Attribute:
|
||||||
|
return
|
||||||
|
if type(node.func.value) is not ast.Name:
|
||||||
|
return
|
||||||
|
if node.func.value.id != 'nni':
|
||||||
|
return
|
||||||
|
|
||||||
|
# ignore if its not a search space function (e.g. `report_final_result`)
|
||||||
|
func = node.func.attr
|
||||||
|
if func not in _ss_funcs:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.last_line = node.lineno
|
||||||
|
|
||||||
|
if node.keywords:
|
||||||
|
# there is a `name` argument
|
||||||
|
assert len(node.keywords) == 1, 'Smart parameter has keyword argument other than "name"'
|
||||||
|
assert node.keywords[0].arg == 'name', 'Smart paramater\'s keyword argument is not "name"'
|
||||||
|
assert type(node.keywords[0].value) is ast.Str, 'Smart parameter\'s name must be string literal'
|
||||||
|
name = node.keywords[0].value.s
|
||||||
|
specified_name = True
|
||||||
|
else:
|
||||||
|
# generate the missing name automatically
|
||||||
|
assert len(node.args) > 0, 'Smart parameter expression has no argument'
|
||||||
|
name = '#' + str(node.args[-1].lineno)
|
||||||
|
specified_name = False
|
||||||
|
|
||||||
|
if func in ('choice', 'function_choice'):
|
||||||
|
# arguments of `choice` may contain complex expression,
|
||||||
|
# so use indices instead of arguments
|
||||||
|
args = list(range(len(node.args)))
|
||||||
|
else:
|
||||||
|
# arguments of other functions must be literal number
|
||||||
|
assert all(type(arg) is ast.Num for arg in node.args), 'Smart parameter\'s arguments must be number literals'
|
||||||
|
args = [arg.n for arg in node.args]
|
||||||
|
|
||||||
|
key = self.module_name + '/' + name + '/' + func
|
||||||
|
if func == 'function_choice':
|
||||||
|
func = 'choice'
|
||||||
|
value = {'_type': func, '_value': args}
|
||||||
|
|
||||||
|
if specified_name:
|
||||||
|
# multiple functions with same name must have identical arguments
|
||||||
|
old = self.search_space.get(key)
|
||||||
|
assert old is None or old == value, 'Different smart parameters have same name'
|
||||||
|
else:
|
||||||
|
# generated name must not duplicate
|
||||||
|
assert key not in self.search_space, 'Only one smart parameter is allowed in a line'
|
||||||
|
|
||||||
|
self.search_space[key] = value
|
||||||
|
|
||||||
|
|
||||||
|
def generate(module_name, code):
|
||||||
|
"""Generate search space.
|
||||||
|
Return a serializable search space object.
|
||||||
|
module_name: name of the module (str)
|
||||||
|
code: user code (str)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
ast_tree = ast.parse(code)
|
||||||
|
except Exception:
|
||||||
|
raise RuntimeError('Bad Python code')
|
||||||
|
|
||||||
|
visitor = SearchSpaceGenerator(module_name)
|
||||||
|
try:
|
||||||
|
visitor.visit(ast_tree)
|
||||||
|
except AssertionError as exc:
|
||||||
|
raise RuntimeError('%d: %s' % (visitor.last_line, exc.args[0]))
|
||||||
|
return visitor.search_space
|
|
@ -0,0 +1,60 @@
|
||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
#
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||||
|
# associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||||
|
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||||
|
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in all copies or
|
||||||
|
# substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||||
|
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||||
|
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# pylint: skip-file
|
||||||
|
|
||||||
|
from .__init__ import *
|
||||||
|
|
||||||
|
import ast
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from unittest import TestCase, main
|
||||||
|
|
||||||
|
|
||||||
|
class AnnotationTestCase(TestCase):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
os.chdir('nni_annotation')
|
||||||
|
if os.path.isdir('_generated'):
|
||||||
|
shutil.rmtree('_generated')
|
||||||
|
|
||||||
|
def test_search_space_generator(self):
|
||||||
|
search_space = generate_search_space('testcase/annotated')
|
||||||
|
with open('testcase/searchspace.json') as f:
|
||||||
|
self.assertEqual(search_space, json.load(f))
|
||||||
|
|
||||||
|
def test_code_generator(self):
|
||||||
|
expand_annotations('testcase/usercode', '_generated')
|
||||||
|
self._assert_source_equal('testcase/annotated/mnist.py', '_generated/mnist.py')
|
||||||
|
self._assert_source_equal('testcase/annotated/dir/simple.py', '_generated/dir/simple.py')
|
||||||
|
with open('testcase/usercode/nonpy.txt') as src, open('_generated/nonpy.txt') as dst:
|
||||||
|
assert src.read() == dst.read()
|
||||||
|
|
||||||
|
def _assert_source_equal(self, src1, src2):
|
||||||
|
with open(src1) as f1, open(src2) as f2:
|
||||||
|
ast1 = ast.dump(ast.parse(f1.read()))
|
||||||
|
ast2 = ast.dump(ast.parse(f2.read()))
|
||||||
|
self.assertEqual(ast1, ast2)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -0,0 +1,14 @@
|
||||||
|
import nni
|
||||||
|
|
||||||
|
def max_pool(k):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
h_conv1 = 1
|
||||||
|
conv_size = nni.choice(2, 3, 5, 7, name='conv_size')
|
||||||
|
h_pool1 = nni.function_choice(lambda : max_pool(h_conv1), lambda : avg_pool
|
||||||
|
(h_conv2, h_conv3), name='max_pool')
|
||||||
|
test_acc = 1
|
||||||
|
nni.report_intermediate_result(test_acc)
|
||||||
|
test_acc = 2
|
||||||
|
nni.report_final_result(test_acc)
|
|
@ -0,0 +1,13 @@
|
||||||
|
h_conv1 = 1
|
||||||
|
conv_size = nni.choice(2, 3, 5, 7, name='conv_size')
|
||||||
|
h_pool1 = nni.function_choice(lambda : max_pool(h_conv1),
|
||||||
|
lambda : h_conv1,
|
||||||
|
lambda : avg_pool
|
||||||
|
(h_conv2, h_conv3)
|
||||||
|
)
|
||||||
|
tmp = nni.qlognormal(1.2, 3, 4.5)
|
||||||
|
test_acc = 1
|
||||||
|
nni.report_intermediate_result(test_acc)
|
||||||
|
test_acc = 2
|
||||||
|
nni.report_final_result(test_acc)
|
||||||
|
nni.choice(foo, bar)(1)
|
|
@ -0,0 +1,171 @@
|
||||||
|
"""A deep MNIST classifier using convolutional layers.
|
||||||
|
"""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import nni
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
|
import tempfile
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.examples.tutorials.mnist import input_data
|
||||||
|
|
||||||
|
logger = logging.getLogger('mnist')
|
||||||
|
FLAGS = None
|
||||||
|
|
||||||
|
|
||||||
|
class MnistNetwork(object):
|
||||||
|
|
||||||
|
def __init__(self, channel_1_num=32, channel_2_num=64, conv_size=5,
|
||||||
|
hidden_size=1024, pool_size=2, learning_rate=0.0001, x_dim=784,
|
||||||
|
y_dim=10):
|
||||||
|
self.channel_1_num = channel_1_num
|
||||||
|
self.channel_2_num = channel_2_num
|
||||||
|
self.conv_size = nni.choice(2, 3, 5, 7, name='self.conv_size')
|
||||||
|
self.hidden_size = nni.choice(124, 512, 1024, name='self.hidden_size')
|
||||||
|
self.pool_size = pool_size
|
||||||
|
self.learning_rate = nni.randint(2, 3, 5, name='self.learning_rate')
|
||||||
|
self.x_dim = x_dim
|
||||||
|
self.y_dim = y_dim
|
||||||
|
|
||||||
|
def build_network(self):
|
||||||
|
self.x = tf.placeholder(tf.float32, [None, self.x_dim], name='input_x')
|
||||||
|
self.y = tf.placeholder(tf.float32, [None, self.y_dim], name='input_y')
|
||||||
|
self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
|
||||||
|
with tf.name_scope('reshape'):
|
||||||
|
try:
|
||||||
|
input_dim = int(math.sqrt(self.x_dim))
|
||||||
|
except:
|
||||||
|
logger.debug(
|
||||||
|
'input dim cannot be sqrt and reshape. input dim: ' +
|
||||||
|
str(self.x_dim))
|
||||||
|
raise
|
||||||
|
x_image = tf.reshape(self.x, [-1, input_dim, input_dim, 1])
|
||||||
|
with tf.name_scope('conv1'):
|
||||||
|
W_conv1 = weight_variable([self.conv_size, self.conv_size, 1,
|
||||||
|
self.channel_1_num])
|
||||||
|
b_conv1 = bias_variable([self.channel_1_num])
|
||||||
|
h_conv1 = nni.function_choice(lambda : tf.nn.relu(conv2d(
|
||||||
|
x_image, W_conv1) + b_conv1), lambda : tf.nn.sigmoid(conv2d
|
||||||
|
(x_image, W_conv1) + b_conv1), lambda : tf.nn.tanh(conv2d(
|
||||||
|
x_image, W_conv1) + b_conv1), name='tf.nn.relu')
|
||||||
|
with tf.name_scope('pool1'):
|
||||||
|
h_pool1 = nni.function_choice(lambda : max_pool(h_conv1, self.
|
||||||
|
pool_size), lambda : avg_pool(h_conv1, self.pool_size),
|
||||||
|
name='max_pool')
|
||||||
|
with tf.name_scope('conv2'):
|
||||||
|
W_conv2 = weight_variable([self.conv_size, self.conv_size, self
|
||||||
|
.channel_1_num, self.channel_2_num])
|
||||||
|
b_conv2 = bias_variable([self.channel_2_num])
|
||||||
|
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
|
||||||
|
with tf.name_scope('pool2'):
|
||||||
|
h_pool2 = max_pool(h_conv2, self.pool_size)
|
||||||
|
last_dim = int(input_dim / (self.pool_size * self.pool_size))
|
||||||
|
with tf.name_scope('fc1'):
|
||||||
|
W_fc1 = weight_variable([last_dim * last_dim * self.
|
||||||
|
channel_2_num, self.hidden_size])
|
||||||
|
b_fc1 = bias_variable([self.hidden_size])
|
||||||
|
h_pool2_flat = tf.reshape(h_pool2, [-1, last_dim * last_dim * self.
|
||||||
|
channel_2_num])
|
||||||
|
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
|
||||||
|
with tf.name_scope('dropout'):
|
||||||
|
h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
|
||||||
|
with tf.name_scope('fc2'):
|
||||||
|
W_fc2 = weight_variable([self.hidden_size, self.y_dim])
|
||||||
|
b_fc2 = bias_variable([self.y_dim])
|
||||||
|
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
|
||||||
|
with tf.name_scope('loss'):
|
||||||
|
cross_entropy = tf.reduce_mean(tf.nn.
|
||||||
|
softmax_cross_entropy_with_logits(labels=self.y, logits=y_conv)
|
||||||
|
)
|
||||||
|
with tf.name_scope('adam_optimizer'):
|
||||||
|
self.train_step = tf.train.AdamOptimizer(self.learning_rate
|
||||||
|
).minimize(cross_entropy)
|
||||||
|
with tf.name_scope('accuracy'):
|
||||||
|
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(
|
||||||
|
self.y, 1))
|
||||||
|
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.
|
||||||
|
float32))
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def conv2d(x, W):
|
||||||
|
"""conv2d returns a 2d convolution layer with full stride."""
|
||||||
|
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
|
||||||
|
|
||||||
|
|
||||||
|
def max_pool(x, pool_size):
|
||||||
|
"""max_pool downsamples a feature map by 2X."""
|
||||||
|
return tf.nn.max_pool(x, ksize=[1, pool_size, pool_size, 1], strides=[1,
|
||||||
|
pool_size, pool_size, 1], padding='SAME')
|
||||||
|
|
||||||
|
|
||||||
|
def avg_pool(x, pool_size):
|
||||||
|
return tf.nn.avg_pool(x, ksize=[1, pool_size, pool_size, 1], strides=[1,
|
||||||
|
pool_size, pool_size, 1], padding='SAME')
|
||||||
|
|
||||||
|
|
||||||
|
def weight_variable(shape):
|
||||||
|
"""weight_variable generates a weight variable of a given shape."""
|
||||||
|
initial = tf.truncated_normal(shape, stddev=0.1)
|
||||||
|
return tf.Variable(initial)
|
||||||
|
|
||||||
|
|
||||||
|
def bias_variable(shape):
|
||||||
|
"""bias_variable generates a bias variable of a given shape."""
|
||||||
|
initial = tf.constant(0.1, shape=shape)
|
||||||
|
return tf.Variable(initial)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
data_dir = '/tmp/tensorflow/mnist/input_data'
|
||||||
|
mnist = input_data.read_data_sets(data_dir, one_hot=True)
|
||||||
|
logger.debug('Mnist download data down.')
|
||||||
|
mnist_network = MnistNetwork()
|
||||||
|
mnist_network.build_network()
|
||||||
|
logger.debug('Mnist build network done.')
|
||||||
|
graph_location = tempfile.mkdtemp()
|
||||||
|
logger.debug('Saving graph to: %s' % graph_location)
|
||||||
|
train_writer = tf.summary.FileWriter(graph_location)
|
||||||
|
train_writer.add_graph(tf.get_default_graph())
|
||||||
|
test_acc = 0.0
|
||||||
|
with tf.Session() as sess:
|
||||||
|
sess.run(tf.global_variables_initializer())
|
||||||
|
batch_num = 200
|
||||||
|
for i in range(batch_num):
|
||||||
|
batch_size = nni.choice(50, 250, 500, name='batch_size')
|
||||||
|
batch = mnist.train.next_batch(batch_size)
|
||||||
|
dropout_rate = nni.choice(1, 5, name='dropout_rate')
|
||||||
|
mnist_network.train_step.run(feed_dict={mnist_network.x: batch[
|
||||||
|
0], mnist_network.y: batch[1], mnist_network.keep_prob:
|
||||||
|
dropout_rate})
|
||||||
|
if i % 100 == 0:
|
||||||
|
test_acc = mnist_network.accuracy.eval(feed_dict={
|
||||||
|
mnist_network.x: mnist.test.images, mnist_network.y:
|
||||||
|
mnist.test.labels, mnist_network.keep_prob: 1.0})
|
||||||
|
nni.report_intermediate_result(test_acc)
|
||||||
|
test_acc = mnist_network.accuracy.eval(feed_dict={mnist_network.x:
|
||||||
|
mnist.test.images, mnist_network.y: mnist.test.labels,
|
||||||
|
mnist_network.keep_prob: 1.0})
|
||||||
|
nni.report_final_result(test_acc)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_default_params():
|
||||||
|
params = {'data_dir': '/tmp/tensorflow/mnist/input_data',
|
||||||
|
'dropout_rate': 0.5, 'channel_1_num': 32, 'channel_2_num': 64,
|
||||||
|
'conv_size': 5, 'pool_size': 2, 'hidden_size': 1024, 'batch_size':
|
||||||
|
50, 'batch_num': 200, 'learning_rate': 0.0001}
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
params = generate_default_params()
|
||||||
|
logger.debug('params')
|
||||||
|
logger.debug('params update')
|
||||||
|
main()
|
||||||
|
except:
|
||||||
|
logger.exception('Got some exception in while loop in mnist.py')
|
||||||
|
raise
|
|
@ -0,0 +1,54 @@
|
||||||
|
{
|
||||||
|
"handwrite/conv_size/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [ 0, 1, 2, 3 ]
|
||||||
|
},
|
||||||
|
"handwrite/#5/function_choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [ 0, 1, 2 ]
|
||||||
|
},
|
||||||
|
"handwrite/#8/qlognormal": {
|
||||||
|
"_type": "qlognormal",
|
||||||
|
"_value": [ 1.2, 3, 4.5 ]
|
||||||
|
},
|
||||||
|
"handwrite/#13/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [ 0, 1 ]
|
||||||
|
},
|
||||||
|
"mnist/self.conv_size/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [ 0, 1, 2, 3 ]
|
||||||
|
},
|
||||||
|
"mnist/self.hidden_size/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [ 0, 1, 2 ]
|
||||||
|
},
|
||||||
|
"mnist/self.learning_rate/randint": {
|
||||||
|
"_type": "randint",
|
||||||
|
"_value": [ 2, 3, 5 ]
|
||||||
|
},
|
||||||
|
"mnist/tf.nn.relu/function_choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [ 0, 1, 2 ]
|
||||||
|
},
|
||||||
|
"mnist/max_pool/function_choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [ 0, 1 ]
|
||||||
|
},
|
||||||
|
"mnist/batch_size/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [ 0, 1, 2 ]
|
||||||
|
},
|
||||||
|
"mnist/dropout_rate/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [ 0, 1 ]
|
||||||
|
},
|
||||||
|
"dir.simple/conv_size/choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [ 0, 1, 2, 3 ]
|
||||||
|
},
|
||||||
|
"dir.simple/max_pool/function_choice": {
|
||||||
|
"_type": "choice",
|
||||||
|
"_value": [ 0, 1 ]
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,11 @@
|
||||||
|
def max_pool(k):
|
||||||
|
pass
|
||||||
|
h_conv1=1
|
||||||
|
"""@nni.variable(nni.choice(2,3,5,7),name=conv_size)"""
|
||||||
|
conv_size = 5
|
||||||
|
"""@nni.function_choice(max_pool(h_conv1),avg_pool(h_conv2,h_conv3),name=max_pool)"""
|
||||||
|
h_pool1 = max_pool(h_conv1)
|
||||||
|
test_acc=1
|
||||||
|
'''@nni.report_intermediate_result(test_acc)'''
|
||||||
|
test_acc=2
|
||||||
|
'''@nni.report_final_result(test_acc)'''
|
|
@ -0,0 +1,208 @@
|
||||||
|
# -*- encoding:utf8 -*-
|
||||||
|
|
||||||
|
"""A deep MNIST classifier using convolutional layers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
|
import tempfile
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from tensorflow.examples.tutorials.mnist import input_data
|
||||||
|
|
||||||
|
logger = logging.getLogger('mnist')
|
||||||
|
|
||||||
|
FLAGS = None
|
||||||
|
|
||||||
|
class MnistNetwork(object):
|
||||||
|
def __init__(self,
|
||||||
|
channel_1_num = 32,
|
||||||
|
channel_2_num = 64,
|
||||||
|
conv_size = 5,
|
||||||
|
hidden_size = 1024,
|
||||||
|
pool_size = 2,
|
||||||
|
learning_rate = 0.0001,
|
||||||
|
x_dim = 784,
|
||||||
|
y_dim = 10):
|
||||||
|
self.channel_1_num = channel_1_num
|
||||||
|
self.channel_2_num = channel_2_num
|
||||||
|
'''@nni.variable(nni.choice(2,3,5,7),name=self.conv_size)'''
|
||||||
|
self.conv_size = conv_size
|
||||||
|
'''@nni.variable(nni.choice(124,512,1024),name=self.hidden_size)'''
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.pool_size = pool_size
|
||||||
|
'''@nni.variable(nni.randint(2,3,5),name=self.learning_rate)'''
|
||||||
|
self.learning_rate = learning_rate
|
||||||
|
self.x_dim = x_dim
|
||||||
|
self.y_dim = y_dim
|
||||||
|
|
||||||
|
def build_network(self):
|
||||||
|
self.x = tf.placeholder(tf.float32, [None, self.x_dim], name = 'input_x')
|
||||||
|
self.y = tf.placeholder(tf.float32, [None, self.y_dim], name = 'input_y')
|
||||||
|
self.keep_prob = tf.placeholder(tf.float32, name = 'keep_prob')
|
||||||
|
|
||||||
|
# Reshape to use within a convolutional neural net.
|
||||||
|
# Last dimension is for "features" - there is only one here, since images are
|
||||||
|
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
|
||||||
|
with tf.name_scope('reshape'):
|
||||||
|
try:
|
||||||
|
input_dim = int(math.sqrt(self.x_dim))
|
||||||
|
except:
|
||||||
|
#print('input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim))
|
||||||
|
logger.debug('input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim))
|
||||||
|
raise
|
||||||
|
x_image = tf.reshape(self.x, [-1, input_dim, input_dim, 1])
|
||||||
|
|
||||||
|
# First convolutional layer - maps one grayscale image to 32 feature maps.
|
||||||
|
with tf.name_scope('conv1'):
|
||||||
|
W_conv1 = weight_variable([self.conv_size, self.conv_size, 1, self.channel_1_num])
|
||||||
|
b_conv1 = bias_variable([self.channel_1_num])
|
||||||
|
"""@nni.function_choice(tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1),tf.nn.sigmoid(conv2d(x_image, W_conv1) + b_conv1),tf.nn.tanh(conv2d(x_image, W_conv1) + b_conv1),name=tf.nn.relu)"""
|
||||||
|
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
|
||||||
|
|
||||||
|
# Pooling layer - downsamples by 2X.
|
||||||
|
with tf.name_scope('pool1'):
|
||||||
|
"""@nni.function_choice(max_pool(h_conv1, self.pool_size),avg_pool(h_conv1, self.pool_size),name=max_pool)"""
|
||||||
|
h_pool1 = max_pool(h_conv1, self.pool_size)
|
||||||
|
|
||||||
|
# Second convolutional layer -- maps 32 feature maps to 64.
|
||||||
|
with tf.name_scope('conv2'):
|
||||||
|
W_conv2 = weight_variable([self.conv_size, self.conv_size, self.channel_1_num, self.channel_2_num])
|
||||||
|
b_conv2 = bias_variable([self.channel_2_num])
|
||||||
|
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
|
||||||
|
|
||||||
|
# Second pooling layer.
|
||||||
|
with tf.name_scope('pool2'):
|
||||||
|
#"""@nni.dynamic(input={cnn_block:1, concat:2},function_choice={"cnn_block":(x,nni.choice([3,4])),"cnn_block":(x),"concat":(x,y)},limit={"cnn_block.input":[concat,input],"concat.input":[this.depth-1,this.depth-3,this.depth-5],"graph.width":[1]})"""
|
||||||
|
h_pool2 = max_pool(h_conv2, self.pool_size)
|
||||||
|
|
||||||
|
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
|
||||||
|
# is down to 7x7x64 feature maps -- maps this to 1024 features.
|
||||||
|
last_dim = int(input_dim / (self.pool_size * self.pool_size))
|
||||||
|
with tf.name_scope('fc1'):
|
||||||
|
W_fc1 = weight_variable([last_dim * last_dim * self.channel_2_num, self.hidden_size])
|
||||||
|
b_fc1 = bias_variable([self.hidden_size])
|
||||||
|
|
||||||
|
h_pool2_flat = tf.reshape(h_pool2, [-1, last_dim * last_dim * self.channel_2_num])
|
||||||
|
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
|
||||||
|
|
||||||
|
# Dropout - controls the complexity of the model, prevents co-adaptation of features.
|
||||||
|
with tf.name_scope('dropout'):
|
||||||
|
h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
|
||||||
|
|
||||||
|
# Map the 1024 features to 10 classes, one for each digit
|
||||||
|
with tf.name_scope('fc2'):
|
||||||
|
W_fc2 = weight_variable([self.hidden_size, self.y_dim])
|
||||||
|
b_fc2 = bias_variable([self.y_dim])
|
||||||
|
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
|
||||||
|
|
||||||
|
with tf.name_scope('loss'):
|
||||||
|
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = self.y, logits = y_conv))
|
||||||
|
with tf.name_scope('adam_optimizer'):
|
||||||
|
self.train_step = tf.train.AdamOptimizer(self.learning_rate).minimize(cross_entropy)
|
||||||
|
|
||||||
|
with tf.name_scope('accuracy'):
|
||||||
|
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(self.y, 1))
|
||||||
|
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def conv2d(x, W):
|
||||||
|
"""conv2d returns a 2d convolution layer with full stride."""
|
||||||
|
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
|
||||||
|
|
||||||
|
def max_pool(x, pool_size):
|
||||||
|
"""max_pool downsamples a feature map by 2X."""
|
||||||
|
return tf.nn.max_pool(x, ksize=[1, pool_size, pool_size, 1],
|
||||||
|
strides=[1, pool_size, pool_size, 1], padding='SAME')
|
||||||
|
def avg_pool(x,pool_size):
|
||||||
|
return tf.nn.avg_pool(x, ksize=[1, pool_size, pool_size, 1],
|
||||||
|
strides=[1, pool_size, pool_size, 1], padding='SAME')
|
||||||
|
|
||||||
|
def weight_variable(shape):
|
||||||
|
"""weight_variable generates a weight variable of a given shape."""
|
||||||
|
initial = tf.truncated_normal(shape, stddev=0.1)
|
||||||
|
return tf.Variable(initial)
|
||||||
|
|
||||||
|
def bias_variable(shape):
|
||||||
|
"""bias_variable generates a bias variable of a given shape."""
|
||||||
|
initial = tf.constant(0.1, shape=shape)
|
||||||
|
return tf.Variable(initial)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Import data
|
||||||
|
data_dir= '/tmp/tensorflow/mnist/input_data'
|
||||||
|
mnist = input_data.read_data_sets(data_dir, one_hot=True)
|
||||||
|
logger.debug('Mnist download data down.')
|
||||||
|
|
||||||
|
# Create the model
|
||||||
|
# Build the graph for the deep net
|
||||||
|
mnist_network = MnistNetwork()
|
||||||
|
mnist_network.build_network()
|
||||||
|
logger.debug('Mnist build network done.')
|
||||||
|
|
||||||
|
# Write log
|
||||||
|
graph_location = tempfile.mkdtemp()
|
||||||
|
logger.debug('Saving graph to: %s' % graph_location)
|
||||||
|
# print('Saving graph to: %s' % graph_location)
|
||||||
|
train_writer = tf.summary.FileWriter(graph_location)
|
||||||
|
train_writer.add_graph(tf.get_default_graph())
|
||||||
|
|
||||||
|
test_acc = 0.0
|
||||||
|
with tf.Session() as sess:
|
||||||
|
sess.run(tf.global_variables_initializer())
|
||||||
|
batch_num=200
|
||||||
|
for i in range(batch_num):
|
||||||
|
'''@nni.variable(nni.choice(50,250,500),name=batch_size)'''
|
||||||
|
batch_size=50
|
||||||
|
batch = mnist.train.next_batch(batch_size)
|
||||||
|
'''@nni.variable(nni.choice(1,5),name=dropout_rate)'''
|
||||||
|
dropout_rate=0.5
|
||||||
|
mnist_network.train_step.run(feed_dict={mnist_network.x: batch[0], mnist_network.y: batch[1], mnist_network.keep_prob: dropout_rate})
|
||||||
|
|
||||||
|
if i % 100 == 0:
|
||||||
|
#train_accuracy = mnist_network.accuracy.eval(feed_dict={
|
||||||
|
# mnist_network.x: batch[0], mnist_network.y: batch[1], mnist_network.keep_prob: params['dropout_rate']})
|
||||||
|
#print('step %d, training accuracy %g' % (i, train_accuracy))
|
||||||
|
|
||||||
|
test_acc = mnist_network.accuracy.eval(feed_dict={
|
||||||
|
mnist_network.x: mnist.test.images, mnist_network.y: mnist.test.labels, mnist_network.keep_prob: 1.0})
|
||||||
|
'''@nni.report_intermediate_result(test_acc)'''
|
||||||
|
|
||||||
|
test_acc = mnist_network.accuracy.eval(feed_dict={
|
||||||
|
mnist_network.x: mnist.test.images, mnist_network.y: mnist.test.labels, mnist_network.keep_prob: 1.0})
|
||||||
|
'''@nni.report_final_result(test_acc)'''
|
||||||
|
|
||||||
|
|
||||||
|
def generate_default_params():
|
||||||
|
params = {'data_dir': '/tmp/tensorflow/mnist/input_data',
|
||||||
|
'dropout_rate': 0.5,
|
||||||
|
'channel_1_num': 32,
|
||||||
|
'channel_2_num': 64,
|
||||||
|
'conv_size': 5,
|
||||||
|
'pool_size': 2,
|
||||||
|
'hidden_size': 1024,
|
||||||
|
'batch_size': 50,
|
||||||
|
'batch_num': 200,
|
||||||
|
'learning_rate': 1e-4}
|
||||||
|
return params
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# run command: python mnist.py --init_file_path ./init.json
|
||||||
|
|
||||||
|
#FLAGS, unparsed = parse_command()
|
||||||
|
#original_params = parse_init_json(FLAGS.init_file_path, {})
|
||||||
|
|
||||||
|
#pipe_interface.set_params_to_env()
|
||||||
|
try:
|
||||||
|
params = generate_default_params()
|
||||||
|
logger.debug('params')
|
||||||
|
logger.debug('params update')
|
||||||
|
main()
|
||||||
|
except:
|
||||||
|
logger.exception('Got some exception in while loop in mnist.py')
|
||||||
|
raise
|
|
@ -0,0 +1 @@
|
||||||
|
hello
|
|
@ -33,7 +33,7 @@ class Config:
|
||||||
|
|
||||||
def get_all_config(self):
|
def get_all_config(self):
|
||||||
'''get all of config values'''
|
'''get all of config values'''
|
||||||
return json.dumps(self.config)
|
return json.dumps(self.config, indent=4, sort_keys=True, separators=(',', ':'))
|
||||||
|
|
||||||
def set_config(self, key, value):
|
def set_config(self, key, value):
|
||||||
'''set {key:value} paris to self.config'''
|
'''set {key:value} paris to self.config'''
|
||||||
|
|
|
@ -22,9 +22,9 @@
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from subprocess import Popen, PIPE
|
from subprocess import Popen, PIPE, call
|
||||||
import tempfile
|
import tempfile
|
||||||
from annotation import *
|
from nni_annotation import *
|
||||||
from .launcher_utils import validate_all_content
|
from .launcher_utils import validate_all_content
|
||||||
from .rest_utils import rest_put, rest_post, check_rest_server, check_rest_server_quick
|
from .rest_utils import rest_put, rest_post, check_rest_server, check_rest_server_quick
|
||||||
from .url_utils import cluster_metadata_url, experiment_url
|
from .url_utils import cluster_metadata_url, experiment_url
|
||||||
|
@ -33,7 +33,7 @@ from .common_utils import get_yml_content, get_json_content, print_error, print_
|
||||||
from .constants import EXPERIMENT_SUCCESS_INFO, STDOUT_FULL_PATH, STDERR_FULL_PATH, LOG_DIR, REST_PORT, ERROR_INFO, NORMAL_INFO
|
from .constants import EXPERIMENT_SUCCESS_INFO, STDOUT_FULL_PATH, STDERR_FULL_PATH, LOG_DIR, REST_PORT, ERROR_INFO, NORMAL_INFO
|
||||||
from .webui_utils import start_web_ui, check_web_ui
|
from .webui_utils import start_web_ui, check_web_ui
|
||||||
|
|
||||||
def start_rest_server(manager, port, platform, mode, experiment_id=None):
|
def start_rest_server(port, platform, mode, experiment_id=None):
|
||||||
'''Run nni manager process'''
|
'''Run nni manager process'''
|
||||||
print_normal('Checking experiment...')
|
print_normal('Checking experiment...')
|
||||||
nni_config = Config()
|
nni_config = Config()
|
||||||
|
@ -44,6 +44,7 @@ def start_rest_server(manager, port, platform, mode, experiment_id=None):
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
print_normal('Starting restful server...')
|
print_normal('Starting restful server...')
|
||||||
|
manager = os.environ.get('NNI_MANAGER', 'nnimanager')
|
||||||
cmds = [manager, '--port', str(port), '--mode', platform, '--start_mode', mode]
|
cmds = [manager, '--port', str(port), '--mode', platform, '--start_mode', mode]
|
||||||
if mode == 'resume':
|
if mode == 'resume':
|
||||||
cmds += ['--experiment_id', experiment_id]
|
cmds += ['--experiment_id', experiment_id]
|
||||||
|
@ -58,9 +59,9 @@ def set_trial_config(experiment_config, port):
|
||||||
'''set trial configuration'''
|
'''set trial configuration'''
|
||||||
request_data = dict()
|
request_data = dict()
|
||||||
value_dict = dict()
|
value_dict = dict()
|
||||||
value_dict['command'] = experiment_config['trial']['trialCommand']
|
value_dict['command'] = experiment_config['trial']['command']
|
||||||
value_dict['codeDir'] = experiment_config['trial']['trialCodeDir']
|
value_dict['codeDir'] = experiment_config['trial']['codeDir']
|
||||||
value_dict['gpuNum'] = experiment_config['trial']['trialGpuNum']
|
value_dict['gpuNum'] = experiment_config['trial']['gpuNum']
|
||||||
request_data['trial_config'] = value_dict
|
request_data['trial_config'] = value_dict
|
||||||
response = rest_put(cluster_metadata_url(port), json.dumps(request_data), 20)
|
response = rest_put(cluster_metadata_url(port), json.dumps(request_data), 20)
|
||||||
return True if response.status_code == 200 else False
|
return True if response.status_code == 200 else False
|
||||||
|
@ -75,11 +76,14 @@ def set_remote_config(experiment_config, port):
|
||||||
request_data = dict()
|
request_data = dict()
|
||||||
request_data['machine_list'] = experiment_config['machineList']
|
request_data['machine_list'] = experiment_config['machineList']
|
||||||
response = rest_put(cluster_metadata_url(port), json.dumps(request_data), 20)
|
response = rest_put(cluster_metadata_url(port), json.dumps(request_data), 20)
|
||||||
|
err_message = ''
|
||||||
if not response or not response.status_code == 200:
|
if not response or not response.status_code == 200:
|
||||||
return False
|
if response is not None:
|
||||||
|
err_message = response.text
|
||||||
|
return False, err_message
|
||||||
|
|
||||||
#set trial_config
|
#set trial_config
|
||||||
return set_trial_config(experiment_config, port)
|
return set_trial_config(experiment_config, port), err_message
|
||||||
|
|
||||||
def set_experiment(experiment_config, mode, port):
|
def set_experiment(experiment_config, mode, port):
|
||||||
'''Call startExperiment (rest POST /experiment) with yaml file content'''
|
'''Call startExperiment (rest POST /experiment) with yaml file content'''
|
||||||
|
@ -89,7 +93,7 @@ def set_experiment(experiment_config, mode, port):
|
||||||
request_data['trialConcurrency'] = experiment_config['trialConcurrency']
|
request_data['trialConcurrency'] = experiment_config['trialConcurrency']
|
||||||
request_data['maxExecDuration'] = experiment_config['maxExecDuration']
|
request_data['maxExecDuration'] = experiment_config['maxExecDuration']
|
||||||
request_data['maxTrialNum'] = experiment_config['maxTrialNum']
|
request_data['maxTrialNum'] = experiment_config['maxTrialNum']
|
||||||
request_data['searchSpace'] = experiment_config['searchSpace']
|
request_data['searchSpace'] = experiment_config.get('searchSpace')
|
||||||
request_data['tuner'] = experiment_config['tuner']
|
request_data['tuner'] = experiment_config['tuner']
|
||||||
if 'assessor' in experiment_config:
|
if 'assessor' in experiment_config:
|
||||||
request_data['assessor'] = experiment_config['assessor']
|
request_data['assessor'] = experiment_config['assessor']
|
||||||
|
@ -97,16 +101,16 @@ def set_experiment(experiment_config, mode, port):
|
||||||
request_data['clusterMetaData'] = []
|
request_data['clusterMetaData'] = []
|
||||||
if experiment_config['trainingServicePlatform'] == 'local':
|
if experiment_config['trainingServicePlatform'] == 'local':
|
||||||
request_data['clusterMetaData'].append(
|
request_data['clusterMetaData'].append(
|
||||||
{'key':'codeDir', 'value':experiment_config['trial']['trialCodeDir']})
|
{'key':'codeDir', 'value':experiment_config['trial']['codeDir']})
|
||||||
request_data['clusterMetaData'].append(
|
request_data['clusterMetaData'].append(
|
||||||
{'key': 'command', 'value': experiment_config['trial']['trialCommand']})
|
{'key': 'command', 'value': experiment_config['trial']['command']})
|
||||||
else:
|
else:
|
||||||
request_data['clusterMetaData'].append(
|
request_data['clusterMetaData'].append(
|
||||||
{'key': 'machine_list', 'value': experiment_config['machineList']})
|
{'key': 'machine_list', 'value': experiment_config['machineList']})
|
||||||
value_dict = dict()
|
value_dict = dict()
|
||||||
value_dict['command'] = experiment_config['trial']['trialCommand']
|
value_dict['command'] = experiment_config['trial']['command']
|
||||||
value_dict['codeDir'] = experiment_config['trial']['trialCodeDir']
|
value_dict['codeDir'] = experiment_config['trial']['codeDir']
|
||||||
value_dict['gpuNum'] = experiment_config['trial']['trialGpuNum']
|
value_dict['gpuNum'] = experiment_config['trial']['gpuNum']
|
||||||
request_data['clusterMetaData'].append(
|
request_data['clusterMetaData'].append(
|
||||||
{'key': 'trial_config', 'value': value_dict})
|
{'key': 'trial_config', 'value': value_dict})
|
||||||
|
|
||||||
|
@ -117,23 +121,24 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
|
||||||
'''follow steps to start rest server and start experiment'''
|
'''follow steps to start rest server and start experiment'''
|
||||||
nni_config = Config()
|
nni_config = Config()
|
||||||
# start rest server
|
# start rest server
|
||||||
rest_process = start_rest_server(args.manager, REST_PORT, experiment_config['trainingServicePlatform'], mode, experiment_id)
|
rest_process = start_rest_server(REST_PORT, experiment_config['trainingServicePlatform'], mode, experiment_id)
|
||||||
nni_config.set_config('restServerPid', rest_process.pid)
|
nni_config.set_config('restServerPid', rest_process.pid)
|
||||||
|
|
||||||
# Deal with annotation
|
# Deal with annotation
|
||||||
if experiment_config.get('useAnnotation'):
|
if experiment_config.get('useAnnotation'):
|
||||||
path = os.path.join(tempfile.gettempdir(), 'nni', 'annotation')
|
path = os.path.join(tempfile.gettempdir(), 'nni', 'annotation')
|
||||||
if os.path.isdir(path):
|
if os.path.isdir(path):
|
||||||
shutil.rmtree(path)
|
shutil.rmtree(path)
|
||||||
os.makedirs(path)
|
os.makedirs(path)
|
||||||
expand_annotations(experiment_config['trial']['trialCodeDir'], path)
|
expand_annotations(experiment_config['trial']['codeDir'], path)
|
||||||
experiment_config['trial']['trialCodeDir'] = path
|
experiment_config['trial']['codeDir'] = path
|
||||||
search_space = generate_search_space(experiment_config['trial']['trialCodeDir'])
|
search_space = generate_search_space(experiment_config['trial']['codeDir'])
|
||||||
|
experiment_config['searchSpace'] = json.dumps(search_space)
|
||||||
assert search_space, ERROR_INFO % 'Generated search space is empty'
|
assert search_space, ERROR_INFO % 'Generated search space is empty'
|
||||||
|
elif experiment_config.get('searchSpacePath'):
|
||||||
|
search_space = get_json_content(experiment_config.get('searchSpacePath'))
|
||||||
|
experiment_config['searchSpace'] = json.dumps(search_space)
|
||||||
else:
|
else:
|
||||||
search_space = get_json_content(experiment_config['searchSpacePath'])
|
experiment_config['searchSpace'] = json.dumps('')
|
||||||
|
|
||||||
experiment_config['searchSpace'] = json.dumps(search_space)
|
|
||||||
|
|
||||||
# check rest server
|
# check rest server
|
||||||
print_normal('Checking restful server...')
|
print_normal('Checking restful server...')
|
||||||
|
@ -142,7 +147,8 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
|
||||||
else:
|
else:
|
||||||
print_error('Restful server start failed!')
|
print_error('Restful server start failed!')
|
||||||
try:
|
try:
|
||||||
rest_process.kill()
|
cmds = ['pkill', '-P', str(rest_process.pid)]
|
||||||
|
call(cmds)
|
||||||
except Exception:
|
except Exception:
|
||||||
raise Exception(ERROR_INFO % 'Rest server stopped!')
|
raise Exception(ERROR_INFO % 'Rest server stopped!')
|
||||||
exit(0)
|
exit(0)
|
||||||
|
@ -150,12 +156,14 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
|
||||||
# set remote config
|
# set remote config
|
||||||
if experiment_config['trainingServicePlatform'] == 'remote':
|
if experiment_config['trainingServicePlatform'] == 'remote':
|
||||||
print_normal('Setting remote config...')
|
print_normal('Setting remote config...')
|
||||||
if set_remote_config(experiment_config, REST_PORT):
|
config_result, err_msg = set_remote_config(experiment_config, REST_PORT)
|
||||||
|
if config_result:
|
||||||
print_normal('Success!')
|
print_normal('Success!')
|
||||||
else:
|
else:
|
||||||
print_error('Failed!')
|
print_error('Failed! Error is: {}'.format(err_msg))
|
||||||
try:
|
try:
|
||||||
rest_process.kill()
|
cmds = ['pkill', '-P', str(rest_process.pid)]
|
||||||
|
call(cmds)
|
||||||
except Exception:
|
except Exception:
|
||||||
raise Exception(ERROR_INFO % 'Rest server stopped!')
|
raise Exception(ERROR_INFO % 'Rest server stopped!')
|
||||||
exit(0)
|
exit(0)
|
||||||
|
@ -168,7 +176,8 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
|
||||||
else:
|
else:
|
||||||
print_error('Failed!')
|
print_error('Failed!')
|
||||||
try:
|
try:
|
||||||
rest_process.kill()
|
cmds = ['pkill', '-P', str(rest_process.pid)]
|
||||||
|
call(cmds)
|
||||||
except Exception:
|
except Exception:
|
||||||
raise Exception(ERROR_INFO % 'Rest server stopped!')
|
raise Exception(ERROR_INFO % 'Rest server stopped!')
|
||||||
exit(0)
|
exit(0)
|
||||||
|
@ -183,7 +192,8 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
|
||||||
else:
|
else:
|
||||||
print_error('Failed!')
|
print_error('Failed!')
|
||||||
try:
|
try:
|
||||||
rest_process.kill()
|
cmds = ['pkill', '-P', str(rest_process.pid)]
|
||||||
|
call(cmds)
|
||||||
except Exception:
|
except Exception:
|
||||||
raise Exception(ERROR_INFO % 'Rest server stopped!')
|
raise Exception(ERROR_INFO % 'Rest server stopped!')
|
||||||
exit(0)
|
exit(0)
|
||||||
|
@ -213,9 +223,9 @@ def resume_experiment(args):
|
||||||
def create_experiment(args):
|
def create_experiment(args):
|
||||||
'''start a new experiment'''
|
'''start a new experiment'''
|
||||||
nni_config = Config()
|
nni_config = Config()
|
||||||
|
config_path = os.path.abspath(args.config)
|
||||||
experiment_config = get_yml_content(args.config)
|
experiment_config = get_yml_content(config_path)
|
||||||
validate_all_content(experiment_config)
|
validate_all_content(experiment_config, config_path)
|
||||||
|
|
||||||
nni_config.set_config('experimentConfig', experiment_config)
|
nni_config.set_config('experimentConfig', experiment_config)
|
||||||
launch_experiment(args, experiment_config, 'new', args.webuiport)
|
launch_experiment(args, experiment_config, 'new', args.webuiport)
|
||||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче