Merge from dogfood branch to master

This commit is contained in:
Deshui Yu 2018-09-07 18:21:46 +08:00 коммит произвёл fishyds
Родитель 98530fd247
Коммит 8314d6eec5
103 изменённых файлов: 5253 добавлений и 1030 удалений

128
Makefile
Просмотреть файл

@ -1,10 +1,10 @@
BIN_PATH ?= /usr/bin BIN_PATH ?= ${HOME}/.local/bin
NODE_PATH ?= /usr/share INSTALL_PREFIX ?= ${HOME}/.local
EXAMPLE_PATH ?= /usr/share/nni/examples PIP_MODE ?= --user
EXAMPLES_PATH ?= ${HOME}/nni/examples
SRC_DIR := ${PWD} WHOAMI := $(shell whoami)
.PHONY: build install uninstall dev-install
.PHONY: build install uninstall YARN := $(INSTALL_PREFIX)/yarn/bin/yarn
build: build:
### Building NNI Manager ### ### Building NNI Manager ###
@ -21,50 +21,124 @@ build:
install: install:
mkdir -p $(NODE_PATH)/nni ifneq ('$(HOME)', '/root')
mkdir -p $(EXAMPLE_PATH) ifeq (${WHOAMI}, root)
### Sorry, sudo make install is not supported ###
exit 1
endif
endif
mkdir -p $(BIN_PATH)
mkdir -p $(INSTALL_PREFIX)/nni
mkdir -p $(EXAMPLES_PATH)
### Installing NNI Manager ### ### Installing NNI Manager ###
cp -rT src/nni_manager/dist $(NODE_PATH)/nni/nni_manager cp -rT src/nni_manager/dist $(INSTALL_PREFIX)/nni/nni_manager
cp -rT src/nni_manager/node_modules $(NODE_PATH)/nni/nni_manager/node_modules cp -rT src/nni_manager/node_modules $(INSTALL_PREFIX)/nni/nni_manager/node_modules
### Installing Web UI ### ### Installing Web UI ###
cp -rT src/webui/build $(NODE_PATH)/nni/webui cp -rT src/webui/build $(INSTALL_PREFIX)/nni/webui
ln -sf $(NODE_PATH)/nni/nni_manager/node_modules/serve/bin/serve.js $(BIN_PATH)/serve ln -sf $(INSTALL_PREFIX)/nni/nni_manager/node_modules/serve/bin/serve.js $(BIN_PATH)/serve
### Installing Python SDK dependencies ### ### Installing Python SDK dependencies ###
pip3 install -r src/sdk/pynni/requirements.txt pip3 install $(PIP_MODE) -r src/sdk/pynni/requirements.txt
### Installing Python SDK ### ### Installing Python SDK ###
cd src/sdk/pynni && python3 setup.py install cd src/sdk/pynni && python3 setup.py install $(PIP_MODE)
### Installing nnictl ### ### Installing nnictl ###
cd tools && python3 setup.py install cd tools && python3 setup.py install $(PIP_MODE)
echo '#!/bin/sh' > $(BIN_PATH)/nnimanager echo '#!/bin/sh' > $(BIN_PATH)/nnimanager
echo 'cd $(NODE_PATH)/nni/nni_manager && node main.js $$@' >> $(BIN_PATH)/nnimanager echo 'cd $(INSTALL_PREFIX)/nni/nni_manager && node main.js $$@' >> $(BIN_PATH)/nnimanager
chmod +x $(BIN_PATH)/nnimanager chmod +x $(BIN_PATH)/nnimanager
install -m 755 tools/nnictl $(BIN_PATH)/nnictl echo '#!/bin/sh' > $(BIN_PATH)/nnictl
echo 'NNI_MANAGER=$(BIN_PATH)/nnimanager WEB_UI_FOLDER=$(INSTALL_PREFIX)/nni/webui python3 -m nnicmd.nnictl $$@' >> $(BIN_PATH)/nnictl
chmod +x $(BIN_PATH)/nnictl
### Installing examples ### ### Installing examples ###
cp -rT examples $(EXAMPLE_PATH) cp -rT examples $(EXAMPLES_PATH)
pip-install:
ifneq ('$(HOME)', '/root')
ifeq (${WHOAMI}, root)
### Sorry, sudo make install is not supported ###
exit 1
endif
endif
### Prepare Node.js ###
wget https://nodejs.org/dist/v10.9.0/node-v10.9.0-linux-x64.tar.xz
tar xf node-v10.9.0-linux-x64.tar.xz
cp -rT node-v10.9.0-linux-x64 $(INSTALL_PREFIX)/node
### Prepare Yarn 1.9.4 ###
wget https://github.com/yarnpkg/yarn/releases/download/v1.9.4/yarn-v1.9.4.tar.gz
tar xf yarn-v1.9.4.tar.gz
cp -rT yarn-v1.9.4 $(INSTALL_PREFIX)/yarn
### Building NNI Manager ###
cd src/nni_manager && $(YARN) && $(YARN) build
### Building Web UI ###
cd src/webui && $(YARN) && $(YARN) build
mkdir -p $(BIN_PATH)
mkdir -p $(INSTALL_PREFIX)/nni
### Installing NNI Manager ###
cp -rT src/nni_manager/dist $(INSTALL_PREFIX)/nni/nni_manager
cp -rT src/nni_manager/node_modules $(INSTALL_PREFIX)/nni/nni_manager/node_modules
echo '#!/bin/sh' > $(BIN_PATH)/nnimanager
echo 'cd $(INSTALL_PREFIX)/nni/nni_manager && node main.js $$@' >> $(BIN_PATH)/nnimanager
chmod +x $(BIN_PATH)/nnimanager
### Installing Web UI ###
cp -rT src/webui/build $(INSTALL_PREFIX)/nni/webui
ln -sf $(INSTALL_PREFIX)/nni/nni_manager/node_modules/serve/bin/serve.js $(BIN_PATH)/serve
### Installing examples ###
cp -rT examples $(EXAMPLES_PATH)
dev-install: dev-install:
mkdir -p $(BIN_PATH)
mkdir -p $(INSTALL_PREFIX)/nni
### Installing NNI Manager ###
ln -sf $(INSTALL_PREFIX)/nni/nni_manager $(PWD)/src/nni_manager/dist
ln -sf $(INSTALL_PREFIX)/nni/nni_manager/node_modules $(PWD)/src/nni_manager/node_modules
### Installing Web UI ###
ln -sf $(INSTALL_PREFIX)/nni/webui $(PWD)/src/webui
ln -sf $(INSTALL_PREFIX)/nni/nni_manager/node_modules/serve/bin/serve.js $(BIN_PATH)/serve
### Installing Python SDK dependencies ### ### Installing Python SDK dependencies ###
pip3 install --user -r src/sdk/pynni/requirements.txt pip3 install $(PIP_MODE) -r src/sdk/pynni/requirements.txt
### Installing Python SDK ### ### Installing Python SDK ###
cd src/sdk/pynni && pip3 install --user -e . cd src/sdk/pynni && pip3 install $(PIP_MODE) -e .
### Installing nnictl ### ### Installing nnictl ###
cd tools && pip3 install --user -e . cd tools && pip3 install $(PIP_MODE) -e .
echo '#!/bin/sh' > $(BIN_PATH)/nnimanager
echo 'cd $(INSTALL_PREFIX)/nni/nni_manager && node main.js $$@' >> $(BIN_PATH)/nnimanager
chmod +x $(BIN_PATH)/nnimanager
echo '#!/bin/sh' > $(BIN_PATH)/nnictl
echo 'NNI_MANAGER=$(BIN_PATH)/nnimanager python3 -m nnicmd.nnictl $$@' >> $(BIN_PATH)/nnictl
chmod +x $(BIN_PATH)/nnictl
### Installing examples ###
ln -sf $(EXAMPLES_PATH) $(PWD)/examples
uninstall: uninstall:
-rm -r $(EXAMPLE_PATH)
-rm -r $(NODE_PATH)/nni
-pip3 uninstall -y nnictl
-pip3 uninstall -y nni -pip3 uninstall -y nni
-rm $(BIN_PATH)/nnictl -pip3 uninstall -y nnictl
-rm $(BIN_PATH)/nnimanager -rm -r $(INSTALL_PREFIX)/nni
-rm -r $(EXAMPLES_PATH)
-rm $(BIN_PATH)/serve -rm $(BIN_PATH)/serve
-rm $(BIN_PATH)/nnimanager
-rm $(BIN_PATH)/nnictl

Просмотреть файл

@ -1,21 +1,53 @@
# Introduction # Introduction
Neural Network Intelligence(NNI) is a light package for supporting hyper-parameter tuning or neural architecture search.
It could easily run in different environments, such as: local/remote machine/cloud.
And it offers a new annotation language for user to conveniently design search space.
Also user could write code using any language or any machine learning framework.
# Getting Started NNI (Neural Network Intelligence) is a toolkit to help users running automated machine learning experiments.
TODO: Guide users through getting your code up and running on their own system. In this section you can talk about: The tool dispatches and runs trial jobs that generated by tuning algorithms to search the best neural architecture and/or hyper-parameters at different environments (e.g. local, remote servers, Cloud).
1. Installation process
2. Software dependencies ```
3. Latest releases AutoML experiment Training Services
4. API references ┌────────┐ ┌────────────────────────┐ ┌────────────────┐
│ nnictl │ ─────> │ nni_manager │ │ Local Machine │
└────────┘ │ sdk/tuner │ └────────────────┘
│ hyperopt_tuner │
│ evolution_tuner │ trail jobs ┌────────────────┐
│ ... │ ────────> │ Remote Servers │
├────────────────────────┤ └────────────────┘
│ trial job source code │
│ sdk/annotation │ ┌────────────────┐
├────────────────────────┤ │ Yarn,K8s, │
│ nni_board │ │ ... │
└────────────────────────┘ └────────────────┘
```
## **Who should consider using NNI**
* You want to try different AutoML algorithms for your training code (model) at local
* You want to run AutoML trial jobs in different environments to speed up search (e.g. remote servers, Cloud)
* As a researcher and data scientist, you want to implement your own AutoML algorithms and compare with other algorithms
* As a ML platform owner, you want to support AutoML in your platform
# Getting Started with NNI
## **Installation**
Install through python pip
* requirements: python >= 3.5
```
pip3 install -v --user git+https://github.com/Microsoft/NeuralNetworkIntelligence.git
source ~/.bashrc
```
## **Quick start: run an experiment at local**
Requirements:
* with NNI installed on your machine.
Run the following command to create an experiment for [mnist]
```bash
nnictl create --config ~/nni/examples/trials/mnist-annotation/config.yaml
```
This command will start the experiment and WebUI. The WebUI endpoint will be shown in the output of this command (for example, `http://localhost:8080`). Open this URL using your browsers. You can analyze your experiment through WebUI, or open trials' tensorboard. Please refer to [here](docs/GetStarted.md) for the GetStarted tutorial.
# Build and Test
TODO: Describe and show how to build your code and run the tests.
# Contribute # Contribute
TODO: Explain how other users and developers can contribute to make your code better. NNI is designed as an automatic searching framework with high extensibility. NNI has a very clear modular design. Contributing more tuner/assessor algorithms, training services, SDKs are really welcome. Please refer to [here](docs/ToContribute.md) for how to contribute.
# Privacy Statement # Privacy Statement
The [Microsoft Enterprise and Developer Privacy Statement](https://privacy.microsoft.com/en-us/privacystatement) describes the privacy statement of this software. The [Microsoft Enterprise and Developer Privacy Statement](https://privacy.microsoft.com/en-us/privacystatement) describes the privacy statement of this software.

88
docs/CustomizedTuner.md Normal file
Просмотреть файл

@ -0,0 +1,88 @@
# Customized Tuner for Experts
*Tuner receive result from Trial as a matric to evaluate the performance of a specific parameters/architecture configure. And tuner send next hyper-parameter or architecture configure to Trial.*
So, if user want to implement a customized Tuner, she/he only need to:
1) Inherit a tuner of a base Tuner class
2) Implement receive_trial_result and generate_parameter function
3) Write a script to run Tuner
Here ia an example:
**1) Inherit a tuner of a base Tuner class**
```python
from nni.tuner import Tuner
class CustomizedTuner(Tuner):
def __init__(self, ...):
...
```
**2) Implement receive_trial_result and generate_parameter function**
```python
from nni.tuner import Tuner
class CustomizedTuner(Tuner):
def __init__(self, ...):
...
def receive_trial_result(self, parameter_id, parameters, reward):
'''
Record an observation of the objective function and Train
parameter_id: int
parameters: object created by 'generate_parameters()'
reward: object reported by trial
'''
# your code implements here.
...
def generate_parameters(self, parameter_id):
'''
Returns a set of trial (hyper-)parameters, as a serializable object
parameter_id: int
'''
# your code implements here.
return your_parameters
...
```
```receive_trial_result``` will receive ```the parameter_id, parameters, reward``` as parameters input. Also, Tuner will receive the ```reward``` object are exactly same reward that Trial send.
The ```your_parameters``` return from ```generate_parameters``` function, will be package as json object by NNI SDK. NNI SDK will unpack json object so the Trial will receive the exact same ```your_parameters``` from Tuner.
For example:
If the you implement the ```generate_parameters``` like this:
```python
def generate_parameters(self, parameter_id):
'''
Returns a set of trial (hyper-)parameters, as a serializable object
parameter_id: int
'''
# your code implements here.
return {"dropout": 0.3, "learning_rate": 0.4}
```
It's means your Tuner will always generate parameters ```{"dropout": 0.3, "learning_rate": 0.4}```. Then Trial will receive ```{"dropout": 0.3, "learning_rate": 0.4}``` this object will using ```nni.get_parameters()``` API from NNI SDK. After training of Trial, it will send result to Tuner by calling ```nni.report_final_result(0.93)```. Then ```receive_trial_result``` will function will receied these parameters like
```
parameter_id = 82347
parameters = {"dropout": 0.3, "learning_rate": 0.4}
reward = 0.93
```
**3) Configure your customized tuner in experiment yaml config file**
NNI needs to locate your customized tuner class and instantiate the class, so you need to specify the location of the customized tuner class and pass literal values as parameters to the \_\_init__ constructor.
```yaml
tuner:
codeDir: /home/abc/mytuner
classFileName: my_customized_tuner.py
className: CustomizedTuner
# Any parameter need to pass to your tuner class __init__ constructor
# can be specified in this optional classArgs field, for example
classArgs:
arg1: value1
```
More detail example you could see:
> * [evolution-tuner](../src/sdk/pynni/nni/evolution_tuner)
> * [hyperopt-tuner](../src/sdk/pynni/nni/hyperopt_tuner)
> * [evolution-based-customized-tuner](../examples/tuners/ga_customer_tuner)

Просмотреть файл

@ -18,17 +18,19 @@ trainingServicePlatform: local
# choice: true, false # choice: true, false
useAnnotation: true useAnnotation: true
tuner: tuner:
tunerName: TPE builtinTunerName: TPE
optimizationMode: Maximize classArgs:
optimize_mode: maximize
assessor: assessor:
assessorName: Medianstop builtinAssessorName: Medianstop
optimizationMode: Maximize classArgs:
optimize_mode: maximize
trial: trial:
trialCommand: python mnist.py command: python mnist.py
trialCodeDir: /usr/share/nni/examples/trials/mnist-annotation codeDir: /usr/share/nni/examples/trials/mnist-annotation
trialGpuNum: 0 gpuNum: 0
``` ```
For our built-in assessors, you need to fill two fields: `assessorName` which chooses NNI provided assessors (refer to [here]() for built-in assessors), `optimizationMode` which includes Maximize and Minimize (you want to maximize or minimize your trial result). For our built-in assessors, you need to fill two fields: `builtinAssessorName` which chooses NNI provided assessors (refer to [here]() for built-in assessors), `optimize_mode` which includes maximize and minimize (you want to maximize or minimize your trial result).
## Using user customized Assessor ## Using user customized Assessor
You can also write your own assessor following the guidance [here](). For example, you wrote an assessor for `examples/trials/mnist-annotation`. You should prepare the yaml configure below: You can also write your own assessor following the guidance [here](). For example, you wrote an assessor for `examples/trials/mnist-annotation`. You should prepare the yaml configure below:
@ -46,15 +48,25 @@ trainingServicePlatform: local
# choice: true, false # choice: true, false
useAnnotation: true useAnnotation: true
tuner: tuner:
tunerName: TPE # Possible values: TPE, Random, Anneal, Evolution
optimizationMode: Maximize builtinTunerName: TPE
classArgs:
optimize_mode: maximize
assessor: assessor:
assessorCommand: your_command # Your assessor code directory
assessorCodeDir: /path/of/your/asessor codeDir:
assessorGpuNum: 0 # Name of the file which contains your assessor class
classFileName:
# Your assessor class name, must be a subclass of nni.Assessor
className:
# Parameter names and literal values you want to pass to
# the __init__ constructor of your assessor class
classArgs:
arg1: value1
gpuNum: 0
trial: trial:
trialCommand: python mnist.py command: python mnist.py
trialCodeDir: /usr/share/nni/examples/trials/mnist-annotation codeDir: /usr/share/nni/examples/trials/mnist-annotation
trialGpuNum: 0 gpuNum: 0
``` ```
You only need to fill three field: `assessorCommand`, `assessorCodeDir` and `assessorGpuNum`. You need to fill: `codeDir`, `classFileName`, `className`, and pass parameters to \_\_init__ constructor through `classArgs` field if the \_\_init__ constructor of your assessor class has required parameters.

Просмотреть файл

@ -19,14 +19,15 @@ searchSpacePath:
useAnnotation: useAnnotation:
tuner: tuner:
#choice: TPE, Random, Anneal, Evolution #choice: TPE, Random, Anneal, Evolution
tunerName: builtinTunerName:
#choice: Maximize, Minimize classArgs:
optimizationMode: #choice: maximize, minimize
tunerGpuNum: optimize_mode:
gpuNum:
trial: trial:
trialCommand: command:
trialCodeDir: codeDir:
trialGpuNum: gpuNum:
#machineList can be empty if the platform is local #machineList can be empty if the platform is local
machineList: machineList:
- ip: - ip:
@ -48,20 +49,22 @@ searchSpacePath:
useAnnotation: useAnnotation:
tuner: tuner:
#choice: TPE, Random, Anneal, Evolution #choice: TPE, Random, Anneal, Evolution
tunerName: builtinTunerName:
#choice: Maximize, Minimize classArgs:
optimizationMode: #choice: maximize, minimize
tunerGpuNum: optimize_mode:
gpuNum:
assessor: assessor:
#choice: Medianstop #choice: Medianstop
assessorName: builtinAssessorName:
#choice: Maximize, Minimize classArgs:
optimizationMode: #choice: maximize, minimize
assessorGpuNum: optimize_mode:
gpuNum:
trial: trial:
trialCommand: command:
trialCodeDir: codeDir:
trialGpuNum: gpuNum:
#machineList can be empty if the platform is local #machineList can be empty if the platform is local
machineList: machineList:
- ip: - ip:
@ -82,20 +85,22 @@ trainingServicePlatform:
useAnnotation: useAnnotation:
tuner: tuner:
#choice: TPE, Random, Anneal, Evolution #choice: TPE, Random, Anneal, Evolution
tunerName: builtinTunerName:
#choice: Maximize, Minimize classArgs:
optimizationMode: #choice: maximize, minimize
tunerGpuNum: optimize_mode:
gpuNum:
assessor: assessor:
#choice: Medianstop #choice: Medianstop
assessorName: builtinAssessorName:
#choice: Maximize, Minimize classArgs:
optimizationMode: #choice: maximize, minimize
assessorGpuNum: optimize_mode:
gpuNum:
trial: trial:
trialCommand: command:
trialCodeDir: codeDir:
trialGpuNum: gpuNum:
#machineList can be empty if the platform is local #machineList can be empty if the platform is local
machineList: machineList:
- ip: - ip:
@ -108,11 +113,13 @@ machineList:
* Description * Description
__authorName__ is the name of the author who create the experiment. __authorName__ is the name of the author who create the experiment.
TBD: add default value
* __experimentName__ * __experimentName__
* Description * Description
__experimentName__ is the name of the experiment you created. __experimentName__ is the name of the experiment you created.
TBD: add default value
* __trialConcurrency__ * __trialConcurrency__
* Description * Description
@ -155,24 +162,30 @@ machineList:
* __tuner__ * __tuner__
* Description * Description
__tuner__ specifies the tuner algorithm you use to run an experiment, there are two kinds of ways to set tuner. One way is to use tuner provided by nni sdk, you just need to set __tunerName__ and __optimizationMode__. Another way is to use your own tuner file, and you need to set __tunerCommand__, __tunerCwd__. __tuner__ specifies the tuner algorithm you use to run an experiment, there are two kinds of ways to set tuner. One way is to use tuner provided by nni sdk, you just need to set __builtinTunerName__ and __classArgs__. Another way is to use your own tuner file, and you need to set __codeDirectory__, __classFileName__, __className__ and __classArgs__.
* __tunerName__ and __optimizationMode__ * __builtinTunerName__ and __classArgs__
* __tunerName__ * __builtinTunerName__
__tunerName__ specifies the name of system tuner you want to use, nni sdk provides four kinds of tuner, including {__TPE__, __Random__, __Anneal__, __Evolution__} __builtinTunerName__ specifies the name of system tuner you want to use, nni sdk provides four kinds of tuner, including {__TPE__, __Random__, __Anneal__, __Evolution__}
* __optimizationMode__ * __classArgs__
__optimizationMode__ specifies the optimization mode of tuner algorithm, including {__Maximize__, __Minimize__} __classArgs__ specifies the arguments of tuner algorithm
* __tunerCommand__ and __tunerCwd__ * __codeDir__, __classFileName__, __className__ and __classArgs__
* __tunerCommand__ * __codeDir__
__tunerCommand__ specifies the command you want to use to run your own tuner file, for example {__python3 mytuner.py__} __codeDir__ specifies the directory of tuner code.
* __tunerCwd__ * __classFileName__
__tunerCwd__ specifies the working directory of your own tuner file, which is the path of your own tuner file. __classFileName__ specifies the name of tuner file.
* __tunerGpuNum__ * __className__
__tunerGPUNum__ specifies the gpu number you want to use to run the tuner process. The value of this field should be a positive number. __className__ specifies the name of tuner class.
* __classArgs__
__classArgs__ specifies the arguments of tuner algorithm.
* __gpuNum__
__gpuNum__ specifies the gpu number you want to use to run the tuner process. The value of this field should be a positive number.
Note: you could only specify one way to set tuner, for example, you could set {tunerName, optimizationMode} or {tunerCommand, tunerCwd}, and you could not set them both. Note: you could only specify one way to set tuner, for example, you could set {tunerName, optimizationMode} or {tunerCommand, tunerCwd}, and you could not set them both.
@ -180,36 +193,42 @@ machineList:
* Description * Description
__assessor__ specifies the assessor algorithm you use to run experiment, there are two kinds of ways to set assessor. One way is to use assessor provided by nni sdk, you just need to set __assessorName__ and __optimizationMode__. Another way is to use your own assessor file, and you need to set __assessorCommand__, __assessorCwd__. __assessor__ specifies the assessor algorithm you use to run an experiment, there are two kinds of ways to set assessor. One way is to use assessor provided by nni sdk, you just need to set __builtinAssessorName__ and __classArgs__. Another way is to use your own tuner file, and you need to set __codeDirectory__, __classFileName__, __className__ and __classArgs__.
* __assessorName__ and __optimizationMode__ * __builtinAssessorName__ and __classArgs__
* __assessorName__ * __builtinAssessorName__
__assessorName__ specifies the name of system assessor you want to use, nni sdk provides one kind of assessor, which is {__Medianstop__}. __builtinAssessorName__ specifies the name of system assessor you want to use, nni sdk provides four kinds of tuner, including {__TPE__, __Random__, __Anneal__, __Evolution__}
* __optimizationMode__ * __classArgs__
__optimizationMode__ specifies the optimization mode of tuner algorithm, including {__Maximize__, __Minimize__} __classArgs__ specifies the arguments of tuner algorithm
* __assessorCommand__ and __assessorCwd__ * __codeDir__, __classFileName__, __className__ and __classArgs__
* __assessorCommand__ * __codeDir__
__assessorCommand__ specifies the command you want to use to run your own assessor file, for example {__python3 myassessor.py__} __codeDir__ specifies the directory of tuner code.
* __assessorCwd__ * __classFileName__
__assessorCwd__ specifies the working directory of your own assessor file, which is the path of your own assessor file. __classFileName__ specifies the name of tuner file.
* __assessorGpuNum__ * __className__
__assessorGPUNum__ specifies the gpu number you want to use to run the assessor process. The value of this field should be a positive number. __className__ specifies the name of tuner class.
* __classArgs__
Note: you could only specify one way to set assessor, for example, you could set {assessorName, optimizationMode} or {assessorCommand, assessorCwd}, and you could not set them both.If you do not want to use assessor, you just need to leave assessor empty or remove assessor in your config file. __classArgs__ specifies the arguments of tuner algorithm.
* __gpuNum__
__gpuNum__ specifies the gpu number you want to use to run the assessor process. The value of this field should be a positive number.
Note: you could only specify one way to set assessor, for example, you could set {assessorName, optimizationMode} or {assessorCommand, assessorCwd}, and you could not set them both.If you do not want to use assessor, you just need to leave assessor empty or remove assessor in your config file. Default value is 0.
* __trial__ * __trial__
* __trialCommand__ * __command__
__trialCommand__ specifies the command to run trial process. __command__ specifies the command to run trial process.
* __trialCodeDir__ * __codeDir__
__trialCodeDir__ specifies the directory of your own trial file. __codeDir__ specifies the directory of your own trial file.
* __trialGpuNum__ * __gpuNum__
__trialGpuNum__ specifies the num of gpu you want to use to run your trial process. __gpuNum__ specifies the num of gpu you want to use to run your trial process. Default value is 0.
* __machineList__ * __machineList__
__machineList__ should be set if you set __trainingServicePlatform__=remote, or it could be empty. __machineList__ should be set if you set __trainingServicePlatform__=remote, or it could be empty.
@ -228,6 +247,17 @@ machineList:
__passwd__ specifies the password of your account. __passwd__ specifies the password of your account.
* __sshKeyPath__
If you want to use ssh key to login remote machine, you could set __sshKeyPath__ in config file. __sshKeyPath__ is the path of ssh key file, which should be valid.
Note: if you set passwd and sshKeyPath simultaneously, nni will try passwd.
* __passphrase__
__passphrase__ is used to protect ssh key, which could be empty if you don't have passphrase.
## Examples ## Examples
* __local mode__ * __local mode__
@ -244,14 +274,15 @@ trainingServicePlatform: local
useAnnotation: true useAnnotation: true
tuner: tuner:
#choice: TPE, Random, Anneal, Evolution #choice: TPE, Random, Anneal, Evolution
tunerName: TPE builtinTunerName: TPE
#choice: Maximize, Minimize classArgs:
optimizationMode: Maximize #choice: maximize, minimize
tunerGpuNum: 0 optimize_mode: maximize
gpuNum: 0
trial: trial:
trialCommand: python3 mnist.py command: python3 mnist.py
trialCodeDir: /nni/mnist codeDir: /nni/mnist
trialGpuNum: 0 gpuNum: 0
``` ```
If you want to use assessor, you could add assessor configuration in your file. If you want to use assessor, you could add assessor configuration in your file.
@ -268,20 +299,22 @@ searchSpacePath: /nni/search_space.json
useAnnotation: false useAnnotation: false
tuner: tuner:
#choice: TPE, Random, Anneal, Evolution #choice: TPE, Random, Anneal, Evolution
tunerName: TPE builtinTunerName: TPE
#choice: Maximize, Minimize classArgs:
optimizationMode: Maximize #choice: maximize, minimize
tunerGpuNum: 0 optimize_mode: maximize
gpuNum: 0
assessor: assessor:
#choice: Medianstop #choice: Medianstop
assessorName: Medianstop builtinAssessorName: Medianstop
#choice: Maximize, Minimize classArgs:
optimizationMode: Maximize #choice: maximize, minimize
assessorGpuNum: 0 optimize_mode: maximize
gpuNum: 0
trial: trial:
trialCommand: python3 mnist.py command: python3 mnist.py
trialCodeDir: /nni/mnist codeDir: /nni/mnist
trialGpuNum: 0 gpuNum: 0
``` ```
Or you could specify your own tuner and assessor file as following: Or you could specify your own tuner and assessor file as following:
@ -297,17 +330,25 @@ searchSpacePath: /nni/search_space.json
#choice: true, false #choice: true, false
useAnnotation: false useAnnotation: false
tuner: tuner:
tunerCommand: python3 mytuner.py codeDir: /nni/tuner
tunerCwd: /nni/tuner classFileName: mytuner.py
tunerGpuNum: 0 className: MyTuner
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
gpuNum: 0
assessor: assessor:
assessorCommand: python3 myassessor.py codeDir: /nni/assessor
assessorCwd: /nni/assessor classFileName: myassessor.py
assessorGpuNum: 0 className: MyAssessor
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
gpuNum: 0
trial: trial:
trialCommand: python3 mnist.py command: python3 mnist.py
trialCodeDir: /nni/mnist codeDir: /nni/mnist
trialGpuNum: 0 gpuNum: 0
``` ```
* __remote mode__ * __remote mode__
@ -326,14 +367,15 @@ searchSpacePath: /nni/search_space.json
useAnnotation: false useAnnotation: false
tuner: tuner:
#choice: TPE, Random, Anneal, Evolution #choice: TPE, Random, Anneal, Evolution
tunerName: TPE builtinTunerName: TPE
#choice: Maximize, Minimize classArgs:
optimizationMode: Maximize #choice: maximize, minimize
tunerGpuNum: 0 optimize_mode: maximize
gpuNum: 0
trial: trial:
trialCommand: python3 mnist.py command: python3 mnist.py
trialCodeDir: /nni/mnist codeDir: /nni/mnist
trialGpuNum: 0 gpuNum: 0
#machineList can be empty if the platform is local #machineList can be empty if the platform is local
machineList: machineList:
- ip: 10.10.10.10 - ip: 10.10.10.10
@ -347,5 +389,6 @@ machineList:
- ip: 10.10.10.12 - ip: 10.10.10.12
port: 22 port: 22
username: test username: test
passwd: test sshKeyPath: /nni/sshkey
passphrase: qwert
``` ```

Просмотреть файл

@ -1,60 +1,26 @@
**Getting Started with NNI** **Getting Started with NNI**
=== ===
NNI (Nerual Network Intelligance) is a toolkit to help users running automated machine learning experiment.
The tool dispatchs and runs trail jobs that generated by tunning algorithms to search the best neural architecture and/or hyper-parameters at different enviroments (e.g. local, remote servers, Cloud).
``` ## **Installation**
AutoML experiment Training Services
┌────────┐ ┌────────────────────────┐ ┌────────────────┐
│ nnictl │ ─────> │ nni_manager │ │ Local Machine │
└────────┘ │ sdk/tuner │ └────────────────┘
│ hyperopt_tuner │
│ evlution_tuner │ trail jobs ┌────────────────┐
│ ... │ ────────> │ Remote Servers │
├────────────────────────┤ └────────────────┘
│ trail job source code │
│ sdk/annotation │ ┌────────────────┐
├────────────────────────┤ │ Yarn,K8s, │
│ nni_board │ │ ... │
└────────────────────────┘ └────────────────┘
```
## **Who should consider using NNI**
* You want to try different AutoML algorithms for your training code (model) at local
* You want to run AutoML trail jobs in different enviroments to speed up search (e.g. remote servers, Cloud)
* As a reseacher and data scientist, you want to implement your own AutoML algorithms and compare with other algorithms
* As a ML platform owner, you want to support AutoML in your platform
## **Setup**
* __Dependencies__ * __Dependencies__
nni requires:
```
python >= 3.5
node >= 10.9.0
yarn >= 1.9.4
```
Before install nni, please make sure you have installed python environment correctly.
* __User installation__
* clone nni repository python >= 3.5
python pip should also be correctly installed. You could use "which pip" or "pip -V" to check in Linux.
TBD: For now, we don's support virtual environment.
* __Install NNI through pip__
pip3 install -v --user git+https://github.com/Microsoft/NeuralNetworkIntelligence.git
source ~/.bashrc
* __Install NNI through source code__
git clone https://github.com/Microsoft/NeuralNetworkIntelligence git clone https://github.com/Microsoft/NeuralNetworkIntelligence
* run install.sh
cd NeuralNetworkIntelligence cd NeuralNetworkIntelligence
sh ./install.sh chmod +x install.sh
source install.sh
For more details about installation, please refer to [Installation instructions](Installation.md).
## **Quick start: run an experiment at local**
Requirements:
* local enviroment setup [TODO]
Run the following command to create an experiemnt for [mnist]
```bash
nnictl create --config /usr/share/nni/examples/trials/mnist-annotation/config.yml
```
This command will start the experiment and WebUI. The WebUI endpoint will be shown in the output of this command (for example, `http://localhost:8080`). Open this URL using your browsers. You can analyze your experiment through WebUI, or open trials' tensorboard.
## **Quick start: run a customized experiment** ## **Quick start: run a customized experiment**
An experiment is to run multiple trial jobs, each trial job tries a configuration which includes a specific neural architecture (or model) and hyper-parameter values. To run an experiment through NNI, you should: An experiment is to run multiple trial jobs, each trial job tries a configuration which includes a specific neural architecture (or model) and hyper-parameter values. To run an experiment through NNI, you should:
@ -64,48 +30,54 @@ An experiment is to run multiple trial jobs, each trial job tries a configuratio
* Provide a yaml experiment configure file * Provide a yaml experiment configure file
* (optional) Provide or choose an assessor * (optional) Provide or choose an assessor
**Prepare trial**: Let's use a simple trial example, e.g. mnist, provided by NNI. After you installed NNI, NNI examples have been put in /usr/share/nni/examples, run `ls /usr/share/nni/examples/trials` to see all the trial examples. You can simply execute the following command to run the NNI mnist example: **Prepare trial**: Let's use a simple trial example, e.g. mnist, provided by NNI. After you installed NNI, NNI examples have been put in ~/nni/examples, run `ls ~/nni/examples/trials` to see all the trial examples. You can simply execute the following command to run the NNI mnist example:
python /usr/share/nni/examples/trials/mnist-annotation/mnist.py python ~/nni/examples/trials/mnist-annotation/mnist.py
This command will be filled in the yaml configure file below. Please refer to [here]() for how to write your own trial. This command will be filled in the yaml configure file below. Please refer to [here]() for how to write your own trial.
**Prepare tuner**: NNI supports several popular automl algorithms, including Random Search, Tree of Parzen Estimators (TPE), Bayesian Optimization etc. Users can write their own tuner (refer to [here]()), but for simplicity, here we can choose a tuner provided by NNI as below: **Prepare tuner**: NNI supports several popular automl algorithms, including Random Search, Tree of Parzen Estimators (TPE), Evolution algorithm etc. Users can write their own tuner (refer to [here]()), but for simplicity, here we choose a tuner provided by NNI as below:
tunerName: TPE tunerName: TPE
optimizationMode: maximize optimizationMode: maximize
*tunerName* is used to specify a tuner in NNI, *optimizationMode* is to indicate whether you want to maximize or minimize your trial's result. *tunerName* is used to specify a tuner in NNI, *optimizationMode* is to indicate whether you want to maximize or minimize your trial's result.
**Prepare configure file**: Since you have already known which trial code you are going to run and which tuner you are going to use, it is time to prepare the yaml configure file. NNI provides a demo configure file for each trial example, `cat /usr/share/nni/examples/trials/mnist-annotation/config.yml` to see it. Its content is basically shown below: **Prepare configure file**: Since you have already known which trial code you are going to run and which tuner you are going to use, it is time to prepare the yaml configure file. NNI provides a demo configure file for each trial example, `cat ~/nni/examples/trials/mnist-annotation/config.yml` to see it. Its content is basically shown below:
``` ```
authorName: your_name authorName: your_name
experimentName: auto_mnist experimentName: auto_mnist
# how many trials could be concurrently running # how many trials could be concurrently running
trialConcurrency: 2 trialConcurrency: 2
# maximum experiment running duration # maximum experiment running duration
maxExecDuration: 3h maxExecDuration: 3h
# empty means never stop # empty means never stop
maxTrialNum: 100 maxTrialNum: 100
# choice: local, remote # choice: local, remote
trainingServicePlatform: local trainingServicePlatform: local
# choice: true, false # choice: true, false
useAnnotation: true useAnnotation: true
tuner: tuner:
tunerName: TPE builtinTunerName: TPE
optimizationMode: Maximize classArgs:
optimize_mode: maximize
trial: trial:
trialCommand: python mnist.py command: python mnist.py
trialCodeDir: /usr/share/nni/examples/trials/mnist-annotation codeDir: ~/nni/examples/trials/mnist-annotation
trialGpuNum: 0 gpuNum: 0
``` ```
Here *useAnnotation* is true because this trial example uses our python annotation (refer to [here]() for details). For trial, we should provide *trialCommand* which is the command to run the trial, provide *trialCodeDir* where the trial code is. The command will be executed in this directory. We should also provide how many GPUs a trial requires. Here *useAnnotation* is true because this trial example uses our python annotation (refer to [here]() for details). For trial, we should provide *trialCommand* which is the command to run the trial, provide *trialCodeDir* where the trial code is. The command will be executed in this directory. We should also provide how many GPUs a trial requires.
With all these steps done, we can run the experiment with the following command: With all these steps done, we can run the experiment with the following command:
nnictl create --config /usr/share/nni/examples/trials/mnist-annotation/config.yml nnictl create --config ~/nni/examples/trials/mnist-annotation/config.yml
You can refer to [here](NNICTLDOC.md) for more usage guide of *nnictl* command line tool. You can refer to [here](NNICTLDOC.md) for more usage guide of *nnictl* command line tool.
@ -118,11 +90,8 @@ The experiment has been running now, NNI provides WebUI for you to view experime
* [Tuners supported by NNI.](../src/sdk/pynni/nni/README.md) * [Tuners supported by NNI.](../src/sdk/pynni/nni/README.md)
* [How to enable early stop (i.e. assessor) in an experiment?](EnableAssessor.md) * [How to enable early stop (i.e. assessor) in an experiment?](EnableAssessor.md)
* [How to run an experiment on multiple machines?](RemoteMachineMode.md) * [How to run an experiment on multiple machines?](RemoteMachineMode.md)
* [How to write a customized tuner?](../examples/tuners/README.md) * [How to write a customized tuner?](CustomizedTuner.md)
* [How to write a customized assessor?](../examples/assessors/README.md) * [How to write a customized assessor?](../examples/assessors/README.md)
* [How to resume an experiment?]() * [How to resume an experiment?](NNICTLDOC.md)
* [Tutorial of the command tool *nnictl*.](NNICTLDOC.md) * [Tutorial of the command tool *nnictl*.](NNICTLDOC.md)
* [How to use *nnictl* to control multiple experiments?]() * [How to use *nnictl* to control multiple experiments?]()
## How to contribute
TBD

Просмотреть файл

@ -8,12 +8,10 @@ nnictl support commands:
``` ```
nnictl create nnictl create
nnictl stop nnictl stop
nnictl create
nnictl update nnictl update
nnictl resume nnictl resume
nnictl trial nnictl trial
nnictl webui nnictl webui
nnictl rest
nnictl experiment nnictl experiment
nnictl config nnictl config
nnictl log nnictl log
@ -72,7 +70,7 @@ nnictl log
* __nnictl update searchspace__ * __nnictl update searchspace__
* Description * Description
You can use this command to update an experiment's searchspace. You can use this command to update an experiment's search space.
* Usage * Usage
@ -201,14 +199,6 @@ nnictl log
nnictl config show nnictl config show
### Manage restful server
* __nnictl rest check__
* Description
Check the status of restful server
* Usage
nnictl rest check
### Manage log ### Manage log
* __nnictl log stdout__ * __nnictl log stdout__

25
docs/RELEASE.md Normal file
Просмотреть файл

@ -0,0 +1,25 @@
# Release 0.1.0 - 9/15/2018
Initial release of Neural Network Intelligence (NNI).
## Major Features
* Installation and Deployment
* Support pip install and source codes install
* Support training services on local mode(including Multi-GPU mode) as well as multi-machines mode
* Tuners, Accessors and Trial
* Support AutoML algorithms including: hyperopt_tpe, hyperopt_annealing, hyperopt_random, and evolution_tuner
* Support assessor(early stop) algorithms including: medianstop algorithm
* Provide Python API for user defined tuners and accessors
* Provide Python API for user to wrap trial code as NNI deployable codes
* Experiments
* Provide a command line toolkit 'nnictl' for experiments management
* Provide a web UI for viewing experiments details and managing experiments
* Continuous Integration
* Support CI by providing out-of-box integration with [travis-ci](https://github.com/travis-ci) on ubuntu
* Others
* Support simple GPU job scheduling

Просмотреть файл

@ -10,6 +10,18 @@ NNI supports running an experiment on multiple machines, called remote machine m
## Setup environment ## Setup environment
Install NNI on each of your machines following the install guide [here](GetStarted.md). Install NNI on each of your machines following the install guide [here](GetStarted.md).
For remote machines that are used only to run trials but not the nnictl, you can just install python SDK:
* __Install python SDK through pip__
pip3 install --user git+https://github.com/Microsoft/NeuralNetworkIntelligence.git#subdirectory=src/sdk/pynni
* __Install python SDK through source code__
git clone https://github.com/Microsoft/NeuralNetworkIntelligence
cd src/sdk/pynni
python3 setup.py install
## Run an experiment ## Run an experiment
Still using `examples/trials/mnist-annotation` as an example here. The yaml file you need is shown below: Still using `examples/trials/mnist-annotation` as an example here. The yaml file you need is shown below:
``` ```
@ -26,12 +38,13 @@ trainingServicePlatform: local
# choice: true, false # choice: true, false
useAnnotation: true useAnnotation: true
tuner: tuner:
tunerName: TPE builtinTunerName: TPE
optimizationMode: Maximize classArgs:
optimize_mode: maximize
trial: trial:
trialCommand: python mnist.py command: python mnist.py
trialCodeDir: /usr/share/nni/examples/trials/mnist-annotation codeDir: /usr/share/nni/examples/trials/mnist-annotation
trialGpuNum: 0 gpuNum: 0
#machineList can be empty if the platform is local #machineList can be empty if the platform is local
machineList: machineList:
- ip: 10.1.1.1 - ip: 10.1.1.1

59
docs/SearchSpaceSpec.md Normal file
Просмотреть файл

@ -0,0 +1,59 @@
## How to define search space?
### Hyper-parameter Search Space
* A search space configure example as follow:
```python
{
"dropout_rate":{"_type":"uniform","_value":[0.1,0.5]},
"conv_size":{"_type":"choice","_value":[2,3,5,7]},
"hidden_size":{"_type":"choice","_value":[124, 512, 1024]},
"batch_size":{"_type":"choice","_value":[50, 250, 500]},
"learning_rate":{"_type":"uniform","_value":[0.0001, 0.1]}
}
```
The example define ```dropout_rate``` as variable which priori distribution is uniform distribution, and its value from ```0.1``` and ```0.5```.
The tuner will sample parameters/architecture by understanding the search space first.
User should define the name of variable, type and candidate value of variable.
The candidate type and value for variable is here:
* {"_type":"choice","_value":options}
* Which means the variable value is one of the options, which should be a list The elements of options can themselves be [nested] stochastic expressions. In this case, the stochastic choices that only appear in some of the options become conditional parameters.
<br/>
* {"_type":"randint","_value":[upper]}
* Which means the variable value is a random integer in the range [0, upper). The semantics of this distribution is that there is no more correlation in the loss function between nearby integer values, as compared with more distant integer values. This is an appropriate distribution for describing random seeds for example. If the loss function is probably more correlated for nearby integer values, then you should probably use one of the "quantized" continuous distributions, such as either quniform, qloguniform, qnormal or qlognormal.
<br/>
* {"_type":"uniform","_value":[low, high]}
* Which means the variable value is a value uniformly between low and high.
* When optimizing, this variable is constrained to a two-sided interval.
<br/>
* {"_type":"quniform","_value":[low, high, q]}
* Which means the variable value is a value like round(uniform(low, high) / q) * q
* Suitable for a discrete value with respect to which the objective is still somewhat "smooth", but which should be bounded both above and below.
<br/>
* {"_type":"loguniform","_value":[low, high]}
* Which means the variable value is a value drawn according to exp(uniform(low, high)) so that the logarithm of the return value is uniformly distributed.
* When optimizing, this variable is constrained to the interval [exp(low), exp(high)].
<br/>
* {"_type":"qloguniform","_value":[low, high, q]}
* Which means the variable value is a value like round(exp(uniform(low, high)) / q) * q
* Suitable for a discrete variable with respect to which the objective is "smooth" and gets smoother with the size of the value, but which should be bounded both above and below.
<br/>
* {"_type":"normal","_value":[label, mu, sigma]}
* Which means the variable value is a real value that's normally-distributed with mean mu and standard deviation sigma. When optimizing, this is an unconstrained variable.
<br/>
* {"_type":"qnormal","_value":[label, mu, sigma, q]}
* Which means the variable value is a value like round(normal(mu, sigma) / q) * q
* Suitable for a discrete variable that probably takes a value around mu, but is fundamentally unbounded.
<br/>
* {"_type":"lognormal","_value":[label, mu, sigma]}
* Which means the variable value is a value drawn according to exp(normal(mu, sigma)) so that the logarithm of the return value is normally distributed. When optimizing, this variable is constrained to be positive.
<br/>
* {"_type":"qlognormal","_value":[label, mu, sigma, q]}
* Which means the variable value is a value like round(exp(normal(mu, sigma)) / q) * q
* Suitable for a discrete variable with respect to which the objective is smooth and gets smoother with the size of the variable, which is bounded from one side.
<br/>

3
docs/ToContribute.md Normal file
Просмотреть файл

@ -0,0 +1,3 @@
## How to contribute
TBD

Просмотреть файл

@ -2,63 +2,7 @@
=== ===
There would be only a few changes on your existing trial(model) code to make the code runnable on NNI. We provide two approaches for you to modify your code: `Python annotation` and `NNI APIs for trial` There would be only a few changes on your existing trial(model) code to make the code runnable on NNI. We provide two approaches for you to modify your code: `Python annotation` and `NNI APIs for trial`
## Python annotation ## NNI APIs
We designed a new syntax for users to annotation which variable they want to tune and in what range they want to tune the variable. Also, they can annotate which variable they want to report as intermediate result to `assessor`, and which variable to report as the final result (e.g. model accuracy) to `tuner`. A really appealing feature of our python annotation is that it exists as comments in your code, which means you can run your code as before without NNI. Let's look at an example, below is a piece of tensorflow code:
```
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
batch_size = 128
for i in range(10000):
batch = mnist.train.next_batch(batch_size)
dropout_rate = 0.5
mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0],
mnist_network.labels: batch[1],
mnist_network.keep_prob: dropout_rate})
if i % 100 == 0:
test_acc = mnist_network.accuracy.eval(
feed_dict={mnist_network.images: mnist.test.images,
mnist_network.labels: mnist.test.labels,
mnist_network.keep_prob: 1.0})
test_acc = mnist_network.accuracy.eval(
feed_dict={mnist_network.images: mnist.test.images,
mnist_network.labels: mnist.test.labels,
mnist_network.keep_prob: 1.0})
```
Let's say you want to tune batch\_size and dropout\_rate, and report test\_acc every 100 steps, at last report test\_acc as final result. With our python annotation, your code would look like below:
```
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
"""@nni.variable(nni.choice(50, 250, 500), name=batch_size)"""
batch_size = 128
for i in range(10000):
batch = mnist.train.next_batch(batch_size)
"""@nni.variable(nni.choice(1, 5), name=dropout_rate)"""
dropout_rate = 0.5
mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0],
mnist_network.labels: batch[1],
mnist_network.keep_prob: dropout_rate})
if i % 100 == 0:
test_acc = mnist_network.accuracy.eval(
feed_dict={mnist_network.images: mnist.test.images,
mnist_network.labels: mnist.test.labels,
mnist_network.keep_prob: 1.0})
"""@nni.report_intermediate_result(test_acc)"""
test_acc = mnist_network.accuracy.eval(
feed_dict={mnist_network.images: mnist.test.images,
mnist_network.labels: mnist.test.labels,
mnist_network.keep_prob: 1.0})
"""@nni.report_final_result(test_acc)"""
```
Simply adding four lines would make your code runnable on NNI. You can still run your code independently. `@nni.variable` works on its next line assignment, and `@nni.report_intermediate_result`/`@nni.report_final_result` would send the data to assessor/tuner at that line. Please refer to [here](../tools/annotation/README.md) for more annotation syntax and more powerful usage. In the yaml configure file, you need one line to enable Python annotation:
```
useAnnotation: true
```
## NNI APIs for trial
We also support NNI APIs for trial code. By using this approach, you should first prepare a search space file. An example is shown below: We also support NNI APIs for trial code. By using this approach, you should first prepare a search space file. An example is shown below:
``` ```
{ {
@ -68,18 +12,21 @@ We also support NNI APIs for trial code. By using this approach, you should firs
"learning_rate":{"_type":"uniform","_value":[0.0001, 0.1]} "learning_rate":{"_type":"uniform","_value":[0.0001, 0.1]}
} }
``` ```
You can refer to [here]() for the tutorial of search space. You can refer to [here](SearchSpaceSpec.md) for the tutorial of search space.
Then, include `import nni` in your trial code to use APIs. Using the line: Then, include `import nni` in your trial code to use NNI APIs. Using the line:
``` ```
RECEIVED_PARAMS = nni.get_parameters() RECEIVED_PARAMS = nni.get_parameters()
``` ```
to get hyper-parameters' values assigned by tuner. `RECEIVED_PARAMS` is a json object, for example: to get hyper-parameters' values assigned by tuner. `RECEIVED_PARAMS` is an object, for example:
``` ```
{'conv_size': 2, 'hidden_size': 124, 'learning_rate': 0.0307, 'dropout_rate': 0.2029} {"conv_size": 2, "hidden_size": 124, "learning_rate": 0.0307, "dropout_rate": 0.2029}
``` ```
On the other hand, you can use the API: `nni.report_intermediate_result(accuracy)` to send `accuracy` to assessor. And use `nni.report_final_result(accuracy)` to send `accuracy` to tuner. Here `accuracy` could be any python data type, but **NOTE that if you use built-in tuner/assessor, `accuracy` should be a number (e.g. float, int)**. On the other hand, you can use the API: `nni.report_intermediate_result(accuracy)` to send `accuracy` to assessor. And use `nni.report_final_result(accuracy)` to send `accuracy` to tuner. Here `accuracy` could be any python data type, but **NOTE that if you use built-in tuner/assessor, `accuracy` should be a numerical variable(e.g. float, int)**.
The assessor will decide which trial should early stop based on the history performance of trial(intermediate result of one trial).
The tuner will generate next parameters/architecture based on the explore history(final result of all trials).
In the yaml configure file, you need two lines to enable NNI APIs: In the yaml configure file, you need two lines to enable NNI APIs:
``` ```
@ -88,3 +35,43 @@ searchSpacePath: /path/to/your/search_space.json
``` ```
You can refer to [here](../examples/trials/README.md) for more information about how to write trial code using NNI APIs. You can refer to [here](../examples/trials/README.md) for more information about how to write trial code using NNI APIs.
## NNI Annotation
We designed a new syntax for users to annotate the variables they want to tune and in what range they want to tune the variables. Also, they can annotate which variable they want to report as intermediate result to `assessor`, and which variable to report as the final result (e.g. model accuracy) to `tuner`. A really appealing feature of our NNI annotation is that it exists as comments in your code, which means you can run your code as before without NNI. Let's look at an example, below is a piece of tensorflow code:
```diff
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
+ """@nni.variable(nni.choice(50, 250, 500), name=batch_size)"""
batch_size = 128
for i in range(10000):
batch = mnist.train.next_batch(batch_size)
+ """@nni.variable(nni.choice(1, 5), name=dropout_rate)"""
dropout_rate = 0.5
mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0],
mnist_network.labels: batch[1],
mnist_network.keep_prob: dropout_rate})
if i % 100 == 0:
test_acc = mnist_network.accuracy.eval(
feed_dict={mnist_network.images: mnist.test.images,
mnist_network.labels: mnist.test.labels,
mnist_network.keep_prob: 1.0})
+ """@nni.report_intermediate_result(test_acc)"""
test_acc = mnist_network.accuracy.eval(
feed_dict={mnist_network.images: mnist.test.images,
mnist_network.labels: mnist.test.labels,
mnist_network.keep_prob: 1.0})
+ """@nni.report_final_result(test_acc)"""
```
Let's say you want to tune batch\_size and dropout\_rate, and report test\_acc every 100 steps, at last report test\_acc as final result. With our NNI annotation, your code would look like below:
Simply adding four lines would make your code runnable on NNI. You can still run your code independently. `@nni.variable` works on its next line assignment, and `@nni.report_intermediate_result`/`@nni.report_final_result` would send the data to assessor/tuner at that line. Please refer to [here](../tools/annotation/README.md) for more annotation syntax and more powerful usage. In the yaml configure file, you need one line to enable NNI annotation:
```
useAnnotation: true
```
For users to correctly leverage NNI annotation, we briefly introduce how NNI annotation works here: NNI precompiles users' trial code to find all the annotations each of which is one line with `"""@nni` at the head of the line. Then NNI replaces each annotation with a corresponding NNI API at the location where the annotation is.
**Note that: in your trial code, you can use either one of NNI APIs and NNI annotation, but not both of them simultaneously.**

Просмотреть файл

@ -9,17 +9,21 @@ searchSpacePath:
#choice: true, false #choice: true, false
useAnnotation: useAnnotation:
tuner: tuner:
tunerCommand: #choice: TPE, Random, Anneal, Evolution
tunerCwd: builtinTunerName:
tunerGpuNum: classArgs:
#choice: maximize, minimize
optimize_mode:
assessor: assessor:
assessorCommand: #choice: Medianstop
assessorCwd: builtinAssessorName:
assessorGpuNum: classArgs:
#choice: maximize, minimize
optimize_mode:
trial: trial:
trialCommand: command:
trialCodeDir: codeDir:
trialGpuNum: gpuNum:
#machineList can be empty if the platform is local #machineList can be empty if the platform is local
machineList: machineList:
- ip: - ip:

Просмотреть файл

@ -8,9 +8,12 @@ trainingServicePlatform: local
#choice: true, false #choice: true, false
useAnnotation: false useAnnotation: false
tuner: tuner:
tunerCommand: python3 __main__.py codeDir: ~/nni/examples/tuners/ga_customer_tuner
tunerCwd: /usr/share/nni/examples/tuners/ga_customer_tuner classFileName: customer_tuner.py
className: CustomerTuner
classArgs:
optimize_mode: maximize
trial: trial:
trialCommand: python3 trial.py command: python3 trial.py
trialCodeDir: /usr/share/nni/examples/trials/ga_squad codeDir: ~/nni/examples/trials/ga_squad
trialGpuNum: 0 gpuNum: 0

Просмотреть файл

@ -9,10 +9,11 @@ trainingServicePlatform: local
useAnnotation: true useAnnotation: true
tuner: tuner:
#choice: TPE, Random, Anneal, Evolution #choice: TPE, Random, Anneal, Evolution
tunerName: TPE builtinTunerName: TPE
#choice: Maximize, Minimize classArgs:
optimizationMode: Maximize #choice: maximize, minimize
optimize_mode: maximize
trial: trial:
trialCommand: python3 mnist.py command: python3 mnist.py
trialCodeDir: /usr/share/nni/examples/trials/mnist-annotation codeDir: ~/nni/examples/trials/mnist-annotation
trialGpuNum: 0 gpuNum: 0

Просмотреть файл

@ -5,15 +5,16 @@ maxExecDuration: 1h
maxTrialNum: 1 maxTrialNum: 1
#choice: local, remote #choice: local, remote
trainingServicePlatform: local trainingServicePlatform: local
searchSpacePath: /usr/share/nni/examples/trials/mnist-keras/search_space.json searchSpacePath: ~/nni/examples/trials/mnist-keras/search_space.json
#choice: true, false #choice: true, false
useAnnotation: false useAnnotation: false
tuner: tuner:
#choice: TPE, Random, Anneal, Evolution #choice: TPE, Random, Anneal, Evolution
tunerName: TPE builtinTunerName: TPE
#choice: Maximize, Minimize classArgs:
optimizationMode: Maximize #choice: maximize, minimize
optimize_mode: maximize
trial: trial:
trialCommand: python3 mnist-keras.py command: python3 mnist-keras.py
trialCodeDir: /usr/share/nni/examples/trials/mnist-keras codeDir: ~/nni/examples/trials/mnist-keras
trialGpuNum: 0 gpuNum: 0

Просмотреть файл

@ -84,7 +84,7 @@ class SendMetrics(keras.callbacks.Callback):
Run on end of each epoch Run on end of each epoch
''' '''
LOG.debug(logs) LOG.debug(logs)
nni.report_intermediate_result(logs) nni.report_intermediate_result(logs['acc'])
def train(args, params): def train(args, params):
''' '''

Просмотреть файл

@ -9,10 +9,11 @@ trainingServicePlatform: local
useAnnotation: true useAnnotation: true
tuner: tuner:
#choice: TPE, Random, Anneal, Evolution #choice: TPE, Random, Anneal, Evolution
tunerName: TPE builtinTunerName: TPE
#choice: Maximize, Minimize classArgs:
optimizationMode: Maximize #choice: maximize, minimize
optimize_mode: maximize
trial: trial:
trialCommand: python3 mnist.py command: python3 mnist.py
trialCodeDir: /usr/share/nni/examples/trials/mnist-smartparam codeDir: ~/nni/examples/trials/mnist-smartparam
trialGpuNum: 0 gpuNum: 0

Просмотреть файл

@ -5,15 +5,16 @@ maxExecDuration: 1h
maxTrialNum: 1 maxTrialNum: 1
#choice: local, remote #choice: local, remote
trainingServicePlatform: local trainingServicePlatform: local
searchSpacePath: /usr/share/nni/examples/trials/mnist/search_space.json searchSpacePath: ~/nni/examples/trials/mnist/search_space.json
#choice: true, false #choice: true, false
useAnnotation: false useAnnotation: false
tuner: tuner:
#choice: TPE, Random, Anneal, Evolution #choice: TPE, Random, Anneal, Evolution
tunerName: TPE builtinTunerName: TPE
#choice: Maximize, Minimize classArgs:
optimizationMode: Maximize #choice: maximize, minimize
optimize_mode: maximize
trial: trial:
trialCommand: python3 mnist.py command: python3 mnist.py
trialCodeDir: /usr/share/nni/examples/trials/mnist codeDir: ~/nni/examples/trials/mnist
trialGpuNum: 0 gpuNum: 0

Просмотреть файл

@ -5,20 +5,22 @@ maxExecDuration: 1h
maxTrialNum: 1 maxTrialNum: 1
#choice: local, remote #choice: local, remote
trainingServicePlatform: local trainingServicePlatform: local
searchSpacePath: /usr/share/nni/examples/trials/mnist/search_space.json searchSpacePath: ~/nni/examples/trials/mnist/search_space.json
#choice: true, false #choice: true, false
useAnnotation: false useAnnotation: false
tuner: tuner:
#choice: TPE, Random, Anneal, Evolution #choice: TPE, Random, Anneal, Evolution
tunerName: TPE builtinTunerName: TPE
#choice: Maximize, Minimize classArgs:
optimizationMode: Maximize #choice: maximize, minimize
optimize_mode: maximize
assessor: assessor:
#choice: Medianstop #choice: Medianstop
assessorName: Medianstop builtinAssessorName: Medianstop
#choice: Maximize, Minimize classArgs:
optimizationMode: Maximize #choice: maximize, minimize
optimize_mode: maximize
trial: trial:
trialCommand: python3 mnist.py command: python3 mnist.py
trialCodeDir: /usr/share/nni/examples/trials/mnist codeDir: ~/nni/examples/trials/mnist
trialGpuNum: 0 gpuNum: 0

Просмотреть файл

@ -0,0 +1,20 @@
authorName: default
experimentName: example_pytorch_cifar10
trialConcurrency: 1
maxExecDuration: 100h
maxTrialNum: 1
#choice: local, remote
trainingServicePlatform: local
searchSpacePath: ~/nni/examples/trials/pytorch_cifar10/search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 main.py
codeDir: ~/nni/examples/trials/pytorch_cifar10
gpuNum: 1

Просмотреть файл

@ -0,0 +1,193 @@
'''Train CIFAR10 with PyTorch.'''
from __future__ import print_function
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
import os
import argparse
import logging
from models import *
from utils import progress_bar
import nni
_logger = logging.getLogger("cifar10_pytorch_automl")
trainloader = None
testloader = None
net = None
criterion = None
optimizer = None
device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0.0 # best test accuracy
start_epoch = 0 # start from epoch 0 or last checkpoint epoch
def prepare(args):
global trainloader
global testloader
global net
global criterion
global optimizer
# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)
#classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
# Model
print('==> Building model..')
if args['model'] == 'vgg':
net = VGG('VGG19')
if args['model'] == 'resnet18':
net = ResNet18()
if args['model'] == 'googlenet':
net = GoogLeNet()
if args['model'] == 'densenet121':
net = DenseNet121()
if args['model'] == 'mobilenet':
net = MobileNet()
if args['model'] == 'dpn92':
net = DPN92()
if args['model'] == 'shufflenetg2':
net = ShuffleNetG2()
if args['model'] == 'senet18':
net = SENet18()
net = net.to(device)
if device == 'cuda':
net = torch.nn.DataParallel(net)
cudnn.benchmark = True
criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(net.parameters(), lr=args['lr'], momentum=0.9, weight_decay=5e-4)
if args['optimizer'] == 'SGD':
optimizer = optim.SGD(net.parameters(), lr=args['lr'], momentum=0.9, weight_decay=5e-4)
if args['optimizer'] == 'Adadelta':
optimizer = optim.Adadelta(net.parameters(), lr=args['lr'])
if args['optimizer'] == 'Adagrad':
optimizer = optim.Adagrad(net.parameters(), lr=args['lr'])
if args['optimizer'] == 'Adam':
optimizer = optim.Adam(net.parameters(), lr=args['lr'])
if args['optimizer'] == 'Adamax':
optimizer = optim.Adam(net.parameters(), lr=args['lr'])
# Training
def train(epoch):
global trainloader
global testloader
global net
global criterion
global optimizer
print('\nEpoch: %d' % epoch)
net.train()
train_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(trainloader):
inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
train_loss += loss.item()
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
acc = 100.*correct/total
progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
% (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
def test(epoch):
global best_acc
global trainloader
global testloader
global net
global criterion
global optimizer
net.eval()
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(testloader):
inputs, targets = inputs.to(device), targets.to(device)
outputs = net(inputs)
loss = criterion(outputs, targets)
test_loss += loss.item()
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
acc = 100.*correct/total
progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
% (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
# Save checkpoint.
acc = 100.*correct/total
if acc > best_acc:
print('Saving..')
state = {
'net': net.state_dict(),
'acc': acc,
'epoch': epoch,
}
if not os.path.isdir('checkpoint'):
os.mkdir('checkpoint')
torch.save(state, './checkpoint/ckpt.t7')
best_acc = acc
return acc, best_acc
if __name__ == '__main__':
try:
RCV_CONFIG = nni.get_parameters()
#RCV_CONFIG = {'lr': 0.1, 'optimizer': 'Adam', 'model':'senet18'}
_logger.debug(RCV_CONFIG)
prepare(RCV_CONFIG)
acc = 0.0
best_acc = 0.0
for epoch in range(start_epoch, start_epoch+200):
train(epoch)
acc, best_acc = test(epoch)
nni.report_intermediate_result(acc)
nni.report_final_result(best_acc)
except Exception as exception:
_logger.exception(exception)
raise

Просмотреть файл

@ -0,0 +1,11 @@
from .vgg import *
from .densenet import *
from .dpn import *
from .googlenet import *
from .lenet import *
from .mobilenet import *
from .pnasnet import *
from .resnet import *
from .senet import *
from .shufflenet import *

Просмотреть файл

@ -0,0 +1,107 @@
'''DenseNet in PyTorch.'''
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
class Bottleneck(nn.Module):
def __init__(self, in_planes, growth_rate):
super(Bottleneck, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
self.bn2 = nn.BatchNorm2d(4*growth_rate)
self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
def forward(self, x):
out = self.conv1(F.relu(self.bn1(x)))
out = self.conv2(F.relu(self.bn2(out)))
out = torch.cat([out,x], 1)
return out
class Transition(nn.Module):
def __init__(self, in_planes, out_planes):
super(Transition, self).__init__()
self.bn = nn.BatchNorm2d(in_planes)
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
def forward(self, x):
out = self.conv(F.relu(self.bn(x)))
out = F.avg_pool2d(out, 2)
return out
class DenseNet(nn.Module):
def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
super(DenseNet, self).__init__()
self.growth_rate = growth_rate
num_planes = 2*growth_rate
self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
num_planes += nblocks[0]*growth_rate
out_planes = int(math.floor(num_planes*reduction))
self.trans1 = Transition(num_planes, out_planes)
num_planes = out_planes
self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
num_planes += nblocks[1]*growth_rate
out_planes = int(math.floor(num_planes*reduction))
self.trans2 = Transition(num_planes, out_planes)
num_planes = out_planes
self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
num_planes += nblocks[2]*growth_rate
out_planes = int(math.floor(num_planes*reduction))
self.trans3 = Transition(num_planes, out_planes)
num_planes = out_planes
self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
num_planes += nblocks[3]*growth_rate
self.bn = nn.BatchNorm2d(num_planes)
self.linear = nn.Linear(num_planes, num_classes)
def _make_dense_layers(self, block, in_planes, nblock):
layers = []
for i in range(nblock):
layers.append(block(in_planes, self.growth_rate))
in_planes += self.growth_rate
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv1(x)
out = self.trans1(self.dense1(out))
out = self.trans2(self.dense2(out))
out = self.trans3(self.dense3(out))
out = self.dense4(out)
out = F.avg_pool2d(F.relu(self.bn(out)), 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def DenseNet121():
return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)
def DenseNet169():
return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
def DenseNet201():
return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
def DenseNet161():
return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
def densenet_cifar():
return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
def test():
net = densenet_cifar()
x = torch.randn(1,3,32,32)
y = net(x)
print(y)
# test()

Просмотреть файл

@ -0,0 +1,98 @@
'''Dual Path Networks in PyTorch.'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class Bottleneck(nn.Module):
def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
super(Bottleneck, self).__init__()
self.out_planes = out_planes
self.dense_depth = dense_depth
self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
self.bn2 = nn.BatchNorm2d(in_planes)
self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
self.shortcut = nn.Sequential()
if first_layer:
self.shortcut = nn.Sequential(
nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_planes+dense_depth)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
x = self.shortcut(x)
d = self.out_planes
out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
out = F.relu(out)
return out
class DPN(nn.Module):
def __init__(self, cfg):
super(DPN, self).__init__()
in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.last_planes = 64
self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)
def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for i,stride in enumerate(strides):
layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
self.last_planes = out_planes + (i+2) * dense_depth
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def DPN26():
cfg = {
'in_planes': (96,192,384,768),
'out_planes': (256,512,1024,2048),
'num_blocks': (2,2,2,2),
'dense_depth': (16,32,24,128)
}
return DPN(cfg)
def DPN92():
cfg = {
'in_planes': (96,192,384,768),
'out_planes': (256,512,1024,2048),
'num_blocks': (3,4,20,3),
'dense_depth': (16,32,24,128)
}
return DPN(cfg)
def test():
net = DPN92()
x = torch.randn(1,3,32,32)
y = net(x)
print(y)
# test()

Просмотреть файл

@ -0,0 +1,107 @@
'''GoogLeNet with PyTorch.'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class Inception(nn.Module):
def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
super(Inception, self).__init__()
# 1x1 conv branch
self.b1 = nn.Sequential(
nn.Conv2d(in_planes, n1x1, kernel_size=1),
nn.BatchNorm2d(n1x1),
nn.ReLU(True),
)
# 1x1 conv -> 3x3 conv branch
self.b2 = nn.Sequential(
nn.Conv2d(in_planes, n3x3red, kernel_size=1),
nn.BatchNorm2d(n3x3red),
nn.ReLU(True),
nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
nn.BatchNorm2d(n3x3),
nn.ReLU(True),
)
# 1x1 conv -> 5x5 conv branch
self.b3 = nn.Sequential(
nn.Conv2d(in_planes, n5x5red, kernel_size=1),
nn.BatchNorm2d(n5x5red),
nn.ReLU(True),
nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
nn.BatchNorm2d(n5x5),
nn.ReLU(True),
nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
nn.BatchNorm2d(n5x5),
nn.ReLU(True),
)
# 3x3 pool -> 1x1 conv branch
self.b4 = nn.Sequential(
nn.MaxPool2d(3, stride=1, padding=1),
nn.Conv2d(in_planes, pool_planes, kernel_size=1),
nn.BatchNorm2d(pool_planes),
nn.ReLU(True),
)
def forward(self, x):
y1 = self.b1(x)
y2 = self.b2(x)
y3 = self.b3(x)
y4 = self.b4(x)
return torch.cat([y1,y2,y3,y4], 1)
class GoogLeNet(nn.Module):
def __init__(self):
super(GoogLeNet, self).__init__()
self.pre_layers = nn.Sequential(
nn.Conv2d(3, 192, kernel_size=3, padding=1),
nn.BatchNorm2d(192),
nn.ReLU(True),
)
self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
self.avgpool = nn.AvgPool2d(8, stride=1)
self.linear = nn.Linear(1024, 10)
def forward(self, x):
out = self.pre_layers(x)
out = self.a3(out)
out = self.b3(out)
out = self.maxpool(out)
out = self.a4(out)
out = self.b4(out)
out = self.c4(out)
out = self.d4(out)
out = self.e4(out)
out = self.maxpool(out)
out = self.a5(out)
out = self.b5(out)
out = self.avgpool(out)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def test():
net = GoogLeNet()
x = torch.randn(1,3,32,32)
y = net(x)
print(y.size())
# test()

Просмотреть файл

@ -0,0 +1,23 @@
'''LeNet in PyTorch.'''
import torch.nn as nn
import torch.nn.functional as F
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
out = F.relu(self.conv1(x))
out = F.max_pool2d(out, 2)
out = F.relu(self.conv2(out))
out = F.max_pool2d(out, 2)
out = out.view(out.size(0), -1)
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
out = self.fc3(out)
return out

Просмотреть файл

@ -0,0 +1,61 @@
'''MobileNet in PyTorch.
See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications"
for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class Block(nn.Module):
'''Depthwise conv + Pointwise conv'''
def __init__(self, in_planes, out_planes, stride=1):
super(Block, self).__init__()
self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(out_planes)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
return out
class MobileNet(nn.Module):
# (128,2) means conv planes=128, conv stride=2, by default conv stride=1
cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
def __init__(self, num_classes=10):
super(MobileNet, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(32)
self.layers = self._make_layers(in_planes=32)
self.linear = nn.Linear(1024, num_classes)
def _make_layers(self, in_planes):
layers = []
for x in self.cfg:
out_planes = x if isinstance(x, int) else x[0]
stride = 1 if isinstance(x, int) else x[1]
layers.append(Block(in_planes, out_planes, stride))
in_planes = out_planes
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layers(out)
out = F.avg_pool2d(out, 2)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def test():
net = MobileNet()
x = torch.randn(1,3,32,32)
y = net(x)
print(y.size())
# test()

Просмотреть файл

@ -0,0 +1,86 @@
'''MobileNetV2 in PyTorch.
See the paper "Inverted Residuals and Linear Bottlenecks:
Mobile Networks for Classification, Detection and Segmentation" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class Block(nn.Module):
'''expand + depthwise + pointwise'''
def __init__(self, in_planes, out_planes, expansion, stride):
super(Block, self).__init__()
self.stride = stride
planes = expansion * in_planes
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn3 = nn.BatchNorm2d(out_planes)
self.shortcut = nn.Sequential()
if stride == 1 and in_planes != out_planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(out_planes),
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out = out + self.shortcut(x) if self.stride==1 else out
return out
class MobileNetV2(nn.Module):
# (expansion, out_planes, num_blocks, stride)
cfg = [(1, 16, 1, 1),
(6, 24, 2, 1), # NOTE: change stride 2 -> 1 for CIFAR10
(6, 32, 3, 2),
(6, 64, 4, 2),
(6, 96, 3, 1),
(6, 160, 3, 2),
(6, 320, 1, 1)]
def __init__(self, num_classes=10):
super(MobileNetV2, self).__init__()
# NOTE: change conv1 stride 2 -> 1 for CIFAR10
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(32)
self.layers = self._make_layers(in_planes=32)
self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(1280)
self.linear = nn.Linear(1280, num_classes)
def _make_layers(self, in_planes):
layers = []
for expansion, out_planes, num_blocks, stride in self.cfg:
strides = [stride] + [1]*(num_blocks-1)
for stride in strides:
layers.append(Block(in_planes, out_planes, expansion, stride))
in_planes = out_planes
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layers(out)
out = F.relu(self.bn2(self.conv2(out)))
# NOTE: change pooling kernel_size 7 -> 4 for CIFAR10
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def test():
net = MobileNetV2()
x = torch.randn(2,3,32,32)
y = net(x)
print(y.size())
# test()

Просмотреть файл

@ -0,0 +1,125 @@
'''PNASNet in PyTorch.
Paper: Progressive Neural Architecture Search
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class SepConv(nn.Module):
'''Separable Convolution.'''
def __init__(self, in_planes, out_planes, kernel_size, stride):
super(SepConv, self).__init__()
self.conv1 = nn.Conv2d(in_planes, out_planes,
kernel_size, stride,
padding=(kernel_size-1)//2,
bias=False, groups=in_planes)
self.bn1 = nn.BatchNorm2d(out_planes)
def forward(self, x):
return self.bn1(self.conv1(x))
class CellA(nn.Module):
def __init__(self, in_planes, out_planes, stride=1):
super(CellA, self).__init__()
self.stride = stride
self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
if stride==2:
self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(out_planes)
def forward(self, x):
y1 = self.sep_conv1(x)
y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
if self.stride==2:
y2 = self.bn1(self.conv1(y2))
return F.relu(y1+y2)
class CellB(nn.Module):
def __init__(self, in_planes, out_planes, stride=1):
super(CellB, self).__init__()
self.stride = stride
# Left branch
self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
# Right branch
self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
if stride==2:
self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(out_planes)
# Reduce channels
self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(out_planes)
def forward(self, x):
# Left branch
y1 = self.sep_conv1(x)
y2 = self.sep_conv2(x)
# Right branch
y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
if self.stride==2:
y3 = self.bn1(self.conv1(y3))
y4 = self.sep_conv3(x)
# Concat & reduce channels
b1 = F.relu(y1+y2)
b2 = F.relu(y3+y4)
y = torch.cat([b1,b2], 1)
return F.relu(self.bn2(self.conv2(y)))
class PNASNet(nn.Module):
def __init__(self, cell_type, num_cells, num_planes):
super(PNASNet, self).__init__()
self.in_planes = num_planes
self.cell_type = cell_type
self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(num_planes)
self.layer1 = self._make_layer(num_planes, num_cells=6)
self.layer2 = self._downsample(num_planes*2)
self.layer3 = self._make_layer(num_planes*2, num_cells=6)
self.layer4 = self._downsample(num_planes*4)
self.layer5 = self._make_layer(num_planes*4, num_cells=6)
self.linear = nn.Linear(num_planes*4, 10)
def _make_layer(self, planes, num_cells):
layers = []
for _ in range(num_cells):
layers.append(self.cell_type(self.in_planes, planes, stride=1))
self.in_planes = planes
return nn.Sequential(*layers)
def _downsample(self, planes):
layer = self.cell_type(self.in_planes, planes, stride=2)
self.in_planes = planes
return layer
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
out = F.avg_pool2d(out, 8)
out = self.linear(out.view(out.size(0), -1))
return out
def PNASNetA():
return PNASNet(CellA, num_cells=6, num_planes=44)
def PNASNetB():
return PNASNet(CellB, num_cells=6, num_planes=32)
def test():
net = PNASNetB()
x = torch.randn(1,3,32,32)
y = net(x)
print(y)
# test()

Просмотреть файл

@ -0,0 +1,118 @@
'''Pre-activation ResNet in PyTorch.
Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Identity Mappings in Deep Residual Networks. arXiv:1603.05027
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class PreActBlock(nn.Module):
'''Pre-activation version of the BasicBlock.'''
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(PreActBlock, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
)
def forward(self, x):
out = F.relu(self.bn1(x))
shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
out = self.conv1(out)
out = self.conv2(F.relu(self.bn2(out)))
out += shortcut
return out
class PreActBottleneck(nn.Module):
'''Pre-activation version of the original Bottleneck module.'''
expansion = 4
def __init__(self, in_planes, planes, stride=1):
super(PreActBottleneck, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
)
def forward(self, x):
out = F.relu(self.bn1(x))
shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
out = self.conv1(out)
out = self.conv2(F.relu(self.bn2(out)))
out = self.conv3(F.relu(self.bn3(out)))
out += shortcut
return out
class PreActResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(PreActResNet, self).__init__()
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512*block.expansion, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv1(x)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def PreActResNet18():
return PreActResNet(PreActBlock, [2,2,2,2])
def PreActResNet34():
return PreActResNet(PreActBlock, [3,4,6,3])
def PreActResNet50():
return PreActResNet(PreActBottleneck, [3,4,6,3])
def PreActResNet101():
return PreActResNet(PreActBottleneck, [3,4,23,3])
def PreActResNet152():
return PreActResNet(PreActBottleneck, [3,8,36,3])
def test():
net = PreActResNet18()
y = net((torch.randn(1,3,32,32)))
print(y.size())
# test()

Просмотреть файл

@ -0,0 +1,121 @@
'''ResNet in PyTorch.
For Pre-activation ResNet, see 'preact_resnet.py'.
Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, in_planes, planes, stride=1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512*block.expansion, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ResNet18():
return ResNet(BasicBlock, [2,2,2,2])
def ResNet34():
return ResNet(BasicBlock, [3,4,6,3])
def ResNet50():
return ResNet(Bottleneck, [3,4,6,3])
def ResNet101():
return ResNet(Bottleneck, [3,4,23,3])
def ResNet152():
return ResNet(Bottleneck, [3,8,36,3])
def test():
net = ResNet18()
y = net(torch.randn(1,3,32,32))
print(y.size())
# test()

Просмотреть файл

@ -0,0 +1,95 @@
'''ResNeXt in PyTorch.
See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class Block(nn.Module):
'''Grouped convolution block.'''
expansion = 2
def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
super(Block, self).__init__()
group_width = cardinality * bottleneck_width
self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(group_width)
self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
self.bn2 = nn.BatchNorm2d(group_width)
self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*group_width:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*group_width)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNeXt(nn.Module):
def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
super(ResNeXt, self).__init__()
self.cardinality = cardinality
self.bottleneck_width = bottleneck_width
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(num_blocks[0], 1)
self.layer2 = self._make_layer(num_blocks[1], 2)
self.layer3 = self._make_layer(num_blocks[2], 2)
# self.layer4 = self._make_layer(num_blocks[3], 2)
self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
def _make_layer(self, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
# Increase bottleneck_width by 2 after each stage.
self.bottleneck_width *= 2
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
# out = self.layer4(out)
out = F.avg_pool2d(out, 8)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ResNeXt29_2x64d():
return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)
def ResNeXt29_4x64d():
return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64)
def ResNeXt29_8x64d():
return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64)
def ResNeXt29_32x4d():
return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4)
def test_resnext():
net = ResNeXt29_2x64d()
x = torch.randn(1,3,32,32)
y = net(x)
print(y.size())
# test_resnext()

Просмотреть файл

@ -0,0 +1,121 @@
'''SENet in PyTorch.
SENet is the winner of ImageNet-2017. The paper is not released yet.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class BasicBlock(nn.Module):
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes)
)
# SE layers
self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) # Use nn.Conv2d instead of nn.Linear
self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
# Squeeze
w = F.avg_pool2d(out, out.size(2))
w = F.relu(self.fc1(w))
w = F.sigmoid(self.fc2(w))
# Excitation
out = out * w # New broadcasting feature from v0.2!
out += self.shortcut(x)
out = F.relu(out)
return out
class PreActBlock(nn.Module):
def __init__(self, in_planes, planes, stride=1):
super(PreActBlock, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
if stride != 1 or in_planes != planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False)
)
# SE layers
self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)
self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
def forward(self, x):
out = F.relu(self.bn1(x))
shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
out = self.conv1(out)
out = self.conv2(F.relu(self.bn2(out)))
# Squeeze
w = F.avg_pool2d(out, out.size(2))
w = F.relu(self.fc1(w))
w = F.sigmoid(self.fc2(w))
# Excitation
out = out * w
out += shortcut
return out
class SENet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(SENet, self).__init__()
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def SENet18():
return SENet(PreActBlock, [2,2,2,2])
def test():
net = SENet18()
y = net(torch.randn(1,3,32,32))
print(y.size())
# test()

Просмотреть файл

@ -0,0 +1,109 @@
'''ShuffleNet in PyTorch.
See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class ShuffleBlock(nn.Module):
def __init__(self, groups):
super(ShuffleBlock, self).__init__()
self.groups = groups
def forward(self, x):
'''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
N,C,H,W = x.size()
g = self.groups
return x.view(N,g,C/g,H,W).permute(0,2,1,3,4).contiguous().view(N,C,H,W)
class Bottleneck(nn.Module):
def __init__(self, in_planes, out_planes, stride, groups):
super(Bottleneck, self).__init__()
self.stride = stride
mid_planes = out_planes/4
g = 1 if in_planes==24 else groups
self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
self.bn1 = nn.BatchNorm2d(mid_planes)
self.shuffle1 = ShuffleBlock(groups=g)
self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
self.bn2 = nn.BatchNorm2d(mid_planes)
self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
self.bn3 = nn.BatchNorm2d(out_planes)
self.shortcut = nn.Sequential()
if stride == 2:
self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.shuffle1(out)
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
res = self.shortcut(x)
out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
return out
class ShuffleNet(nn.Module):
def __init__(self, cfg):
super(ShuffleNet, self).__init__()
out_planes = cfg['out_planes']
num_blocks = cfg['num_blocks']
groups = cfg['groups']
self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(24)
self.in_planes = 24
self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
self.linear = nn.Linear(out_planes[2], 10)
def _make_layer(self, out_planes, num_blocks, groups):
layers = []
for i in range(num_blocks):
stride = 2 if i == 0 else 1
cat_planes = self.in_planes if i == 0 else 0
layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
self.in_planes = out_planes
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ShuffleNetG2():
cfg = {
'out_planes': [200,400,800],
'num_blocks': [4,8,4],
'groups': 2
}
return ShuffleNet(cfg)
def ShuffleNetG3():
cfg = {
'out_planes': [240,480,960],
'num_blocks': [4,8,4],
'groups': 3
}
return ShuffleNet(cfg)
def test():
net = ShuffleNetG2()
x = torch.randn(1,3,32,32)
y = net(x)
print(y)
# test()

Просмотреть файл

@ -0,0 +1,47 @@
'''VGG11/13/16/19 in Pytorch.'''
import torch
import torch.nn as nn
cfg = {
'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
class VGG(nn.Module):
def __init__(self, vgg_name):
super(VGG, self).__init__()
self.features = self._make_layers(cfg[vgg_name])
self.classifier = nn.Linear(512, 10)
def forward(self, x):
out = self.features(x)
out = out.view(out.size(0), -1)
out = self.classifier(out)
return out
def _make_layers(self, cfg):
layers = []
in_channels = 3
for x in cfg:
if x == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
nn.BatchNorm2d(x),
nn.ReLU(inplace=True)]
in_channels = x
layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
return nn.Sequential(*layers)
def test():
net = VGG('VGG11')
x = torch.randn(2,3,32,32)
y = net(x)
print(y.size())
# test()

Просмотреть файл

@ -0,0 +1,6 @@
This example requires pytorch.
pytorch install package should be chosen based on python version and cuda version.
Here is an example of the environment python==3.5 and cuda == 8.0, then using the following commands to install pytorch:
pip3 install http://download.pytorch.org/whl/cu80/torch-0.4.1-cp35-cp35m-linux_x86_64.whl
pip3 install torchvision

Просмотреть файл

@ -0,0 +1,5 @@
{
"lr":{"_type":"choice", "_value":[0.1, 0.01, 0.001, 0.0001]},
"optimizer":{"_type":"choice", "_value":["SGD", "Adadelta", "Adagrad", "Adam", "Adamax"]},
"model":{"_type":"choice", "_value":["vgg", "resnet18", "googlenet", "densenet121", "mobilenet", "dpn92", "senet18"]}
}

Просмотреть файл

@ -0,0 +1,127 @@
'''Some helper functions for PyTorch, including:
- get_mean_and_std: calculate the mean and std value of dataset.
- msr_init: net parameter initialization.
- progress_bar: progress bar mimic xlua.progress.
'''
import os
import sys
import time
import math
import torch.nn as nn
import torch.nn.init as init
def get_mean_and_std(dataset):
'''Compute the mean and std value of dataset.'''
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
mean = torch.zeros(3)
std = torch.zeros(3)
print('==> Computing mean and std..')
for inputs, targets in dataloader:
for i in range(3):
mean[i] += inputs[:,i,:,:].mean()
std[i] += inputs[:,i,:,:].std()
mean.div_(len(dataset))
std.div_(len(dataset))
return mean, std
def init_params(net):
'''Init layer parameters.'''
for m in net.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal(m.weight, mode='fan_out')
if m.bias:
init.constant(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant(m.weight, 1)
init.constant(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal(m.weight, std=1e-3)
if m.bias:
init.constant(m.bias, 0)
term_width = 0
try:
_, term_width = os.popen('stty size', 'r').read().split()
except Exception as exception:
term_width = 200
term_width = int(term_width)
TOTAL_BAR_LENGTH = 65.
last_time = time.time()
begin_time = last_time
def progress_bar(current, total, msg=None):
global last_time, begin_time
if current == 0:
begin_time = time.time() # Reset for new bar.
cur_len = int(TOTAL_BAR_LENGTH*current/total)
rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
sys.stdout.write(' [')
for i in range(cur_len):
sys.stdout.write('=')
sys.stdout.write('>')
for i in range(rest_len):
sys.stdout.write('.')
sys.stdout.write(']')
cur_time = time.time()
step_time = cur_time - last_time
last_time = cur_time
tot_time = cur_time - begin_time
L = []
L.append(' Step: %s' % format_time(step_time))
L.append(' | Tot: %s' % format_time(tot_time))
if msg:
L.append(' | ' + msg)
msg = ''.join(L)
sys.stdout.write(msg)
for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
sys.stdout.write(' ')
# Go back to the center of the bar.
for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
sys.stdout.write('\b')
sys.stdout.write(' %d/%d ' % (current+1, total))
if current < total-1:
sys.stdout.write('\r')
else:
sys.stdout.write('\n')
sys.stdout.flush()
def format_time(seconds):
days = int(seconds / 3600/24)
seconds = seconds - days*3600*24
hours = int(seconds / 3600)
seconds = seconds - hours*3600
minutes = int(seconds / 60)
seconds = seconds - minutes*60
secondsf = int(seconds)
seconds = seconds - secondsf
millis = int(seconds*1000)
f = ''
i = 1
if days > 0:
f += str(days) + 'D'
i += 1
if hours > 0 and i <= 2:
f += str(hours) + 'h'
i += 1
if minutes > 0 and i <= 2:
f += str(minutes) + 'm'
i += 1
if secondsf > 0 and i <= 2:
f += str(secondsf) + 's'
i += 1
if millis > 0 and i <= 2:
f += str(millis) + 'ms'
i += 1
if f == '':
f = '0ms'
return f

Просмотреть файл

@ -0,0 +1,15 @@
# How to use ga_customer_tuner?
This tuner is a customized tuner which only suitable for trial whose code path is "~/nni/examples/trials/ga_squad",
type `cd ~/nni/examples/trials/ga_squad` and check readme.md to get more information for ga_squad trial.
# config
If you want to use ga_customer_tuner in your experiment, you could set config file as following format:
```
tuner:
codeDir: ~/nni/examples/tuners/ga_customer_tuner
classFileName: customer_tuner.py
className: CustomerTuner
classArgs:
optimize_mode: maximize
```

Просмотреть файл

@ -1,12 +1,19 @@
#!/bin/bash #!/bin/bash
INSTALL_PREFIX=${HOME}/.local
mkdir -p ${INSTALL_PREFIX}
wget -4 -nc https://nodejs.org/dist/v10.9.0/node-v10.9.0-linux-x64.tar.xz --header "Referer: nodejs.org" wget -4 -nc https://nodejs.org/dist/v10.9.0/node-v10.9.0-linux-x64.tar.xz --header "Referer: nodejs.org"
tar -xf 'node-v10.9.0-linux-x64.tar.xz' tar -xf 'node-v10.9.0-linux-x64.tar.xz'
sudo cp -rf node-v10.9.0-linux-x64/* /usr/local/node/ cp -rT node-v10.9.0-linux-x64 ${INSTALL_PREFIX}/node
rm -rf node-v10.9.0-linux-x64* rm -rf node-v10.9.0-linux-x64*
wget -4 -nc https://github.com/yarnpkg/yarn/releases/download/v1.9.4/yarn-v1.9.4.tar.gz wget -4 -nc https://github.com/yarnpkg/yarn/releases/download/v1.9.4/yarn-v1.9.4.tar.gz
tar -xf 'yarn-v1.9.4.tar.gz' tar -xf 'yarn-v1.9.4.tar.gz'
sudo cp -rf yarn-v1.9.4/* /usr/local/yarn/ cp -rT yarn-v1.9.4 ${INSTALL_PREFIX}/yarn
rm -rf yarn-v1.9.4* rm -rf yarn-v1.9.4*
export PATH=/usr/local/node/bin:/usr/local/yarn/bin:$PATH NODE_BIN=${INSTALL_PREFIX}/node/bin
YARN_BIN=${INSTALL_PREFIX}/yarn/bin
export PATH=${INSTALL_PREFIX}/node/bin:${INSTALL_PREFIX}/yarn/bin:$PATH
echo $PATH|grep -q ${NODE_BIN} || echo "export PATH=${NODE_BIN}:\${PATH}" >> ${HOME}/.bashrc
echo $PATH|grep -q ${YARN_BIN} || echo "export PATH=${YARN_BIN}:\${PATH}" >> ${HOME}/.bashrc
source ${HOME}/.bashrc
make make
sudo make install make install

96
setup.py Normal file
Просмотреть файл

@ -0,0 +1,96 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import os
from setuptools import setup, find_packages
from setuptools.command.install import install
from subprocess import Popen
def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read()
class CustomInstallCommand(install):
'''a customized install class in pip module'''
def makeInstall(self):
'''execute make pip-install command'''
cmds = ['make', 'pip-install']
process = Popen(cmds)
if process.wait() != 0:
print('Error: Make Install Failed')
exit(-1)
def writeEnvironmentVariables(self, variable_name):
'''write an environment variable into ~/.bashrc'''
paths = os.getenv("PATH").split(':')
bin_path = os.path.join(os.getenv('HOME'),'.local/'+variable_name+'/bin')
if bin_path not in paths:
bashrc_path = os.path.join(os.getenv('HOME'), '.bashrc')
process = Popen('echo export PATH=' + bin_path + ':\$PATH >> ' + bashrc_path, shell=True)
if process.wait() != 0:
print('Error: Write Environment Variables Failed')
exit(-1)
def run(self):
install.run(self)
self.makeInstall()
self.writeEnvironmentVariables('node')
self.writeEnvironmentVariables('yarn')
setup(
name = 'NNI',
version = '0.0.1',
author = 'Microsoft NNI Team',
author_email = 'nni@microsoft.com',
description = 'Neural Network Intelligence project',
long_description = read('docs/NNICTLDOC.md'),
license = 'MIT',
url = 'https://msrasrg.visualstudio.com/NeuralNetworkIntelligence',
packages = find_packages('src/sdk/pynni', exclude=['tests']) + find_packages('tools'),
package_dir = {
'annotation': 'tools/annotation',
'nni': 'src/sdk/pynni/nni',
'nnicmd': 'tools/nnicmd'
},
python_requires = '>=3.5',
install_requires = [
'astor',
'json_tricks',
'numpy',
'psutil',
'pymc3',
'pyyaml',
'requests',
'scipy'
],
dependency_links = [
'git+https://github.com/hyperopt/hyperopt.git',
],
cmdclass={
'install': CustomInstallCommand
},
entry_points={
'console_scripts': ['nnictl = nnicmd.nnictl:parse_args']
}
)

Просмотреть файл

@ -32,16 +32,22 @@ interface ExperimentParams {
maxTrialNum: number; maxTrialNum: number;
searchSpace: string; searchSpace: string;
tuner: { tuner: {
tunerCommand: string; className: string;
tunerCwd: string; builtinTunerName?: string;
tunerCheckpointDirectory: string; codeDir?: string;
tunerGpuNum?: number; classArgs?: any;
classFileName?: string;
checkpointDir: string;
gpuNum?: number;
}; };
assessor?: { assessor?: {
assessorCommand: string; className: string;
assessorCwd: string; builtinAssessorName?: string;
assessorCheckpointDirectory: string; codeDir?: string;
assessorGpuNum?: number; classArgs?: any;
classFileName?: string;
checkpointDir: string;
gpuNum?: number;
}; };
clusterMetaData?: { clusterMetaData?: {
key: string; key: string;

Просмотреть файл

@ -105,6 +105,8 @@ abstract class TrainingService {
public abstract addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void; public abstract addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void;
public abstract removeTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void; public abstract removeTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void;
public abstract submitTrialJob(form: JobApplicationForm): Promise<TrialJobDetail>; public abstract submitTrialJob(form: JobApplicationForm): Promise<TrialJobDetail>;
public abstract updateTrialJob(trialJobId: string, form: JobApplicationForm): Promise<TrialJobDetail>;
public abstract get isMultiPhaseJobSupported(): boolean;
public abstract cancelTrialJob(trialJobId: string): Promise<void>; public abstract cancelTrialJob(trialJobId: string): Promise<void>;
public abstract setClusterMetadata(key: string, value: string): Promise<void>; public abstract setClusterMetadata(key: string, value: string): Promise<void>;
public abstract getClusterMetadata(key: string): Promise<string>; public abstract getClusterMetadata(key: string): Promise<string>;

Просмотреть файл

@ -28,7 +28,7 @@ import { Container } from 'typescript-ioc';
import * as util from 'util'; import * as util from 'util';
import { Database, DataStore } from './datastore'; import { Database, DataStore } from './datastore';
import { ExperimentStartupInfo, setExperimentStartupInfo, getExperimentId } from './experimentStartupInfo'; import { ExperimentStartupInfo, getExperimentId, setExperimentStartupInfo } from './experimentStartupInfo';
import { Manager } from './manager'; import { Manager } from './manager';
import { TrainingService } from './trainingService'; import { TrainingService } from './trainingService';
@ -127,6 +127,63 @@ function parseArg(names: string[]): string {
return ''; return '';
} }
/**
* Generate command line to start advisor process which runs tuner and assessor
* @param tuner : For builtin tuner:
* {
* className: 'EvolutionTuner'
* classArgs: {
* optimize_mode: 'maximize',
* population_size: 3
* }
* }
* customized:
* {
* codeDir: '/tmp/mytuner'
* classFile: 'best_tuner.py'
* className: 'BestTuner'
* classArgs: {
* optimize_mode: 'maximize',
* population_size: 3
* }
* }
*
* @param assessor: similiar as tuner
*
*/
function getMsgDispatcherCommand(tuner: any, assessor: any): string {
let command: string = `python3 -m nni --tuner_class_name ${tuner.className}`;
if (tuner.classArgs !== undefined) {
command += ` --tuner_args ${JSON.stringify(JSON.stringify(tuner.classArgs))}`;
}
if (tuner.codeDir !== undefined && tuner.codeDir.length > 1) {
command += ` --tuner_directory ${tuner.codeDir}`;
}
if (tuner.classFileName !== undefined && tuner.classFileName.length > 1) {
command += ` --tuner_class_filename ${tuner.classFileName}`;
}
if (assessor !== undefined && assessor.className !== undefined) {
command += ` --assessor_class_name ${assessor.className}`;
if (assessor.classArgs !== undefined) {
command += ` --assessor_args ${JSON.stringify(JSON.stringify(assessor.classArgs))}`;
}
if (assessor.codeDir !== undefined && assessor.codeDir.length > 1) {
command += ` --assessor_directory ${assessor.codeDir}`;
}
if (assessor.classFileName !== undefined && assessor.classFileName.length > 1) {
command += ` --assessor_class_filename ${assessor.classFileName}`;
}
}
return command;
}
/** /**
* Initialize a pseudo experiment environment for unit test. * Initialize a pseudo experiment environment for unit test.
* Must be paired with `cleanupUnitTest()`. * Must be paired with `cleanupUnitTest()`.
@ -161,5 +218,5 @@ function cleanupUnitTest(): void {
Container.restore(ExperimentStartupInfo); Container.restore(ExperimentStartupInfo);
} }
export { getLogDir, getExperimentRootDir, getDefaultDatabaseDir, mkDirP, delay, prepareUnitTest, export { getMsgDispatcherCommand, getLogDir, getExperimentRootDir, getDefaultDatabaseDir, mkDirP, delay, prepareUnitTest,
parseArg, cleanupUnitTest, uniqueString }; parseArg, cleanupUnitTest, uniqueString };

Просмотреть файл

@ -135,16 +135,8 @@ class IpcInterface {
* Create IPC proxy for tuner process * Create IPC proxy for tuner process
* @param process_ the tuner process * @param process_ the tuner process
*/ */
function createTunerInterface(process: ChildProcess): IpcInterface { function createDispatcherInterface(process: ChildProcess): IpcInterface {
return new IpcInterface(process, CommandType.TUNER_COMMANDS); return new IpcInterface(process, new Set([...CommandType.TUNER_COMMANDS, ...CommandType.ASSESSOR_COMMANDS]));
} }
/** export { IpcInterface, createDispatcherInterface };
* Create IPC proxy for assessor process
* @param process_ the assessor process
*/
function createAssessorInterface(process: ChildProcess): IpcInterface {
return new IpcInterface(process, CommandType.ASSESSOR_COMMANDS);
}
export { IpcInterface, createTunerInterface, createAssessorInterface };

Просмотреть файл

@ -185,6 +185,9 @@ class NNIDataStore implements DataStore {
// assume data is stored by time ASC order // assume data is stored by time ASC order
for (const record of trialJobEvents) { for (const record of trialJobEvents) {
let jobInfo: TrialJobInfo | undefined; let jobInfo: TrialJobInfo | undefined;
if (record.trialJobId === undefined || record.trialJobId.length < 1) {
continue;
}
if (map.has(record.trialJobId)) { if (map.has(record.trialJobId)) {
jobInfo = map.get(record.trialJobId); jobInfo = map.get(record.trialJobId);
} else { } else {

Просмотреть файл

@ -34,12 +34,12 @@ import {
import { import {
TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, TrialJobStatus TrainingService, TrialJobApplicationForm, TrialJobDetail, TrialJobMetric, TrialJobStatus
} from '../common/trainingService'; } from '../common/trainingService';
import { delay , getLogDir} from '../common/utils'; import { delay , getLogDir, getMsgDispatcherCommand} from '../common/utils';
import { import {
ADD_CUSTOMIZED_TRIAL_JOB, KILL_TRIAL_JOB, NEW_TRIAL_JOB, NO_MORE_TRIAL_JOBS, REPORT_METRIC_DATA, ADD_CUSTOMIZED_TRIAL_JOB, KILL_TRIAL_JOB, NEW_TRIAL_JOB, NO_MORE_TRIAL_JOBS, REPORT_METRIC_DATA,
REQUEST_TRIAL_JOBS, TERMINATE, TRIAL_END, UPDATE_SEARCH_SPACE REQUEST_TRIAL_JOBS, TERMINATE, TRIAL_END, UPDATE_SEARCH_SPACE
} from './commands'; } from './commands';
import { createAssessorInterface, createTunerInterface, IpcInterface } from './ipcInterface'; import { createDispatcherInterface, IpcInterface } from './ipcInterface';
import { TrialJobMaintainerEvent, TrialJobs } from './trialJobs'; import { TrialJobMaintainerEvent, TrialJobs } from './trialJobs';
/** /**
@ -47,8 +47,7 @@ import { TrialJobMaintainerEvent, TrialJobs } from './trialJobs';
*/ */
class NNIManager implements Manager { class NNIManager implements Manager {
private trainingService: TrainingService; private trainingService: TrainingService;
private tuner: IpcInterface | undefined; private dispatcher: IpcInterface | undefined;
private assessor: IpcInterface | undefined;
private trialJobsMaintainer: TrialJobs | undefined; private trialJobsMaintainer: TrialJobs | undefined;
private currSubmittedTrialNum: number; // need to be recovered private currSubmittedTrialNum: number; // need to be recovered
private trialConcurrencyReduction: number; private trialConcurrencyReduction: number;
@ -56,9 +55,7 @@ class NNIManager implements Manager {
private log: Logger; private log: Logger;
private dataStore: DataStore; private dataStore: DataStore;
private experimentProfile: ExperimentProfile; private experimentProfile: ExperimentProfile;
// TO DO: could use struct here private dispatcherPid: number;
private tunerPid: number;
private assessorPid: number;
constructor() { constructor() {
this.currSubmittedTrialNum = 0; this.currSubmittedTrialNum = 0;
@ -67,8 +64,7 @@ class NNIManager implements Manager {
const experimentId: string = getExperimentId(); const experimentId: string = getExperimentId();
this.trainingService = component.get(TrainingService); this.trainingService = component.get(TrainingService);
assert(this.trainingService); assert(this.trainingService);
this.tunerPid = 0; this.dispatcherPid = 0;
this.assessorPid = 0;
this.log = getLogger(); this.log = getLogger();
this.dataStore = component.get(DataStore); this.dataStore = component.get(DataStore);
@ -84,9 +80,9 @@ class NNIManager implements Manager {
maxTrialNum: 0, // maxTrialNum includes all the submitted trial jobs maxTrialNum: 0, // maxTrialNum includes all the submitted trial jobs
searchSpace: '', searchSpace: '',
tuner: { tuner: {
tunerCommand: '', className: '',
tunerCwd: '', classArgs: {},
tunerCheckpointDirectory: '' checkpointDir: ''
} }
} }
}; };
@ -134,21 +130,15 @@ class NNIManager implements Manager {
this.experimentProfile.params = expParams; this.experimentProfile.params = expParams;
await this.storeExperimentProfile(); await this.storeExperimentProfile();
this.log.debug('Setup tuner...'); this.log.debug('Setup tuner...');
this.setupTuner(
expParams.tuner.tunerCommand,
expParams.tuner.tunerCwd,
'start',
expParams.tuner.tunerCheckpointDirectory);
if (expParams.assessor !== undefined) { const dispatcherCommand: string = getMsgDispatcherCommand(expParams.tuner, expParams.assessor);
this.log.debug('Setup assessor...'); console.log(`dispatcher command: ${dispatcherCommand}`);
this.setupAssessor( this.setupTuner(
expParams.assessor.assessorCommand, //expParams.tuner.tunerCommand,
expParams.assessor.assessorCwd, dispatcherCommand,
undefined,
'start', 'start',
expParams.assessor.assessorCheckpointDirectory expParams.tuner.checkpointDir);
);
}
this.experimentProfile.startTime = new Date(); this.experimentProfile.startTime = new Date();
await this.storeExperimentProfile(); await this.storeExperimentProfile();
@ -164,20 +154,13 @@ class NNIManager implements Manager {
this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId); this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId);
const expParams: ExperimentParams = this.experimentProfile.params; const expParams: ExperimentParams = this.experimentProfile.params;
const dispatcherCommand: string = getMsgDispatcherCommand(expParams.tuner, expParams.assessor);
console.log(`dispatcher command: ${dispatcherCommand}`);
this.setupTuner( this.setupTuner(
expParams.tuner.tunerCommand, dispatcherCommand,
expParams.tuner.tunerCwd, undefined,
'resume', 'resume',
expParams.tuner.tunerCheckpointDirectory); expParams.tuner.checkpointDir);
if (expParams.assessor !== undefined) {
this.setupAssessor(
expParams.assessor.assessorCommand,
expParams.assessor.assessorCwd,
'resume',
expParams.assessor.assessorCheckpointDirectory
);
}
const allTrialJobs: TrialJobInfo[] = await this.dataStore.listTrialJobs(); const allTrialJobs: TrialJobInfo[] = await this.dataStore.listTrialJobs();
@ -204,7 +187,7 @@ class NNIManager implements Manager {
// TO DO: move timeout value to constants file // TO DO: move timeout value to constants file
const delay1: Promise<{}> = new Promise((resolve: Function, reject: Function): void => { const delay1: Promise<{}> = new Promise((resolve: Function, reject: Function): void => {
timeoutId = setTimeout( timeoutId = setTimeout(
() => { reject(new Error('TrainingService setClusterMetadata timeout.')); }, () => { reject(new Error('TrainingService setClusterMetadata timeout. Please check your config file.')); },
10000); 10000);
}); });
await Promise.race([delay1, this.trainingService.setClusterMetadata(key, value)]).finally(() => { await Promise.race([delay1, this.trainingService.setClusterMetadata(key, value)]).finally(() => {
@ -248,8 +231,8 @@ class NNIManager implements Manager {
return this.dataStore.listTrialJobs(status); return this.dataStore.listTrialJobs(status);
} }
private setupTuner(command: string, cwd: string, mode: 'start' | 'resume', dataDirectory: string): void { private setupTuner(command: string, cwd: string | undefined, mode: 'start' | 'resume', dataDirectory: string): void {
if (this.tuner !== undefined) { if (this.dispatcher !== undefined) {
return; return;
} }
const stdio: (string | NodeJS.WriteStream)[] = ['ignore', process.stdout, process.stderr, 'pipe', 'pipe']; const stdio: (string | NodeJS.WriteStream)[] = ['ignore', process.stdout, process.stderr, 'pipe', 'pipe'];
@ -270,36 +253,8 @@ class NNIManager implements Manager {
}, },
shell: true shell: true
}); });
this.tunerPid = tunerProc.pid; this.dispatcherPid = tunerProc.pid;
this.tuner = createTunerInterface(tunerProc); this.dispatcher = createDispatcherInterface(tunerProc);
return;
}
private setupAssessor(command: string, cwd: string, mode: 'start' | 'resume', dataDirectory: string): void {
if (this.assessor !== undefined) {
return;
}
const stdio: (string | NodeJS.WriteStream)[] = ['ignore', process.stdout, process.stderr, 'pipe', 'pipe'];
let newCwd: string;
if (cwd === undefined || cwd === '') {
newCwd = getLogDir();
} else {
newCwd = cwd;
}
// TO DO: add CUDA_VISIBLE_DEVICES
const assessorProc: ChildProcess = spawn(command, [], {
stdio,
cwd: newCwd,
env: {
NNI_MODE: mode,
NNI_CHECKPOINT_DIRECTORY: dataDirectory,
NNI_LOG_DIRECTORY: getLogDir()
},
shell: true
});
this.assessorPid = assessorProc.pid;
this.assessor = createAssessorInterface(assessorProc);
return; return;
} }
@ -307,10 +262,10 @@ class NNIManager implements Manager {
private updateTrialConcurrency(trialConcurrency: number): void { private updateTrialConcurrency(trialConcurrency: number): void {
// TO DO: this method can only be called after startExperiment/resumeExperiment // TO DO: this method can only be called after startExperiment/resumeExperiment
if (trialConcurrency > this.experimentProfile.params.trialConcurrency) { if (trialConcurrency > this.experimentProfile.params.trialConcurrency) {
if (this.tuner === undefined) { if (this.dispatcher === undefined) {
throw new Error('Error: tuner has to be initialized'); throw new Error('Error: tuner has to be initialized');
} }
this.tuner.sendCommand( this.dispatcher.sendCommand(
REQUEST_TRIAL_JOBS, REQUEST_TRIAL_JOBS,
String(trialConcurrency - this.experimentProfile.params.trialConcurrency) String(trialConcurrency - this.experimentProfile.params.trialConcurrency)
); );
@ -333,45 +288,31 @@ class NNIManager implements Manager {
} }
private updateSearchSpace(searchSpace: string): void { private updateSearchSpace(searchSpace: string): void {
if (this.tuner === undefined) { if (this.dispatcher === undefined) {
throw new Error('Error: tuner has not been setup'); throw new Error('Error: tuner has not been setup');
} }
this.tuner.sendCommand(UPDATE_SEARCH_SPACE, searchSpace); this.dispatcher.sendCommand(UPDATE_SEARCH_SPACE, searchSpace);
this.experimentProfile.params.searchSpace = searchSpace; this.experimentProfile.params.searchSpace = searchSpace;
return; return;
} }
private async experimentDoneCleanUp(): Promise<void> { private async experimentDoneCleanUp(): Promise<void> {
if (this.tuner === undefined) { if (this.dispatcher === undefined) {
throw new Error('Error: tuner has not been setup'); throw new Error('Error: tuner has not been setup');
} }
this.tuner.sendCommand(TERMINATE); this.dispatcher.sendCommand(TERMINATE);
if (this.assessor !== undefined) {
this.assessor.sendCommand(TERMINATE);
}
let tunerAlive: boolean = true; let tunerAlive: boolean = true;
let assessorAlive: boolean = true;
// gracefully terminate tuner and assessor here, wait at most 30 seconds. // gracefully terminate tuner and assessor here, wait at most 30 seconds.
for (let i: number = 0; i < 30; i++) { for (let i: number = 0; i < 30; i++) {
if (!tunerAlive && !assessorAlive) { break; } if (!tunerAlive) { break; }
try { try {
await cpp.exec(`kill -0 ${this.tunerPid}`); await cpp.exec(`kill -0 ${this.dispatcherPid}`);
} catch (error) { tunerAlive = false; } } catch (error) { tunerAlive = false; }
if (this.assessor !== undefined) {
try {
await cpp.exec(`kill -0 ${this.assessorPid}`);
} catch (error) { assessorAlive = false; }
} else {
assessorAlive = false;
}
await delay(1000); await delay(1000);
} }
try { try {
await cpp.exec(`kill ${this.tunerPid}`); await cpp.exec(`kill ${this.dispatcherPid}`);
if (this.assessorPid !== undefined) {
await cpp.exec(`kill ${this.assessorPid}`);
}
} catch (error) { } catch (error) {
// this.tunerPid does not exist, do nothing here // this.tunerPid does not exist, do nothing here
} }
@ -408,25 +349,18 @@ class NNIManager implements Manager {
return this.dataStore.storeExperimentProfile(this.experimentProfile); return this.dataStore.storeExperimentProfile(this.experimentProfile);
} }
// tslint:disable-next-line:max-func-body-length
private runInternal(): Promise<void> { private runInternal(): Promise<void> {
// TO DO: cannot run this method more than once in one NNIManager instance // TO DO: cannot run this method more than once in one NNIManager instance
if (this.tuner === undefined) { if (this.dispatcher === undefined) {
throw new Error('Error: tuner has not been setup'); throw new Error('Error: tuner has not been setup');
} }
this.trainingService.addTrialJobMetricListener(async (metric: TrialJobMetric) => { this.trainingService.addTrialJobMetricListener(async (metric: TrialJobMetric) => {
await this.dataStore.storeMetricData(metric.id, metric.data); await this.dataStore.storeMetricData(metric.id, metric.data);
if (this.tuner === undefined) { if (this.dispatcher === undefined) {
throw new Error('Error: tuner has not been setup'); throw new Error('Error: tuner has not been setup');
} }
this.tuner.sendCommand(REPORT_METRIC_DATA, metric.data); this.dispatcher.sendCommand(REPORT_METRIC_DATA, metric.data);
if (this.assessor !== undefined) {
try {
this.assessor.sendCommand(REPORT_METRIC_DATA, metric.data);
} catch (error) {
this.log.critical(`ASSESSOR ERROR: ${error.message}`);
this.log.critical(`ASSESSOR ERROR: ${error.stack}`);
}
}
}); });
this.trialJobsMaintainer = new TrialJobs( this.trialJobsMaintainer = new TrialJobs(
@ -439,7 +373,7 @@ class NNIManager implements Manager {
} else { } else {
this.log.debug(`Job event: ${event}`); this.log.debug(`Job event: ${event}`);
} }
if (this.tuner === undefined) { if (this.dispatcher === undefined) {
throw new Error('Error: tuner has not been setup'); throw new Error('Error: tuner has not been setup');
} }
switch (event) { switch (event) {
@ -453,15 +387,13 @@ class NNIManager implements Manager {
if (this.currSubmittedTrialNum < this.experimentProfile.params.maxTrialNum) { if (this.currSubmittedTrialNum < this.experimentProfile.params.maxTrialNum) {
if (this.customizedTrials.length > 0) { if (this.customizedTrials.length > 0) {
const hyperParams: string | undefined = this.customizedTrials.shift(); const hyperParams: string | undefined = this.customizedTrials.shift();
this.tuner.sendCommand(ADD_CUSTOMIZED_TRIAL_JOB, hyperParams); this.dispatcher.sendCommand(ADD_CUSTOMIZED_TRIAL_JOB, hyperParams);
} else { } else {
this.tuner.sendCommand(REQUEST_TRIAL_JOBS, '1'); this.dispatcher.sendCommand(REQUEST_TRIAL_JOBS, '1');
} }
} }
} }
if (this.assessor !== undefined) { this.dispatcher.sendCommand(TRIAL_END, JSON.stringify({trial_job_id: trialJobDetail.id, event: event}));
this.assessor.sendCommand(TRIAL_END, JSON.stringify({trial_job_id: trialJobDetail.id, event: event}));
}
await this.dataStore.storeTrialJobEvent(event, trialJobDetail.id, undefined, trialJobDetail.url); await this.dataStore.storeTrialJobEvent(event, trialJobDetail.id, undefined, trialJobDetail.url);
break; break;
case 'RUNNING': case 'RUNNING':
@ -478,15 +410,14 @@ class NNIManager implements Manager {
}); });
// TO DO: we should send INITIALIZE command to tuner if user's tuner needs to run init method in tuner // TO DO: we should send INITIALIZE command to tuner if user's tuner needs to run init method in tuner
// TO DO: we should send INITIALIZE command to assessor if user's tuner needs to run init method in tuner this.log.debug(`Send tuner command: update search space: ${this.experimentProfile.params.searchSpace}`);
this.log.debug(`Send tuner command: update search space: ${this.experimentProfile.params.searchSpace}`) this.dispatcher.sendCommand(UPDATE_SEARCH_SPACE, this.experimentProfile.params.searchSpace);
this.tuner.sendCommand(UPDATE_SEARCH_SPACE, this.experimentProfile.params.searchSpace);
if (this.trialConcurrencyReduction !== 0) { if (this.trialConcurrencyReduction !== 0) {
return Promise.reject(new Error('Error: cannot modify trialConcurrency before startExperiment')); return Promise.reject(new Error('Error: cannot modify trialConcurrency before startExperiment'));
} }
this.log.debug(`Send tuner command: ${this.experimentProfile.params.trialConcurrency}`) this.log.debug(`Send tuner command: ${this.experimentProfile.params.trialConcurrency}`)
this.tuner.sendCommand(REQUEST_TRIAL_JOBS, String(this.experimentProfile.params.trialConcurrency)); this.dispatcher.sendCommand(REQUEST_TRIAL_JOBS, String(this.experimentProfile.params.trialConcurrency));
this.tuner.onCommand(async (commandType: string, content: string) => { this.dispatcher.onCommand(async (commandType: string, content: string) => {
this.log.info(`Command from tuner: ${commandType}, ${content}`); this.log.info(`Command from tuner: ${commandType}, ${content}`);
if (this.trialJobsMaintainer === undefined) { if (this.trialJobsMaintainer === undefined) {
throw new Error('Error: trialJobsMaintainer not initialized'); throw new Error('Error: trialJobsMaintainer not initialized');
@ -501,8 +432,7 @@ class NNIManager implements Manager {
}; };
const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(trialJobAppForm); const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(trialJobAppForm);
this.trialJobsMaintainer.setTrialJob(trialJobDetail.id, Object.assign({}, trialJobDetail)); this.trialJobsMaintainer.setTrialJob(trialJobDetail.id, Object.assign({}, trialJobDetail));
// TO DO: to uncomment assert(trialJobDetail.status === 'WAITING');
//assert(trialJobDetail.status === 'WAITING');
await this.dataStore.storeTrialJobEvent(trialJobDetail.status, trialJobDetail.id, content, trialJobDetail.url); await this.dataStore.storeTrialJobEvent(trialJobDetail.status, trialJobDetail.id, content, trialJobDetail.url);
if (this.currSubmittedTrialNum === this.experimentProfile.params.maxTrialNum) { if (this.currSubmittedTrialNum === this.experimentProfile.params.maxTrialNum) {
this.trialJobsMaintainer.setNoMoreTrials(); this.trialJobsMaintainer.setNoMoreTrials();
@ -512,19 +442,13 @@ class NNIManager implements Manager {
case NO_MORE_TRIAL_JOBS: case NO_MORE_TRIAL_JOBS:
this.trialJobsMaintainer.setNoMoreTrials(); this.trialJobsMaintainer.setNoMoreTrials();
break; break;
default: case KILL_TRIAL_JOB:
throw new Error('Error: unsupported command type from tuner');
}
});
if (this.assessor !== undefined) {
this.assessor.onCommand(async (commandType: string, content: string) => {
if (commandType === KILL_TRIAL_JOB) {
await this.trainingService.cancelTrialJob(JSON.parse(content)); await this.trainingService.cancelTrialJob(JSON.parse(content));
} else { break;
throw new Error('Error: unsupported command type from assessor'); default:
throw new Error(`Error: unsupported command type: [${commandType}]`);
} }
}); });
}
return this.trialJobsMaintainer.run(); return this.trialJobsMaintainer.run();
} }

Просмотреть файл

@ -69,10 +69,9 @@ describe('Unit test for dataStore', () => {
} }
}`, }`,
tuner: { tuner: {
tunerCommand: 'python3 tunner.py', className: 'testTuner',
tunerCwd: '/tmp', checkpointDir: '/tmp/cp',
tunerCheckpointDirectory: '/tmp/cp', gpuNum: 0
tunerGpuNum: 0
} }
}, },
id: 'exp123', id: 'exp123',

Просмотреть файл

@ -21,5 +21,3 @@ from nni.assessor import Assessor, AssessResult
class DummyAssessor(Assessor): class DummyAssessor(Assessor):
def assess_trial(self, trial_job_id, trial_history): def assess_trial(self, trial_job_id, trial_history):
return AssessResult.Good return AssessResult.Good
DummyAssessor().run()

Просмотреть файл

@ -0,0 +1,35 @@
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
from nni.tuner import Tuner
class DummyTuner(Tuner):
def generate_parameters(self, parameter_id):
return 'unit-test-parm'
def generate_multiple_parameters(self, parameter_id_list):
return ['unit-test-param1', 'unit-test-param2']
def receive_trial_result(self, parameter_id, parameters, reward):
pass
def receive_customized_trial_result(self, parameter_id, parameters, reward):
pass
def update_search_space(self, search_space):
pass

Просмотреть файл

@ -24,7 +24,7 @@ import { ChildProcess, spawn } from 'child_process';
import { Deferred } from 'ts-deferred'; import { Deferred } from 'ts-deferred';
import { cleanupUnitTest, prepareUnitTest } from '../../common/utils'; import { cleanupUnitTest, prepareUnitTest } from '../../common/utils';
import * as CommandType from '../commands'; import * as CommandType from '../commands';
import { createAssessorInterface, createTunerInterface, IpcInterface } from '../ipcInterface'; import { createDispatcherInterface, IpcInterface } from '../ipcInterface';
let sentCommands: {[key: string]: string}[] = []; let sentCommands: {[key: string]: string}[] = [];
const receivedCommands: {[key: string]: string}[] = []; const receivedCommands: {[key: string]: string}[] = [];
@ -52,27 +52,27 @@ function runProcess(): Promise<Error | null> {
}); });
// create IPC interface // create IPC interface
const assessor: IpcInterface = createAssessorInterface(proc); const dispatcher: IpcInterface = createDispatcherInterface(proc);
assessor.onCommand((commandType: string, content: string): void => { dispatcher.onCommand((commandType: string, content: string): void => {
receivedCommands.push({ commandType, content }); receivedCommands.push({ commandType, content });
}); });
// Command #1: ok // Command #1: ok
assessor.sendCommand('IN'); dispatcher.sendCommand('IN');
// Command #2: ok // Command #2: ok
assessor.sendCommand('ME', '123'); dispatcher.sendCommand('ME', '123');
// Command #3: too long // Command #3: too long
try { try {
assessor.sendCommand('ME', 'x'.repeat(1_000_000)); dispatcher.sendCommand('ME', 'x'.repeat(1_000_000));
} catch (error) { } catch (error) {
commandTooLong = error; commandTooLong = error;
} }
// Command #4: not assessor command // Command #4: FE is not tuner/assessor command, test the exception type of send non-valid command
try { try {
assessor.sendCommand('GE', '1'); dispatcher.sendCommand('FE', '1');
} catch (error) { } catch (error) {
rejectCommandType = error; rejectCommandType = error;
} }

Просмотреть файл

@ -22,18 +22,34 @@
import * as assert from 'assert'; import * as assert from 'assert';
import { ChildProcess, spawn } from 'child_process'; import { ChildProcess, spawn } from 'child_process';
import { Deferred } from 'ts-deferred'; import { Deferred } from 'ts-deferred';
import { cleanupUnitTest, prepareUnitTest } from '../../common/utils'; import { cleanupUnitTest, prepareUnitTest, getMsgDispatcherCommand } from '../../common/utils';
import * as CommandType from '../commands'; import * as CommandType from '../commands';
import { createAssessorInterface, IpcInterface } from '../ipcInterface'; import { createDispatcherInterface, IpcInterface } from '../ipcInterface';
let assessor: IpcInterface | undefined; let dispatcher: IpcInterface | undefined;
let procExit: boolean = false; let procExit: boolean = false;
let procError: boolean = false; let procError: boolean = false;
function startProcess(): void { function startProcess(): void {
// create fake assessor process // create fake assessor process
const stdio: {}[] = ['ignore', 'pipe', process.stderr, 'pipe', 'pipe']; const stdio: {}[] = ['ignore', 'pipe', process.stderr, 'pipe', 'pipe'];
const proc: ChildProcess = spawn('python3 dummy_assessor.py', [], { stdio, cwd: 'core/test', shell: true });
const dispatcherCmd : string = getMsgDispatcherCommand(
// Mock tuner config
{
className: 'DummyTuner',
codeDir: './',
classFileName: 'dummy_tuner.py'
},
// Mock assessor config
{
className: 'DummyAssessor',
codeDir: './',
classFileName: 'dummy_assessor.py'
}
);
const proc: ChildProcess = spawn(dispatcherCmd, [], { stdio, cwd: 'core/test', shell: true });
proc.on('error', (error: Error): void => { proc.on('error', (error: Error): void => {
procExit = true; procExit = true;
@ -45,8 +61,8 @@ function startProcess(): void {
}); });
// create IPC interface // create IPC interface
assessor = createAssessorInterface(proc); dispatcher = createDispatcherInterface(proc);
(<IpcInterface>assessor).onCommand((commandType: string, content: string): void => { (<IpcInterface>dispatcher).onCommand((commandType: string, content: string): void => {
console.log(commandType, content); // tslint:disable-line:no-console console.log(commandType, content); // tslint:disable-line:no-console
}); });
} }
@ -62,9 +78,9 @@ describe('core/ipcInterface.terminate', (): void => {
}); });
it('normal', () => { it('normal', () => {
(<IpcInterface>assessor).sendCommand( (<IpcInterface>dispatcher).sendCommand(
CommandType.REPORT_METRIC_DATA, CommandType.REPORT_METRIC_DATA,
'{"trial_job_id":"A","type":"periodical","value":1}'); '{"trial_job_id":"A","type":"PERIODICAL","value":1,"sequence":123}');
const deferred: Deferred<void> = new Deferred<void>(); const deferred: Deferred<void> = new Deferred<void>();
setTimeout( setTimeout(
@ -79,7 +95,7 @@ describe('core/ipcInterface.terminate', (): void => {
}); });
it('terminate', () => { it('terminate', () => {
(<IpcInterface>assessor).sendCommand(CommandType.TERMINATE); (<IpcInterface>dispatcher).sendCommand(CommandType.TERMINATE);
const deferred: Deferred<void> = new Deferred<void>(); const deferred: Deferred<void> = new Deferred<void>();
setTimeout( setTimeout(
@ -88,7 +104,7 @@ describe('core/ipcInterface.terminate', (): void => {
assert.ok(!procError); assert.ok(!procError);
deferred.resolve(); deferred.resolve();
}, },
1000); 2000);
return deferred.promise; return deferred.promise;
}); });

Просмотреть файл

@ -30,7 +30,6 @@ const testTrainingServiceProvider: Provider = {
}; };
class MockedTrainingService extends TrainingService { class MockedTrainingService extends TrainingService {
public mockedMetaDataValue: string = "default"; public mockedMetaDataValue: string = "default";
public jobDetail1: TrialJobDetail = { public jobDetail1: TrialJobDetail = {
id: '1234', id: '1234',
@ -93,6 +92,14 @@ class MockedTrainingService extends TrainingService {
return deferred.promise; return deferred.promise;
} }
public updateTrialJob(trialJobId: string, form: TrialJobApplicationForm): Promise<TrialJobDetail> {
throw new MethodNotImplementedError();
}
public get isMultiPhaseJobSupported(): boolean {
return false;
}
public cancelTrialJob(trialJobId: string): Promise<void> { public cancelTrialJob(trialJobId: string): Promise<void> {
const deferred = new Deferred<void>(); const deferred = new Deferred<void>();
if(trialJobId === '1234' || trialJobId === '3456'){ if(trialJobId === '1234' || trialJobId === '3456'){
@ -125,7 +132,7 @@ class MockedTrainingService extends TrainingService {
} }
public cleanUp(): Promise<void> { public cleanUp(): Promise<void> {
throw new MethodNotImplementedError(); return Promise.resolve();
} }
} }

Просмотреть файл

@ -56,16 +56,17 @@ describe('Unit test for nnimanager', function () {
maxTrialNum: 2, maxTrialNum: 2,
searchSpace: '{"x":1}', searchSpace: '{"x":1}',
tuner: { tuner: {
tunerCommand: 'python3 hyperopt.py', className: 'EvolutionTuner',
tunerCwd: 'core/test', classArgs: {
tunerCheckpointDirectory: '', optimize_mode: 'maximize'
tunerGpuNum: 1 },
checkpointDir: '',
gpuNum: 1
}, },
assessor: { assessor: {
assessorCommand: 'python3 dummy_assessor.py', className: 'MedianstopAssessor',
assessorCwd: 'core/test', checkpointDir: '',
assessorCheckpointDirectory: '', gpuNum: 1
assessorGpuNum: 1
} }
} }

Просмотреть файл

@ -38,10 +38,9 @@ const expParams1: ExperimentParams = {
maxTrialNum: 5, maxTrialNum: 5,
searchSpace: 'SS', searchSpace: 'SS',
tuner: { tuner: {
tunerCommand: './tuner.sh', className: 'testTuner',
tunerCwd: '.', checkpointDir: '/tmp',
tunerCheckpointDirectory: '/tmp', gpuNum: 0
tunerGpuNum: 0
} }
}; };
@ -53,14 +52,12 @@ const expParams2: ExperimentParams = {
maxTrialNum: 5, maxTrialNum: 5,
searchSpace: '', searchSpace: '',
tuner: { tuner: {
tunerCommand: 'python tuner.py', className: 'testTuner',
tunerCwd: '/tmp', checkpointDir: '/tmp'
tunerCheckpointDirectory: '/tmp'
}, },
assessor: { assessor: {
assessorCommand: 'python assessor.py', className: 'testAssessor',
assessorCwd: '/tmp', checkpointDir: '/tmp'
assessorCheckpointDirectory: '/tmp'
} }
}; };

Просмотреть файл

@ -37,7 +37,7 @@ export const testManagerProvider: Provider = {
}; };
export class MockedNNIManager extends Manager { export class MockedNNIManager extends Manager {
public updateExperimentProfile(experimentProfile: ExperimentProfile, updateType: ProfileUpdateType ): Promise<void> { public updateExperimentProfile(experimentProfile: ExperimentProfile, updateType: ProfileUpdateType): Promise<void> {
return Promise.resolve(); return Promise.resolve();
} }
public getTrialJobStatistics(): Promise<TrialJobStatistics[]> { public getTrialJobStatistics(): Promise<TrialJobStatistics[]> {
@ -103,23 +103,15 @@ export class MockedNNIManager extends Manager {
return deferred.promise; return deferred.promise;
} }
public getTrialJob(trialJobId: string): Promise<TrialJobDetail> { public getTrialJob(trialJobId: string): Promise<TrialJobInfo> {
const deferred: Deferred<TrialJobDetail> = new Deferred<TrialJobDetail>(); const deferred: Deferred<TrialJobInfo> = new Deferred<TrialJobInfo>();
const jobDetail: TrialJobDetail = { const jobInfo: TrialJobInfo = {
id: '1234', id: '1234',
status: 'SUCCEEDED', status: 'SUCCEEDED',
submitTime: new Date(),
startTime: new Date(), startTime: new Date(),
endTime: new Date(), endTime: new Date()
tags: ['test'],
// tslint:disable-next-line:no-http-string
url: 'http://test',
workingDirectory: '/tmp/mocked',
form: {
jobType: 'TRIAL'
}
}; };
deferred.resolve(jobDetail); deferred.resolve(jobInfo);
return deferred.promise; return deferred.promise;
} }
@ -139,9 +131,8 @@ export class MockedNNIManager extends Manager {
maxTrialNum: 3, maxTrialNum: 3,
searchSpace: '{lr: 0.01}', searchSpace: '{lr: 0.01}',
tuner: { tuner: {
tunerCommand: 'python3 tuner.py', className: 'testTuner',
tunerCwd: '/tmp/tunner', checkpointDir: ''
tunerCheckpointDirectory: ''
} }
}, },
id: '2345', id: '2345',

Просмотреть файл

@ -116,7 +116,7 @@ describe('Unit test for rest server', () => {
} }
const req: request.Options = { const req: request.Options = {
uri: `${ROOT_URL}/experiment`, uri: `${ROOT_URL}/experiment?update_type=TRIAL_CONCURRENCY`,
method: 'PUT', method: 'PUT',
json: true, json: true,
body: profile body: profile
@ -141,7 +141,7 @@ describe('Unit test for rest server', () => {
body: { body: {
exception_test_key: 'test' exception_test_key: 'test'
} }
} };
request(req, (err: Error, res: request.Response) => { request(req, (err: Error, res: request.Response) => {
if (err) { if (err) {
assert.fail(err.message); assert.fail(err.message);
@ -158,7 +158,7 @@ describe('Unit test for rest server', () => {
method: 'PUT', method: 'PUT',
json: true, json: true,
body: { body: {
MACHINE_LIST: [{ machine_list: [{
ip: '10.10.10.101', ip: '10.10.10.101',
port: 22, port: 22,
username: 'test', username: 'test',
@ -170,7 +170,7 @@ describe('Unit test for rest server', () => {
passwd: '1234' passwd: '1234'
}] }]
} }
} };
request(req, (err: Error, res: request.Response) => { request(req, (err: Error, res: request.Response) => {
if (err) { if (err) {
assert.fail(err.message); assert.fail(err.message);
@ -180,29 +180,4 @@ describe('Unit test for rest server', () => {
done(); done();
}); });
}); });
it('Test POST experiment', (done: Mocha.Done) => {
const req: request.Options = {
uri: `${ROOT_URL}/experiment`,
method: 'POST',
json: true,
body: {
author: 'test',
trial: {
entrypoint: 'python',
args: 'mnist.py'
}
}
};
// tslint:disable-next-line:no-any
request(req, (err: Error, res: request.Response, body: any) => {
if (err) {
assert.fail(err.message);
} else {
expect(res.statusCode).to.equal(200);
expect(body.experiment_id).to.equal('id-1234');
}
done();
});
});
}); });

Просмотреть файл

@ -25,7 +25,7 @@ import { EventEmitter } from 'events';
import * as fs from 'fs'; import * as fs from 'fs';
import * as path from 'path'; import * as path from 'path';
import * as ts from 'tail-stream'; import * as ts from 'tail-stream';
import { NNIError, NNIErrorNames } from '../../common/errors'; import { MethodNotImplementedError, NNIError, NNIErrorNames } from '../../common/errors';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { TrialConfig } from '../common/trialConfig'; import { TrialConfig } from '../common/trialConfig';
import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey'; import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey';
@ -205,6 +205,22 @@ class LocalTrainingService implements TrainingService {
} }
} }
/**
* Update trial job for multi-phase
* @param trialJobId trial job id
* @param form job application form
*/
public updateTrialJob(trialJobId: string, form: JobApplicationForm): Promise<TrialJobDetail> {
throw new MethodNotImplementedError();
}
/**
* Is multiphase job supported in current training service
*/
public get isMultiPhaseJobSupported(): boolean {
return false;
}
public async cancelTrialJob(trialJobId: string): Promise<void> { public async cancelTrialJob(trialJobId: string): Promise<void> {
this.log.info(`cancelTrialJob: ${trialJobId}`); this.log.info(`cancelTrialJob: ${trialJobId}`);
const trialJob: LocalTrialJobDetail | undefined = this.jobMap.get(trialJobId); const trialJob: LocalTrialJobDetail | undefined = this.jobMap.get(trialJobId);
@ -309,7 +325,7 @@ class LocalTrainingService implements TrainingService {
runScriptLines.push(`export ${variable.key}=${variable.value}`); runScriptLines.push(`export ${variable.key}=${variable.value}`);
} }
runScriptLines.push( runScriptLines.push(
`eval ${this.localTrailConfig.command} 2>${path.join(trialJobDetail.workingDirectory, '.nni', 'stderr')}`, `eval ${this.localTrailConfig.command} 2>${path.join(trialJobDetail.workingDirectory, 'stderr')}`,
`echo $? \`date +%s%3N\` >${path.join(trialJobDetail.workingDirectory, '.nni', 'state')}`); `echo $? \`date +%s%3N\` >${path.join(trialJobDetail.workingDirectory, '.nni', 'state')}`);
await cpp.exec(`mkdir -p ${trialJobDetail.workingDirectory}`); await cpp.exec(`mkdir -p ${trialJobDetail.workingDirectory}`);

Просмотреть файл

@ -82,7 +82,12 @@ export class MetricsCollector {
private getTrialJobIdsGroupByRmMeta(status: TrialJobStatus[]): Map<RemoteMachineMeta, string[]> { private getTrialJobIdsGroupByRmMeta(status: TrialJobStatus[]): Map<RemoteMachineMeta, string[]> {
const map: Map<RemoteMachineMeta, string[]> = new Map<RemoteMachineMeta, string[]>(); const map: Map<RemoteMachineMeta, string[]> = new Map<RemoteMachineMeta, string[]>();
this.trialJobsMap.forEach((trialJob, id) => { this.trialJobsMap.forEach((trialJob, id) => {
if (status.includes(trialJob.status)) { let reservedTrialJobIds : string[] = [];
if(trialJob.rmMeta !== undefined
&& trialJob.rmMeta.gpuReservation !== undefined) {
reservedTrialJobIds = Array.from(trialJob.rmMeta.gpuReservation.values());
}
if (reservedTrialJobIds.includes(id) || status.includes(trialJob.status)) {
if (map.has(trialJob.rmMeta)) { if (map.has(trialJob.rmMeta)) {
const ids = map.get(trialJob.rmMeta); const ids = map.get(trialJob.rmMeta);
if (ids !== undefined && !ids.includes(id)) { if (ids !== undefined && !ids.includes(id)) {
@ -93,7 +98,7 @@ export class MetricsCollector {
// If the remote machine has jobs reserve GPU, also put that jobs into list to get metrics data // If the remote machine has jobs reserve GPU, also put that jobs into list to get metrics data
if(trialJob.rmMeta.gpuReservation !== undefined) { if(trialJob.rmMeta.gpuReservation !== undefined) {
const concatJobIds : string[] = initJobIds.concat(Array.from(trialJob.rmMeta.gpuReservation.values())); const concatJobIds : string[] = initJobIds.concat(reservedTrialJobIds);
initJobIds = concatJobIds.filter((item, pos) => concatJobIds.indexOf(item) === pos); initJobIds = concatJobIds.filter((item, pos) => concatJobIds.indexOf(item) === pos);
} }

Просмотреть файл

@ -31,16 +31,21 @@ export class RemoteMachineMeta {
public readonly ip : string; public readonly ip : string;
public readonly port : number; public readonly port : number;
public readonly username : string; public readonly username : string;
public readonly passwd: string; public readonly passwd?: string;
public readonly sshKeyPath?: string;
public readonly passphrase?: string;
public gpuSummary : GPUSummary | undefined; public gpuSummary : GPUSummary | undefined;
/* GPU Reservation info, the key is GPU index, the value is the job id which reserves this GPU*/ /* GPU Reservation info, the key is GPU index, the value is the job id which reserves this GPU*/
public gpuReservation : Map<number, string>; public gpuReservation : Map<number, string>;
constructor(ip : string, port : number, username : string, passwd : string) { constructor(ip : string, port : number, username : string, passwd : string,
sshKeyPath : string, passphrase : string) {
this.ip = ip; this.ip = ip;
this.port = port; this.port = port;
this.username = username; this.username = username;
this.passwd = passwd; this.passwd = passwd;
this.sshKeyPath = sshKeyPath;
this.passphrase = passphrase;
this.gpuReservation = new Map<number, string>(); this.gpuReservation = new Map<number, string>();
} }
} }

Просмотреть файл

@ -24,11 +24,11 @@ import { EventEmitter } from 'events';
import * as fs from 'fs'; import * as fs from 'fs';
import * as os from 'os'; import * as os from 'os';
import * as path from 'path'; import * as path from 'path';
import { Client } from 'ssh2'; import { Client, ConnectConfig } from 'ssh2';
import { Deferred } from 'ts-deferred'; import { Deferred } from 'ts-deferred';
import { String } from 'typescript-string-operations'; import { String } from 'typescript-string-operations';
import * as component from '../../common/component'; import * as component from '../../common/component';
import { NNIError, NNIErrorNames } from '../../common/errors'; import { MethodNotImplementedError, NNIError, NNIErrorNames } from '../../common/errors';
import { getExperimentId } from '../../common/experimentStartupInfo'; import { getExperimentId } from '../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../common/log'; import { getLogger, Logger } from '../../common/log';
import { ObservableTimer } from '../../common/observableTimer'; import { ObservableTimer } from '../../common/observableTimer';
@ -195,6 +195,22 @@ class RemoteMachineTrainingService implements TrainingService {
} }
} }
/**
* Update trial job for multi-phase
* @param trialJobId trial job id
* @param form job application form
*/
public updateTrialJob(trialJobId: string, form: JobApplicationForm): Promise<TrialJobDetail> {
throw new MethodNotImplementedError();
}
/**
* Is multiphase job supported in current training service
*/
public get isMultiPhaseJobSupported(): boolean {
return false;
}
/** /**
* Cancel trial job * Cancel trial job
* @param trialJobId ID of trial job * @param trialJobId ID of trial job
@ -290,6 +306,24 @@ class RemoteMachineTrainingService implements TrainingService {
let connectedRMNum: number = 0; let connectedRMNum: number = 0;
rmMetaList.forEach((rmMeta: RemoteMachineMeta) => { rmMetaList.forEach((rmMeta: RemoteMachineMeta) => {
const conn: Client = new Client(); const conn: Client = new Client();
let connectConfig: ConnectConfig = {
host: rmMeta.ip,
port: rmMeta.port,
username: rmMeta.username };
if (rmMeta.passwd) {
connectConfig.password = rmMeta.passwd;
} else if(rmMeta.sshKeyPath) {
if(!fs.existsSync(rmMeta.sshKeyPath)) {
//SSh key path is not a valid file, reject
deferred.reject(new Error(`${rmMeta.sshKeyPath} does not exist.`));
}
const privateKey: string = fs.readFileSync(rmMeta.sshKeyPath, 'utf8');
connectConfig.privateKey = privateKey;
connectConfig.passphrase = rmMeta.passphrase;
} else {
deferred.reject(new Error(`No valid passwd or sshKeyPath is configed.`));
}
this.machineSSHClientMap.set(rmMeta, conn); this.machineSSHClientMap.set(rmMeta, conn);
conn.on('ready', async () => { conn.on('ready', async () => {
await this.initRemoteMachineOnConnected(rmMeta, conn); await this.initRemoteMachineOnConnected(rmMeta, conn);
@ -299,12 +333,7 @@ class RemoteMachineTrainingService implements TrainingService {
}).on('error', (err: Error) => { }).on('error', (err: Error) => {
// SSH connection error, reject with error message // SSH connection error, reject with error message
deferred.reject(new Error(err.message)); deferred.reject(new Error(err.message));
}).connect({ }).connect(connectConfig);
host: rmMeta.ip,
port: rmMeta.port,
username: rmMeta.username,
password: rmMeta.passwd
});
}); });
return deferred.promise; return deferred.promise;
@ -402,7 +431,7 @@ class RemoteMachineTrainingService implements TrainingService {
(typeof cuda_visible_device === 'string' && cuda_visible_device.length > 0) ? (typeof cuda_visible_device === 'string' && cuda_visible_device.length > 0) ?
`CUDA_VISIBLE_DEVICES=${cuda_visible_device} ` : `CUDA_VISIBLE_DEVICES=" " `, `CUDA_VISIBLE_DEVICES=${cuda_visible_device} ` : `CUDA_VISIBLE_DEVICES=" " `,
this.trialConfig.command, this.trialConfig.command,
path.join(trialWorkingFolder, '.nni', 'stderr'), path.join(trialWorkingFolder, 'stderr'),
path.join(trialWorkingFolder, '.nni', 'code')); path.join(trialWorkingFolder, '.nni', 'code'));
//create tmp trial working folder locally. //create tmp trial working folder locally.

Просмотреть файл

@ -0,0 +1,131 @@
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
'''
__main__.py
'''
import os
import sys
import argparse
import logging
import json
import importlib
from nni.msg_dispatcher import MsgDispatcher
from nni.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
from nni.evolution_tuner.evolution_tuner import EvolutionTuner
from nni.medianstop_assessor.medianstop_assessor import MedianstopAssessor
logger = logging.getLogger('nni.main')
logger.debug('START')
BUILT_IN_CLASS_NAMES = ['HyperoptTuner', 'EvolutionTuner', 'MedianstopAssessor']
def create_builtin_class_instance(classname, jsonstr_args):
if jsonstr_args:
class_args = json.loads(jsonstr_args)
instance = eval(classname)(**class_args)
else:
instance = eval(classname)()
return instance
def create_customized_class_instance(class_dir, class_filename, classname, jsonstr_args):
if not os.path.isfile(os.path.join(class_dir, class_filename)):
raise ValueError('Class file not found: {}'.format(os.path.join(class_dir, class_filename)))
sys.path.append(class_dir)
module_name = class_filename.split('.')[0]
class_module = importlib.import_module(module_name)
class_constructor = getattr(class_module, classname)
if jsonstr_args:
class_args = json.loads(jsonstr_args)
instance = class_constructor(**class_args)
else:
instance = class_constructor()
return instance
def parse_args():
parser = argparse.ArgumentParser(description='parse command line parameters.')
parser.add_argument('--tuner_class_name', type=str, required=True,
help='Tuner class name, the class must be a subclass of nni.Tuner')
parser.add_argument('--tuner_args', type=str, required=False,
help='Parameters pass to tuner __init__ constructor')
parser.add_argument('--tuner_directory', type=str, required=False,
help='Tuner directory')
parser.add_argument('--tuner_class_filename', type=str, required=False,
help='Tuner class file path')
parser.add_argument('--assessor_class_name', type=str, required=False,
help='Assessor class name, the class must be a subclass of nni.Assessor')
parser.add_argument('--assessor_args', type=str, required=False,
help='Parameters pass to assessor __init__ constructor')
parser.add_argument('--assessor_directory', type=str, required=False,
help='Assessor directory')
parser.add_argument('--assessor_class_filename', type=str, required=False,
help='Assessor class file path')
flags, _ = parser.parse_known_args()
return flags
def main():
'''
main function.
'''
args = parse_args()
tuner = None
assessor = None
if args.tuner_class_name is None:
raise ValueError('Tuner must be specified')
if args.tuner_class_name in BUILT_IN_CLASS_NAMES:
tuner = create_builtin_class_instance(args.tuner_class_name, args.tuner_args)
else:
tuner = create_customized_class_instance(args.tuner_directory, args.tuner_class_filename, args.tuner_class_name, args.tuner_args)
if args.assessor_class_name:
if args.assessor_class_name in BUILT_IN_CLASS_NAMES:
assessor = create_builtin_class_instance(args.assessor_class_name, args.assessor_args)
else:
assessor = create_customized_class_instance(args.assessor_directory, \
args.assessor_class_filename, args.assessor_class_name, args.assessor_args)
if tuner is None:
raise AssertionError('Failed to create Tuner instance')
dispatcher = MsgDispatcher(tuner, assessor)
try:
dispatcher.run()
tuner._on_exit()
if assessor is not None:
assessor._on_exit()
except Exception as exception:
logger.exception(exception)
tuner._on_error()
if assessor is not None:
assessor._on_error()
raise
if __name__ == '__main__':
try:
main()
except Exception as exception:
logger.exception(exception)
raise

Просмотреть файл

@ -19,27 +19,18 @@
# ================================================================================================== # ==================================================================================================
from collections import defaultdict
from enum import Enum
import logging import logging
import os from enum import Enum
import json_tricks from .recoverable import Recoverable
from .common import init_logger
from .protocol import CommandType, send, receive
init_logger('assessor.log')
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
class AssessResult(Enum): class AssessResult(Enum):
Good = True Good = True
Bad = False Bad = False
class Assessor(Recoverable):
class Assessor:
# pylint: disable=no-self-use,unused-argument # pylint: disable=no-self-use,unused-argument
def assess_trial(self, trial_job_id, trial_history): def assess_trial(self, trial_job_id, trial_history):
@ -57,101 +48,22 @@ class Assessor:
""" """
pass pass
def load_checkpoint(self, path): def load_checkpoint(self):
"""Load the checkpoint of assessor. """Load the checkpoint of assessr.
path: checkpoint directory of assessor path: checkpoint directory for assessor
""" """
_logger.info('Load checkpoint ignored by assessor') checkpoin_path = self.get_checkpoint_path()
_logger.info('Load checkpoint ignored by assessor, checkpoint path: %s' % checkpoin_path)
def save_checkpoint(self, path): def save_checkpoint(self):
"""Save the checkpoint of assessor. """Save the checkpoint of assessor.
path: checkpoint directory of assessor path: checkpoint directory for assessor
""" """
_logger.info('Save checkpoint ignored by assessor') checkpoin_path = self.get_checkpoint_path()
_logger.info('Save checkpoint ignored by assessor, checkpoint path: %s' % checkpoin_path)
def request_save_checkpoint(self): def _on_exit(self):
"""Request to save the checkpoint of assessor
"""
self.save_checkpoint(os.getenv('NNI_CHECKPOINT_DIRECTORY'))
def run(self):
"""Run the assessor.
This function will never return unless raise.
"""
mode = os.getenv('NNI_MODE')
if mode == 'resume':
self.load_checkpoint(os.getenv('NNI_CHECKPOINT_DIRECTORY'))
while _handle_request(self):
pass pass
_logger.info('Terminated by NNI manager')
def _on_error(self):
_trial_history = defaultdict(dict) pass
'''key: trial job ID; value: intermediate results, mapping from sequence number to data'''
_ended_trials = set()
'''trial_job_id of all ended trials.
We need this because NNI manager may send metrics after reporting a trial ended.
TODO: move this logic to NNI manager
'''
def _sort_history(history):
ret = [ ]
for i, _ in enumerate(history):
if i in history:
ret.append(history[i])
else:
break
return ret
def _handle_request(assessor):
_logger.debug('waiting receive_message')
command, data = receive()
_logger.debug(command)
_logger.debug(data)
if command is CommandType.Terminate:
return False
data = json_tricks.loads(data)
if command is CommandType.ReportMetricData:
if data['type'] != 'PERIODICAL':
return True
trial_job_id = data['trial_job_id']
if trial_job_id in _ended_trials:
return True
history = _trial_history[trial_job_id]
history[data['sequence']] = data['value']
ordered_history = _sort_history(history)
if len(ordered_history) < data['sequence']: # no user-visible update since last time
return True
result = assessor.assess_trial(trial_job_id, ordered_history)
if isinstance(result, bool):
result = AssessResult.Good if result else AssessResult.Bad
elif not isinstance(result, AssessResult):
msg = 'Result of Assessor.assess_trial must be an object of AssessResult, not %s'
raise RuntimeError(msg % type(result))
if result is AssessResult.Bad:
_logger.debug('BAD, kill %s', trial_job_id)
send(CommandType.KillTrialJob, json_tricks.dumps(trial_job_id))
else:
_logger.debug('GOOD')
elif command is CommandType.TrialEnd:
trial_job_id = data['trial_job_id']
_ended_trials.add(trial_job_id)
if trial_job_id in _trial_history:
_trial_history.pop(trial_job_id)
assessor.trial_end(trial_job_id, data['event'] == 'SUCCEEDED')
else:
raise AssertionError('Unsupported command: %s' % command)
return True

Просмотреть файл

@ -26,7 +26,7 @@ class MedianstopAssessor(Assessor):
if the trials best objective value by step S is strictly worse than the median value if the trials best objective value by step S is strictly worse than the median value
of the running averages of all completed trials objectives reported up to step S of the running averages of all completed trials objectives reported up to step S
''' '''
def __init__(self, start_step, optimize_mode): def __init__(self, optimize_mode='maximize', start_step=0):
self.start_step = start_step self.start_step = start_step
self.running_history = dict() self.running_history = dict()
self.completed_avg_history = dict() self.completed_avg_history = dict()

Просмотреть файл

@ -0,0 +1,165 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import logging
from collections import defaultdict
import json_tricks
from .protocol import CommandType, send
from .msg_dispatcher_base import MsgDispatcherBase
from .assessor import AssessResult
_logger = logging.getLogger(__name__)
# Assessor global variables
_trial_history = defaultdict(dict)
'''key: trial job ID; value: intermediate results, mapping from sequence number to data'''
_ended_trials = set()
'''trial_job_id of all ended trials.
We need this because NNI manager may send metrics after reporting a trial ended.
TODO: move this logic to NNI manager
'''
def _sort_history(history):
ret = [ ]
for i, _ in enumerate(history):
if i in history:
ret.append(history[i])
else:
break
return ret
# Tuner global variables
_next_parameter_id = 0
_trial_params = {}
'''key: trial job ID; value: parameters'''
_customized_parameter_ids = set()
def _create_parameter_id():
global _next_parameter_id # pylint: disable=global-statement
_next_parameter_id += 1
return _next_parameter_id - 1
def _pack_parameter(parameter_id, params, customized=False):
_trial_params[parameter_id] = params
ret = {
'parameter_id': parameter_id,
'parameter_source': 'customized' if customized else 'algorithm',
'parameters': params
}
return json_tricks.dumps(ret)
class MsgDispatcher(MsgDispatcherBase):
def __init__(self, tuner, assessor=None):
super()
self.tuner = tuner
self.assessor = assessor
if assessor is None:
_logger.debug('Assessor is not configured')
def load_checkpoint(self):
self.tuner.load_checkpoint()
if self.assessor is not None:
self.assessor.load_checkpoint()
def save_checkpoint(self):
self.tuner.save_checkpoint()
if self.assessor is not None:
self.assessor.save_checkpoint()
def handle_request_trial_jobs(self, data):
# data: number or trial jobs
ids = [_create_parameter_id() for _ in range(data)]
params_list = self.tuner.generate_multiple_parameters(ids)
assert len(ids) == len(params_list)
for i, _ in enumerate(ids):
send(CommandType.NewTrialJob, _pack_parameter(ids[i], params_list[i]))
return True
def handle_update_search_space(self, data):
self.tuner.update_search_space(data)
return True
def handle_add_customized_trial(self, data):
# data: parameters
id_ = _create_parameter_id()
_customized_parameter_ids.add(id_)
send(CommandType.NewTrialJob, _pack_parameter(id_, data, customized=True))
return True
def handle_report_metric_data(self, data):
if data['type'] == 'FINAL':
id_ = data['parameter_id']
if id_ in _customized_parameter_ids:
self.tuner.receive_customized_trial_result(id_, _trial_params[id_], data['value'])
else:
self.tuner.receive_trial_result(id_, _trial_params[id_], data['value'])
elif data['type'] == 'PERIODICAL':
if self.assessor is not None:
self._handle_intermediate_metric_data(data)
else:
pass
else:
raise ValueError('Data type not supported: {}'.format(data['type']))
return True
def handle_trial_end(self, data):
trial_job_id = data['trial_job_id']
_ended_trials.add(trial_job_id)
if trial_job_id in _trial_history:
_trial_history.pop(trial_job_id)
if self.assessor is not None:
self.assessor.trial_end(trial_job_id, data['event'] == 'SUCCEEDED')
return True
def _handle_intermediate_metric_data(self, data):
if data['type'] != 'PERIODICAL':
return True
if self.assessor is None:
return True
trial_job_id = data['trial_job_id']
if trial_job_id in _ended_trials:
return True
history = _trial_history[trial_job_id]
history[data['sequence']] = data['value']
ordered_history = _sort_history(history)
if len(ordered_history) < data['sequence']: # no user-visible update since last time
return True
try:
result = self.assessor.assess_trial(trial_job_id, ordered_history)
except Exception as e:
_logger.exception('Assessor error')
if isinstance(result, bool):
result = AssessResult.Good if result else AssessResult.Bad
elif not isinstance(result, AssessResult):
msg = 'Result of Assessor.assess_trial must be an object of AssessResult, not %s'
raise RuntimeError(msg % type(result))
if result is AssessResult.Bad:
_logger.debug('BAD, kill %s', trial_job_id)
send(CommandType.KillTrialJob, json_tricks.dumps(trial_job_id))
else:
_logger.debug('GOOD')

Просмотреть файл

@ -0,0 +1,90 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
#import json_tricks
import os
import logging
import json_tricks
from .common import init_logger
from .recoverable import Recoverable
from .protocol import CommandType, receive
init_logger('dispatcher.log')
_logger = logging.getLogger(__name__)
class MsgDispatcherBase(Recoverable):
def run(self):
"""Run the tuner.
This function will never return unless raise.
"""
mode = os.getenv('NNI_MODE')
if mode == 'resume':
self.load_checkpoint()
while self.handle_request():
pass
_logger.info('Terminated by NNI manager')
def handle_request(self):
_logger.debug('waiting receive_message')
command, data = receive()
if command is None:
return False
_logger.debug('handle request: command: [{}], data: [{}]'.format(command, data))
if command is CommandType.Terminate:
return False
data = json_tricks.loads(data)
command_handlers = {
# Tunner commands:
CommandType.RequestTrialJobs: self.handle_request_trial_jobs,
CommandType.UpdateSearchSpace: self.handle_update_search_space,
CommandType.AddCustomizedTrialJob: self.handle_add_customized_trial,
# Tunner/Assessor commands:
CommandType.ReportMetricData: self.handle_report_metric_data,
CommandType.TrialEnd: self.handle_trial_end,
}
if command not in command_handlers:
raise AssertionError('Unsupported command: {}'.format(command))
return command_handlers[command](data)
def handle_request_trial_jobs(self, data):
raise NotImplementedError('handle_request_trial_jobs not implemented')
def handle_update_search_space(self, data):
raise NotImplementedError('handle_update_search_space not implemented')
def handle_add_customized_trial(self, data):
raise NotImplementedError('handle_add_customized_trial not implemented')
def handle_report_metric_data(self, data):
raise NotImplementedError('handle_report_metric_data not implemented')
def handle_trial_end(self, data):
raise NotImplementedError('handle_trial_end not implemented')

Просмотреть файл

@ -28,7 +28,7 @@ from ..common import init_logger
_dir = os.environ['NNI_SYS_DIR'] _dir = os.environ['NNI_SYS_DIR']
_metric_file = open(os.path.join(_dir, '.nni', 'metrics'), 'wb') _metric_file = open(os.path.join(_dir, '.nni', 'metrics'), 'wb')
_log_file_path = os.path.join(_dir, '.nni', 'trial.log') _log_file_path = os.path.join(_dir, 'trial.log')
init_logger(_log_file_path) init_logger(_log_file_path)

Просмотреть файл

@ -55,6 +55,7 @@ def send(command, data):
data = data.encode('utf8') data = data.encode('utf8')
assert len(data) < 1000000, 'Command too long' assert len(data) < 1000000, 'Command too long'
msg = b'%b%06d%b' % (command.value, len(data), data) msg = b'%b%06d%b' % (command.value, len(data), data)
logging.getLogger(__name__).debug('Sending command, data: [%s]' % data)
_out_file.write(msg) _out_file.write(msg)
_out_file.flush() _out_file.flush()

Просмотреть файл

@ -0,0 +1,34 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import os
class Recoverable:
def load_checkpoint(self):
pass
def save_checkpont(self):
pass
def get_checkpoint_path(self):
ckp_path = os.getenv('NNI_CHECKPOINT_DIRECTORY')
if ckp_path is not None and os.path.isdir(ckp_path):
return ckp_path
return None

Просмотреть файл

@ -20,19 +20,13 @@
import logging import logging
import os
import json_tricks from .recoverable import Recoverable
from .common import init_logger
from .protocol import CommandType, send, receive
init_logger('tuner.log')
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
class Tuner: class Tuner(Recoverable):
# pylint: disable=no-self-use,unused-argument # pylint: disable=no-self-use,unused-argument
def generate_parameters(self, parameter_id): def generate_parameters(self, parameter_id):
@ -72,100 +66,22 @@ class Tuner:
""" """
raise NotImplementedError('Tuner: update_search_space not implemented') raise NotImplementedError('Tuner: update_search_space not implemented')
def load_checkpoint(self, path): def load_checkpoint(self):
"""Load the checkpoint of tuner. """Load the checkpoint of tuner.
path: checkpoint directory for tuner path: checkpoint directory for tuner
""" """
_logger.info('Load checkpoint ignored by tuner') checkpoin_path = self.get_checkpoint_path()
_logger.info('Load checkpoint ignored by tuner, checkpoint path: %s' % checkpoin_path)
def save_checkpoint(self, path): def save_checkpoint(self):
"""Save the checkpoint of tuner. """Save the checkpoint of tuner.
path: checkpoint directory for tuner path: checkpoint directory for tuner
""" """
_logger.info('Save checkpoint ignored by tuner') checkpoin_path = self.get_checkpoint_path()
_logger.info('Save checkpoint ignored by tuner, checkpoint path: %s' % checkpoin_path)
def request_save_checkpoint(self): def _on_exit(self):
"""Request to save the checkpoint of tuner
"""
self.save_checkpoint(os.getenv('NNI_CHECKPOINT_DIRECTORY'))
def run(self):
"""Run the tuner.
This function will never return unless raise.
"""
mode = os.getenv('NNI_MODE')
if mode == 'resume':
self.load_checkpoint(os.getenv('NNI_CHECKPOINT_DIRECTORY'))
while _handle_request(self):
pass pass
_logger.info('Terminated by NNI manager')
def _on_error(self):
_next_parameter_id = 0 pass
_trial_params = {}
'''key: trial job ID; value: parameters'''
_customized_parameter_ids = set()
def _create_parameter_id():
global _next_parameter_id # pylint: disable=global-statement
_next_parameter_id += 1
return _next_parameter_id - 1
def _pack_parameter(parameter_id, params, customized=False):
_trial_params[parameter_id] = params
ret = {
'parameter_id': parameter_id,
'parameter_source': 'customized' if customized else 'algorithm',
'parameters': params
}
return json_tricks.dumps(ret)
def _handle_request(tuner):
_logger.debug('waiting receive_message')
command, data = receive()
if command is None:
return False
_logger.debug(command)
_logger.debug(data)
if command is CommandType.Terminate:
return False
data = json_tricks.loads(data)
if command is CommandType.RequestTrialJobs:
# data: number or trial jobs
ids = [_create_parameter_id() for _ in range(data)]
params_list = list(tuner.generate_multiple_parameters(ids))
assert len(ids) == len(params_list)
for i, _ in enumerate(ids):
send(CommandType.NewTrialJob, _pack_parameter(ids[i], params_list[i]))
elif command is CommandType.ReportMetricData:
# data: { 'type': 'FINAL', 'parameter_id': ..., 'value': ... }
if data['type'] == 'FINAL':
id_ = data['parameter_id']
if id_ in _customized_parameter_ids:
tuner.receive_customized_trial_result(id_, _trial_params[id_], data['value'])
else:
tuner.receive_trial_result(id_, _trial_params[id_], data['value'])
elif command is CommandType.UpdateSearchSpace:
# data: search space
tuner.update_search_space(data)
elif command is CommandType.AddCustomizedTrialJob:
# data: parameters
id_ = _create_parameter_id()
_customized_parameter_ids.add(id_)
send(CommandType.NewTrialJob, _pack_parameter(id_, data, customized=True))
else:
raise AssertionError('Unsupported command: %s' % command)
return True

Просмотреть файл

@ -261,17 +261,12 @@ class Control extends React.Component<{}, ControlState> {
} else { } else {
this.addButtonLoad(); this.addButtonLoad();
// new experiment obj // new experiment obj
const parameter = [];
parameter.push({
parameters: addTrial
});
const sendPara = JSON.stringify(parameter[0]);
axios(`${MANAGER_IP}/trial-jobs`, { axios(`${MANAGER_IP}/trial-jobs`, {
method: 'POST', method: 'POST',
headers: { headers: {
'Content-Type': 'application/json' 'Content-Type': 'application/json'
}, },
data: sendPara data: addTrial
}).then(res => { }).then(res => {
if (res.status === 200) { if (res.status === 200) {
message.success('Submit successfully'); message.success('Submit successfully');

Просмотреть файл

@ -238,9 +238,6 @@ class Para extends React.Component<{}, ParaState> {
type: 'continuous', type: 'continuous',
min: 0, min: 0,
max: 1, max: 1,
realtime: false,
calculable: true,
precision: 1,
// gradient color // gradient color
color: ['#fb7c7c', 'yellow', 'lightblue'] color: ['#fb7c7c', 'yellow', 'lightblue']
}, },
@ -357,6 +354,7 @@ class Para extends React.Component<{}, ParaState> {
this._isMounted = false; this._isMounted = false;
window.clearInterval(this.intervalIDPara); window.clearInterval(this.intervalIDPara);
} }
render() { render() {
const { option, paraNodata, dimName } = this.state; const { option, paraNodata, dimName } = this.state;
return ( return (
@ -365,6 +363,7 @@ class Para extends React.Component<{}, ParaState> {
<div className="paraTitle"> <div className="paraTitle">
<div className="paraLeft">Hyper Parameter</div> <div className="paraLeft">Hyper Parameter</div>
<div className="paraRight"> <div className="paraRight">
{/* <span>top</span> */}
<Select <Select
className="parapercent" className="parapercent"
style={{ width: '20%' }} style={{ width: '20%' }}
@ -372,10 +371,10 @@ class Para extends React.Component<{}, ParaState> {
optionFilterProp="children" optionFilterProp="children"
onSelect={this.percentNum} onSelect={this.percentNum}
> >
<Option value="0.2">0.2</Option> <Option value="0.2">20%</Option>
<Option value="0.5">0.5</Option> <Option value="0.5">50%</Option>
<Option value="0.8">0.8</Option> <Option value="0.8">80%</Option>
<Option value="1">1</Option> <Option value="1">100%</Option>
</Select> </Select>
<Select <Select
style={{ width: '60%' }} style={{ width: '60%' }}

Просмотреть файл

@ -39,12 +39,6 @@ class SlideBar extends React.Component<{}, {}> {
<Icon className="floicon" type="right" /> <Icon className="floicon" type="right" />
</Link> </Link>
</li> </li>
<li>
<Link to={'/tensor'} activeClassName="high">
<Icon className="icon" type="link" />Tensorboard
<Icon className="floicon" type="right" />
</Link>
</li>
</ul> </ul>
</div> </div>
); );

Просмотреть файл

@ -234,7 +234,6 @@ class TrialStatus extends React.Component<{}, TabState> {
// kill job // kill job
killJob = (key: number, id: string, status: string) => { killJob = (key: number, id: string, status: string) => {
if (status === 'RUNNING') {
axios(`${MANAGER_IP}/trial-jobs/${id}`, { axios(`${MANAGER_IP}/trial-jobs/${id}`, {
method: 'DELETE', method: 'DELETE',
headers: { headers: {
@ -250,9 +249,6 @@ class TrialStatus extends React.Component<{}, TabState> {
message.error('fail to cancel the job'); message.error('fail to cancel the job');
} }
}); });
} else {
message.error('you just can kill the job that status is Running');
}
} }
// get tensorflow address // get tensorflow address
@ -347,13 +343,34 @@ class TrialStatus extends React.Component<{}, TabState> {
key: 'operation', key: 'operation',
width: '10%', width: '10%',
render: (text: string, record: TableObj) => { render: (text: string, record: TableObj) => {
let trialStatus = record.status;
let flagKill = false;
if (trialStatus === 'RUNNING') {
flagKill = true;
} else {
flagKill = false;
}
return ( return (
flagKill
?
(
<Popconfirm <Popconfirm
title="Are you sure to delete this trial?" title="Are you sure to delete this trial?"
onConfirm={this.killJob.bind(this, record.key, record.id, record.status)} onConfirm={this.killJob.bind(this, record.key, record.id, record.status)}
> >
<Button type="primary" className="tableButton">Kill</Button> <Button type="primary" className="tableButton">Kill</Button>
</Popconfirm> </Popconfirm>
)
:
(
<Button
type="primary"
className="tableButton"
disabled={true}
>
Kill
</Button>
)
); );
}, },
}, { }, {

Просмотреть файл

@ -24,6 +24,11 @@
float: right; float: right;
width: 60%; width: 60%;
} }
.paraRight>span{
font-size: 14px;
color: #333;
margin-right: 5px;
}
.paraRight .parapercent{ .paraRight .parapercent{
margin-right: 10px; margin-right: 10px;
} }

Просмотреть файл

@ -9,20 +9,14 @@ searchSpacePath: search_space.json
#choice: true, false #choice: true, false
useAnnotation: false useAnnotation: false
tuner: tuner:
tunerCommand: /home/travis/virtualenv/python3.6.3/bin/python3 naive_tuner.py codeDir: .
tunerCwd: . classFileName: naive_tuner.py
tunerGpuNum: 0 className: NaiveTuner
assessor: assessor:
assessorCommand: /home/travis/virtualenv/python3.6.3/bin/python3 naive_assessor.py codeDir: .
assessorCwd: . classFileName: naive_assessor.py
assessorGpuNum: 0 className: NaiveAssessor
trial: trial:
trialCommand: /home/travis/virtualenv/python3.6.3/bin/python3 naive_trial.py command: python3 naive_trial.py
trialCodeDir: . codeDir: .
trialGpuNum: 0 gpuNum: 0
#machineList can be empty if the platform is local
machineList:
- ip:
port:
username:
passwd:

Просмотреть файл

@ -4,7 +4,7 @@ from nni.assessor import Assessor, AssessResult
_logger = logging.getLogger('NaiveAssessor') _logger = logging.getLogger('NaiveAssessor')
_logger.info('start') _logger.info('start')
_result = open('assessor_result.txt', 'w') _result = open('/tmp/nni_assessor_result.txt', 'w')
class NaiveAssessor(Assessor): class NaiveAssessor(Assessor):
def __init__(self): def __init__(self):
@ -29,10 +29,10 @@ class NaiveAssessor(Assessor):
return AssessResult.Good return AssessResult.Good
try: def _on_exit(self):
NaiveAssessor().run()
_result.write('DONE\n') _result.write('DONE\n')
except Exception as e: _result.close()
_logger.exception(e)
def _on_error(self):
_result.write('ERROR\n') _result.write('ERROR\n')
_result.close() _result.close()

Просмотреть файл

@ -5,7 +5,7 @@ from nni.tuner import Tuner
_logger = logging.getLogger('NaiveTuner') _logger = logging.getLogger('NaiveTuner')
_logger.info('start') _logger.info('start')
_result = open('tuner_result.txt', 'w') _result = open('/tmp/nni_tuner_result.txt', 'w')
class NaiveTuner(Tuner): class NaiveTuner(Tuner):
def __init__(self): def __init__(self):
@ -24,13 +24,13 @@ class NaiveTuner(Tuner):
def update_search_space(self, search_space): def update_search_space(self, search_space):
_logger.info('update_search_space: %s' % search_space) _logger.info('update_search_space: %s' % search_space)
with open('tuner_search_space.json', 'w') as file_: with open('/tmp/nni_tuner_search_space.json', 'w') as file_:
json.dump(search_space, file_) json.dump(search_space, file_)
try: def _on_exit(self):
NaiveTuner().run()
_result.write('DONE\n') _result.write('DONE\n')
except Exception as e: _result.close()
_logger.exception(e)
def _on_error(self):
_result.write('ERROR\n') _result.write('ERROR\n')
_result.close() _result.close()

Просмотреть файл

@ -1,2 +1,2 @@
#!/bin/sh #!/bin/sh
python3 -m nnicmd.nnictl $@ WEB_UI_FOLDER=${PWD}/../../src/webui python3 -m nnicmd.nnictl $@

Просмотреть файл

@ -5,6 +5,7 @@ import json
import os import os
import subprocess import subprocess
import time import time
import traceback
GREEN = '\33[32m' GREEN = '\33[32m'
RED = '\33[31m' RED = '\33[31m'
@ -25,7 +26,7 @@ def run():
with contextlib.suppress(FileNotFoundError): with contextlib.suppress(FileNotFoundError):
os.remove('tuner_result.txt') os.remove('tuner_result.txt')
with contextlib.suppress(FileNotFoundError): with contextlib.suppress(FileNotFoundError):
os.remove('assessor_result.txt') os.remove('/tmp/nni_assessor_result.txt')
proc = subprocess.run(['nnictl', 'create', '--config', 'local.yml']) proc = subprocess.run(['nnictl', 'create', '--config', 'local.yml'])
assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode
@ -36,8 +37,8 @@ def run():
for _ in range(60): for _ in range(60):
time.sleep(1) time.sleep(1)
tuner_status = read_last_line('tuner_result.txt') tuner_status = read_last_line('/tmp/nni_tuner_result.txt')
assessor_status = read_last_line('assessor_result.txt') assessor_status = read_last_line('/tmp/nni_assessor_result.txt')
assert tuner_status != 'ERROR', 'Tuner exited with error' assert tuner_status != 'ERROR', 'Tuner exited with error'
assert assessor_status != 'ERROR', 'Assessor exited with error' assert assessor_status != 'ERROR', 'Assessor exited with error'
@ -46,7 +47,7 @@ def run():
break break
if tuner_status is not None: if tuner_status is not None:
for line in open('tuner_result.txt'): for line in open('/tmp/nni_tuner_result.txt'):
if line.strip() in ('DONE', 'ERROR'): if line.strip() in ('DONE', 'ERROR'):
break break
trial = int(line.split(' ')[0]) trial = int(line.split(' ')[0])
@ -57,16 +58,16 @@ def run():
assert tuner_status == 'DONE' and assessor_status == 'DONE', 'Failed to finish in 1 min' assert tuner_status == 'DONE' and assessor_status == 'DONE', 'Failed to finish in 1 min'
ss1 = json.load(open('search_space.json')) ss1 = json.load(open('search_space.json'))
ss2 = json.load(open('tuner_search_space.json')) ss2 = json.load(open('/tmp/nni_tuner_search_space.json'))
assert ss1 == ss2, 'Tuner got wrong search space' assert ss1 == ss2, 'Tuner got wrong search space'
tuner_result = set(open('tuner_result.txt')) tuner_result = set(open('/tmp/nni_tuner_result.txt'))
expected = set(open('expected_tuner_result.txt')) expected = set(open('expected_tuner_result.txt'))
# Trials may complete before NNI gets assessor's result, # Trials may complete before NNI gets assessor's result,
# so it is possible to have more final result than expected # so it is possible to have more final result than expected
assert tuner_result.issuperset(expected), 'Bad tuner result' assert tuner_result.issuperset(expected), 'Bad tuner result'
assessor_result = set(open('assessor_result.txt')) assessor_result = set(open('/tmp/nni_assessor_result.txt'))
expected = set(open('expected_assessor_result.txt')) expected = set(open('expected_assessor_result.txt'))
assert assessor_result == expected, 'Bad assessor result' assert assessor_result == expected, 'Bad assessor result'
@ -78,5 +79,6 @@ if __name__ == '__main__':
except Exception as e: except Exception as e:
print(RED + 'FAIL' + CLEAR) print(RED + 'FAIL' + CLEAR)
print('%r' % e) print('%r' % e)
traceback.print_exc()
subprocess.run(['nnictl', 'stop']) subprocess.run(['nnictl', 'stop'])

Просмотреть файл

@ -0,0 +1,55 @@
# Introduction
For good user experience and reduce user effort, we need to design a good annotation grammar.
If users use NNI system, they only need to:
1. Annotation variable in code as:
'''@nni.variable(nni.choice(2,3,5,7),name=self.conv_size)'''
2. Annotation intermediate in code as:
'''@nni.report_intermediate_result(test_acc)'''
3. Annotation output in code as:
'''@nni.report_final_result(test_acc)'''
4. Annotation `function_choice` in code as:
'''@nni.function_choice(max_pool(h_conv1, self.pool_size),avg_pool(h_conv1, self.pool_size),name=max_pool)'''
In this way, they can easily realize automatic tuning on NNI.
For `@nni.variable`, `nni.choice` is the type of search space and there are 10 types to express your search space as follows:
1. `@nni.variable(nni.choice(option1,option2,...,optionN),name=variable)`
Which means the variable value is one of the options, which should be a list The elements of options can themselves be stochastic expressions
2. `@nni.variable(nni.randint(upper),name=variable)`
Which means the variable value is a random integer in the range [0, upper).
3. `@nni.variable(nni.uniform(low, high),name=variable)`
Which means the variable value is a value uniformly between low and high.
4. `@nni.variable(nni.quniform(low, high, q),name=variable)`
Which means the variable value is a value like round(uniform(low, high) / q) * q
5. `@nni.variable(nni.loguniform(low, high),name=variable)`
Which means the variable value is a value drawn according to exp(uniform(low, high)) so that the logarithm of the return value is uniformly distributed.
6. `@nni.variable(nni.qloguniform(low, high, q),name=variable)`
Which means the variable value is a value like round(exp(uniform(low, high)) / q) * q
7. `@nni.variable(nni.normal(label, mu, sigma),name=variable)`
Which means the variable value is a real value that's normally-distributed with mean mu and standard deviation sigma.
8. `@nni.variable(nni.qnormal(label, mu, sigma, q),name=variable)`
Which means the variable value is a value like round(normal(mu, sigma) / q) * q
9. `@nni.variable(nni.lognormal(label, mu, sigma),name=variable)`
Which means the variable value is a value drawn according to exp(normal(mu, sigma))
10. `@nni.variable(nni.qlognormal(label, mu, sigma, q),name=variable)`
Which means the variable value is a value like round(exp(normal(mu, sigma)) / q) * q

Просмотреть файл

@ -0,0 +1,104 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import os
import shutil
from . import code_generator
from . import search_space_generator
__all__ = ['generate_search_space', 'expand_annotations']
def generate_search_space(code_dir):
"""Generate search space from Python source code.
Return a serializable search space object.
code_dir: directory path of source files (str)
"""
search_space = {}
if code_dir.endswith('/'):
code_dir = code_dir[:-1]
for subdir, _, files in os.walk(code_dir):
# generate module name from path
if subdir == code_dir:
package = ''
else:
assert subdir.startswith(code_dir + '/'), subdir
prefix_len = len(code_dir) + 1
package = subdir[prefix_len:].replace('/', '.') + '.'
for file_name in files:
if file_name.endswith('.py'):
path = os.path.join(subdir, file_name)
module = package + file_name[:-3]
search_space.update(_generate_file_search_space(path, module))
return search_space
def _generate_file_search_space(path, module):
with open(path) as src:
try:
return search_space_generator.generate(module, src.read())
except Exception as exc: # pylint: disable=broad-except
if exc.args:
raise RuntimeError(path + ' ' + '\n'.join(exc.args))
else:
raise RuntimeError('Failed to generate search space for %s: %r' % (path, exc))
def expand_annotations(src_dir, dst_dir):
"""Expand annotations in user code.
src_dir: directory path of user code (str)
dst_dir: directory to place generated files (str)
"""
if src_dir[-1] == '/':
src_dir = src_dir[:-1]
if dst_dir[-1] == '/':
dst_dir = dst_dir[:-1]
for src_subdir, dirs, files in os.walk(src_dir):
assert src_subdir.startswith(src_dir)
dst_subdir = src_subdir.replace(src_dir, dst_dir, 1)
os.makedirs(dst_subdir, exist_ok=True)
for file_name in files:
src_path = os.path.join(src_subdir, file_name)
dst_path = os.path.join(dst_subdir, file_name)
if file_name.endswith('.py'):
_expand_file_annotations(src_path, dst_path)
else:
shutil.copyfile(src_path, dst_path)
for dir_name in dirs:
os.makedirs(os.path.join(dst_subdir, dir_name), exist_ok=True)
def _expand_file_annotations(src_path, dst_path):
with open(src_path) as src, open(dst_path, 'w') as dst:
try:
dst.write(code_generator.parse(src.read()))
except Exception as exc: # pylint: disable=broad-except
if exc.args:
raise RuntimeError(src_path + ' ' + '\n'.join(exc.args))
else:
raise RuntimeError('Failed to expand annotations for %s: %r' % (src_path, exc))

Просмотреть файл

@ -0,0 +1,240 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import ast
import astor
# pylint: disable=unidiomatic-typecheck
def parse_annotation(code):
"""Parse an annotation string.
Return an AST Expr node.
code: annotation string (excluding '@')
"""
module = ast.parse(code)
assert type(module) is ast.Module, 'internal error #1'
assert len(module.body) == 1, 'Annotation contains more than one expression'
assert type(module.body[0]) is ast.Expr, 'Annotation is not expression'
return module.body[0]
def parse_annotation_function(code, func_name):
"""Parse an annotation function.
Return the value of `name` keyword argument and the AST Call node.
func_name: expected function name
"""
expr = parse_annotation(code)
call = expr.value
assert type(call) is ast.Call, 'Annotation is not a function call'
assert type(call.func) is ast.Attribute, 'Unexpected annotation function'
assert type(call.func.value) is ast.Name, 'Invalid annotation function name'
assert call.func.value.id == 'nni', 'Annotation is not a NNI function'
assert call.func.attr == func_name, 'internal error #2'
assert len(call.keywords) == 1, 'Annotation function contains more than one keyword argument'
assert call.keywords[0].arg == 'name', 'Annotation keyword argument is not "name"'
name = call.keywords[0].value
return name, call
def parse_nni_variable(code):
"""Parse `nni.variable` expression.
Return the name argument and AST node of annotated expression.
code: annotation string
"""
name, call = parse_annotation_function(code, 'variable')
assert len(call.args) == 1, 'nni.variable contains more than one arguments'
arg = call.args[0]
assert type(arg) is ast.Call, 'Value of nni.variable is not a function call'
assert type(arg.func) is ast.Attribute, 'nni.variable value is not a NNI function'
assert type(arg.func.value) is ast.Name, 'nni.variable value is not a NNI function'
assert arg.func.value.id == 'nni', 'nni.variable value is not a NNI function'
name_str = astor.to_source(name).strip()
keyword_arg = ast.keyword(arg='name', value=ast.Str(s=name_str))
arg.keywords.append(keyword_arg)
return name, arg
def parse_nni_function(code):
"""Parse `nni.function_choice` expression.
Return the AST node of annotated expression and a list of dumped function call expressions.
code: annotation string
"""
name, call = parse_annotation_function(code, 'function_choice')
funcs = [ast.dump(func, False) for func in call.args]
call.args = [make_lambda(arg) for arg in call.args]
name_str = astor.to_source(name).strip()
call.keywords[0].value = ast.Str(s=name_str)
return call, funcs
def make_lambda(call):
"""Wrap an AST Call node to lambda expression node.
call: ast.Call node
"""
assert type(call) is ast.Call, 'Argument of nni.function_choice is not function call'
empty_args = ast.arguments(args=[], vararg=None, kwarg=None, defaults=[])
return ast.Lambda(args=empty_args, body=call)
def test_variable_equal(var1, var2):
"""Test whether two variables are the same."""
if type(var1) is not type(var2):
return False
if type(var1) is ast.Name:
return var1.id == var2.id
if type(var1) is ast.Attribute:
return var1.attr == var2.attr and test_variable_equal(var1.value, var2.value)
return False
def replace_variable_node(node, annotation):
"""Replace a node annotated by `nni.variable`.
node: the AST node to replace
annotation: annotation string
"""
assert type(node) is ast.Assign, 'nni.variable is not annotating assignment expression'
assert len(node.targets) == 1, 'Annotated assignment has more than one left-hand value'
name, expr = parse_nni_variable(annotation)
assert test_variable_equal(node.targets[0], name), 'Annotated variable has wrong name'
node.value = expr
return node
def replace_function_node(node, annotation):
"""Replace a node annotated by `nni.function_choice`.
node: the AST node to replace
annotation: annotation string
"""
target, funcs = parse_nni_function(annotation)
FuncReplacer(funcs, target).visit(node)
return node
class FuncReplacer(ast.NodeTransformer):
"""To replace target function call expressions in a node annotated by `nni.function_choice`"""
def __init__(self, funcs, target):
"""Constructor.
funcs: list of dumped function call expressions to replace
target: use this AST node to replace matching expressions
"""
self.funcs = set(funcs)
self.target = target
def visit_Call(self, node): # pylint: disable=invalid-name
if ast.dump(node, False) in self.funcs:
return self.target
return node
class Transformer(ast.NodeTransformer):
"""Transform original code to annotated code"""
def __init__(self):
self.stack = []
self.last_line = 0
def visit(self, node):
if isinstance(node, (ast.expr, ast.stmt)):
self.last_line = node.lineno
# do nothing for root
if not self.stack:
return self._visit_children(node)
annotation = self.stack[-1]
# this is a standalone string, may be an annotation
if type(node) is ast.Expr and type(node.value) is ast.Str:
# must not annotate an annotation string
assert annotation is None, 'Annotating an annotation'
return self._visit_string(node)
if annotation is not None: # this expression is annotated
self.stack[-1] = None # so next expression is not
if annotation.startswith('nni.variable'):
return replace_variable_node(node, annotation)
if annotation.startswith('nni.function_choice'):
return replace_function_node(node, annotation)
return self._visit_children(node)
def _visit_string(self, node):
string = node.value.s
if not string.startswith('@nni.'):
return node # not an annotation, ignore it
if string.startswith('@nni.report_intermediate_result(') \
or string.startswith('@nni.report_final_result('):
return parse_annotation(string[1:]) # expand annotation string to code
if string.startswith('@nni.variable(') \
or string.startswith('@nni.function_choice('):
self.stack[-1] = string[1:] # mark that the next expression is annotated
return None
raise AssertionError('Unexpected annotation function')
def _visit_children(self, node):
self.stack.append(None)
self.generic_visit(node)
annotation = self.stack.pop()
assert annotation is None, 'Annotation has no target'
return node
def parse(code):
"""Annotate user code.
Return annotated code (str).
code: original user code (str)
"""
try:
ast_tree = ast.parse(code)
except Exception:
raise RuntimeError('Bad Python code')
try:
Transformer().visit(ast_tree)
except AssertionError as exc:
raise RuntimeError('%d: %s' % (ast_tree.last_line, exc.args[0]))
last_future_import = -1
import_nni = ast.Import(names=[ast.alias(name='nni', asname=None)])
nodes = ast_tree.body
for i, _ in enumerate(nodes):
if type(nodes[i]) is ast.ImportFrom and nodes[i].module == '__future__':
last_future_import = i
nodes.insert(last_future_import + 1, import_nni)
return astor.to_source(ast_tree)

Просмотреть файл

@ -0,0 +1,185 @@
import nni
"""A deep MNIST classifier using convolutional layers."""
import logging
import math
import tempfile
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
FLAGS = None
logger = logging.getLogger('mnist_AutoML')
class MnistNetwork(object):
"""
MnistNetwork is for initlizing and building basic network for mnist.
"""
def __init__(self, channel_1_num, channel_2_num, conv_size, hidden_size,
pool_size, learning_rate, x_dim=784, y_dim=10):
self.channel_1_num = channel_1_num
self.channel_2_num = channel_2_num
self.conv_size = nni.choice(2, 3, 5, 7, name='self.conv_size')
self.hidden_size = nni.choice(124, 512, 1024, name='self.hidden_size')
self.pool_size = pool_size
self.learning_rate = nni.uniform(0.0001, 0.1, name='self.learning_rate'
)
self.x_dim = x_dim
self.y_dim = y_dim
self.images = tf.placeholder(tf.float32, [None, self.x_dim], name=
'input_x')
self.labels = tf.placeholder(tf.float32, [None, self.y_dim], name=
'input_y')
self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
self.train_step = None
self.accuracy = None
def build_network(self):
"""
Building network for mnist
"""
with tf.name_scope('reshape'):
try:
input_dim = int(math.sqrt(self.x_dim))
except:
print('input dim cannot be sqrt and reshape. input dim: ' +
str(self.x_dim))
logger.debug(
'input dim cannot be sqrt and reshape. input dim: %s',
str(self.x_dim))
raise
x_image = tf.reshape(self.images, [-1, input_dim, input_dim, 1])
with tf.name_scope('conv1'):
w_conv1 = weight_variable([self.conv_size, self.conv_size, 1,
self.channel_1_num])
b_conv1 = bias_variable([self.channel_1_num])
h_conv1 = nni.function_choice(lambda : tf.nn.relu(conv2d(
x_image, w_conv1) + b_conv1), lambda : tf.nn.sigmoid(conv2d
(x_image, w_conv1) + b_conv1), lambda : tf.nn.tanh(conv2d(
x_image, w_conv1) + b_conv1), name='tf.nn.relu')
with tf.name_scope('pool1'):
h_pool1 = nni.function_choice(lambda : max_pool(h_conv1, self.
pool_size), lambda : avg_pool(h_conv1, self.pool_size),
name='max_pool')
with tf.name_scope('conv2'):
w_conv2 = weight_variable([self.conv_size, self.conv_size, self
.channel_1_num, self.channel_2_num])
b_conv2 = bias_variable([self.channel_2_num])
h_conv2 = tf.nn.relu(conv2d(h_pool1, w_conv2) + b_conv2)
with tf.name_scope('pool2'):
h_pool2 = max_pool(h_conv2, self.pool_size)
last_dim = int(input_dim / (self.pool_size * self.pool_size))
with tf.name_scope('fc1'):
w_fc1 = weight_variable([last_dim * last_dim * self.
channel_2_num, self.hidden_size])
b_fc1 = bias_variable([self.hidden_size])
h_pool2_flat = tf.reshape(h_pool2, [-1, last_dim * last_dim * self.
channel_2_num])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)
with tf.name_scope('dropout'):
h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
with tf.name_scope('fc2'):
w_fc2 = weight_variable([self.hidden_size, self.y_dim])
b_fc2 = bias_variable([self.y_dim])
y_conv = tf.matmul(h_fc1_drop, w_fc2) + b_fc2
with tf.name_scope('loss'):
cross_entropy = tf.reduce_mean(tf.nn.
softmax_cross_entropy_with_logits(labels=self.labels,
logits=y_conv))
with tf.name_scope('adam_optimizer'):
self.train_step = tf.train.AdamOptimizer(self.learning_rate
).minimize(cross_entropy)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(
self.labels, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.
float32))
def conv2d(x_input, w_matrix):
"""conv2d returns a 2d convolution layer with full stride."""
return tf.nn.conv2d(x_input, w_matrix, strides=[1, 1, 1, 1], padding='SAME'
)
def max_pool(x_input, pool_size):
"""max_pool downsamples a feature map by 2X."""
return tf.nn.max_pool(x_input, ksize=[1, pool_size, pool_size, 1],
strides=[1, pool_size, pool_size, 1], padding='SAME')
def avg_pool(x_input, pool_size):
return tf.nn.avg_pool(x_input, ksize=[1, pool_size, pool_size, 1],
strides=[1, pool_size, pool_size, 1], padding='SAME')
def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape."""
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
"""bias_variable generates a bias variable of a given shape."""
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def main(params):
"""
Main function, build mnist network, run and send result to NNI.
"""
mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
print('Mnist download data down.')
logger.debug('Mnist download data down.')
mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'],
channel_2_num=params['channel_2_num'], conv_size=params['conv_size'
], hidden_size=params['hidden_size'], pool_size=params['pool_size'],
learning_rate=params['learning_rate'])
mnist_network.build_network()
logger.debug('Mnist build network done.')
graph_location = tempfile.mkdtemp()
logger.debug('Saving graph to: %s', graph_location)
train_writer = tf.summary.FileWriter(graph_location)
train_writer.add_graph(tf.get_default_graph())
test_acc = 0.0
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
batch_num = nni.choice(50, 250, 500, name='batch_num')
for i in range(batch_num):
batch = mnist.train.next_batch(batch_num)
dropout_rate = nni.choice(1, 5, name='dropout_rate')
mnist_network.train_step.run(feed_dict={mnist_network.images:
batch[0], mnist_network.labels: batch[1], mnist_network.
keep_prob: dropout_rate})
if i % 100 == 0:
test_acc = mnist_network.accuracy.eval(feed_dict={
mnist_network.images: mnist.test.images, mnist_network.
labels: mnist.test.labels, mnist_network.keep_prob: 1.0})
nni.report_intermediate_result(test_acc)
logger.debug('test accuracy %g', test_acc)
logger.debug('Pipe send intermediate result done.')
test_acc = mnist_network.accuracy.eval(feed_dict={mnist_network.
images: mnist.test.images, mnist_network.labels: mnist.test.
labels, mnist_network.keep_prob: 1.0})
nni.report_final_result(test_acc)
logger.debug('Final result is %g', test_acc)
logger.debug('Send final result done.')
def generate_defualt_params():
"""
Generate default parameters for mnist network.
"""
params = {'data_dir': '/tmp/tensorflow/mnist/input_data',
'dropout_rate': 0.5, 'channel_1_num': 32, 'channel_2_num': 64,
'conv_size': 5, 'pool_size': 2, 'hidden_size': 1024,
'learning_rate': 0.0001, 'batch_num': 200}
return params
if __name__ == '__main__':
try:
main(generate_defualt_params())
except Exception as exception:
logger.exception(exception)
raise

Просмотреть файл

@ -0,0 +1,56 @@
{
"mnist_with_annotation/batch_num/choice": {
"_type": "choice",
"_value": [
0,
1,
2
]
},
"mnist_with_annotation/dropout_rate/choice": {
"_type": "choice",
"_value": [
0,
1
]
},
"mnist_with_annotation/max_pool/function_choice": {
"_type": "choice",
"_value": [
0,
1
]
},
"mnist_with_annotation/self.conv_size/choice": {
"_type": "choice",
"_value": [
0,
1,
2,
3
]
},
"mnist_with_annotation/self.hidden_size/choice": {
"_type": "choice",
"_value": [
0,
1,
2
]
},
"mnist_with_annotation/self.learning_rate/uniform": {
"_type": "uniform",
"_value": [
0.0001,
0.1
]
},
"mnist_with_annotation/tf.nn.relu/function_choice": {
"_type": "choice",
"_value": [
0,
1,
2
]
}
}

Просмотреть файл

@ -0,0 +1,254 @@
#!/usr/bin/python
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""A deep MNIST classifier using convolutional layers."""
import logging
import math
import tempfile
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
FLAGS = None
logger = logging.getLogger('mnist_AutoML')
class MnistNetwork(object):
'''
MnistNetwork is for initlizing and building basic network for mnist.
'''
def __init__(self,
channel_1_num,
channel_2_num,
conv_size,
hidden_size,
pool_size,
learning_rate,
x_dim=784,
y_dim=10):
self.channel_1_num = channel_1_num
self.channel_2_num = channel_2_num
"""@nni.variable(nni.choice(2, 3, 5, 7),name=self.conv_size)"""
self.conv_size = conv_size
"""@nni.variable(nni.choice(124, 512, 1024), name=self.hidden_size)"""
self.hidden_size = hidden_size
self.pool_size = pool_size
"""@nni.variable(nni.uniform(0.0001, 0.1), name=self.learning_rate)"""
self.learning_rate = learning_rate
self.x_dim = x_dim
self.y_dim = y_dim
self.images = tf.placeholder(tf.float32, [None, self.x_dim], name='input_x')
self.labels = tf.placeholder(tf.float32, [None, self.y_dim], name='input_y')
self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
self.train_step = None
self.accuracy = None
def build_network(self):
'''
Building network for mnist
'''
# Reshape to use within a convolutional neural net.
# Last dimension is for "features" - there is only one here, since images are
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
with tf.name_scope('reshape'):
try:
input_dim = int(math.sqrt(self.x_dim))
except:
print(
'input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim))
logger.debug(
'input dim cannot be sqrt and reshape. input dim: %s', str(self.x_dim))
raise
x_image = tf.reshape(self.images, [-1, input_dim, input_dim, 1])
# First convolutional layer - maps one grayscale image to 32 feature maps.
with tf.name_scope('conv1'):
w_conv1 = weight_variable(
[self.conv_size, self.conv_size, 1, self.channel_1_num])
b_conv1 = bias_variable([self.channel_1_num])
"""@nni.function_choice(tf.nn.relu(conv2d(x_image, w_conv1) + b_conv1), tf.nn.sigmoid(conv2d(x_image, w_conv1) + b_conv1), tf.nn.tanh(conv2d(x_image, w_conv1) + b_conv1), name=tf.nn.relu)"""
h_conv1 = tf.nn.relu(conv2d(x_image, w_conv1) + b_conv1)
# Pooling layer - downsamples by 2X.
with tf.name_scope('pool1'):
"""@nni.function_choice(max_pool(h_conv1, self.pool_size), avg_pool(h_conv1, self.pool_size), name=max_pool)"""
h_pool1 = max_pool(h_conv1, self.pool_size)
# Second convolutional layer -- maps 32 feature maps to 64.
with tf.name_scope('conv2'):
w_conv2 = weight_variable([self.conv_size, self.conv_size,
self.channel_1_num, self.channel_2_num])
b_conv2 = bias_variable([self.channel_2_num])
h_conv2 = tf.nn.relu(conv2d(h_pool1, w_conv2) + b_conv2)
# Second pooling layer.
with tf.name_scope('pool2'):
h_pool2 = max_pool(h_conv2, self.pool_size)
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps -- maps this to 1024 features.
last_dim = int(input_dim / (self.pool_size * self.pool_size))
with tf.name_scope('fc1'):
w_fc1 = weight_variable(
[last_dim * last_dim * self.channel_2_num, self.hidden_size])
b_fc1 = bias_variable([self.hidden_size])
h_pool2_flat = tf.reshape(
h_pool2, [-1, last_dim * last_dim * self.channel_2_num])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)
# Dropout - controls the complexity of the model, prevents co-adaptation of features.
with tf.name_scope('dropout'):
h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
# Map the 1024 features to 10 classes, one for each digit
with tf.name_scope('fc2'):
w_fc2 = weight_variable([self.hidden_size, self.y_dim])
b_fc2 = bias_variable([self.y_dim])
y_conv = tf.matmul(h_fc1_drop, w_fc2) + b_fc2
with tf.name_scope('loss'):
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=y_conv))
with tf.name_scope('adam_optimizer'):
self.train_step = tf.train.AdamOptimizer(
self.learning_rate).minimize(cross_entropy)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(
tf.argmax(y_conv, 1), tf.argmax(self.labels, 1))
self.accuracy = tf.reduce_mean(
tf.cast(correct_prediction, tf.float32))
def conv2d(x_input, w_matrix):
"""conv2d returns a 2d convolution layer with full stride."""
return tf.nn.conv2d(x_input, w_matrix, strides=[1, 1, 1, 1], padding='SAME')
def max_pool(x_input, pool_size):
"""max_pool downsamples a feature map by 2X."""
return tf.nn.max_pool(x_input, ksize=[1, pool_size, pool_size, 1],
strides=[1, pool_size, pool_size, 1], padding='SAME')
def avg_pool(x_input, pool_size):
return tf.nn.avg_pool(x_input, ksize=[1, pool_size, pool_size, 1],
strides=[1, pool_size, pool_size, 1], padding='SAME')
def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape."""
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
"""bias_variable generates a bias variable of a given shape."""
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def main(params):
'''
Main function, build mnist network, run and send result to NNI.
'''
# Import data
mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
print('Mnist download data down.')
logger.debug('Mnist download data down.')
# Create the model
# Build the graph for the deep net
mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'],
channel_2_num=params['channel_2_num'],
conv_size=params['conv_size'],
hidden_size=params['hidden_size'],
pool_size=params['pool_size'],
learning_rate=params['learning_rate'])
mnist_network.build_network()
logger.debug('Mnist build network done.')
# Write log
graph_location = tempfile.mkdtemp()
logger.debug('Saving graph to: %s', graph_location)
train_writer = tf.summary.FileWriter(graph_location)
train_writer.add_graph(tf.get_default_graph())
test_acc = 0.0
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
"""@nni.variable(nni.choice(50, 250, 500), name=batch_num)"""
batch_num = params['batch_num']
for i in range(batch_num):
batch = mnist.train.next_batch(batch_num)
"""@nni.variable(nni.choice(1, 5), name=dropout_rate)"""
dropout_rate = params['dropout_rate']
mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0],
mnist_network.labels: batch[1],
mnist_network.keep_prob: dropout_rate}
)
if i % 100 == 0:
test_acc = mnist_network.accuracy.eval(
feed_dict={mnist_network.images: mnist.test.images,
mnist_network.labels: mnist.test.labels,
mnist_network.keep_prob: 1.0})
"""@nni.report_intermediate_result(test_acc)"""
logger.debug('test accuracy %g', test_acc)
logger.debug('Pipe send intermediate result done.')
test_acc = mnist_network.accuracy.eval(
feed_dict={mnist_network.images: mnist.test.images,
mnist_network.labels: mnist.test.labels,
mnist_network.keep_prob: 1.0})
"""@nni.report_final_result(test_acc)"""
logger.debug('Final result is %g', test_acc)
logger.debug('Send final result done.')
def generate_defualt_params():
'''
Generate default parameters for mnist network.
'''
params = {
'data_dir': '/tmp/tensorflow/mnist/input_data',
'dropout_rate': 0.5,
'channel_1_num': 32,
'channel_2_num': 64,
'conv_size': 5,
'pool_size': 2,
'hidden_size': 1024,
'learning_rate': 1e-4,
'batch_num': 200}
return params
if __name__ == '__main__':
try:
main(generate_defualt_params())
except Exception as exception:
logger.exception(exception)
raise

Просмотреть файл

@ -0,0 +1,56 @@
{
"mnist_without_annotation/#31/choice": {
"_type": "choice",
"_value": [
0,
1,
2
]
},
"mnist_without_annotation/#68/function_choice": {
"_type": "choice",
"_value": [
0,
1,
2
]
},
"mnist_without_annotation/batch_num/choice": {
"_type": "choice",
"_value": [
0,
1,
2
]
},
"mnist_without_annotation/conv-size/choice": {
"_type": "choice",
"_value": [
0,
1,
2,
3
]
},
"mnist_without_annotation/dropout_rate/choice": {
"_type": "choice",
"_value": [
0,
1
]
},
"mnist_without_annotation/h_pool1/function_choice": {
"_type": "choice",
"_value": [
0,
1
]
},
"mnist_without_annotation/learning_rate/uniform": {
"_type": "uniform",
"_value": [
0.0001,
0.1
]
}
}

Просмотреть файл

@ -0,0 +1,248 @@
#!/usr/bin/python
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""A deep MNIST classifier using convolutional layers."""
import logging
import math
import tempfile
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import nni
FLAGS = None
logger = logging.getLogger('mnist_AutoML')
class MnistNetwork(object):
'''
MnistNetwork is for initlizing and building basic network for mnist.
'''
def __init__(self,
channel_1_num,
channel_2_num,
pool_size,
learning_rate,
x_dim=784,
y_dim=10):
self.channel_1_num = channel_1_num
self.channel_2_num = channel_2_num
self.conv_size = nni.choice(2, 3, 5, 7, name='conv-size')
self.hidden_size = nni.choice(124, 512, 1024) # example: without name
self.pool_size = pool_size
self.learning_rate = nni.uniform(0.0001, 0.1, name='learning_rate')
self.x_dim = x_dim
self.y_dim = y_dim
self.images = tf.placeholder(tf.float32, [None, self.x_dim], name='input_x')
self.labels = tf.placeholder(tf.float32, [None, self.y_dim], name='input_y')
self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
self.train_step = None
self.accuracy = None
def build_network(self):
'''
Building network for mnist
'''
# Reshape to use within a convolutional neural net.
# Last dimension is for "features" - there is only one here, since images are
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
with tf.name_scope('reshape'):
try:
input_dim = int(math.sqrt(self.x_dim))
except:
print(
'input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim))
logger.debug(
'input dim cannot be sqrt and reshape. input dim: %s', str(self.x_dim))
raise
x_image = tf.reshape(self.images, [-1, input_dim, input_dim, 1])
# First convolutional layer - maps one grayscale image to 32 feature maps.
with tf.name_scope('conv1'):
w_conv1 = weight_variable(
[self.conv_size, self.conv_size, 1, self.channel_1_num])
b_conv1 = bias_variable([self.channel_1_num])
h_conv1 = nni.function_choice(
lambda: tf.nn.relu(conv2d(x_image, w_conv1) + b_conv1),
lambda: tf.nn.sigmoid(conv2d(x_image, w_conv1) + b_conv1),
lambda: tf.nn.tanh(conv2d(x_image, w_conv1) + b_conv1)
) # example: without name
# Pooling layer - downsamples by 2X.
with tf.name_scope('pool1'):
h_pool1 = max_pool(h_conv1, self.pool_size)
h_pool1 = nni.function_choice(
lambda: max_pool(h_conv1, self.pool_size),
lambda: avg_pool(h_conv1, self.pool_size),
name='h_pool1')
# Second convolutional layer -- maps 32 feature maps to 64.
with tf.name_scope('conv2'):
w_conv2 = weight_variable([self.conv_size, self.conv_size,
self.channel_1_num, self.channel_2_num])
b_conv2 = bias_variable([self.channel_2_num])
h_conv2 = tf.nn.relu(conv2d(h_pool1, w_conv2) + b_conv2)
# Second pooling layer.
with tf.name_scope('pool2'): # example: another style
h_pool2 = max_pool(h_conv2, self.pool_size)
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps -- maps this to 1024 features.
last_dim = int(input_dim / (self.pool_size * self.pool_size))
with tf.name_scope('fc1'):
w_fc1 = weight_variable(
[last_dim * last_dim * self.channel_2_num, self.hidden_size])
b_fc1 = bias_variable([self.hidden_size])
h_pool2_flat = tf.reshape(
h_pool2, [-1, last_dim * last_dim * self.channel_2_num])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)
# Dropout - controls the complexity of the model, prevents co-adaptation of features.
with tf.name_scope('dropout'):
h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
# Map the 1024 features to 10 classes, one for each digit
with tf.name_scope('fc2'):
w_fc2 = weight_variable([self.hidden_size, self.y_dim])
b_fc2 = bias_variable([self.y_dim])
y_conv = tf.matmul(h_fc1_drop, w_fc2) + b_fc2
with tf.name_scope('loss'):
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=y_conv))
with tf.name_scope('adam_optimizer'):
self.train_step = tf.train.AdamOptimizer(
self.learning_rate).minimize(cross_entropy)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(
tf.argmax(y_conv, 1), tf.argmax(self.labels, 1))
self.accuracy = tf.reduce_mean(
tf.cast(correct_prediction, tf.float32))
def conv2d(x_input, w_matrix):
"""conv2d returns a 2d convolution layer with full stride."""
return tf.nn.conv2d(x_input, w_matrix, strides=[1, 1, 1, 1], padding='SAME')
def max_pool(x_input, pool_size):
"""max_pool downsamples a feature map by 2X."""
return tf.nn.max_pool(x_input, ksize=[1, pool_size, pool_size, 1],
strides=[1, pool_size, pool_size, 1], padding='SAME')
def avg_pool(x_input, pool_size):
return tf.nn.avg_pool(x_input, ksize=[1, pool_size, pool_size, 1],
strides=[1, pool_size, pool_size, 1], padding='SAME')
def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape."""
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
"""bias_variable generates a bias variable of a given shape."""
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def main(params):
'''
Main function, build mnist network, run and send result to NNI.
'''
# Import data
mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
print('Mnist download data down.')
logger.debug('Mnist download data down.')
# Create the model
# Build the graph for the deep net
mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'],
channel_2_num=params['channel_2_num'],
pool_size=params['pool_size'])
mnist_network.build_network()
logger.debug('Mnist build network done.')
# Write log
graph_location = tempfile.mkdtemp()
logger.debug('Saving graph to: %s', graph_location)
train_writer = tf.summary.FileWriter(graph_location)
train_writer.add_graph(tf.get_default_graph())
test_acc = 0.0
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
batch_num = nni.choice(50, 250, 500, name='batch_num')
for i in range(batch_num):
batch = mnist.train.next_batch(batch_num)
dropout_rate = nni.choice(1, 5, name='dropout_rate')
mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0],
mnist_network.labels: batch[1],
mnist_network.keep_prob: dropout_rate}
)
if i % 100 == 0:
test_acc = mnist_network.accuracy.eval(
feed_dict={mnist_network.images: mnist.test.images,
mnist_network.labels: mnist.test.labels,
mnist_network.keep_prob: 1.0})
nni.report_intermediate_result(test_acc)
logger.debug('test accuracy %g', test_acc)
logger.debug('Pipe send intermediate result done.')
test_acc = mnist_network.accuracy.eval(
feed_dict={mnist_network.images: mnist.test.images,
mnist_network.labels: mnist.test.labels,
mnist_network.keep_prob: 1.0})
nni.report_final_result(test_acc)
logger.debug('Final result is %g', test_acc)
logger.debug('Send final result done.')
def generate_defualt_params():
'''
Generate default parameters for mnist network.
'''
params = {
'data_dir': '/tmp/tensorflow/mnist/input_data',
'channel_1_num': 32,
'channel_2_num': 64,
'pool_size': 2}
return params
if __name__ == '__main__':
try:
main(generate_defualt_params())
except Exception as exception:
logger.exception(exception)
raise

Просмотреть файл

@ -0,0 +1,123 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import ast
# pylint: disable=unidiomatic-typecheck
# list of functions related to search space generating
_ss_funcs = [
'choice',
'randint',
'uniform',
'quniform',
'loguniform',
'qloguniform',
'normal',
'qnormal',
'lognormal',
'qlognormal',
'function_choice'
]
class SearchSpaceGenerator(ast.NodeVisitor):
"""Generate search space from smart parater APIs"""
def __init__(self, module_name):
self.module_name = module_name
self.search_space = {}
self.last_line = 0 # last parsed line, useful for error reporting
def visit_Call(self, node): # pylint: disable=invalid-name
self.generic_visit(node)
# ignore if the function is not 'nni.*'
if type(node.func) is not ast.Attribute:
return
if type(node.func.value) is not ast.Name:
return
if node.func.value.id != 'nni':
return
# ignore if its not a search space function (e.g. `report_final_result`)
func = node.func.attr
if func not in _ss_funcs:
return
self.last_line = node.lineno
if node.keywords:
# there is a `name` argument
assert len(node.keywords) == 1, 'Smart parameter has keyword argument other than "name"'
assert node.keywords[0].arg == 'name', 'Smart paramater\'s keyword argument is not "name"'
assert type(node.keywords[0].value) is ast.Str, 'Smart parameter\'s name must be string literal'
name = node.keywords[0].value.s
specified_name = True
else:
# generate the missing name automatically
assert len(node.args) > 0, 'Smart parameter expression has no argument'
name = '#' + str(node.args[-1].lineno)
specified_name = False
if func in ('choice', 'function_choice'):
# arguments of `choice` may contain complex expression,
# so use indices instead of arguments
args = list(range(len(node.args)))
else:
# arguments of other functions must be literal number
assert all(type(arg) is ast.Num for arg in node.args), 'Smart parameter\'s arguments must be number literals'
args = [arg.n for arg in node.args]
key = self.module_name + '/' + name + '/' + func
if func == 'function_choice':
func = 'choice'
value = {'_type': func, '_value': args}
if specified_name:
# multiple functions with same name must have identical arguments
old = self.search_space.get(key)
assert old is None or old == value, 'Different smart parameters have same name'
else:
# generated name must not duplicate
assert key not in self.search_space, 'Only one smart parameter is allowed in a line'
self.search_space[key] = value
def generate(module_name, code):
"""Generate search space.
Return a serializable search space object.
module_name: name of the module (str)
code: user code (str)
"""
try:
ast_tree = ast.parse(code)
except Exception:
raise RuntimeError('Bad Python code')
visitor = SearchSpaceGenerator(module_name)
try:
visitor.visit(ast_tree)
except AssertionError as exc:
raise RuntimeError('%d: %s' % (visitor.last_line, exc.args[0]))
return visitor.search_space

Просмотреть файл

@ -0,0 +1,60 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
# pylint: skip-file
from .__init__ import *
import ast
import json
import os
import shutil
from unittest import TestCase, main
class AnnotationTestCase(TestCase):
@classmethod
def setUpClass(cls):
os.chdir('nni_annotation')
if os.path.isdir('_generated'):
shutil.rmtree('_generated')
def test_search_space_generator(self):
search_space = generate_search_space('testcase/annotated')
with open('testcase/searchspace.json') as f:
self.assertEqual(search_space, json.load(f))
def test_code_generator(self):
expand_annotations('testcase/usercode', '_generated')
self._assert_source_equal('testcase/annotated/mnist.py', '_generated/mnist.py')
self._assert_source_equal('testcase/annotated/dir/simple.py', '_generated/dir/simple.py')
with open('testcase/usercode/nonpy.txt') as src, open('_generated/nonpy.txt') as dst:
assert src.read() == dst.read()
def _assert_source_equal(self, src1, src2):
with open(src1) as f1, open(src2) as f2:
ast1 = ast.dump(ast.parse(f1.read()))
ast2 = ast.dump(ast.parse(f2.read()))
self.assertEqual(ast1, ast2)
if __name__ == '__main__':
main()

Просмотреть файл

@ -0,0 +1,14 @@
import nni
def max_pool(k):
pass
h_conv1 = 1
conv_size = nni.choice(2, 3, 5, 7, name='conv_size')
h_pool1 = nni.function_choice(lambda : max_pool(h_conv1), lambda : avg_pool
(h_conv2, h_conv3), name='max_pool')
test_acc = 1
nni.report_intermediate_result(test_acc)
test_acc = 2
nni.report_final_result(test_acc)

Просмотреть файл

@ -0,0 +1,13 @@
h_conv1 = 1
conv_size = nni.choice(2, 3, 5, 7, name='conv_size')
h_pool1 = nni.function_choice(lambda : max_pool(h_conv1),
lambda : h_conv1,
lambda : avg_pool
(h_conv2, h_conv3)
)
tmp = nni.qlognormal(1.2, 3, 4.5)
test_acc = 1
nni.report_intermediate_result(test_acc)
test_acc = 2
nni.report_final_result(test_acc)
nni.choice(foo, bar)(1)

Просмотреть файл

@ -0,0 +1,171 @@
"""A deep MNIST classifier using convolutional layers.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import nni
import logging
import math
import tempfile
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
logger = logging.getLogger('mnist')
FLAGS = None
class MnistNetwork(object):
def __init__(self, channel_1_num=32, channel_2_num=64, conv_size=5,
hidden_size=1024, pool_size=2, learning_rate=0.0001, x_dim=784,
y_dim=10):
self.channel_1_num = channel_1_num
self.channel_2_num = channel_2_num
self.conv_size = nni.choice(2, 3, 5, 7, name='self.conv_size')
self.hidden_size = nni.choice(124, 512, 1024, name='self.hidden_size')
self.pool_size = pool_size
self.learning_rate = nni.randint(2, 3, 5, name='self.learning_rate')
self.x_dim = x_dim
self.y_dim = y_dim
def build_network(self):
self.x = tf.placeholder(tf.float32, [None, self.x_dim], name='input_x')
self.y = tf.placeholder(tf.float32, [None, self.y_dim], name='input_y')
self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
with tf.name_scope('reshape'):
try:
input_dim = int(math.sqrt(self.x_dim))
except:
logger.debug(
'input dim cannot be sqrt and reshape. input dim: ' +
str(self.x_dim))
raise
x_image = tf.reshape(self.x, [-1, input_dim, input_dim, 1])
with tf.name_scope('conv1'):
W_conv1 = weight_variable([self.conv_size, self.conv_size, 1,
self.channel_1_num])
b_conv1 = bias_variable([self.channel_1_num])
h_conv1 = nni.function_choice(lambda : tf.nn.relu(conv2d(
x_image, W_conv1) + b_conv1), lambda : tf.nn.sigmoid(conv2d
(x_image, W_conv1) + b_conv1), lambda : tf.nn.tanh(conv2d(
x_image, W_conv1) + b_conv1), name='tf.nn.relu')
with tf.name_scope('pool1'):
h_pool1 = nni.function_choice(lambda : max_pool(h_conv1, self.
pool_size), lambda : avg_pool(h_conv1, self.pool_size),
name='max_pool')
with tf.name_scope('conv2'):
W_conv2 = weight_variable([self.conv_size, self.conv_size, self
.channel_1_num, self.channel_2_num])
b_conv2 = bias_variable([self.channel_2_num])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
with tf.name_scope('pool2'):
h_pool2 = max_pool(h_conv2, self.pool_size)
last_dim = int(input_dim / (self.pool_size * self.pool_size))
with tf.name_scope('fc1'):
W_fc1 = weight_variable([last_dim * last_dim * self.
channel_2_num, self.hidden_size])
b_fc1 = bias_variable([self.hidden_size])
h_pool2_flat = tf.reshape(h_pool2, [-1, last_dim * last_dim * self.
channel_2_num])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
with tf.name_scope('dropout'):
h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
with tf.name_scope('fc2'):
W_fc2 = weight_variable([self.hidden_size, self.y_dim])
b_fc2 = bias_variable([self.y_dim])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
with tf.name_scope('loss'):
cross_entropy = tf.reduce_mean(tf.nn.
softmax_cross_entropy_with_logits(labels=self.y, logits=y_conv)
)
with tf.name_scope('adam_optimizer'):
self.train_step = tf.train.AdamOptimizer(self.learning_rate
).minimize(cross_entropy)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(
self.y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.
float32))
return
def conv2d(x, W):
"""conv2d returns a 2d convolution layer with full stride."""
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool(x, pool_size):
"""max_pool downsamples a feature map by 2X."""
return tf.nn.max_pool(x, ksize=[1, pool_size, pool_size, 1], strides=[1,
pool_size, pool_size, 1], padding='SAME')
def avg_pool(x, pool_size):
return tf.nn.avg_pool(x, ksize=[1, pool_size, pool_size, 1], strides=[1,
pool_size, pool_size, 1], padding='SAME')
def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape."""
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
"""bias_variable generates a bias variable of a given shape."""
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def main():
data_dir = '/tmp/tensorflow/mnist/input_data'
mnist = input_data.read_data_sets(data_dir, one_hot=True)
logger.debug('Mnist download data down.')
mnist_network = MnistNetwork()
mnist_network.build_network()
logger.debug('Mnist build network done.')
graph_location = tempfile.mkdtemp()
logger.debug('Saving graph to: %s' % graph_location)
train_writer = tf.summary.FileWriter(graph_location)
train_writer.add_graph(tf.get_default_graph())
test_acc = 0.0
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
batch_num = 200
for i in range(batch_num):
batch_size = nni.choice(50, 250, 500, name='batch_size')
batch = mnist.train.next_batch(batch_size)
dropout_rate = nni.choice(1, 5, name='dropout_rate')
mnist_network.train_step.run(feed_dict={mnist_network.x: batch[
0], mnist_network.y: batch[1], mnist_network.keep_prob:
dropout_rate})
if i % 100 == 0:
test_acc = mnist_network.accuracy.eval(feed_dict={
mnist_network.x: mnist.test.images, mnist_network.y:
mnist.test.labels, mnist_network.keep_prob: 1.0})
nni.report_intermediate_result(test_acc)
test_acc = mnist_network.accuracy.eval(feed_dict={mnist_network.x:
mnist.test.images, mnist_network.y: mnist.test.labels,
mnist_network.keep_prob: 1.0})
nni.report_final_result(test_acc)
def generate_default_params():
params = {'data_dir': '/tmp/tensorflow/mnist/input_data',
'dropout_rate': 0.5, 'channel_1_num': 32, 'channel_2_num': 64,
'conv_size': 5, 'pool_size': 2, 'hidden_size': 1024, 'batch_size':
50, 'batch_num': 200, 'learning_rate': 0.0001}
return params
if __name__ == '__main__':
try:
params = generate_default_params()
logger.debug('params')
logger.debug('params update')
main()
except:
logger.exception('Got some exception in while loop in mnist.py')
raise

Просмотреть файл

@ -0,0 +1,54 @@
{
"handwrite/conv_size/choice": {
"_type": "choice",
"_value": [ 0, 1, 2, 3 ]
},
"handwrite/#5/function_choice": {
"_type": "choice",
"_value": [ 0, 1, 2 ]
},
"handwrite/#8/qlognormal": {
"_type": "qlognormal",
"_value": [ 1.2, 3, 4.5 ]
},
"handwrite/#13/choice": {
"_type": "choice",
"_value": [ 0, 1 ]
},
"mnist/self.conv_size/choice": {
"_type": "choice",
"_value": [ 0, 1, 2, 3 ]
},
"mnist/self.hidden_size/choice": {
"_type": "choice",
"_value": [ 0, 1, 2 ]
},
"mnist/self.learning_rate/randint": {
"_type": "randint",
"_value": [ 2, 3, 5 ]
},
"mnist/tf.nn.relu/function_choice": {
"_type": "choice",
"_value": [ 0, 1, 2 ]
},
"mnist/max_pool/function_choice": {
"_type": "choice",
"_value": [ 0, 1 ]
},
"mnist/batch_size/choice": {
"_type": "choice",
"_value": [ 0, 1, 2 ]
},
"mnist/dropout_rate/choice": {
"_type": "choice",
"_value": [ 0, 1 ]
},
"dir.simple/conv_size/choice": {
"_type": "choice",
"_value": [ 0, 1, 2, 3 ]
},
"dir.simple/max_pool/function_choice": {
"_type": "choice",
"_value": [ 0, 1 ]
}
}

Просмотреть файл

@ -0,0 +1,11 @@
def max_pool(k):
pass
h_conv1=1
"""@nni.variable(nni.choice(2,3,5,7),name=conv_size)"""
conv_size = 5
"""@nni.function_choice(max_pool(h_conv1),avg_pool(h_conv2,h_conv3),name=max_pool)"""
h_pool1 = max_pool(h_conv1)
test_acc=1
'''@nni.report_intermediate_result(test_acc)'''
test_acc=2
'''@nni.report_final_result(test_acc)'''

Просмотреть файл

@ -0,0 +1,208 @@
# -*- encoding:utf8 -*-
"""A deep MNIST classifier using convolutional layers.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import math
import tempfile
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
logger = logging.getLogger('mnist')
FLAGS = None
class MnistNetwork(object):
def __init__(self,
channel_1_num = 32,
channel_2_num = 64,
conv_size = 5,
hidden_size = 1024,
pool_size = 2,
learning_rate = 0.0001,
x_dim = 784,
y_dim = 10):
self.channel_1_num = channel_1_num
self.channel_2_num = channel_2_num
'''@nni.variable(nni.choice(2,3,5,7),name=self.conv_size)'''
self.conv_size = conv_size
'''@nni.variable(nni.choice(124,512,1024),name=self.hidden_size)'''
self.hidden_size = hidden_size
self.pool_size = pool_size
'''@nni.variable(nni.randint(2,3,5),name=self.learning_rate)'''
self.learning_rate = learning_rate
self.x_dim = x_dim
self.y_dim = y_dim
def build_network(self):
self.x = tf.placeholder(tf.float32, [None, self.x_dim], name = 'input_x')
self.y = tf.placeholder(tf.float32, [None, self.y_dim], name = 'input_y')
self.keep_prob = tf.placeholder(tf.float32, name = 'keep_prob')
# Reshape to use within a convolutional neural net.
# Last dimension is for "features" - there is only one here, since images are
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
with tf.name_scope('reshape'):
try:
input_dim = int(math.sqrt(self.x_dim))
except:
#print('input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim))
logger.debug('input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim))
raise
x_image = tf.reshape(self.x, [-1, input_dim, input_dim, 1])
# First convolutional layer - maps one grayscale image to 32 feature maps.
with tf.name_scope('conv1'):
W_conv1 = weight_variable([self.conv_size, self.conv_size, 1, self.channel_1_num])
b_conv1 = bias_variable([self.channel_1_num])
"""@nni.function_choice(tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1),tf.nn.sigmoid(conv2d(x_image, W_conv1) + b_conv1),tf.nn.tanh(conv2d(x_image, W_conv1) + b_conv1),name=tf.nn.relu)"""
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
# Pooling layer - downsamples by 2X.
with tf.name_scope('pool1'):
"""@nni.function_choice(max_pool(h_conv1, self.pool_size),avg_pool(h_conv1, self.pool_size),name=max_pool)"""
h_pool1 = max_pool(h_conv1, self.pool_size)
# Second convolutional layer -- maps 32 feature maps to 64.
with tf.name_scope('conv2'):
W_conv2 = weight_variable([self.conv_size, self.conv_size, self.channel_1_num, self.channel_2_num])
b_conv2 = bias_variable([self.channel_2_num])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# Second pooling layer.
with tf.name_scope('pool2'):
#"""@nni.dynamic(input={cnn_block:1, concat:2},function_choice={"cnn_block":(x,nni.choice([3,4])),"cnn_block":(x),"concat":(x,y)},limit={"cnn_block.input":[concat,input],"concat.input":[this.depth-1,this.depth-3,this.depth-5],"graph.width":[1]})"""
h_pool2 = max_pool(h_conv2, self.pool_size)
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps -- maps this to 1024 features.
last_dim = int(input_dim / (self.pool_size * self.pool_size))
with tf.name_scope('fc1'):
W_fc1 = weight_variable([last_dim * last_dim * self.channel_2_num, self.hidden_size])
b_fc1 = bias_variable([self.hidden_size])
h_pool2_flat = tf.reshape(h_pool2, [-1, last_dim * last_dim * self.channel_2_num])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# Dropout - controls the complexity of the model, prevents co-adaptation of features.
with tf.name_scope('dropout'):
h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
# Map the 1024 features to 10 classes, one for each digit
with tf.name_scope('fc2'):
W_fc2 = weight_variable([self.hidden_size, self.y_dim])
b_fc2 = bias_variable([self.y_dim])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
with tf.name_scope('loss'):
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = self.y, logits = y_conv))
with tf.name_scope('adam_optimizer'):
self.train_step = tf.train.AdamOptimizer(self.learning_rate).minimize(cross_entropy)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(self.y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return
def conv2d(x, W):
"""conv2d returns a 2d convolution layer with full stride."""
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool(x, pool_size):
"""max_pool downsamples a feature map by 2X."""
return tf.nn.max_pool(x, ksize=[1, pool_size, pool_size, 1],
strides=[1, pool_size, pool_size, 1], padding='SAME')
def avg_pool(x,pool_size):
return tf.nn.avg_pool(x, ksize=[1, pool_size, pool_size, 1],
strides=[1, pool_size, pool_size, 1], padding='SAME')
def weight_variable(shape):
"""weight_variable generates a weight variable of a given shape."""
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
"""bias_variable generates a bias variable of a given shape."""
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def main():
# Import data
data_dir= '/tmp/tensorflow/mnist/input_data'
mnist = input_data.read_data_sets(data_dir, one_hot=True)
logger.debug('Mnist download data down.')
# Create the model
# Build the graph for the deep net
mnist_network = MnistNetwork()
mnist_network.build_network()
logger.debug('Mnist build network done.')
# Write log
graph_location = tempfile.mkdtemp()
logger.debug('Saving graph to: %s' % graph_location)
# print('Saving graph to: %s' % graph_location)
train_writer = tf.summary.FileWriter(graph_location)
train_writer.add_graph(tf.get_default_graph())
test_acc = 0.0
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
batch_num=200
for i in range(batch_num):
'''@nni.variable(nni.choice(50,250,500),name=batch_size)'''
batch_size=50
batch = mnist.train.next_batch(batch_size)
'''@nni.variable(nni.choice(1,5),name=dropout_rate)'''
dropout_rate=0.5
mnist_network.train_step.run(feed_dict={mnist_network.x: batch[0], mnist_network.y: batch[1], mnist_network.keep_prob: dropout_rate})
if i % 100 == 0:
#train_accuracy = mnist_network.accuracy.eval(feed_dict={
# mnist_network.x: batch[0], mnist_network.y: batch[1], mnist_network.keep_prob: params['dropout_rate']})
#print('step %d, training accuracy %g' % (i, train_accuracy))
test_acc = mnist_network.accuracy.eval(feed_dict={
mnist_network.x: mnist.test.images, mnist_network.y: mnist.test.labels, mnist_network.keep_prob: 1.0})
'''@nni.report_intermediate_result(test_acc)'''
test_acc = mnist_network.accuracy.eval(feed_dict={
mnist_network.x: mnist.test.images, mnist_network.y: mnist.test.labels, mnist_network.keep_prob: 1.0})
'''@nni.report_final_result(test_acc)'''
def generate_default_params():
params = {'data_dir': '/tmp/tensorflow/mnist/input_data',
'dropout_rate': 0.5,
'channel_1_num': 32,
'channel_2_num': 64,
'conv_size': 5,
'pool_size': 2,
'hidden_size': 1024,
'batch_size': 50,
'batch_num': 200,
'learning_rate': 1e-4}
return params
if __name__ == '__main__':
# run command: python mnist.py --init_file_path ./init.json
#FLAGS, unparsed = parse_command()
#original_params = parse_init_json(FLAGS.init_file_path, {})
#pipe_interface.set_params_to_env()
try:
params = generate_default_params()
logger.debug('params')
logger.debug('params update')
main()
except:
logger.exception('Got some exception in while loop in mnist.py')
raise

Просмотреть файл

@ -0,0 +1 @@
hello

Просмотреть файл

@ -33,7 +33,7 @@ class Config:
def get_all_config(self): def get_all_config(self):
'''get all of config values''' '''get all of config values'''
return json.dumps(self.config) return json.dumps(self.config, indent=4, sort_keys=True, separators=(',', ':'))
def set_config(self, key, value): def set_config(self, key, value):
'''set {key:value} paris to self.config''' '''set {key:value} paris to self.config'''

Просмотреть файл

@ -22,9 +22,9 @@
import json import json
import os import os
import shutil import shutil
from subprocess import Popen, PIPE from subprocess import Popen, PIPE, call
import tempfile import tempfile
from annotation import * from nni_annotation import *
from .launcher_utils import validate_all_content from .launcher_utils import validate_all_content
from .rest_utils import rest_put, rest_post, check_rest_server, check_rest_server_quick from .rest_utils import rest_put, rest_post, check_rest_server, check_rest_server_quick
from .url_utils import cluster_metadata_url, experiment_url from .url_utils import cluster_metadata_url, experiment_url
@ -33,7 +33,7 @@ from .common_utils import get_yml_content, get_json_content, print_error, print_
from .constants import EXPERIMENT_SUCCESS_INFO, STDOUT_FULL_PATH, STDERR_FULL_PATH, LOG_DIR, REST_PORT, ERROR_INFO, NORMAL_INFO from .constants import EXPERIMENT_SUCCESS_INFO, STDOUT_FULL_PATH, STDERR_FULL_PATH, LOG_DIR, REST_PORT, ERROR_INFO, NORMAL_INFO
from .webui_utils import start_web_ui, check_web_ui from .webui_utils import start_web_ui, check_web_ui
def start_rest_server(manager, port, platform, mode, experiment_id=None): def start_rest_server(port, platform, mode, experiment_id=None):
'''Run nni manager process''' '''Run nni manager process'''
print_normal('Checking experiment...') print_normal('Checking experiment...')
nni_config = Config() nni_config = Config()
@ -44,6 +44,7 @@ def start_rest_server(manager, port, platform, mode, experiment_id=None):
exit(0) exit(0)
print_normal('Starting restful server...') print_normal('Starting restful server...')
manager = os.environ.get('NNI_MANAGER', 'nnimanager')
cmds = [manager, '--port', str(port), '--mode', platform, '--start_mode', mode] cmds = [manager, '--port', str(port), '--mode', platform, '--start_mode', mode]
if mode == 'resume': if mode == 'resume':
cmds += ['--experiment_id', experiment_id] cmds += ['--experiment_id', experiment_id]
@ -58,9 +59,9 @@ def set_trial_config(experiment_config, port):
'''set trial configuration''' '''set trial configuration'''
request_data = dict() request_data = dict()
value_dict = dict() value_dict = dict()
value_dict['command'] = experiment_config['trial']['trialCommand'] value_dict['command'] = experiment_config['trial']['command']
value_dict['codeDir'] = experiment_config['trial']['trialCodeDir'] value_dict['codeDir'] = experiment_config['trial']['codeDir']
value_dict['gpuNum'] = experiment_config['trial']['trialGpuNum'] value_dict['gpuNum'] = experiment_config['trial']['gpuNum']
request_data['trial_config'] = value_dict request_data['trial_config'] = value_dict
response = rest_put(cluster_metadata_url(port), json.dumps(request_data), 20) response = rest_put(cluster_metadata_url(port), json.dumps(request_data), 20)
return True if response.status_code == 200 else False return True if response.status_code == 200 else False
@ -75,11 +76,14 @@ def set_remote_config(experiment_config, port):
request_data = dict() request_data = dict()
request_data['machine_list'] = experiment_config['machineList'] request_data['machine_list'] = experiment_config['machineList']
response = rest_put(cluster_metadata_url(port), json.dumps(request_data), 20) response = rest_put(cluster_metadata_url(port), json.dumps(request_data), 20)
err_message = ''
if not response or not response.status_code == 200: if not response or not response.status_code == 200:
return False if response is not None:
err_message = response.text
return False, err_message
#set trial_config #set trial_config
return set_trial_config(experiment_config, port) return set_trial_config(experiment_config, port), err_message
def set_experiment(experiment_config, mode, port): def set_experiment(experiment_config, mode, port):
'''Call startExperiment (rest POST /experiment) with yaml file content''' '''Call startExperiment (rest POST /experiment) with yaml file content'''
@ -89,7 +93,7 @@ def set_experiment(experiment_config, mode, port):
request_data['trialConcurrency'] = experiment_config['trialConcurrency'] request_data['trialConcurrency'] = experiment_config['trialConcurrency']
request_data['maxExecDuration'] = experiment_config['maxExecDuration'] request_data['maxExecDuration'] = experiment_config['maxExecDuration']
request_data['maxTrialNum'] = experiment_config['maxTrialNum'] request_data['maxTrialNum'] = experiment_config['maxTrialNum']
request_data['searchSpace'] = experiment_config['searchSpace'] request_data['searchSpace'] = experiment_config.get('searchSpace')
request_data['tuner'] = experiment_config['tuner'] request_data['tuner'] = experiment_config['tuner']
if 'assessor' in experiment_config: if 'assessor' in experiment_config:
request_data['assessor'] = experiment_config['assessor'] request_data['assessor'] = experiment_config['assessor']
@ -97,16 +101,16 @@ def set_experiment(experiment_config, mode, port):
request_data['clusterMetaData'] = [] request_data['clusterMetaData'] = []
if experiment_config['trainingServicePlatform'] == 'local': if experiment_config['trainingServicePlatform'] == 'local':
request_data['clusterMetaData'].append( request_data['clusterMetaData'].append(
{'key':'codeDir', 'value':experiment_config['trial']['trialCodeDir']}) {'key':'codeDir', 'value':experiment_config['trial']['codeDir']})
request_data['clusterMetaData'].append( request_data['clusterMetaData'].append(
{'key': 'command', 'value': experiment_config['trial']['trialCommand']}) {'key': 'command', 'value': experiment_config['trial']['command']})
else: else:
request_data['clusterMetaData'].append( request_data['clusterMetaData'].append(
{'key': 'machine_list', 'value': experiment_config['machineList']}) {'key': 'machine_list', 'value': experiment_config['machineList']})
value_dict = dict() value_dict = dict()
value_dict['command'] = experiment_config['trial']['trialCommand'] value_dict['command'] = experiment_config['trial']['command']
value_dict['codeDir'] = experiment_config['trial']['trialCodeDir'] value_dict['codeDir'] = experiment_config['trial']['codeDir']
value_dict['gpuNum'] = experiment_config['trial']['trialGpuNum'] value_dict['gpuNum'] = experiment_config['trial']['gpuNum']
request_data['clusterMetaData'].append( request_data['clusterMetaData'].append(
{'key': 'trial_config', 'value': value_dict}) {'key': 'trial_config', 'value': value_dict})
@ -117,23 +121,24 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
'''follow steps to start rest server and start experiment''' '''follow steps to start rest server and start experiment'''
nni_config = Config() nni_config = Config()
# start rest server # start rest server
rest_process = start_rest_server(args.manager, REST_PORT, experiment_config['trainingServicePlatform'], mode, experiment_id) rest_process = start_rest_server(REST_PORT, experiment_config['trainingServicePlatform'], mode, experiment_id)
nni_config.set_config('restServerPid', rest_process.pid) nni_config.set_config('restServerPid', rest_process.pid)
# Deal with annotation # Deal with annotation
if experiment_config.get('useAnnotation'): if experiment_config.get('useAnnotation'):
path = os.path.join(tempfile.gettempdir(), 'nni', 'annotation') path = os.path.join(tempfile.gettempdir(), 'nni', 'annotation')
if os.path.isdir(path): if os.path.isdir(path):
shutil.rmtree(path) shutil.rmtree(path)
os.makedirs(path) os.makedirs(path)
expand_annotations(experiment_config['trial']['trialCodeDir'], path) expand_annotations(experiment_config['trial']['codeDir'], path)
experiment_config['trial']['trialCodeDir'] = path experiment_config['trial']['codeDir'] = path
search_space = generate_search_space(experiment_config['trial']['trialCodeDir']) search_space = generate_search_space(experiment_config['trial']['codeDir'])
assert search_space, ERROR_INFO % 'Generated search space is empty'
else:
search_space = get_json_content(experiment_config['searchSpacePath'])
experiment_config['searchSpace'] = json.dumps(search_space) experiment_config['searchSpace'] = json.dumps(search_space)
assert search_space, ERROR_INFO % 'Generated search space is empty'
elif experiment_config.get('searchSpacePath'):
search_space = get_json_content(experiment_config.get('searchSpacePath'))
experiment_config['searchSpace'] = json.dumps(search_space)
else:
experiment_config['searchSpace'] = json.dumps('')
# check rest server # check rest server
print_normal('Checking restful server...') print_normal('Checking restful server...')
@ -142,7 +147,8 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
else: else:
print_error('Restful server start failed!') print_error('Restful server start failed!')
try: try:
rest_process.kill() cmds = ['pkill', '-P', str(rest_process.pid)]
call(cmds)
except Exception: except Exception:
raise Exception(ERROR_INFO % 'Rest server stopped!') raise Exception(ERROR_INFO % 'Rest server stopped!')
exit(0) exit(0)
@ -150,12 +156,14 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
# set remote config # set remote config
if experiment_config['trainingServicePlatform'] == 'remote': if experiment_config['trainingServicePlatform'] == 'remote':
print_normal('Setting remote config...') print_normal('Setting remote config...')
if set_remote_config(experiment_config, REST_PORT): config_result, err_msg = set_remote_config(experiment_config, REST_PORT)
if config_result:
print_normal('Success!') print_normal('Success!')
else: else:
print_error('Failed!') print_error('Failed! Error is: {}'.format(err_msg))
try: try:
rest_process.kill() cmds = ['pkill', '-P', str(rest_process.pid)]
call(cmds)
except Exception: except Exception:
raise Exception(ERROR_INFO % 'Rest server stopped!') raise Exception(ERROR_INFO % 'Rest server stopped!')
exit(0) exit(0)
@ -168,7 +176,8 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
else: else:
print_error('Failed!') print_error('Failed!')
try: try:
rest_process.kill() cmds = ['pkill', '-P', str(rest_process.pid)]
call(cmds)
except Exception: except Exception:
raise Exception(ERROR_INFO % 'Rest server stopped!') raise Exception(ERROR_INFO % 'Rest server stopped!')
exit(0) exit(0)
@ -183,7 +192,8 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No
else: else:
print_error('Failed!') print_error('Failed!')
try: try:
rest_process.kill() cmds = ['pkill', '-P', str(rest_process.pid)]
call(cmds)
except Exception: except Exception:
raise Exception(ERROR_INFO % 'Rest server stopped!') raise Exception(ERROR_INFO % 'Rest server stopped!')
exit(0) exit(0)
@ -213,9 +223,9 @@ def resume_experiment(args):
def create_experiment(args): def create_experiment(args):
'''start a new experiment''' '''start a new experiment'''
nni_config = Config() nni_config = Config()
config_path = os.path.abspath(args.config)
experiment_config = get_yml_content(args.config) experiment_config = get_yml_content(config_path)
validate_all_content(experiment_config) validate_all_content(experiment_config, config_path)
nni_config.set_config('experimentConfig', experiment_config) nni_config.set_config('experimentConfig', experiment_config)
launch_experiment(args, experiment_config, 'new', args.webuiport) launch_experiment(args, experiment_config, 'new', args.webuiport)

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше