diff --git a/dependencies/required.txt b/dependencies/required.txt index 2ce30c9cd..e765025af 100644 --- a/dependencies/required.txt +++ b/dependencies/required.txt @@ -6,6 +6,7 @@ hyperopt == 0.1.2 json_tricks >= 3.15.5 numpy < 1.22 ; python_version < "3.8" numpy ; python_version >= "3.8" +packaging pandas prettytable psutil diff --git a/docs/source/Tutorial/HowToDebug.rst b/docs/_removed/HowToDebug.rst similarity index 100% rename from docs/source/Tutorial/HowToDebug.rst rename to docs/_removed/HowToDebug.rst diff --git a/docs/source/Overview.rst b/docs/_removed/Overview.rst similarity index 100% rename from docs/source/Overview.rst rename to docs/_removed/Overview.rst diff --git a/docs/source/TrainingService/AMLMode.rst b/docs/_removed/TrainingService/AMLMode.rst similarity index 100% rename from docs/source/TrainingService/AMLMode.rst rename to docs/_removed/TrainingService/AMLMode.rst diff --git a/docs/source/TrainingService/FrameworkControllerMode.rst b/docs/_removed/TrainingService/FrameworkControllerMode.rst similarity index 100% rename from docs/source/TrainingService/FrameworkControllerMode.rst rename to docs/_removed/TrainingService/FrameworkControllerMode.rst diff --git a/docs/source/TrainingService/HybridMode.rst b/docs/_removed/TrainingService/HybridMode.rst similarity index 100% rename from docs/source/TrainingService/HybridMode.rst rename to docs/_removed/TrainingService/HybridMode.rst diff --git a/docs/source/TrainingService/KubeflowMode.rst b/docs/_removed/TrainingService/KubeflowMode.rst similarity index 100% rename from docs/source/TrainingService/KubeflowMode.rst rename to docs/_removed/TrainingService/KubeflowMode.rst diff --git a/docs/source/TrainingService/Overview.rst b/docs/_removed/TrainingService/Overview.rst similarity index 100% rename from docs/source/TrainingService/Overview.rst rename to docs/_removed/TrainingService/Overview.rst diff --git a/docs/source/TrainingService/PaiMode.rst b/docs/_removed/TrainingService/PaiMode.rst similarity index 100% rename from docs/source/TrainingService/PaiMode.rst rename to docs/_removed/TrainingService/PaiMode.rst diff --git a/docs/source/TrainingService/RemoteMachineMode.rst b/docs/_removed/TrainingService/RemoteMachineMode.rst similarity index 100% rename from docs/source/TrainingService/RemoteMachineMode.rst rename to docs/_removed/TrainingService/RemoteMachineMode.rst diff --git a/docs/source/TrialExample/Cifar10Examples.rst b/docs/_removed/TrialExample/Cifar10Examples.rst similarity index 100% rename from docs/source/TrialExample/Cifar10Examples.rst rename to docs/_removed/TrialExample/Cifar10Examples.rst diff --git a/docs/source/TrialExample/GbdtExample.rst b/docs/_removed/TrialExample/GbdtExample.rst similarity index 100% rename from docs/source/TrialExample/GbdtExample.rst rename to docs/_removed/TrialExample/GbdtExample.rst diff --git a/docs/source/TrialExample/MnistExamples.rst b/docs/_removed/TrialExample/MnistExamples.rst similarity index 100% rename from docs/source/TrialExample/MnistExamples.rst rename to docs/_removed/TrialExample/MnistExamples.rst diff --git a/docs/source/TrialExample/Pix2pixExample.rst b/docs/_removed/TrialExample/Pix2pixExample.rst similarity index 100% rename from docs/source/TrialExample/Pix2pixExample.rst rename to docs/_removed/TrialExample/Pix2pixExample.rst diff --git a/docs/source/TrialExample/SklearnExamples.rst b/docs/_removed/TrialExample/SklearnExamples.rst similarity index 100% rename from docs/source/TrialExample/SklearnExamples.rst rename to docs/_removed/TrialExample/SklearnExamples.rst diff --git a/docs/source/TrialExample/Trials.rst b/docs/_removed/Trials.rst similarity index 100% rename from docs/source/TrialExample/Trials.rst rename to docs/_removed/Trials.rst diff --git a/docs/source/Tuner/AnnealTuner.rst b/docs/_removed/Tuner/AnnealTuner.rst similarity index 100% rename from docs/source/Tuner/AnnealTuner.rst rename to docs/_removed/Tuner/AnnealTuner.rst diff --git a/docs/source/Tuner/BatchTuner.rst b/docs/_removed/Tuner/BatchTuner.rst similarity index 100% rename from docs/source/Tuner/BatchTuner.rst rename to docs/_removed/Tuner/BatchTuner.rst diff --git a/docs/source/Tuner/BohbAdvisor.rst b/docs/_removed/Tuner/BohbAdvisor.rst similarity index 100% rename from docs/source/Tuner/BohbAdvisor.rst rename to docs/_removed/Tuner/BohbAdvisor.rst diff --git a/docs/source/Tuner/CustomizeAdvisor.rst b/docs/_removed/Tuner/CustomizeAdvisor.rst similarity index 100% rename from docs/source/Tuner/CustomizeAdvisor.rst rename to docs/_removed/Tuner/CustomizeAdvisor.rst diff --git a/docs/source/Tuner/DngoTuner.rst b/docs/_removed/Tuner/DngoTuner.rst similarity index 100% rename from docs/source/Tuner/DngoTuner.rst rename to docs/_removed/Tuner/DngoTuner.rst diff --git a/docs/source/Tuner/EvolutionTuner.rst b/docs/_removed/Tuner/EvolutionTuner.rst similarity index 100% rename from docs/source/Tuner/EvolutionTuner.rst rename to docs/_removed/Tuner/EvolutionTuner.rst diff --git a/docs/source/Tuner/GPTuner.rst b/docs/_removed/Tuner/GPTuner.rst similarity index 100% rename from docs/source/Tuner/GPTuner.rst rename to docs/_removed/Tuner/GPTuner.rst diff --git a/docs/source/Tuner/GridsearchTuner.rst b/docs/_removed/Tuner/GridsearchTuner.rst similarity index 100% rename from docs/source/Tuner/GridsearchTuner.rst rename to docs/_removed/Tuner/GridsearchTuner.rst diff --git a/docs/source/Tuner/HyperbandAdvisor.rst b/docs/_removed/Tuner/HyperbandAdvisor.rst similarity index 100% rename from docs/source/Tuner/HyperbandAdvisor.rst rename to docs/_removed/Tuner/HyperbandAdvisor.rst diff --git a/docs/source/Tuner/MetisTuner.rst b/docs/_removed/Tuner/MetisTuner.rst similarity index 100% rename from docs/source/Tuner/MetisTuner.rst rename to docs/_removed/Tuner/MetisTuner.rst diff --git a/docs/source/Tuner/NetworkmorphismTuner.rst b/docs/_removed/Tuner/NetworkmorphismTuner.rst similarity index 100% rename from docs/source/Tuner/NetworkmorphismTuner.rst rename to docs/_removed/Tuner/NetworkmorphismTuner.rst diff --git a/docs/source/Tuner/PBTTuner.rst b/docs/_removed/Tuner/PBTTuner.rst similarity index 100% rename from docs/source/Tuner/PBTTuner.rst rename to docs/_removed/Tuner/PBTTuner.rst diff --git a/docs/source/Tuner/SmacTuner.rst b/docs/_removed/Tuner/SmacTuner.rst similarity index 100% rename from docs/source/Tuner/SmacTuner.rst rename to docs/_removed/Tuner/SmacTuner.rst diff --git a/docs/source/Tutorial/WebUI.rst b/docs/_removed/WebUI.rst similarity index 100% rename from docs/source/Tutorial/WebUI.rst rename to docs/_removed/WebUI.rst diff --git a/docs/source/examples.rst b/docs/_removed/examples.rst similarity index 100% rename from docs/source/examples.rst rename to docs/_removed/examples.rst diff --git a/docs/source/training_services.rst b/docs/_removed/training_services.rst similarity index 100% rename from docs/source/training_services.rst rename to docs/_removed/training_services.rst diff --git a/docs/source/Assessor/CustomizeAssessor.rst b/docs/source/Assessor/CustomizeAssessor.rst deleted file mode 100644 index 34cef8ebf..000000000 --- a/docs/source/Assessor/CustomizeAssessor.rst +++ /dev/null @@ -1,65 +0,0 @@ -Customize Assessor -================== - -NNI supports to build an assessor by yourself for tuning demand. - -If you want to implement a customized Assessor, there are three things to do: - - -#. Inherit the base Assessor class -#. Implement assess_trial function -#. Configure your customized Assessor in experiment YAML config file - -**1. Inherit the base Assessor class** - -.. code-block:: python - - from nni.assessor import Assessor - - class CustomizedAssessor(Assessor): - def __init__(self, ...): - ... - -**2. Implement assess trial function** - -.. code-block:: python - - from nni.assessor import Assessor, AssessResult - - class CustomizedAssessor(Assessor): - def __init__(self, ...): - ... - - def assess_trial(self, trial_history): - """ - Determines whether a trial should be killed. Must override. - trial_history: a list of intermediate result objects. - Returns AssessResult.Good or AssessResult.Bad. - """ - # you code implement here. - ... - -**3. Configure your customized Assessor in experiment YAML config file** - -NNI needs to locate your customized Assessor class and instantiate the class, so you need to specify the location of the customized Assessor class and pass literal values as parameters to the __init__ constructor. - -.. code-block:: yaml - - assessor: - codeDir: /home/abc/myassessor - classFileName: my_customized_assessor.py - className: CustomizedAssessor - # Any parameter need to pass to your Assessor class __init__ constructor - # can be specified in this optional classArgs field, for example - classArgs: - arg1: value1 - -Please noted in **2**. The object ``trial_history`` are exact the object that Trial send to Assessor by using SDK ``report_intermediate_result`` function. - -The working directory of your assessor is ``/nni-experiments//log``\ , which can be retrieved with environment variable ``NNI_LOG_DIRECTORY``\ , - -More detail example you could see: - -* :githublink:`medianstop-assessor ` -* :githublink:`curvefitting-assessor ` - diff --git a/docs/source/Overview_zh.rst b/docs/source/Overview_zh.rst deleted file mode 100644 index be61cc35c..000000000 --- a/docs/source/Overview_zh.rst +++ /dev/null @@ -1,125 +0,0 @@ -.. 6e45ee0ddd5d0315e5c946149d4f9c31 - -概述 -======== - -NNI (Neural Network Intelligence) 是一个工具包,可有效的帮助用户设计并调优机器学习模型的神经网络架构,复杂系统的参数(如超参)等。 NNI 的特性包括:易于使用,可扩展,灵活,高效。 - - -* **易于使用**:NNI 可通过 pip 安装。 只需要在代码中添加几行,就可以利用 NNI 来调优参数。 可使用命令行工具或 Web 界面来查看 Experiment。 -* **可扩展**:调优超参或网络结构通常需要大量的计算资源。NNI 在设计时就支持了多种不同的计算资源,如远程服务器组,训练平台(如:OpenPAI,Kubernetes),等等。 根据您配置的培训平台的能力,可以并行运行数百个 Trial 。 -* **灵活**:除了内置的算法,NNI 中还可以轻松集成自定义的超参调优算法,神经网络架构搜索算法,提前终止算法等等。 还可以将 NNI 连接到更多的训练平台上,如云中的虚拟机集群,Kubernetes 服务等等。 此外,NNI 还可以连接到外部环境中的特殊应用和模型上。 -* **高效**:NNI 在系统及算法级别上不断地进行优化。 例如:通过早期的反馈来加速调优过程。 - -下图显示了 NNI 的体系结构。 - - -.. raw:: html - -

- drawing -

- - -主要概念 ------------- - - -* - *Experiment(实验)*: 表示一次任务,例如,寻找模型的最佳超参组合,或最好的神经网络架构等。 它由 Trial 和自动机器学习算法所组成。 - -* - *搜索空间*:是模型调优的范围。 例如,超参的取值范围。 - -* - *Configuration(配置)*:配置是来自搜索空间的实例,每个超参都会有特定的值。 - -* - *Trial*:是一次独立的尝试,它会使用某组配置(例如,一组超参值,或者特定的神经网络架构)。 Trial 会基于提供的配置来运行。 - -* - *Tuner(调优器)*:一种自动机器学习算法,会为下一个 Trial 生成新的配置。 新的 Trial 会使用这组配置来运行。 - -* - *Assessor(评估器)*:分析 Trial 的中间结果(例如,定期评估数据集上的精度),来确定 Trial 是否应该被提前终止。 - -* - *训练平台*:是 Trial 的执行环境。 根据 Experiment 的配置,可以是本机,远程服务器组,或其它大规模训练平台(如,OpenPAI,Kubernetes)。 - -Experiment 的运行过程为:Tuner 接收搜索空间并生成配置。 这些配置将被提交到训练平台,如本机,远程服务器组或训练集群。 执行的性能结果会被返回给 Tuner。 然后,再生成并提交新的配置。 - -每次 Experiment 执行时,用户只需要定义搜索空间,改动几行代码,就能利用 NNI 内置的 Tuner/Assessor 和训练平台来搜索最好的超参组合以及神经网络结构。 基本上分为三步: - -.. - - 步骤一:`定义搜索空间 `__ - - 步骤二:`改动模型代码 `__ - - 步骤三:`定义实验配置 `__ - - - -.. raw:: html - -

- drawing -

- - -可查看 `快速入门 `__ 来调优你的模型或系统。 - -核心功能 -------------- - -NNI 提供了并行运行多个实例以查找最佳参数组合的能力。 此功能可用于各种领域,例如,为深度学习模型查找最佳超参数,或查找具有真实数据的数据库和其他复杂系统的最佳配置。 - -NNI 还希望提供用于机器学习和深度学习的算法工具包,尤其是神经体系结构搜索(NAS)算法,模型压缩算法和特征工程算法。 - -超参调优 -^^^^^^^^^^^^^^^^^^^^^ - -这是 NNI 最核心、基本的功能,其中提供了许多流行的 `自动调优算法 `__ (即 Tuner) 以及 `提前终止算法 `__ (即 Assessor)。 可查看 `快速入门 `__ 来调优你的模型或系统。 基本上通过以上三步,就能开始 NNI Experiment。 - -通用 NAS 框架 -^^^^^^^^^^^^^^^^^^^^^ - -此 NAS 框架可供用户轻松指定候选的神经体系结构,例如,可以为单个层指定多个候选操作(例如,可分离的 conv、扩张 conv),并指定可能的跳过连接。 NNI 将自动找到最佳候选。 另一方面,NAS 框架为其他类型的用户(如,NAS 算法研究人员)提供了简单的接口,以实现新的 NAS 算法。 NAS 详情及用法参考 `这里 `__。 - -NNI 通过 Trial SDK 支持多种 one-shot(一次性) NAS 算法,如:ENAS、DARTS。 使用这些算法时,不需启动 NNI Experiment。 在 Trial 代码中加入算法,直接运行即可。 如果要调整算法中的超参数,或运行多个实例,可以使用 Tuner 并启动 NNI Experiment。 - -除了 one-shot NAS 外,NAS 还能以 NNI 模式运行,其中每个候选的网络结构都作为独立 Trial 任务运行。 在此模式下,与超参调优类似,必须启动 NNI Experiment 并为 NAS 选择 Tuner。 - -模型压缩 -^^^^^^^^^^^^^^^^^ - -NNI 提供了一个易于使用的模型压缩框架来压缩深度神经网络,压缩后的网络通常具有更小的模型尺寸和更快的推理速度, -模型性能也不会有明显的下降。 NNI 上的模型压缩包括剪枝和量化算法。 这些算法通过 NNI Trial SDK 提供 -。 可以直接在 Trial 代码中使用,并在不启动 NNI Experiment 的情况下运行 Trial 代码。 用户还可以使用 NNI 模型压缩框架集成自定义的剪枝和量化算法。 - -模型压缩的详细说明和算法可在 `这里 `__ 找到。 - -自动特征工程 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -自动特征工程,可以为下游任务找到最有效的特征。 自动特征工程及其用法的详细说明可在 `这里 `__ 找到。 通过 NNI Trial SDK 支持,不必创建 NNI Experiment, 只需在 Trial 代码中加入内置的自动特征工程算法,然后直接运行 Trial 代码。 - -自动特征工程算法通常有一些超参。 如果要自动调整这些超参,可以利用 NNI 的超参数调优,即选择调优算法(即 Tuner)并启动 NNI Experiment。 - -了解更多信息 --------------------- - - -* `入门 `__ -* `如何为 NNI 调整代码? `__ -* `NNI 支持哪些 Tuner? `__ -* `如何自定义 Tuner? `__ -* `NNI 支持哪些 Assessor? `__ -* `如何自定义 Assessor? `__ -* `如何在本机上运行 Experiment? `__ -* `如何在多机上运行 Experiment? `__ -* `如何在 OpenPAI 上运行 Experiment? `__ -* `示例 `__ -* `NNI 上的神经网络架构搜索 `__ -* `NNI 上的自动模型压缩 `__ -* `NNI 上的自动特征工程 `__ diff --git a/docs/source/TrialExample/Trials_zh.rst b/docs/source/TrialExample/Trials_zh.rst deleted file mode 100644 index f3340de3d..000000000 --- a/docs/source/TrialExample/Trials_zh.rst +++ /dev/null @@ -1,216 +0,0 @@ -.. 263c2dcfaee0c2fd06c19b5e90b96374 - -.. role:: raw-html(raw) - :format: html - - -实现 NNI 的 Trial(试验)代码 -================================= - -**Trial(试验)** 是将一组参数组合(例如,超参)在模型上独立的一次尝试。 - -定义 NNI 的 Trial,需要首先定义参数组(例如,搜索空间),并更新模型代码。 有两种方法来定义一个 Trial:`NNI Python API <#nni-api>`__ 和 `NNI Python annotation <#nni-annotation>`__。 参考 `这里 <#more-examples>`__ 更多 Trial 示例。 - -:raw-html:`` - -NNI Trial API -------------- - -第一步:准备搜索空间参数文件。 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -样例如下: - -.. code-block:: json - - { - "dropout_rate":{"_type":"uniform","_value":[0.1,0.5]}, - "conv_size":{"_type":"choice","_value":[2,3,5,7]}, - "hidden_size":{"_type":"choice","_value":[124, 512, 1024]}, - "learning_rate":{"_type":"uniform","_value":[0.0001, 0.1]} - } - -参考 `SearchSpaceSpec.rst <../Tutorial/SearchSpaceSpec.rst>`__ 进一步了解搜索空间。 Tuner 会根据搜索空间来生成配置,即从每个超参的范围中选一个值。 - -第二步:更新模型代码 -^^^^^^^^^^^^^^^^^^^^^^^^^^ - - -* Import NNI - -在 Trial 代码中加上 ``import nni`` 。 - -* 从 Tuner 获得参数值 - -.. code-block:: python - - RECEIVED_PARAMS = nni.get_next_parameter() - -``RECEIVED_PARAMS`` 是一个对象,如: - -``{"conv_size": 2, "hidden_size": 124, "learning_rate": 0.0307, "dropout_rate": 0.2029}`` - - -* 定期返回指标数据(可选) - -.. code-block:: python - - nni.report_intermediate_result(metrics) - -``指标`` 可以是任意的 Python 对象。 如果使用了 NNI 内置的 Tuner/Assessor,``指标`` 只可以是两种类型:1) 数值类型,如 float、int, 2) dict 对象,其中必须有键名为 ``default`` ,值为数值的项目。 ``指标`` 会发送给 `assessor <../Assessor/BuiltinAssessor.rst>`__。 通常,``指标`` 包含了定期评估的损失值或精度。 - - -* 返回配置的最终性能 - -.. code-block:: python - - nni.report_final_result(metrics) - -``指标`` 可以是任意的 Python 对象。 如果使用了内置的 Tuner/Assessor,``指标`` 格式和 ``report_intermediate_result`` 中一样,这个数值表示模型的性能,如精度、损失值等。 ``指标`` 会发送给 `tuner <../Tuner/BuiltinTuner.rst>`__。 - -第三步:启动 NNI Experiment (实验) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -启动 NNI 实验,提供搜索空间文件的路径,即第一步中定义的文件: - -.. code-block:: yaml - - searchSpacePath: /path/to/your/search_space.json - -参考 `这里 <../Tutorial/ExperimentConfig.rst>`__ 进一步了解如何配置 Experiment。 - -参考 `这里 <../sdk_reference.rst>`__ ,了解更多 NNI Trial API (例如:``nni.get_sequence_id()``)。 - -:raw-html:`` - -NNI Annotation ---------------------- - -另一种实现 Trial 的方法是使用 Python 注释来标记 NNI。 NNI Annotation 很简单,类似于注释。 不必对现有代码进行结构更改。 只需要添加一些 NNI Annotation,就能够: - - -* 标记需要调整的参数变量 -* 指定要在其中调整的变量的范围 -* 标记哪个变量需要作为中间结果范围给 ``assessor`` -* 标记哪个变量需要作为最终结果(例如:模型精度) 返回给 ``tuner`` - -同样以 MNIST 为例,只需要两步就能用 NNI Annotation 来实现 Trial 代码。 - -第一步:在代码中加入 Annotation -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -下面是加入了 Annotation 的 TensorFlow 代码片段,高亮的 4 行 Annotation 用于: - - -#. 调优 batch_size 和 dropout_rate -#. 每执行 100 步返回 test_acc -#. 最后返回 test_acc 作为最终结果。 - -值得注意的是,新添加的代码都是注释,不会影响以前的执行逻辑。因此这些代码仍然能在没有安装 NNI 的环境中运行。 - -.. code-block:: diff - - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - + """@nni.variable(nni.choice(50, 250, 500), name=batch_size)""" - batch_size = 128 - for i in range(10000): - batch = mnist.train.next_batch(batch_size) - + """@nni.variable(nni.choice(0.1, 0.5), name=dropout_rate)""" - dropout_rate = 0.5 - mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0], - mnist_network.labels: batch[1], - mnist_network.keep_prob: dropout_rate}) - if i % 100 == 0: - test_acc = mnist_network.accuracy.eval( - feed_dict={mnist_network.images: mnist.test.images, - mnist_network.labels: mnist.test.labels, - mnist_network.keep_prob: 1.0}) - + """@nni.report_intermediate_result(test_acc)""" - - test_acc = mnist_network.accuracy.eval( - feed_dict={mnist_network.images: mnist.test.images, - mnist_network.labels: mnist.test.labels, - mnist_network.keep_prob: 1.0}) - + """@nni.report_final_result(test_acc)""" - -**注意**: - - -* ``@nni.variable`` 会对它的下面一行进行修改,左边被赋值变量必须与 ``@nni.variable`` 的关键字 ``name`` 相同。 -* ``@nni.report_intermediate_result``\ /\ ``@nni.report_final_result`` 会将数据发送给 assessor/tuner。 - -Annotation 的语法和用法等,参考 `Annotation <../Tutorial/AnnotationSpec.rst>`__。 - -第二步:启用 Annotation -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -在 YAML 配置文件中设置 *useAnnotation* 为 true 来启用 Annotation: - -.. code-block:: bash - - useAnnotation: true - -用于调试的独立模式 ------------------------------ - -NNI 支持独立模式,使 Trial 代码无需启动 NNI 实验即可运行。 这样能更容易的找出 Trial 代码中的 Bug。 NNI Annotation 天然支持独立模式,因为添加的 NNI 相关的行都是注释的形式。 NNI Trial API 在独立模式下的行为有所变化,某些 API 返回虚拟值,而某些 API 不报告值。 有关这些 API 的完整列表,请参阅下表。 - -.. code-block:: python - - # 注意:请为 Trial 代码中的超参分配默认值 - nni.get_next_parameter # 返回 {} - nni.report_final_result # 已在 stdout 上打印日志,但不报告 - nni.report_intermediate_result # 已在 stdout 上打印日志,但不报告 - nni.get_experiment_id # 返回 "STANDALONE" - nni.get_trial_id # 返回 "STANDALONE" - nni.get_sequence_id # 返回 0 - -可使用 :githublink:`mnist 示例 ` 来尝试独立模式。 只需在代码目录下运行 ``python3 mnist.py``。 Trial 代码会使用默认超参成功运行。 - -更多调试的信息,可参考 `How to Debug <../Tutorial/HowToDebug.rst>`__。 - -Trial 存放在什么地方? ----------------------------------------- - -本机模式 -^^^^^^^^^^ - -每个 Trial 都有单独的目录来输出自己的数据。 在每次 Trial 运行后,环境变量 ``NNI_OUTPUT_DIR`` 定义的目录都会被导出。 在这个目录中可以看到 Trial 的代码、数据和日志。 此外,Trial 的日志(包括 stdout)还会被重定向到此目录中的 ``trial.log`` 文件。 - -如果使用了 Annotation 方法,转换后的 Trial 代码会存放在另一个临时目录中。 可以在 ``run.sh`` 文件中的 ``NNI_OUTPUT_DIR`` 变量找到此目录。 文件中的第二行(即:``cd``)会切换到代码所在的实际路径。 ``run.sh`` 文件示例: - -.. code-block:: bash - - #!/bin/bash - cd /tmp/user_name/nni/annotation/tmpzj0h72x6 #This is the actual directory - export NNI_PLATFORM=local - export NNI_SYS_DIR=/home/user_name/nni-experiments/$experiment_id$/trials/$trial_id$ - export NNI_TRIAL_JOB_ID=nrbb2 - export NNI_OUTPUT_DIR=/home/user_name/nni-experiments/$eperiment_id$/trials/$trial_id$ - export NNI_TRIAL_SEQ_ID=1 - export MULTI_PHASE=false - export CUDA_VISIBLE_DEVICES= - eval python3 mnist.py 2>/home/user_name/nni-experiments/$experiment_id$/trials/$trial_id$/stderr - echo $? `date +%s%3N` >/home/user_name/nni-experiments/$experiment_id$/trials/$trial_id$/.nni/state - -其它模式 -^^^^^^^^^^^ - -当 Trial 运行在 OpenPAI 这样的远程服务器上时,``NNI_OUTPUT_DIR`` 仅会指向 Trial 的输出目录,而 ``run.sh`` 不会在此目录中。 ``trial.log`` 文件会被复制回本机的 Trial 目录中。目录的默认位置在 ``~/nni-experiments/$experiment_id$/trials/$trial_id$/``。 - -更多调试的信息,可参考 `How to Debug <../Tutorial/HowToDebug.rst>`__。 - -:raw-html:`` - -更多 Trial 的示例 -------------------- - - -* `将日志写入 TensorBoard 的 Trial 输出目录 <../Tutorial/Tensorboard.rst>`__ -* `MNIST 示例 `__ -* `为 CIFAR 10 分类找到最佳的 optimizer `__ -* `如何在 NNI 调优 SciKit-learn 的参数 `__ -* `在阅读理解上使用自动模型架构搜索。 `__ -* `如何在 NNI 上调优 GBDT `__ -* `在 NNI 上调优 RocksDB `__ diff --git a/docs/source/Tuner/RandomTuner.rst b/docs/source/Tuner/RandomTuner.rst deleted file mode 100644 index 1b1022a5a..000000000 --- a/docs/source/Tuner/RandomTuner.rst +++ /dev/null @@ -1,17 +0,0 @@ -Random Tuner -============ - -In `Random Search for Hyper-Parameter Optimization `__ we show that Random Search might be surprisingly effective despite its simplicity. -We suggest using Random Search as a baseline when no knowledge about the prior distribution of hyper-parameters is available. - -Usage ------ - -Example Configuration - -.. code-block:: yaml - - tuner: - name: Random - classArgs: - seed: 100 # optional diff --git a/docs/source/Tuner/TpeTuner.rst b/docs/source/Tuner/TpeTuner.rst deleted file mode 100644 index d255b0f69..000000000 --- a/docs/source/Tuner/TpeTuner.rst +++ /dev/null @@ -1,114 +0,0 @@ -TPE Tuner -========= - -The Tree-structured Parzen Estimator (TPE) is a sequential model-based optimization (SMBO) approach. -SMBO methods sequentially construct models to approximate the performance of hyperparameters based on historical measurements, -and then subsequently choose new hyperparameters to test based on this model. - -The TPE approach models P(x|y) and P(y) where x represents hyperparameters and y the associated evaluation matric. -P(x|y) is modeled by transforming the generative process of hyperparameters, -replacing the distributions of the configuration prior with non-parametric densities. - -This optimization approach is described in detail in `Algorithms for Hyper-Parameter Optimization `__. - -Parallel TPE optimization -^^^^^^^^^^^^^^^^^^^^^^^^^ - -TPE approaches were actually run asynchronously in order to make use of multiple compute nodes and to avoid wasting time waiting for trial evaluations to complete. -The original algorithm design was optimized for sequential computation. -If we were to use TPE with much concurrency, its performance will be bad. -We have optimized this case using the Constant Liar algorithm. -For these principles of optimization, please refer to our `research blog <../CommunitySharings/ParallelizingTpeSearch.rst>`__. - -Usage ------ - - To use TPE, you should add the following spec in your experiment's YAML config file: - -.. code-block:: yaml - - ## minimal config ## - tuner: - name: TPE - classArgs: - optimize_mode: minimize - -.. code-block:: yaml - - ## advanced config ## - tuner: - name: TPE - classArgs: - optimize_mode: maximize - seed: 12345 - tpe_args: - constant_liar_type: 'mean' - n_startup_jobs: 10 - n_ei_candidates: 20 - linear_forgetting: 100 - prior_weight: 0 - gamma: 0.5 - -classArgs -^^^^^^^^^ - -.. list-table:: - :widths: 10 20 10 60 - :header-rows: 1 - - * - Field - - Type - - Default - - Description - - * - ``optimize_mode`` - - ``'minimize' | 'maximize'`` - - ``'minimize'`` - - Whether to minimize or maximize trial metrics. - - * - ``seed`` - - ``int | null`` - - ``null`` - - The random seed. - - * - ``tpe_args.constant_liar_type`` - - ``'best' | 'worst' | 'mean' | null`` - - ``'best'`` - - TPE algorithm itself does not support parallel tuning. This parameter specifies how to optimize for trial_concurrency > 1. How each liar works is explained in paper's section 6.1. - - In general ``best`` suit for small trial number and ``worst`` suit for large trial number. - - * - ``tpe_args.n_startup_jobs`` - - ``int`` - - ``20`` - - The first N hyper-parameters are generated fully randomly for warming up. - - If the search space is large, you can increase this value. Or if max_trial_number is small, you may want to decrease it. - - * - ``tpe_args.n_ei_candidates`` - - ``int`` - - ``24`` - - For each iteration TPE samples EI for N sets of parameters and choose the best one. (loosely speaking) - - * - ``tpe_args.linear_forgetting`` - - ``int`` - - ``25`` - - TPE will lower the weights of old trials. This controls how many iterations it takes for a trial to start decay. - - * - ``tpe_args.prior_weight`` - - ``float`` - - ``1.0`` - - TPE treats user provided search space as prior. - When generating new trials, it also incorporates the prior in trial history by transforming the search space to - one trial configuration (i.e., each parameter of this configuration chooses the mean of its candidate range). - Here, prior_weight determines the weight of this trial configuration in the history trial configurations. - - With prior weight 1.0, the search space is treated as one good trial. - For example, "normal(0, 1)" effectly equals to a trial with x = 0 which has yielded good result. - - * - ``tpe_args.gamma`` - - ``float`` - - ``0.25`` - - Controls how many trials are considered "good". - - The number is calculated as "min(gamma * sqrt(N), linear_forgetting)". diff --git a/docs/source/Tutorial/QuickStart.rst b/docs/source/Tutorial/QuickStart.rst index 8fb2ac4df..034e19db3 100644 --- a/docs/source/Tutorial/QuickStart.rst +++ b/docs/source/Tutorial/QuickStart.rst @@ -168,7 +168,7 @@ Experiment config reference could be found `here <../reference/experiment_config .. _nniignore: -.. Note:: If you are planning to use remote machines or clusters as your :doc:`training service <../TrainingService/Overview>`, to avoid too much pressure on network, NNI limits the number of files to 2000 and total size to 300MB. If your codeDir contains too many files, you can choose which files and subfolders should be excluded by adding a ``.nniignore`` file that works like a ``.gitignore`` file. For more details on how to write this file, see the `git documentation `__. +.. Note:: If you are planning to use remote machines or clusters as your training service, to avoid too much pressure on network, NNI limits the number of files to 2000 and total size to 300MB. If your codeDir contains too many files, you can choose which files and subfolders should be excluded by adding a ``.nniignore`` file that works like a ``.gitignore`` file. For more details on how to write this file, see the `git documentation `__. *Example:* :githublink:`config_detailed.yml ` and :githublink:`.nniignore ` diff --git a/docs/source/Tutorial/QuickStart_zh.rst b/docs/source/Tutorial/QuickStart_zh.rst index db4ff6f11..38f1325fc 100644 --- a/docs/source/Tutorial/QuickStart_zh.rst +++ b/docs/source/Tutorial/QuickStart_zh.rst @@ -1,4 +1,4 @@ -.. 90b7c298df11d68ba419a1feaf453cfc +.. 8eaeffaa088fa022c601eb18bebd0736 快速入门 ========== @@ -170,7 +170,7 @@ Experiment 的配置文件可以参考 `文档 <../reference/experiment_config.r .. _nniignore: -.. Note:: 如果要使用远程服务器或集群作为 :doc:`训练平台 <../TrainingService/Overview>`,为了避免产生过大的网络压力,NNI 限制了文件的最大数量为 2000,大小为 300 MB。 如果代码目录中包含了过多的文件,可添加 ``.nniignore`` 文件来排除部分,与 ``.gitignore`` 文件用法类似。 参考 `git documentation `__ ,了解更多如何编写此文件的详细信息。 +.. Note:: 如果要使用远程服务器或集群作为训练平台,为了避免产生过大的网络压力,NNI 限制了文件的最大数量为 2000,大小为 300 MB。 如果代码目录中包含了过多的文件,可添加 ``.nniignore`` 文件来排除部分,与 ``.gitignore`` 文件用法类似。 参考 `git documentation `__ ,了解更多如何编写此文件的详细信息。 *示例:* :githublink:`config.yml ` 和 :githublink:`.nniignore ` diff --git a/docs/source/Tutorial/WebUI_zh.rst b/docs/source/Tutorial/WebUI_zh.rst deleted file mode 100644 index 216189d58..000000000 --- a/docs/source/Tutorial/WebUI_zh.rst +++ /dev/null @@ -1,328 +0,0 @@ -.. bb68c969dbc2b3a2ec79d323cbd31401 - -Web 界面 -================== - -Experiments 管理 ------------------------ - -点击导航栏上的 ``All experiments`` 标签。 - -.. image:: ../../img/webui-img/managerExperimentList/experimentListNav.png - :target: ../../img/webui-img/managerExperimentList/experimentListNav.png - :alt: ExperimentList nav - - - -* 在 ``All experiments`` 页面,可以看到机器上的所有 Experiment。 - -.. image:: ../../img/webui-img/managerExperimentList/expList.png - :target: ../../img/webui-img/managerExperimentList/expList.png - :alt: Experiments list - - - -* 查看 Experiment 更多详细信息时,可以单击 trial ID 跳转至该 Experiment 详情页,如下所示: - -.. image:: ../../img/webui-img/managerExperimentList/toAnotherExp.png - :target: ../../img/webui-img/managerExperimentList/toAnotherExp.png - :alt: See this experiment detail - - - -* 如果表格里有很多 Experiment,可以使用 ``filter`` 按钮。 - -.. image:: ../../img/webui-img/managerExperimentList/expFilter.png - :target: ../../img/webui-img/managerExperimentList/expFilter.png - :alt: filter button - - - -查看概要页面 ------------------ - -点击 ``Overview`` 标签。 - - -* 在 Overview 标签上,可看到 Experiment trial 的概况、搜索空间以及 ``top trials`` 的结果。 - - -.. image:: ../../img/webui-img/full-oview.png - :target: ../../img/webui-img/full-oview.png - :alt: overview - - - -如果想查看 Experiment 配置和搜索空间,点击右边的 ``Search space`` 和 ``Config`` 按钮。 - - 1. 搜索空间文件: - - - .. image:: ../../img/webui-img/searchSpace.png - :target: ../../img/webui-img/searchSpace.png - :alt: searchSpace - - - - 2. 配置文件: - - - .. image:: ../../img/webui-img/config.png - :target: ../../img/webui-img/config.png - :alt: config - - - -* 你可以在这里查看和下载 ``nni-manager/dispatcher 日志文件``。 - - -.. image:: ../../img/webui-img/review-log.png - :target: ../../img/webui-img/review-log.png - :alt: logfile - - - -* 如果 Experiment 包含了较多 Trial,可改变刷新间隔。 - - -.. image:: ../../img/webui-img/refresh-interval.png - :target: ../../img/webui-img/refresh-interval.png - :alt: refresh - - - - -* 单击按钮 ``Experiment summary`` ,可以查看和下载 Experiment 结果(``Experiment 配置``,``trial 信息`` 和 ``中间结果`` )。 - - -.. image:: ../../img/webui-img/summary.png - :target: ../../img/webui-img/summary.png - :alt: summary - - - -* 在这里修改 Experiment 配置(例如 ``maxExecDuration``, ``maxTrialNum`` 和 ``trial concurrency``)。 - - -.. image:: ../../img/webui-img/edit-experiment-param.png - :target: ../../img/webui-img/edit-experiment-param.png - :alt: editExperimentParams - - - -* 通过单击 ``Learn about`` ,可以查看错误消息和 ``nni-manager/dispatcher 日志文件`` - - -.. image:: ../../img/webui-img/experimentError.png - :target: ../../img/webui-img/experimentError.png - :alt: experimentError - - - - -* ``About`` 菜单内含有版本信息以及问题反馈渠道。 - -查看 trial 最终结果 ----------------------------------------------- - - -* ``Default metric`` 是所有 trial 的最终结果图。 在每一个结果上悬停鼠标可以看到 trial 信息,比如 trial id、No.、超参等。 - - -.. image:: ../../img/webui-img/default-metric.png - :target: ../../img/webui-img/default-metric.png - :alt: defaultMetricGraph - - - -* 打开 ``Optimization curve`` 来查看 Experiment 的优化曲线。 - - -.. image:: ../../img/webui-img/best-curve.png - :target: ../../img/webui-img/best-curve.png - :alt: bestCurveGraph - - -查看超参 --------------------- - -单击 ``Hyper-parameter`` 标签查看平行坐标系图。 - - -* 可以点击 ``添加/删除`` 按钮来添加或删减纵坐标轴。 -* 直接在图上拖动轴线来交换轴线位置。 -* 通过调节百分比来查看 top trial。 - - -.. image:: ../../img/webui-img/hyperPara.png - :target: ../../img/webui-img/hyperPara.png - :alt: hyperParameterGraph - - - -查看 Trial 运行时间 -------------------- - -点击 ``Trial Duration`` 标签来查看柱状图。 - - -.. image:: ../../img/webui-img/trial_duration.png - :target: ../../img/webui-img/trial_duration.png - :alt: trialDurationGraph - - - -查看 Trial 中间结果 ------------------------------------- - -单击 ``Intermediate Result`` 标签查看折线图。 - - -.. image:: ../../img/webui-img/trials_intermeidate.png - :target: ../../img/webui-img/trials_intermeidate.png - :alt: trialIntermediateGraph - - - -Trial 在训练过程中可能有大量中间结果。 为了更清楚的理解一些 Trial 的趋势,可以为中间结果图设置过滤功能。 - -这样可以发现 Trial 在某个中间结果上会变得更好或更差。 这表明它是一个重要的并相关的中间结果。 如果要仔细查看这个点,可以在 #Intermediate 中输入其 X 坐标。 并输入这个中间结果的指标范围。 在下图中,选择了第四个中间结果并将指标范围设置为了 0.8 -1。 - - -.. image:: ../../img/webui-img/filter-intermediate.png - :target: ../../img/webui-img/filter-intermediate.png - :alt: filterIntermediateGraph - - - -查看 Trial 状态 ------------------- - -点击 ``Trials Detail`` 标签查看所有 Trial 的状态。具体如下: - - -* Trial 详情:Trial id,持续时间,开始时间,结束时间,状态,精度和 search space 文件。 - - -.. image:: ../../img/webui-img/detail-local.png - :target: ../../img/webui-img/detail-local.png - :alt: detailLocalImage - - - -* 支持通过 id,状态,Trial 编号以及参数来搜索。 - -1. Trial id: - -.. image:: ../../img/webui-img/detail/searchId.png - :target: ../../img/webui-img/detail/searchId.png - :alt: searchTrialId - - -2. Trial No.: - -.. image:: ../../img/webui-img/detail/searchNo.png - :target: ../../img/webui-img/detail/searchNo.png - :alt: searchTrialNo. - - -3. Trial 状态: - -.. image:: ../../img/webui-img/detail/searchStatus.png - :target: ../../img/webui-img/detail/searchStatus.png - :alt: searchStatus - -4. Trial 参数: - -(1) 类型为 choice 的参数: - -.. image:: ../../img/webui-img/detail/searchParameterChoice.png - :target: ../../img/webui-img/detail/searchParameterChoice.png - :alt: searchParameterChoice - -(2) 类型不是 choice 的参数: - -.. image:: ../../img/webui-img/detail/searchParameterRange.png - :target: ../../img/webui-img/detail/searchParameterRange.png - :alt: searchParameterRange - - -* ``Add column`` 按钮可选择在表格中显示的列。 如果 Experiment 的最终结果是 dict,则可以在表格中查看其它键。可选择 ``Intermediate count`` 列来查看 Trial 进度。 - - -.. image:: ../../img/webui-img/addColumn.png - :target: ../../img/webui-img/addColumn.png - :alt: addColumnGraph - - - -* 如果要比较某些 Trial,可选择并点击 ``Compare`` 来查看结果。 - - -.. image:: ../../img/webui-img/select-trial.png - :target: ../../img/webui-img/select-trial.png - :alt: selectTrialGraph - - -.. image:: ../../img/webui-img/compare.png - :target: ../../img/webui-img/compare.png - :alt: compareTrialsGraph - - -* ``Tensorboard`` 请参考 `此文档 `__。 - - -* 可使用 ``Copy as python`` 按钮来拷贝 Trial 的参数。 - - -.. image:: ../../img/webui-img/copyParameter.png - :target: ../../img/webui-img/copyParameter.png - :alt: copyTrialParameters - - - -* 您可以在 ``Log`` 选项卡上看到 Trial 日志。 在本地模式下有 ``View trial log``, ``View trial error`` 和 ``View trial stdout`` 三个按钮。 * 如果在 OpenPAI 或 Kubeflow 平台上运行,还可以看到 hdfsLog。 - -1. 本机模式 - -.. image:: ../../img/webui-img/detail/log-local.png - :target: ../../img/webui-img/detail/log-local.png - :alt: logOnLocal - - -2. OpenPAI、Kubeflow 等模式: - -.. image:: ../../img/webui-img/detail-pai.png - :target: ../../img/webui-img/detail-pai.png - :alt: detailPai - - -* 中间结果图:可在此图中通过点击 intermediate 按钮来查看默认指标。 - - -.. image:: ../../img/webui-img/intermediate.png - :target: ../../img/webui-img/intermediate.png - :alt: intermeidateGraph - - - -* Kill: 可终止正在运行的任务。 - - -.. image:: ../../img/webui-img/kill-running.png - :target: ../../img/webui-img/kill-running.png - :alt: killTrial - - - -* 自定义 Trial:您可以更改此 Trial 参数,然后将其提交给 Experiment。如果您想重新运行失败的 Trial ,您可以向 Experiment 提交相同的参数。 - -.. image:: ../../img/webui-img/detail/customizedTrialButton.png - :target: ../../img/webui-img/detail/customizedTrialButton.png - :alt: customizedTrialButton - - - -.. image:: ../../img/webui-img/detail/customizedTrial.png - :target: ../../img/webui-img/detail/customizedTrial.png - :alt: customizedTrial diff --git a/docs/source/autotune_ref.rst b/docs/source/autotune_ref.rst index a3e3261ff..fc1bd9706 100644 --- a/docs/source/autotune_ref.rst +++ b/docs/source/autotune_ref.rst @@ -23,6 +23,8 @@ Tuner .. autoclass:: nni.algorithms.hpo.tpe_tuner.TpeTuner :members: +.. autoclass:: nni.algorithms.hpo.tpe_tuner.TpeArguments + .. autoclass:: nni.algorithms.hpo.random_tuner.RandomTuner :members: diff --git a/docs/source/builtin_assessor_zh.rst b/docs/source/builtin_assessor_zh.rst deleted file mode 100644 index e745109d0..000000000 --- a/docs/source/builtin_assessor_zh.rst +++ /dev/null @@ -1,21 +0,0 @@ -.. d5351e951811dcaeeda7f270427187fd - -内置 Assessor -================= - -为了节省计算资源,NNI 支持提前终止策略,并且通过叫做 **Assessor** 的接口来执行此操作。 - -Assessor 从 Trial 中接收中间结果,并通过指定的算法决定此 Trial 是否应该终止。 一旦 Trial 满足了提前终止策略(这表示 Assessor 认为最终结果不会太好),Assessor 会终止此 Trial,并将其状态标志为 `EARLY_STOPPED`。 - -这是 MNIST 在 "最大化" 模式下使用 "曲线拟合" Assessor 的实验结果。 可以看到 Assessor 成功的 **提前终止** 了许多结果不好超参组合的 Trial。 使用 Assessor,能在相同的计算资源下,得到更好的结果。 - -实验代码: :githublink:`config_assessor.yml ` - -.. image:: ../img/Assessor.png - -.. toctree:: - :maxdepth: 1 - - 概述<./Assessor/BuiltinAssessor> - Medianstop<./Assessor/MedianstopAssessor> - Curvefitting(曲线拟合)<./Assessor/CurvefittingAssessor> diff --git a/docs/source/builtin_tuner_zh.rst b/docs/source/builtin_tuner_zh.rst deleted file mode 100644 index 813be5007..000000000 --- a/docs/source/builtin_tuner_zh.rst +++ /dev/null @@ -1,76 +0,0 @@ -.. 10b9097fcfec13f98bb6914b40bd0337 - -内置 Tuner -========== - -为了让机器学习和深度学习模型适应不同的任务和问题,我们需要进行超参数调优,而自动化调优依赖于优秀的调优算法。NNI 内置了先进的调优算法,并且提供了易于使用的 API。 - -在 NNI 中,调优算法被称为“tuner”。Tuner 向 trial 发送超参数,接收运行结果从而评估这组超参的性能,然后将下一组超参发送给新的 trial。 - -下表简要介绍了 NNI 内置的调优算法。点击 tuner 的名称可以查看其安装需求、推荐使用场景、示例配置文件等详细信息。`这篇文章 <../CommunitySharings/HpoComparison.rst>`__ 对比了各个 tuner 在不同场景下的性能。 - -.. list-table:: - :header-rows: 1 - :widths: auto - - * - Tuner - - 算法简介 - - * - `TPE <./TpeTuner.rst>`__ - - Tree-structured Parzen Estimator (TPE) 是一种基于序列模型的优化方法 (sequential model-based optimization, SMBO)。SMBO方法根据历史数据来顺序地构造模型,从而预估超参性能,并基于此模型来选择新的超参。`参考论文 `__ - - * - `Random Search (随机搜索) <./RandomTuner.rst>`__ - - 随机搜索在超算优化中表现出了令人意外的性能。如果没有对超参分布的先验知识,我们推荐使用随机搜索作为基线方法。`参考论文 `__ - - * - `Anneal (退火) <./AnnealTuner.rst>`__ - - 朴素退火算法首先基于先验进行采样,然后逐渐逼近实际性能较好的采样点。该算法是随即搜索的变体,利用了反应曲面的平滑性。该实现中退火率不是自适应的。 - - * - `Naive Evolution(朴素进化) <./EvolutionTuner.rst>`__ - - 朴素进化算法来自于 Large-Scale Evolution of Image Classifiers。它基于搜索空间随机生成一个种群,在每一代中选择较好的结果,并对其下一代进行变异。朴素进化算法需要很多 Trial 才能取得最优效果,但它也非常简单,易于扩展。`参考论文 `__ - - * - `SMAC <./SmacTuner.rst>`__ - - SMAC 是基于序列模型的优化方法 (SMBO)。它利用使用过的最突出的模型(高斯随机过程模型),并将随机森林引入到SMBO中,来处理分类参数。NNI 的 SMAC tuner 封装了 GitHub 上的 `SMAC3 `__。`参考论文 `__ - - 注意:SMAC 算法需要使用 ``pip install nni[SMAC]`` 安装依赖,暂不支持 Windows 操作系统。 - - * - `Batch(批处理) <./BatchTuner.rst>`__ - - 批处理允许用户直接提供若干组配置,为每种配置运行一个 trial。 - - * - `Grid Search(网格遍历) <./GridsearchTuner.rst>`__ - - 网格遍历会穷举搜索空间中的所有超参组合。 - - * - `Hyperband <./HyperbandAdvisor.rst>`__ - - Hyperband 试图用有限的资源探索尽可能多的超参组合。该算法的思路是,首先生成大量超参配置,将每组超参运行较短的一段时间,随后抛弃其中效果较差的一半,让较好的超参继续运行,如此重复多轮。`参考论文 `__ - - * - `Metis <./MetisTuner.rst>`__ - - 大多数调参工具仅仅预测最优配置,而 Metis 的优势在于它有两个输出:(a) 最优配置的当前预测结果, 以及 (b) 下一次 trial 的建议。大多数工具假设训练集没有噪声数据,但 Metis 会知道是否需要对某个超参重新采样。`参考论文 `__ - - * - `BOHB <./BohbAdvisor.rst>`__ - - BOHB 是 Hyperband 算法的后续工作。 Hyperband 在生成新的配置时,没有利用已有的 trial 结果,而本算法利用了 trial 结果。BOHB 中,HB 表示 Hyperband,BO 表示贝叶斯优化(Byesian Optimization)。 BOHB 会建立多个 TPE 模型,从而利用已完成的 Trial 生成新的配置。`参考论文 `__ - - * - `GP (高斯过程) <./GPTuner.rst>`__ - - GP Tuner 是基于序列模型的优化方法 (SMBO),使用高斯过程进行 surrogate。`参考论文 `__ - - * - `PBT <./PBTTuner.rst>`__ - - PBT Tuner 是一种简单的异步优化算法,在固定的计算资源下,它能有效的联合优化一组模型及其超参来最优化性能。`参考论文 `__ - - * - `DNGO <./DngoTuner.rst>`__ - - DNGO 是基于序列模型的优化方法 (SMBO),该算法使用神经网络(而不是高斯过程)去建模贝叶斯优化中所需要的函数分布。 - -.. toctree:: - :maxdepth: 1 - - TPE - Random Search(随机搜索) - Anneal(退火) - Naïve Evolution(朴素进化) - SMAC - Metis Tuner - Batch Tuner(批处理) - Grid Search(网格遍历) - GP Tuner - Network Morphism - Hyperband - BOHB - PBT Tuner - DNGO Tuner diff --git a/docs/source/conf.py b/docs/source/conf.py index 84cf1bc7d..7eb17b343 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -79,6 +79,10 @@ autosummary_mock_imports = [ 'nni.tools.jupyter_extension.management', ] + autodoc_mock_imports +autodoc_typehints = 'description' +autodoc_typehints_description_target = 'documented' +autodoc_inherit_docstrings = False + # Bibliography files bibtex_bibfiles = ['refs.bib'] diff --git a/docs/source/examples_zh.rst b/docs/source/examples_zh.rst deleted file mode 100644 index c51da740a..000000000 --- a/docs/source/examples_zh.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. d19a00598b8eca71c825d80c0a7106f2 - -###################### -示例 -###################### - -.. toctree:: - :maxdepth: 2 - - MNIST<./TrialExample/MnistExamples> - Cifar10<./TrialExample/Cifar10Examples> - Scikit-learn<./TrialExample/SklearnExamples> - GBDT<./TrialExample/GbdtExample> - Pix2pix<./TrialExample/Pix2pixExample> \ No newline at end of file diff --git a/docs/source/hpo/advanced_toctree.rst b/docs/source/hpo/advanced_toctree.rst new file mode 100644 index 000000000..0f2f31cf4 --- /dev/null +++ b/docs/source/hpo/advanced_toctree.rst @@ -0,0 +1,11 @@ +########################### +Hyperparameter Optimization +########################### + +.. toctree:: + :maxdepth: 2 + + TensorBoard Integration + Implement Custom Tuners and Assessors + Install Custom or 3rd-party Tuners and Assessors + Tuner Benchmark diff --git a/docs/source/builtin_assessor.rst b/docs/source/hpo/assessors.rst similarity index 80% rename from docs/source/builtin_assessor.rst rename to docs/source/hpo/assessors.rst index 9b059f60f..6361af0fb 100644 --- a/docs/source/builtin_assessor.rst +++ b/docs/source/hpo/assessors.rst @@ -1,5 +1,5 @@ -Builtin-Assessors -================= +Assessor: Early Stopping +======================== In order to save on computing resources, NNI supports an early stopping policy and has an interface called **Assessor** to do this job. @@ -9,11 +9,11 @@ Here is an experimental result of MNIST after using the 'Curvefitting' Assessor Implemented code directory: :githublink:`config_assessor.yml ` -.. image:: ../img/Assessor.png +.. image:: ../../img/Assessor.png .. toctree:: :maxdepth: 1 - Overview<./Assessor/BuiltinAssessor> - Medianstop<./Assessor/MedianstopAssessor> - Curvefitting<./Assessor/CurvefittingAssessor> + Overview<../Assessor/BuiltinAssessor> + Medianstop<../Assessor/MedianstopAssessor> + Curvefitting<../Assessor/CurvefittingAssessor> diff --git a/docs/source/Tuner/CustomizeTuner.rst b/docs/source/hpo/custom_algorithm.rst similarity index 71% rename from docs/source/Tuner/CustomizeTuner.rst rename to docs/source/hpo/custom_algorithm.rst index b79563662..c62b72c99 100644 --- a/docs/source/Tuner/CustomizeTuner.rst +++ b/docs/source/hpo/custom_algorithm.rst @@ -1,4 +1,4 @@ -Customize-Tuner +Customize Tuner =============== NNI provides state-of-the-art tuning algorithm in builtin-tuners. NNI supports to build a tuner by yourself for tuning demand. @@ -123,3 +123,68 @@ Write a more advanced automl algorithm ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The methods above are usually enough to write a general tuner. However, users may also want more methods, for example, intermediate results, trials' state (e.g., the methods in assessor), in order to have a more powerful automl algorithm. Therefore, we have another concept called ``advisor`` which directly inherits from ``MsgDispatcherBase`` in :githublink:`msg_dispatcher_base.py `. Please refer to `here `__ for how to write a customized advisor. + +Customize Assessor +================== + +NNI supports to build an assessor by yourself for tuning demand. + +If you want to implement a customized Assessor, there are three things to do: + + +#. Inherit the base Assessor class +#. Implement assess_trial function +#. Configure your customized Assessor in experiment YAML config file + +**1. Inherit the base Assessor class** + +.. code-block:: python + + from nni.assessor import Assessor + + class CustomizedAssessor(Assessor): + def __init__(self, ...): + ... + +**2. Implement assess trial function** + +.. code-block:: python + + from nni.assessor import Assessor, AssessResult + + class CustomizedAssessor(Assessor): + def __init__(self, ...): + ... + + def assess_trial(self, trial_history): + """ + Determines whether a trial should be killed. Must override. + trial_history: a list of intermediate result objects. + Returns AssessResult.Good or AssessResult.Bad. + """ + # you code implement here. + ... + +**3. Configure your customized Assessor in experiment YAML config file** + +NNI needs to locate your customized Assessor class and instantiate the class, so you need to specify the location of the customized Assessor class and pass literal values as parameters to the __init__ constructor. + +.. code-block:: yaml + + assessor: + codeDir: /home/abc/myassessor + classFileName: my_customized_assessor.py + className: CustomizedAssessor + # Any parameter need to pass to your Assessor class __init__ constructor + # can be specified in this optional classArgs field, for example + classArgs: + arg1: value1 + +Please noted in **2**. The object ``trial_history`` are exact the object that Trial send to Assessor by using SDK ``report_intermediate_result`` function. + +The working directory of your assessor is ``/nni-experiments//log``\ , which can be retrieved with environment variable ``NNI_LOG_DIRECTORY``\ , + +More detail example you could see: + +* :githublink:`medianstop-assessor ` +* :githublink:`curvefitting-assessor ` diff --git a/docs/source/Tutorial/InstallCustomizedAlgos.rst b/docs/source/hpo/custom_algorithm_installation.rst similarity index 100% rename from docs/source/Tutorial/InstallCustomizedAlgos.rst rename to docs/source/hpo/custom_algorithm_installation.rst diff --git a/docs/source/hpo_benchmark.rst b/docs/source/hpo/hpo_benchmark.rst similarity index 100% rename from docs/source/hpo_benchmark.rst rename to docs/source/hpo/hpo_benchmark.rst diff --git a/docs/source/hpo_benchmark_stats.rst b/docs/source/hpo/hpo_benchmark_stats.rst similarity index 83% rename from docs/source/hpo_benchmark_stats.rst rename to docs/source/hpo/hpo_benchmark_stats.rst index 1c4e01cf4..419397bbf 100644 --- a/docs/source/hpo_benchmark_stats.rst +++ b/docs/source/hpo/hpo_benchmark_stats.rst @@ -28,8 +28,8 @@ classification tasks, the metric "auc" and "logloss" were used for evaluation, w After the script finishes, the final scores of each tuner are summarized in the file ``results[time]/reports/performances.txt``. Since the file is large, we only show the following screenshot and summarize other important statistics instead. -.. image:: ../img/hpo_benchmark/performances.png - :target: ../img/hpo_benchmark/performances.png +.. image:: ../../img/hpo_benchmark/performances.png + :target: ../../img/hpo_benchmark/performances.png :alt: When the results are parsed, the tuners are also ranked based on their final performance. The following three tables show @@ -154,52 +154,52 @@ To view the same data in another way, for each tuner, we present the average ran Besides these reports, our script also generates two graphs for each fold of each task: one graph presents the best score received by each tuner until trial x, and another graph shows the score that each tuner receives in trial x. These two graphs can give some information regarding how the tuners are "converging" to their final solution. We found that for "nnismall", tuners on the random forest model with search space defined in ``/examples/trials/benchmarking/automlbenchmark/nni/extensions/NNI/architectures/run_random_forest.py`` generally converge to the final solution after 40 to 60 trials. As there are too much graphs to incldue in a single report (96 graphs in total), we only present 10 graphs here. -.. image:: ../img/hpo_benchmark/car_fold1_1.jpg - :target: ../img/hpo_benchmark/car_fold1_1.jpg +.. image:: ../../img/hpo_benchmark/car_fold1_1.jpg + :target: ../../img/hpo_benchmark/car_fold1_1.jpg :alt: -.. image:: ../img/hpo_benchmark/car_fold1_2.jpg - :target: ../img/hpo_benchmark/car_fold1_2.jpg +.. image:: ../../img/hpo_benchmark/car_fold1_2.jpg + :target: ../../img/hpo_benchmark/car_fold1_2.jpg :alt: The previous two graphs are generated for fold 1 of the task "car". In the first graph, we observe that most tuners find a relatively good solution within 40 trials. In this experiment, among all tuners, the DNGOTuner converges fastest to the best solution (within 10 trials). Its best score improved for three times in the entire experiment. In the second graph, we observe that most tuners have their score flucturate between 0.8 and 1 throughout the experiment. However, it seems that the Anneal tuner (green line) is more unstable (having more fluctuations) while the GPTuner has a more stable pattern. This may be interpreted as the Anneal tuner explores more aggressively than the GPTuner and thus its scores for different trials vary a lot. Regardless, although this pattern can to some extent hint a tuner's position on the explore-exploit tradeoff, it is not a comprehensive evaluation of a tuner's effectiveness. -.. image:: ../img/hpo_benchmark/christine_fold0_1.jpg - :target: ../img/hpo_benchmark/christine_fold0_1.jpg +.. image:: ../../img/hpo_benchmark/christine_fold0_1.jpg + :target: ../../img/hpo_benchmark/christine_fold0_1.jpg :alt: -.. image:: ../img/hpo_benchmark/christine_fold0_2.jpg - :target: ../img/hpo_benchmark/christine_fold0_2.jpg +.. image:: ../../img/hpo_benchmark/christine_fold0_2.jpg + :target: ../../img/hpo_benchmark/christine_fold0_2.jpg :alt: -.. image:: ../img/hpo_benchmark/cnae-9_fold0_1.jpg - :target: ../img/hpo_benchmark/cnae-9_fold0_1.jpg +.. image:: ../../img/hpo_benchmark/cnae-9_fold0_1.jpg + :target: ../../img/hpo_benchmark/cnae-9_fold0_1.jpg :alt: -.. image:: ../img/hpo_benchmark/cnae-9_fold0_2.jpg - :target: ../img/hpo_benchmark/cnae-9_fold0_2.jpg +.. image:: ../../img/hpo_benchmark/cnae-9_fold0_2.jpg + :target: ../../img/hpo_benchmark/cnae-9_fold0_2.jpg :alt: -.. image:: ../img/hpo_benchmark/credit-g_fold1_1.jpg - :target: ../img/hpo_benchmark/credit-g_fold1_1.jpg +.. image:: ../../img/hpo_benchmark/credit-g_fold1_1.jpg + :target: ../../img/hpo_benchmark/credit-g_fold1_1.jpg :alt: -.. image:: ../img/hpo_benchmark/credit-g_fold1_2.jpg - :target: ../img/hpo_benchmark/credit-g_fold1_2.jpg +.. image:: ../../img/hpo_benchmark/credit-g_fold1_2.jpg + :target: ../../img/hpo_benchmark/credit-g_fold1_2.jpg :alt: -.. image:: ../img/hpo_benchmark/titanic_2_fold1_1.jpg - :target: ../img/hpo_benchmark/titanic_2_fold1_1.jpg +.. image:: ../../img/hpo_benchmark/titanic_2_fold1_1.jpg + :target: ../../img/hpo_benchmark/titanic_2_fold1_1.jpg :alt: -.. image:: ../img/hpo_benchmark/titanic_2_fold1_2.jpg - :target: ../img/hpo_benchmark/titanic_2_fold1_2.jpg +.. image:: ../../img/hpo_benchmark/titanic_2_fold1_2.jpg + :target: ../../img/hpo_benchmark/titanic_2_fold1_2.jpg :alt: diff --git a/docs/source/hpo/index.rst b/docs/source/hpo/index.rst new file mode 100644 index 000000000..8a8d9adfd --- /dev/null +++ b/docs/source/hpo/index.rst @@ -0,0 +1,21 @@ +########################### +Hyperparameter Optimization +########################### + +.. raw:: html + + + +.. toctree:: + :maxdepth: 2 + + Overview + Search Space + Tuners + Assessors + Advanced Usage diff --git a/docs/source/Tutorial/AnnotationSpec.rst b/docs/source/hpo/nni_annotation.rst similarity index 99% rename from docs/source/Tutorial/AnnotationSpec.rst rename to docs/source/hpo/nni_annotation.rst index ed3a2918a..029abc304 100644 --- a/docs/source/Tutorial/AnnotationSpec.rst +++ b/docs/source/hpo/nni_annotation.rst @@ -1,3 +1,5 @@ +:orphan: + NNI Annotation ============== diff --git a/docs/source/hpo/overview.rst b/docs/source/hpo/overview.rst new file mode 100644 index 000000000..87ce1eb65 --- /dev/null +++ b/docs/source/hpo/overview.rst @@ -0,0 +1,105 @@ +Hyperparameter Optimization Overview +==================================== + +Auto hyperparameter optimization (HPO), or auto tuning, is one of the key features of NNI. + +Introduction to HPO +------------------- + +In machine learning, a hyperparameter is a parameter whose value is used to control learning process [1]_, +and HPO is the problem of choosing a set of optimal hyperparameters for a learning algorithm [2]_. + +.. [1] https://en.wikipedia.org/wiki/Hyperparameter_(machine_learning) +.. [2] https://en.wikipedia.org/wiki/Hyperparameter_optimization + +Following code snippet demonstrates a naive HPO process: + +.. code-block:: python + + best_hyperparameters = None + best_accuracy = 0 + + for learning_rate in [0.1, 0.01, 0.001, 0.0001]: + for momentum in [i / 10 for i in range(10)]: + for activation_type in ['relu', 'tanh', 'sigmoid']: + model = build_model(activation_type) + train_model(model, learning_rate, momentum) + accuracy = evaluate_model(model) + + if accuracy > best_accuracy: + best_accuracy = accuracy + best_hyperparameters = (learning_rate, momentum, activation_type) + + print('Best hyperparameters:', best_hyperparameters) + +You may have noticed, the example will train 4×10×3=120 models in total. +Since it consumes so much computing resources, you may want to: + + 1. Find the best set of hyperparameters with less iterations. + 2. Train the models on distributed platforms. + 3. Have a portal to monitor and control the process. + +And NNI will do them for you. + +Key Features of NNI HPO +----------------------- + +Tuning Algorithms +^^^^^^^^^^^^^^^^^ + +NNI provides *tuners* to speed up the process of finding best hyperparameter set. + +A tuner, or a tuning algorithm, decides the order in which hyperparameter sets are evaluated. +Based on the results of historical hyperparameter sets, an efficient tuner can predict where the best hyperparameters locates around, +and finds them in much fewer attempts. + +The naive example above evaluates all possible hyperparameter sets in constant order, ignoring the historical results. +This is the brute-force tuning algorithm called *grid search*. + +NNI has out-of-the-box support for a variety of popular tuners. +It includes naive algorithms like random search and grid search, Bayesian-based algorithms like TPE and SMAC, +RL based algorithms like PPO, and much more. + +Main article: :doc:`tuners` + +Training Platforms +^^^^^^^^^^^^^^^^^^ + +If you are not interested in distributed platforms, you can simply run NNI HPO with current computer, +just like any ordinary Python library. + +And when you want to leverage more computing resources, NNI provides built-in integration for training platforms +from simple on-premise servers to scalable commercial clouds. + +With NNI you can write one piece of model code, and concurrently evaluate hyperparameter sets on local machine, SSH servers, +Kubernetes-based clusters, AzureML service, and much more. + +Main article: (FIXME: link to training_services) + +Web UI +^^^^^^ + +NNI provides a web portal to monitor training progress, to visualize hyperparameter performance, +to manually customize hyperparameters, and to manage multiple HPO experiments. + +(FIXME: image and link) + +Tutorials +--------- + +To start using NNI HPO, choose the tutorial of your favorite framework: + + * PyTorch MNIST tutorial + * :doc:`TensorFlow MNIST tutorial ` + +Extra Features +-------------- + +After you are familiar with basic usage, you can explore more HPO features: + + * :doc:`Assessor: Early stop non-optimal models ` + * :doc:`nnictl: Use command line tool to create and manage experiments ` + * :doc:`Custom tuner: Implement your own tuner ` + * :doc:`Tensorboard support ` + * :doc:`Tuner benchmark ` + * :doc:`NNI Annotation (legacy) ` diff --git a/docs/source/Tutorial/SearchSpaceSpec.rst b/docs/source/hpo/search_space.rst similarity index 100% rename from docs/source/Tutorial/SearchSpaceSpec.rst rename to docs/source/hpo/search_space.rst diff --git a/docs/source/Tutorial/Tensorboard.rst b/docs/source/hpo/tensorboard.rst similarity index 100% rename from docs/source/Tutorial/Tensorboard.rst rename to docs/source/hpo/tensorboard.rst diff --git a/docs/source/builtin_tuner.rst b/docs/source/hpo/tuners.rst similarity index 58% rename from docs/source/builtin_tuner.rst rename to docs/source/hpo/tuners.rst index b8867e825..92f52f50a 100644 --- a/docs/source/builtin_tuner.rst +++ b/docs/source/hpo/tuners.rst @@ -1,11 +1,46 @@ -Builtin-Tuners -============== +Tuner: Tuning Algorithms +======================== -NNI provides an easy way to adopt an approach to set up parameter tuning algorithms, we call them **Tuner**. +The tuner decides which hyperparameter sets will be evaluated. It is a most important part of NNI HPO. -Tuner receives metrics from `Trial` to evaluate the performance of a specific parameters/architecture configuration. Tuner sends the next hyper-parameter or architecture configuration to Trial. +A tuner works in following steps: -The following table briefly describes the built-in tuners provided by NNI. Click the **Tuner's name** to get the Tuner's installation requirements, suggested scenario, and an example configuration. A link for a detailed description of each algorithm is located at the end of the suggested scenario for each tuner. Here is an `article <../CommunitySharings/HpoComparison.rst>`__ comparing different Tuners on several problems. + 1. Initialize with a search space. + 2. Generate hyperparameter sets from the search space. + 3. Send hyperparameters to trials. + 4. Receive evaluation results. + 5. Update internal states according to the results. + 6. Go to step 2, until experiment end. + +NNI has out-of-the-box support for many popular tuning algorithms. +They should be sufficient to cover most typical machine learning scenarios. + +However, if you have a very specific demand, or if you have designed an algorithm yourself, +you can also implement your own tuner: :doc:`custom_algorithm` + +Common Usage +------------ + +All built-in tuners have similar usage. + +To use a built-in tuner, you need to specify its name and arguments in experiment config, +and provides a standard :doc:`search_space`. +Some tuners, like SMAC and DNGO, have extra dependencies that need to be installed separately. + +Please check each tuner's reference page for what arguments it supports and whether it needs extra dependencies. + +For a general example, random tuner can be configured as follow: + +.. code-block:: python + + config.search_space = { + 'x': {'_type': 'uniform', '_value': [0, 1]} + } + config.tuner.name = 'Random' + config.tuner.class_args = {'seed': 0} + +Full List +--------- .. list-table:: :header-rows: 1 @@ -14,61 +49,52 @@ The following table briefly describes the built-in tuners provided by NNI. Click * - Tuner - Brief Introduction of Algorithm - * - `TPE <./TpeTuner.rst>`__ + * - `TPE <../autotune_ref.html#nni.algorithms.hpo.tpe_tuner.TpeTuner>`_ - The Tree-structured Parzen Estimator (TPE) is a sequential model-based optimization (SMBO) approach. SMBO methods sequentially construct models to approximate the performance of hyperparameters based on historical measurements, and then subsequently choose new hyperparameters to test based on this model. `Reference Paper `__ - * - `Random Search <./RandomTuner.rst>`__ + * - `Random Search <../autotune_ref.html#nni.algorithms.hpo.random_tuner.RandomTuner>`_ - In Random Search for Hyper-Parameter Optimization show that Random Search might be surprisingly simple and effective. We suggest that we could use Random Search as the baseline when we have no knowledge about the prior distribution of hyper-parameters. `Reference Paper `__ - * - `Anneal <./AnnealTuner.rst>`__ + * - `Anneal <../autotune_ref.html#nni.algorithms.hpo.hyperopt_tuner.HyperoptTuner>`_ - This simple annealing algorithm begins by sampling from the prior, but tends over time to sample from points closer and closer to the best ones observed. This algorithm is a simple variation on the random search that leverages smoothness in the response surface. The annealing rate is not adaptive. - * - `Naïve Evolution <./EvolutionTuner.rst>`__ - - Naïve Evolution comes from Large-Scale Evolution of Image Classifiers. It randomly initializes a population-based on search space. For each generation, it chooses better ones and does some mutation (e.g., change a hyperparameter, add/remove one layer) on them to get the next generation. Naïve Evolution requires many trials to work, but it's very simple and easy to expand new features. `Reference paper `__ + * - `Naive Evolution <../autotune_ref.html#nni.algorithms.hpo.evolution_tuner.EvolutionTuner>`_ + - Naive Evolution comes from Large-Scale Evolution of Image Classifiers. It randomly initializes a population-based on search space. For each generation, it chooses better ones and does some mutation (e.g., change a hyperparameter, add/remove one layer) on them to get the next generation. Naïve Evolution requires many trials to work, but it's very simple and easy to expand new features. `Reference paper `__ - * - `SMAC <./SmacTuner.rst>`__ + * - `SMAC <../autotune_ref.html#nni.algorithms.hpo.smac_tuner.SMACTuner>`_ - SMAC is based on Sequential Model-Based Optimization (SMBO). It adapts the most prominent previously used model class (Gaussian stochastic process models) and introduces the model class of random forests to SMBO, in order to handle categorical parameters. The SMAC supported by NNI is a wrapper on the SMAC3 GitHub repo. Notice, SMAC needs to be installed by ``pip install nni[SMAC]`` command. `Reference Paper, `__ `GitHub Repo `__ - * - `Batch tuner <./BatchTuner.rst>`__ + * - `Batch <../autotune_ref.html#nni.algorithms.hpo.batch_tuner.BatchTuner>`_ - Batch tuner allows users to simply provide several configurations (i.e., choices of hyper-parameters) for their trial code. After finishing all the configurations, the experiment is done. Batch tuner only supports the type choice in search space spec. - * - `Grid Search <./GridsearchTuner.rst>`__ + * - `Grid Search <../autotune_ref.html#nni.algorithms.hpo.gridsearch_tuner.GridSearchTuner>`_ - Grid Search performs an exhaustive searching through the search space. - * - `Hyperband <./HyperbandAdvisor.rst>`__ + * - `Hyperband <../autotune_ref.html#nni.algorithms.hpo.hyperband_advisor.Hyperband>`_ - Hyperband tries to use limited resources to explore as many configurations as possible and returns the most promising ones as a final result. The basic idea is to generate many configurations and run them for a small number of trials. The half least-promising configurations are thrown out, the remaining are further trained along with a selection of new configurations. The size of these populations is sensitive to resource constraints (e.g. allotted search time). `Reference Paper `__ - * - `Metis Tuner <./MetisTuner.rst>`__ + * - `Metis <../autotune_ref.html#nni.algorithms.hpo.metis_tuner.MetisTuner>`_ - Metis offers the following benefits when it comes to tuning parameters: While most tools only predict the optimal configuration, Metis gives you two outputs: (a) current prediction of optimal configuration, and (b) suggestion for the next trial. No more guesswork. While most tools assume training datasets do not have noisy data, Metis actually tells you if you need to re-sample a particular hyper-parameter. `Reference Paper `__ - * - `BOHB <./BohbAdvisor.rst>`__ + * - `BOHB <../autotune_ref.html#nni.algorithms.hpo.bohb_advisor.BOHB>`_ - BOHB is a follow-up work to Hyperband. It targets the weakness of Hyperband that new configurations are generated randomly without leveraging finished trials. For the name BOHB, HB means Hyperband, BO means Bayesian Optimization. BOHB leverages finished trials by building multiple TPE models, a proportion of new configurations are generated through these models. `Reference Paper `__ - * - `GP Tuner <./GPTuner.rst>`__ + * - `GP <../autotune_ref.html#nni.algorithms.hpo.gp_tuner.GPTuner>`_ - Gaussian Process Tuner is a sequential model-based optimization (SMBO) approach with Gaussian Process as the surrogate. `Reference Paper `__, `Github Repo `__ - * - `PBT Tuner <./PBTTuner.rst>`__ + * - `PBT <../autotune_ref.html>`_ - PBT Tuner is a simple asynchronous optimization algorithm which effectively utilizes a fixed computational budget to jointly optimize a population of models and their hyperparameters to maximize performance. `Reference Paper `__ - * - `DNGO Tuner <./DngoTuner.rst>`__ + * - `DNGO <../autotune_ref.html>`_ - Use of neural networks as an alternative to GPs to model distributions over functions in bayesian optimization. -.. toctree:: - :maxdepth: 1 +Comparison +---------- - TPE - Random Search - Anneal - Naive Evolution - SMAC - Metis Tuner - Batch Tuner - Grid Search - GP Tuner - Network Morphism - Hyperband - BOHB - PBT Tuner - DNGO Tuner +These articles have compared built-in tuners' performance on some different tasks: + +:doc:`hpo_benchmark_stats` + +:doc:`/CommunitySharings/HpoComparison` diff --git a/docs/source/hpo_advanced.rst b/docs/source/hpo_advanced.rst deleted file mode 100644 index 013d0499e..000000000 --- a/docs/source/hpo_advanced.rst +++ /dev/null @@ -1,10 +0,0 @@ -Advanced Features -================= - -.. toctree:: - :maxdepth: 2 - - Write a New Tuner - Write a New Assessor - Write a New Advisor - Install Customized Algorithms as Builtin Tuners/Assessors/Advisors diff --git a/docs/source/hpo_advanced_zh.rst b/docs/source/hpo_advanced_zh.rst deleted file mode 100644 index 0c4c104dd..000000000 --- a/docs/source/hpo_advanced_zh.rst +++ /dev/null @@ -1,12 +0,0 @@ -.. aa9e6234ae4a578e6e74efcdc521f119 - -高级功能 -================= - -.. toctree:: - :maxdepth: 2 - - 编写新的 Tuner - 编写新的 Assessor - 编写新的 Advisor - 安装自定义的 Tuners/Assessors/Advisors diff --git a/docs/source/hyperparameter_tune.rst b/docs/source/hyperparameter_tune.rst deleted file mode 100644 index e0f613677..000000000 --- a/docs/source/hyperparameter_tune.rst +++ /dev/null @@ -1,28 +0,0 @@ -############################# -Auto (Hyper-parameter) Tuning -############################# - -Auto tuning is one of the key features provided by NNI; a main application scenario being -hyper-parameter tuning. Tuning specifically applies to trial code. We provide a lot of popular -auto tuning algorithms (called Tuner), and some early stop algorithms (called Assessor). -NNI supports running trials on various training platforms, for example, on a local machine, -on several servers in a distributed manner, or on platforms such as OpenPAI, Kubernetes, etc. - -Other key features of NNI, such as model compression, feature engineering, can also be further -enhanced by auto tuning, which we'll described when introducing those features. - -NNI has high extensibility, advanced users can customize their own Tuner, Assessor, and Training Service -according to their needs. - -.. toctree:: - :maxdepth: 2 - - Write Trial - Tuners - Assessors - Training Platform - Examples - WebUI - How to Debug - Advanced - HPO Benchmarks diff --git a/docs/source/hyperparameter_tune_zh.rst b/docs/source/hyperparameter_tune_zh.rst deleted file mode 100644 index c8e379ab8..000000000 --- a/docs/source/hyperparameter_tune_zh.rst +++ /dev/null @@ -1,30 +0,0 @@ -.. 6ed30d3a87dbc4c1c4650cf56f074045 - -############## -自动超参数调优 -############## - -自动调优是 NNI 的主要功能之一。它的工作模式是 -反复运行 trial 代码,每次向其提供不同的超参组合,从而对 trial 的运行结果进行调优。 -NNI 提供了很多流行的自动调优算法(称为 Tuner)和一些提前终止算法(称为 Assessor)。 -NNI 支持在多种训练平台上运行 trial,包括本机、 -远程服务器、Azure Machine Learning、基于 Kubernetes 的集群(如 OpenPAI、Kubeflow)等等。 - -其他的功能,例如模型压缩、特征工程,也可以 -使用自动调优。这些我们在介绍相应功能的时候会具体介绍。 - -NNI 具有高扩展性, -用户可以根据需求实现自己的 Tuner 算法和训练平台。 - -.. toctree:: - :maxdepth: 2 - - 实现 Trial <./TrialExample/Trials> - Tuners - Assessors - 训练平台 - 示例 - Web 界面 - 如何调试 - 高级功能 - Tuner 基准测试 diff --git a/docs/source/index.rst b/docs/source/index.rst index ffee49713..96f35e399 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -20,8 +20,7 @@ Neural Network Intelligence :caption: Advanced Materials :hidden: - Overview - Auto (Hyper-parameter) Tuning + Hyperparameter Optimization Neural Architecture Search Model Compression Feature Engineering @@ -35,7 +34,6 @@ Neural Network Intelligence nnictl Commands Experiment Configuration Experiment Configuration (legacy) - Search Space Python API .. toctree:: diff --git a/docs/source/index_zh.rst b/docs/source/index_zh.rst index f2b29dc6a..066912fd0 100644 --- a/docs/source/index_zh.rst +++ b/docs/source/index_zh.rst @@ -1,4 +1,4 @@ -.. 84633d9c4ebf3421e7618c56117045c2 +.. 16313ff0f7a4b190c06f8a388509a199 ########################### Neural Network Intelligence @@ -10,11 +10,10 @@ Neural Network Intelligence :titlesonly: :hidden: - 概述 安装 入门 教程 - 自动(超参数)调优 + 自动(超参数)调优 神经网络架构搜索 模型压缩 特征工程 @@ -481,4 +480,4 @@ Neural Network Intelligence

许可协议

代码库遵循 MIT 许可协议

- \ No newline at end of file + diff --git a/docs/source/reference.rst b/docs/source/reference.rst index 67020ff80..3705db77a 100644 --- a/docs/source/reference.rst +++ b/docs/source/reference.rst @@ -11,9 +11,6 @@ References nnictl Commands Experiment Configuration Experiment Configuration (legacy) - Search Space - NNI Annotation SDK API References Supported Framework Library Launch from Python - Tensorboard diff --git a/docs/source/reference_zh.rst b/docs/source/reference_zh.rst index af1320fd7..dc0668616 100644 --- a/docs/source/reference_zh.rst +++ b/docs/source/reference_zh.rst @@ -1,4 +1,4 @@ -.. e8dca0b3551823aef1648bcef1745028 +.. ebdb4f520eb0601c779312975a205bdc :orphan: @@ -11,9 +11,6 @@ nnictl 命令 Experiment 配置 Experiment 配置(遗产) - 搜索空间 - NNI Annotation SDK API 参考 支持的框架和库 从 Python 发起实验 - Tensorboard diff --git a/docs/source/training_services_zh.rst b/docs/source/training_services_zh.rst deleted file mode 100644 index e5a5a450b..000000000 --- a/docs/source/training_services_zh.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. 0da0df7e3bb27a30cdec9d6357ea1f9b - -NNI 支持的训练平台介绍 -===================================== - -.. toctree:: - Overview <./TrainingService/Overview> - 远程<./TrainingService/RemoteMachineMode> - OpenPAI<./TrainingService/PaiMode> - Kubeflow<./TrainingService/KubeflowMode> - FrameworkController<./TrainingService/FrameworkControllerMode> - AML<./TrainingService/AMLMode> - 混合模式 <./TrainingService/HybridMode> diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst index b10dbdbde..a3b2c3ca7 100644 --- a/docs/source/tutorials.rst +++ b/docs/source/tutorials.rst @@ -24,6 +24,13 @@ Tutorials :image: ../img/thumbnails/overview-31.png :tags: Experiment/HPO +.. cardlinkitem:: + :header: HPO Quickstart with TensorFlow + :description: Use HPO to tune a TensorFlow MNIST model + :link: tutorials/hpo_quickstart_tensorflow/main.html + :image: ../img/thumbnails/overview-33.png + :tags: HPO + .. cardlinkitem:: :header: Hello, NAS! :description: Beginners' NAS tutorial on how to search for neural architectures for MNIST dataset. diff --git a/docs/source/tutorials/hpo_quickstart_tensorflow/images/thumb/sphx_glr_main_thumb.png b/docs/source/tutorials/hpo_quickstart_tensorflow/images/thumb/sphx_glr_main_thumb.png new file mode 100644 index 000000000..b06c4e6a1 Binary files /dev/null and b/docs/source/tutorials/hpo_quickstart_tensorflow/images/thumb/sphx_glr_main_thumb.png differ diff --git a/docs/source/tutorials/hpo_quickstart_tensorflow/images/thumb/sphx_glr_model_thumb.png b/docs/source/tutorials/hpo_quickstart_tensorflow/images/thumb/sphx_glr_model_thumb.png new file mode 100644 index 000000000..025bdaa7e Binary files /dev/null and b/docs/source/tutorials/hpo_quickstart_tensorflow/images/thumb/sphx_glr_model_thumb.png differ diff --git a/docs/source/tutorials/hpo_quickstart_tensorflow/main.ipynb b/docs/source/tutorials/hpo_quickstart_tensorflow/main.ipynb new file mode 100644 index 000000000..e9821210a --- /dev/null +++ b/docs/source/tutorials/hpo_quickstart_tensorflow/main.ipynb @@ -0,0 +1,176 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n# NNI HPO Quickstart with TensorFlow\nThis tutorial optimizes the model in `official TensorFlow quickstart`_ with auto-tuning.\n\nThe tutorial consists of 4 steps: \n\n 1. Modify the model for auto-tuning.\n 2. Define hyperparameters' search space.\n 3. Configure the experiment.\n 4. Run the experiment.\n\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Prepare the model\nIn first step, you need to prepare the model to be tuned.\n\nThe model should be put in a separate script.\nIt will be evaluated many times concurrently,\nand possibly will be trained on distributed platforms.\n\nIn this tutorial, the model is defined in :doc:`model.py `.\n\nPlease understand the model code before continue to next step.\n\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Define search space\nIn model code, we have prepared 4 hyperparameters to be tuned:\n*dense_units*, *activation_type*, *dropout_rate*, and *learning_rate*.\n\nHere we need to define their *search space* so the tuning algorithm can sample them in desired range.\n\nAssuming we have following prior knowledge for these hyperparameters:\n\n 1. *dense_units* should be one of 64, 128, 256.\n 2. *activation_type* should be one of 'relu', 'tanh', 'swish', or None.\n 3. *dropout_rate* should be a float between 0.5 and 0.9.\n 4. *learning_rate* should be a float between 0.0001 and 0.1, and it follows exponential distribution.\n\nIn NNI, the space of *dense_units* and *activation_type* is called ``choice``;\nthe space of *dropout_rate* is called ``uniform``;\nand the space of *learning_rate* is called ``loguniform``.\nYou may have noticed, these names are derived from ``numpy.random``.\n\nFor full specification of search space, check :doc:`the reference
`.\n\nNow we can define the search space as follow:\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "search_space = {\n 'dense_units': {'_type': 'choice', '_value': [64, 128, 256]},\n 'activation_type': {'_type': 'choice', '_value': ['relu', 'tanh', 'swish', None]},\n 'dropout_rate': {'_type': 'uniform', '_value': [0.5, 0.9]},\n 'learning_rate': {'_type': 'loguniform', '_value': [0.0001, 0.1]},\n}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Configure the experiment\nNNI uses an *experiment* to manage the HPO process.\nThe *experiment config* defines how to train the models and how to explore the search space.\n\nIn this tutorial we use a *local* mode experiment,\nwhich means models will be trained on local machine, without using any special training platform.\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from nni.experiment import Experiment\nexperiment = Experiment('local')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we start to configure the experiment.\n\nFirstly, specify the model code.\nIn NNI evaluation of each hyperparameter set is called a *trial*.\nSo the model script is called *trial code*.\n\nIf you are using Linux system without Conda, you many need to change ``python`` to ``python3``.\n\nWhen ``trial_code_directory`` is a relative path, it relates to current working directory.\nTo run ``main.py`` from a different path, you can set trial code directory to ``Path(__file__).parent``.\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "experiment.config.trial_command = 'python model.py'\nexperiment.config.trial_code_directory = '.'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then specify the search space we defined above:\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "experiment.config.search_space = search_space" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Choose a tuning algorithm.\nHere we use :doc:`TPE tuner
`.\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "experiment.config.tuner.name = 'TPE'\nexperiment.config.tuner.class_args['optimize_mode'] = 'maximize'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Specify how many trials to run.\nHere we evaluate 10 sets of hyperparameters in total, and concurrently evaluate 4 sets at a time.\n\nPlease note that ``max_trial_number`` here is merely for a quick example.\nWith default config TPE tuner requires 20 trials to warm up.\nIn real world max trial number is commonly set to 100+.\n\nYou can also set ``max_experiment_duration = '1h'`` to limit running time.\n\nAnd alternatively, you can skip this part and set no limit at all.\nThe experiment will run forever until you press Ctrl-C.\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "experiment.config.max_trial_number = 10\nexperiment.config.trial_concurrency = 4" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Run the experiment\nNow the experiment is ready. Choose a port and launch it.\n\nYou can use the web portal to view experiment status: http://localhost:8080.\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "experiment.run(8080)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/docs/source/tutorials/hpo_quickstart_tensorflow/main.py b/docs/source/tutorials/hpo_quickstart_tensorflow/main.py new file mode 100644 index 000000000..01ebd2c0b --- /dev/null +++ b/docs/source/tutorials/hpo_quickstart_tensorflow/main.py @@ -0,0 +1,116 @@ +""" +NNI HPO Quickstart with TensorFlow +================================== +This tutorial optimizes the model in `official TensorFlow quickstart`_ with auto-tuning. + +The tutorial consists of 4 steps: + + 1. Modify the model for auto-tuning. + 2. Define hyperparameters' search space. + 3. Configure the experiment. + 4. Run the experiment. + +.. _official TensorFlow quickstart: https://www.tensorflow.org/tutorials/quickstart/beginner +""" + +# %% +# Step 1: Prepare the model +# ------------------------- +# In first step, you need to prepare the model to be tuned. +# +# The model should be put in a separate script. +# It will be evaluated many times concurrently, +# and possibly will be trained on distributed platforms. +# +# In this tutorial, the model is defined in :doc:`model.py `. +# +# Please understand the model code before continue to next step. + +# %% +# Step 2: Define search space +# --------------------------- +# In model code, we have prepared 4 hyperparameters to be tuned: +# *dense_units*, *activation_type*, *dropout_rate*, and *learning_rate*. +# +# Here we need to define their *search space* so the tuning algorithm can sample them in desired range. +# +# Assuming we have following prior knowledge for these hyperparameters: +# +# 1. *dense_units* should be one of 64, 128, 256. +# 2. *activation_type* should be one of 'relu', 'tanh', 'swish', or None. +# 3. *dropout_rate* should be a float between 0.5 and 0.9. +# 4. *learning_rate* should be a float between 0.0001 and 0.1, and it follows exponential distribution. +# +# In NNI, the space of *dense_units* and *activation_type* is called ``choice``; +# the space of *dropout_rate* is called ``uniform``; +# and the space of *learning_rate* is called ``loguniform``. +# You may have noticed, these names are derived from ``numpy.random``. +# +# For full specification of search space, check :doc:`the reference `. +# +# Now we can define the search space as follow: + +search_space = { + 'dense_units': {'_type': 'choice', '_value': [64, 128, 256]}, + 'activation_type': {'_type': 'choice', '_value': ['relu', 'tanh', 'swish', None]}, + 'dropout_rate': {'_type': 'uniform', '_value': [0.5, 0.9]}, + 'learning_rate': {'_type': 'loguniform', '_value': [0.0001, 0.1]}, +} + +# %% +# Step 3: Configure the experiment +# -------------------------------- +# NNI uses an *experiment* to manage the HPO process. +# The *experiment config* defines how to train the models and how to explore the search space. +# +# In this tutorial we use a *local* mode experiment, +# which means models will be trained on local machine, without using any special training platform. +from nni.experiment import Experiment +experiment = Experiment('local') + +# %% +# Now we start to configure the experiment. +# +# Firstly, specify the model code. +# In NNI evaluation of each hyperparameter set is called a *trial*. +# So the model script is called *trial code*. +# +# If you are using Linux system without Conda, you many need to change ``python`` to ``python3``. +# +# When ``trial_code_directory`` is a relative path, it relates to current working directory. +# To run ``main.py`` from a different path, you can set trial code directory to ``Path(__file__).parent``. +experiment.config.trial_command = 'python model.py' +experiment.config.trial_code_directory = '.' + +# %% +# Then specify the search space we defined above: +experiment.config.search_space = search_space + +# %% +# Choose a tuning algorithm. +# Here we use :doc:`TPE tuner `. +experiment.config.tuner.name = 'TPE' +experiment.config.tuner.class_args['optimize_mode'] = 'maximize' + +# %% +# Specify how many trials to run. +# Here we evaluate 10 sets of hyperparameters in total, and concurrently evaluate 4 sets at a time. +# +# Please note that ``max_trial_number`` here is merely for a quick example. +# With default config TPE tuner requires 20 trials to warm up. +# In real world max trial number is commonly set to 100+. +# +# You can also set ``max_experiment_duration = '1h'`` to limit running time. +# +# And alternatively, you can skip this part and set no limit at all. +# The experiment will run forever until you press Ctrl-C. +experiment.config.max_trial_number = 10 +experiment.config.trial_concurrency = 4 + +# %% +# Step 4: Run the experiment +# -------------------------- +# Now the experiment is ready. Choose a port and launch it. +# +# You can use the web portal to view experiment status: http://localhost:8080. +experiment.run(8080) diff --git a/docs/source/tutorials/hpo_quickstart_tensorflow/main.py.md5 b/docs/source/tutorials/hpo_quickstart_tensorflow/main.py.md5 new file mode 100644 index 000000000..3ecaf103f --- /dev/null +++ b/docs/source/tutorials/hpo_quickstart_tensorflow/main.py.md5 @@ -0,0 +1 @@ +911c32a84d08c02c02821ba2badc056c \ No newline at end of file diff --git a/docs/source/tutorials/hpo_quickstart_tensorflow/main.rst b/docs/source/tutorials/hpo_quickstart_tensorflow/main.rst new file mode 100644 index 000000000..c38d06fbd --- /dev/null +++ b/docs/source/tutorials/hpo_quickstart_tensorflow/main.rst @@ -0,0 +1,273 @@ +:orphan: + +.. DO NOT EDIT. +.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. +.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: +.. "tutorials/hpo_quickstart_tensorflow/main.py" +.. LINE NUMBERS ARE GIVEN BELOW. + +.. only:: html + + .. note:: + :class: sphx-glr-download-link-note + + Click :ref:`here ` + to download the full example code + +.. rst-class:: sphx-glr-example-title + +.. _sphx_glr_tutorials_hpo_quickstart_tensorflow_main.py: + + +NNI HPO Quickstart with TensorFlow +================================== +This tutorial optimizes the model in `official TensorFlow quickstart`_ with auto-tuning. + +The tutorial consists of 4 steps: + + 1. Modify the model for auto-tuning. + 2. Define hyperparameters' search space. + 3. Configure the experiment. + 4. Run the experiment. + +.. _official TensorFlow quickstart: https://www.tensorflow.org/tutorials/quickstart/beginner + +.. GENERATED FROM PYTHON SOURCE LINES 17-28 + +Step 1: Prepare the model +------------------------- +In first step, you need to prepare the model to be tuned. + +The model should be put in a separate script. +It will be evaluated many times concurrently, +and possibly will be trained on distributed platforms. + +In this tutorial, the model is defined in :doc:`model.py `. + +Please understand the model code before continue to next step. + +.. GENERATED FROM PYTHON SOURCE LINES 30-52 + +Step 2: Define search space +--------------------------- +In model code, we have prepared 4 hyperparameters to be tuned: +*dense_units*, *activation_type*, *dropout_rate*, and *learning_rate*. + +Here we need to define their *search space* so the tuning algorithm can sample them in desired range. + +Assuming we have following prior knowledge for these hyperparameters: + + 1. *dense_units* should be one of 64, 128, 256. + 2. *activation_type* should be one of 'relu', 'tanh', 'swish', or None. + 3. *dropout_rate* should be a float between 0.5 and 0.9. + 4. *learning_rate* should be a float between 0.0001 and 0.1, and it follows exponential distribution. + +In NNI, the space of *dense_units* and *activation_type* is called ``choice``; +the space of *dropout_rate* is called ``uniform``; +and the space of *learning_rate* is called ``loguniform``. +You may have noticed, these names are derived from ``numpy.random``. + +For full specification of search space, check :doc:`the reference `. + +Now we can define the search space as follow: + +.. GENERATED FROM PYTHON SOURCE LINES 52-60 + +.. code-block:: default + + + search_space = { + 'dense_units': {'_type': 'choice', '_value': [64, 128, 256]}, + 'activation_type': {'_type': 'choice', '_value': ['relu', 'tanh', 'swish', None]}, + 'dropout_rate': {'_type': 'uniform', '_value': [0.5, 0.9]}, + 'learning_rate': {'_type': 'loguniform', '_value': [0.0001, 0.1]}, + } + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 61-68 + +Step 3: Configure the experiment +-------------------------------- +NNI uses an *experiment* to manage the HPO process. +The *experiment config* defines how to train the models and how to explore the search space. + +In this tutorial we use a *local* mode experiment, +which means models will be trained on local machine, without using any special training platform. + +.. GENERATED FROM PYTHON SOURCE LINES 68-71 + +.. code-block:: default + + from nni.experiment import Experiment + experiment = Experiment('local') + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 72-82 + +Now we start to configure the experiment. + +Firstly, specify the model code. +In NNI evaluation of each hyperparameter set is called a *trial*. +So the model script is called *trial code*. + +If you are using Linux system without Conda, you many need to change ``python`` to ``python3``. + +When ``trial_code_directory`` is a relative path, it relates to current working directory. +To run ``main.py`` from a different path, you can set trial code directory to ``Path(__file__).parent``. + +.. GENERATED FROM PYTHON SOURCE LINES 82-85 + +.. code-block:: default + + experiment.config.trial_command = 'python model.py' + experiment.config.trial_code_directory = '.' + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 86-87 + +Then specify the search space we defined above: + +.. GENERATED FROM PYTHON SOURCE LINES 87-89 + +.. code-block:: default + + experiment.config.search_space = search_space + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 90-92 + +Choose a tuning algorithm. +Here we use :doc:`TPE tuner `. + +.. GENERATED FROM PYTHON SOURCE LINES 92-95 + +.. code-block:: default + + experiment.config.tuner.name = 'TPE' + experiment.config.tuner.class_args['optimize_mode'] = 'maximize' + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 96-107 + +Specify how many trials to run. +Here we evaluate 10 sets of hyperparameters in total, and concurrently evaluate 4 sets at a time. + +Please note that ``max_trial_number`` here is merely for a quick example. +With default config TPE tuner requires 20 trials to warm up. +In real world max trial number is commonly set to 100+. + +You can also set ``max_experiment_duration = '1h'`` to limit running time. + +And alternatively, you can skip this part and set no limit at all. +The experiment will run forever until you press Ctrl-C. + +.. GENERATED FROM PYTHON SOURCE LINES 107-110 + +.. code-block:: default + + experiment.config.max_trial_number = 10 + experiment.config.trial_concurrency = 4 + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 111-116 + +Step 4: Run the experiment +-------------------------- +Now the experiment is ready. Choose a port and launch it. + +You can use the web portal to view experiment status: http://localhost:8080. + +.. GENERATED FROM PYTHON SOURCE LINES 116-117 + +.. code-block:: default + + experiment.run(8080) + + + + +.. rst-class:: sphx-glr-script-out + + Out: + + .. code-block:: none + + [2022-03-07 03:24:07] Creating experiment, Experiment ID: f4q1xjki + [2022-03-07 03:24:07] Starting web server... + [2022-03-07 03:24:08] Setting up... + [2022-03-07 03:24:08] Web UI URLs: http://127.0.0.1:8080 http://192.168.100.103:8080 + [2022-03-07 03:36:50] Stopping experiment, please wait... + [2022-03-07 03:36:53] Experiment stopped + + True + + + + +.. rst-class:: sphx-glr-timing + + **Total running time of the script:** ( 12 minutes 45.612 seconds) + + +.. _sphx_glr_download_tutorials_hpo_quickstart_tensorflow_main.py: + + +.. only :: html + + .. container:: sphx-glr-footer + :class: sphx-glr-footer-example + + + + .. container:: sphx-glr-download sphx-glr-download-python + + :download:`Download Python source code: main.py ` + + + + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: main.ipynb ` + + +.. only:: html + + .. rst-class:: sphx-glr-signature + + `Gallery generated by Sphinx-Gallery `_ diff --git a/docs/source/tutorials/hpo_quickstart_tensorflow/main_codeobj.pickle b/docs/source/tutorials/hpo_quickstart_tensorflow/main_codeobj.pickle new file mode 100644 index 000000000..5db122a6c Binary files /dev/null and b/docs/source/tutorials/hpo_quickstart_tensorflow/main_codeobj.pickle differ diff --git a/docs/source/tutorials/hpo_quickstart_tensorflow/model.ipynb b/docs/source/tutorials/hpo_quickstart_tensorflow/model.ipynb new file mode 100644 index 000000000..43bf75de7 --- /dev/null +++ b/docs/source/tutorials/hpo_quickstart_tensorflow/model.ipynb @@ -0,0 +1,180 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n# Port TensorFlow Quickstart to NNI\nThis is a modified version of `TensorFlow quickstart`_.\n\nIt can be run directly and will have the exact same result as original version.\n\nFurthermore, it enables the ability of auto-tuning with an NNI *experiment*, which will be discussed later.\n\nFor now, we recommend to run this script directly to verify the environment.\n\nThere are only 3 key differences from the original version:\n\n 1. In `Get optimized hyperparameters`_ part, it receives auto-generated hyperparameters.\n 2. In `(Optional) Report intermediate results`_ part, it reports per-epoch accuracy for visualization.\n 3. In `Report final result`_ part, it reports final accuracy for tuner to generate next hyperparameter set.\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import nni\nimport tensorflow as tf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hyperparameters to be tuned\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "params = {\n 'dense_units': 128,\n 'activation_type': 'relu',\n 'dropout_rate': 0.2,\n 'learning_rate': 0.001,\n}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get optimized hyperparameters\nIf run directly, ``nni.get_next_parameters()`` is a no-op and returns an empty dict.\nBut with an NNI *experiment*, it will receive optimized hyperparameters from tuning algorithm.\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "optimized_params = nni.get_next_parameter()\nparams.update(optimized_params)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load dataset\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "mnist = tf.keras.datasets.mnist\n\n(x_train, y_train), (x_test, y_test) = mnist.load_data()\nx_train, x_test = x_train / 255.0, x_test / 255.0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Build model with hyperparameters\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "model = tf.keras.models.Sequential([\n tf.keras.layers.Flatten(input_shape=(28, 28)),\n tf.keras.layers.Dense(params['dense_units'], activation=params['activation_type']),\n tf.keras.layers.Dropout(params['dropout_rate']),\n tf.keras.layers.Dense(10)\n])\n\nadam = tf.keras.optimizers.Adam(learning_rate=params['learning_rate'])\n\nloss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n\nmodel.compile(optimizer=adam, loss=loss_fn, metrics=['accuracy'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## (Optional) Report intermediate results\nThe callback reports per-epoch accuracy to show learning curve in NNI web portal.\nAnd in :doc:`/hpo/assessors`, you will see how to leverage the metrics for early stopping.\n\nYou can safely skip this and the experiment will work fine.\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "callback = tf.keras.callbacks.LambdaCallback(\n on_epoch_end = lambda epoch, logs: nni.report_intermediate_result(logs['accuracy'])\n)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train and evluate the model\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "model.fit(x_train, y_train, epochs=5, verbose=2, callbacks=[callback])\nloss, accuracy = model.evaluate(x_test, y_test, verbose=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Report final result\nReport final accuracy to NNI so the tuning algorithm can predict best hyperparameters.\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "nni.report_final_result(accuracy)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/docs/source/tutorials/hpo_quickstart_tensorflow/model.py b/docs/source/tutorials/hpo_quickstart_tensorflow/model.py new file mode 100644 index 000000000..a8215f72c --- /dev/null +++ b/docs/source/tutorials/hpo_quickstart_tensorflow/model.py @@ -0,0 +1,88 @@ +""" +Port TensorFlow Quickstart to NNI +================================= +This is a modified version of `TensorFlow quickstart`_. + +It can be run directly and will have the exact same result as original version. + +Furthermore, it enables the ability of auto-tuning with an NNI *experiment*, which will be discussed later. + +For now, we recommend to run this script directly to verify the environment. + +There are only 3 key differences from the original version: + + 1. In `Get optimized hyperparameters`_ part, it receives auto-generated hyperparameters. + 2. In `(Optional) Report intermediate results`_ part, it reports per-epoch accuracy for visualization. + 3. In `Report final result`_ part, it reports final accuracy for tuner to generate next hyperparameter set. + +.. _TensorFlow quickstart: https://www.tensorflow.org/tutorials/quickstart/beginner +""" + +# %% +import nni +import tensorflow as tf + +# %% +# Hyperparameters to be tuned +# --------------------------- +params = { + 'dense_units': 128, + 'activation_type': 'relu', + 'dropout_rate': 0.2, + 'learning_rate': 0.001, +} + +# %% +# Get optimized hyperparameters +# ----------------------------- +# If run directly, ``nni.get_next_parameters()`` is a no-op and returns an empty dict. +# But with an NNI *experiment*, it will receive optimized hyperparameters from tuning algorithm. +optimized_params = nni.get_next_parameter() +params.update(optimized_params) + +# %% +# Load dataset +# ------------ +mnist = tf.keras.datasets.mnist + +(x_train, y_train), (x_test, y_test) = mnist.load_data() +x_train, x_test = x_train / 255.0, x_test / 255.0 + +# %% +# Build model with hyperparameters +# -------------------------------- +model = tf.keras.models.Sequential([ + tf.keras.layers.Flatten(input_shape=(28, 28)), + tf.keras.layers.Dense(params['dense_units'], activation=params['activation_type']), + tf.keras.layers.Dropout(params['dropout_rate']), + tf.keras.layers.Dense(10) +]) + +adam = tf.keras.optimizers.Adam(learning_rate=params['learning_rate']) + +loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) + +model.compile(optimizer=adam, loss=loss_fn, metrics=['accuracy']) + +# %% +# (Optional) Report intermediate results +# -------------------------------------- +# The callback reports per-epoch accuracy to show learning curve in NNI web portal. +# And in :doc:`/hpo/assessors`, you will see how to leverage the metrics for early stopping. +# +# You can safely skip this and the experiment will work fine. +callback = tf.keras.callbacks.LambdaCallback( + on_epoch_end = lambda epoch, logs: nni.report_intermediate_result(logs['accuracy']) +) + +# %% +# Train and evluate the model +# --------------------------- +model.fit(x_train, y_train, epochs=5, verbose=2, callbacks=[callback]) +loss, accuracy = model.evaluate(x_test, y_test, verbose=2) + +# %% +# Report final result +# ------------------- +# Report final accuracy to NNI so the tuning algorithm can predict best hyperparameters. +nni.report_final_result(accuracy) diff --git a/docs/source/tutorials/hpo_quickstart_tensorflow/model.py.md5 b/docs/source/tutorials/hpo_quickstart_tensorflow/model.py.md5 new file mode 100644 index 000000000..13c679075 --- /dev/null +++ b/docs/source/tutorials/hpo_quickstart_tensorflow/model.py.md5 @@ -0,0 +1 @@ +1d29b3ef885b5725c4a3a2c8121ee8df \ No newline at end of file diff --git a/docs/source/tutorials/hpo_quickstart_tensorflow/model.rst b/docs/source/tutorials/hpo_quickstart_tensorflow/model.rst new file mode 100644 index 000000000..23447872a --- /dev/null +++ b/docs/source/tutorials/hpo_quickstart_tensorflow/model.rst @@ -0,0 +1,280 @@ +:orphan: + +.. DO NOT EDIT. +.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. +.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: +.. "tutorials/hpo_quickstart_tensorflow/model.py" +.. LINE NUMBERS ARE GIVEN BELOW. + +.. only:: html + + .. note:: + :class: sphx-glr-download-link-note + + Click :ref:`here ` + to download the full example code + +.. rst-class:: sphx-glr-example-title + +.. _sphx_glr_tutorials_hpo_quickstart_tensorflow_model.py: + + +Port TensorFlow Quickstart to NNI +================================= +This is a modified version of `TensorFlow quickstart`_. + +It can be run directly and will have the exact same result as original version. + +Furthermore, it enables the ability of auto-tuning with an NNI *experiment*, which will be discussed later. + +For now, we recommend to run this script directly to verify the environment. + +There are only 3 key differences from the original version: + + 1. In `Get optimized hyperparameters`_ part, it receives auto-generated hyperparameters. + 2. In `(Optional) Report intermediate results`_ part, it reports per-epoch accuracy for visualization. + 3. In `Report final result`_ part, it reports final accuracy for tuner to generate next hyperparameter set. + +.. _TensorFlow quickstart: https://www.tensorflow.org/tutorials/quickstart/beginner + +.. GENERATED FROM PYTHON SOURCE LINES 22-25 + +.. code-block:: default + + import nni + import tensorflow as tf + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 26-28 + +Hyperparameters to be tuned +--------------------------- + +.. GENERATED FROM PYTHON SOURCE LINES 28-35 + +.. code-block:: default + + params = { + 'dense_units': 128, + 'activation_type': 'relu', + 'dropout_rate': 0.2, + 'learning_rate': 0.001, + } + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 36-40 + +Get optimized hyperparameters +----------------------------- +If run directly, ``nni.get_next_parameters()`` is a no-op and returns an empty dict. +But with an NNI *experiment*, it will receive optimized hyperparameters from tuning algorithm. + +.. GENERATED FROM PYTHON SOURCE LINES 40-43 + +.. code-block:: default + + optimized_params = nni.get_next_parameter() + params.update(optimized_params) + + + + + +.. rst-class:: sphx-glr-script-out + + Out: + + .. code-block:: none + + /home/lz/code/nnisrc/nni/runtime/platform/standalone.py:32: RuntimeWarning: Running NNI code without runtime. Check the following tutorial if you are new to NNI: https://nni.readthedocs.io/en/stable/Tutorial/QuickStart.html#id1 + warnings.warn(warning_message, RuntimeWarning) + + + + +.. GENERATED FROM PYTHON SOURCE LINES 44-46 + +Load dataset +------------ + +.. GENERATED FROM PYTHON SOURCE LINES 46-51 + +.. code-block:: default + + mnist = tf.keras.datasets.mnist + + (x_train, y_train), (x_test, y_test) = mnist.load_data() + x_train, x_test = x_train / 255.0, x_test / 255.0 + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 52-54 + +Build model with hyperparameters +-------------------------------- + +.. GENERATED FROM PYTHON SOURCE LINES 54-67 + +.. code-block:: default + + model = tf.keras.models.Sequential([ + tf.keras.layers.Flatten(input_shape=(28, 28)), + tf.keras.layers.Dense(params['dense_units'], activation=params['activation_type']), + tf.keras.layers.Dropout(params['dropout_rate']), + tf.keras.layers.Dense(10) + ]) + + adam = tf.keras.optimizers.Adam(learning_rate=params['learning_rate']) + + loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) + + model.compile(optimizer=adam, loss=loss_fn, metrics=['accuracy']) + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 68-74 + +(Optional) Report intermediate results +-------------------------------------- +The callback reports per-epoch accuracy to show learning curve in NNI web portal. +And in :doc:`/hpo/assessors`, you will see how to leverage the metrics for early stopping. + +You can safely skip this and the experiment will work fine. + +.. GENERATED FROM PYTHON SOURCE LINES 74-78 + +.. code-block:: default + + callback = tf.keras.callbacks.LambdaCallback( + on_epoch_end = lambda epoch, logs: nni.report_intermediate_result(logs['accuracy']) + ) + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 79-81 + +Train and evluate the model +--------------------------- + +.. GENERATED FROM PYTHON SOURCE LINES 81-84 + +.. code-block:: default + + model.fit(x_train, y_train, epochs=5, verbose=2, callbacks=[callback]) + loss, accuracy = model.evaluate(x_test, y_test, verbose=2) + + + + + +.. rst-class:: sphx-glr-script-out + + Out: + + .. code-block:: none + + Epoch 1/5 + [2022-03-07 02:37:35] INFO (nni/MainThread) Intermediate result: 0.9145833253860474 (Index 0) + 1875/1875 - 12s - loss: 0.2940 - accuracy: 0.9146 - 12s/epoch - 6ms/step + Epoch 2/5 + [2022-03-07 02:37:41] INFO (nni/MainThread) Intermediate result: 0.9573833346366882 (Index 1) + 1875/1875 - 5s - loss: 0.1422 - accuracy: 0.9574 - 5s/epoch - 3ms/step + Epoch 3/5 + [2022-03-07 02:37:49] INFO (nni/MainThread) Intermediate result: 0.967283308506012 (Index 2) + 1875/1875 - 8s - loss: 0.1075 - accuracy: 0.9673 - 8s/epoch - 4ms/step + Epoch 4/5 + [2022-03-07 02:37:57] INFO (nni/MainThread) Intermediate result: 0.9723333120346069 (Index 3) + 1875/1875 - 8s - loss: 0.0885 - accuracy: 0.9723 - 8s/epoch - 4ms/step + Epoch 5/5 + [2022-03-07 02:38:06] INFO (nni/MainThread) Intermediate result: 0.9762333035469055 (Index 4) + 1875/1875 - 9s - loss: 0.0747 - accuracy: 0.9762 - 9s/epoch - 5ms/step + 313/313 - 1s - loss: 0.0766 - accuracy: 0.9772 - 647ms/epoch - 2ms/step + + + + +.. GENERATED FROM PYTHON SOURCE LINES 85-88 + +Report final result +------------------- +Report final accuracy to NNI so the tuning algorithm can predict best hyperparameters. + +.. GENERATED FROM PYTHON SOURCE LINES 88-89 + +.. code-block:: default + + nni.report_final_result(accuracy) + + + + +.. rst-class:: sphx-glr-script-out + + Out: + + .. code-block:: none + + [2022-03-07 02:38:06] INFO (nni/MainThread) Final result: 0.9771999716758728 + + + + + +.. rst-class:: sphx-glr-timing + + **Total running time of the script:** ( 0 minutes 44.370 seconds) + + +.. _sphx_glr_download_tutorials_hpo_quickstart_tensorflow_model.py: + + +.. only :: html + + .. container:: sphx-glr-footer + :class: sphx-glr-footer-example + + + + .. container:: sphx-glr-download sphx-glr-download-python + + :download:`Download Python source code: model.py ` + + + + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: model.ipynb ` + + +.. only:: html + + .. rst-class:: sphx-glr-signature + + `Gallery generated by Sphinx-Gallery `_ diff --git a/docs/source/tutorials/hpo_quickstart_tensorflow/model_codeobj.pickle b/docs/source/tutorials/hpo_quickstart_tensorflow/model_codeobj.pickle new file mode 100644 index 000000000..d811be641 Binary files /dev/null and b/docs/source/tutorials/hpo_quickstart_tensorflow/model_codeobj.pickle differ diff --git a/docs/source/tutorials/hpo_quickstart_tensorflow/sg_execution_times.rst b/docs/source/tutorials/hpo_quickstart_tensorflow/sg_execution_times.rst new file mode 100644 index 000000000..16a30695c --- /dev/null +++ b/docs/source/tutorials/hpo_quickstart_tensorflow/sg_execution_times.rst @@ -0,0 +1,14 @@ + +:orphan: + +.. _sphx_glr_tutorials_hpo_quickstart_tensorflow_sg_execution_times: + +Computation times +================= +**12:45.612** total execution time for **tutorials_hpo_quickstart_tensorflow** files: + ++-----------------------------------------------------------------------------+-----------+--------+ +| :ref:`sphx_glr_tutorials_hpo_quickstart_tensorflow_main.py` (``main.py``) | 12:45.612 | 0.0 MB | ++-----------------------------------------------------------------------------+-----------+--------+ +| :ref:`sphx_glr_tutorials_hpo_quickstart_tensorflow_model.py` (``model.py``) | 00:00.000 | 0.0 MB | ++-----------------------------------------------------------------------------+-----------+--------+ diff --git a/docs/source/tutorials/index.rst b/docs/source/tutorials/index.rst index 01109a003..8f6f2ac76 100644 --- a/docs/source/tutorials/index.rst +++ b/docs/source/tutorials/index.rst @@ -161,6 +161,58 @@ Tutorials +.. _sphx_glr_tutorials_hpo_quickstart_tensorflow: + + + + +.. raw:: html + +
+ +.. only:: html + + .. figure:: /tutorials/hpo_quickstart_tensorflow/images/thumb/sphx_glr_main_thumb.png + :alt: NNI HPO Quickstart with TensorFlow + + :ref:`sphx_glr_tutorials_hpo_quickstart_tensorflow_main.py` + +.. raw:: html + +
+ + +.. toctree:: + :hidden: + + /tutorials/hpo_quickstart_tensorflow/main + +.. raw:: html + +
+ +.. only:: html + + .. figure:: /tutorials/hpo_quickstart_tensorflow/images/thumb/sphx_glr_model_thumb.png + :alt: Port TensorFlow Quickstart to NNI + + :ref:`sphx_glr_tutorials_hpo_quickstart_tensorflow_model.py` + +.. raw:: html + +
+ + +.. toctree:: + :hidden: + + /tutorials/hpo_quickstart_tensorflow/model +.. raw:: html + +
+ + + .. only:: html .. rst-class:: sphx-glr-signature diff --git a/examples/tutorials/hpo_quickstart_tensorflow/README.rst b/examples/tutorials/hpo_quickstart_tensorflow/README.rst new file mode 100644 index 000000000..e69de29bb diff --git a/examples/tutorials/hpo_quickstart_tensorflow/main.py b/examples/tutorials/hpo_quickstart_tensorflow/main.py new file mode 100644 index 000000000..01ebd2c0b --- /dev/null +++ b/examples/tutorials/hpo_quickstart_tensorflow/main.py @@ -0,0 +1,116 @@ +""" +NNI HPO Quickstart with TensorFlow +================================== +This tutorial optimizes the model in `official TensorFlow quickstart`_ with auto-tuning. + +The tutorial consists of 4 steps: + + 1. Modify the model for auto-tuning. + 2. Define hyperparameters' search space. + 3. Configure the experiment. + 4. Run the experiment. + +.. _official TensorFlow quickstart: https://www.tensorflow.org/tutorials/quickstart/beginner +""" + +# %% +# Step 1: Prepare the model +# ------------------------- +# In first step, you need to prepare the model to be tuned. +# +# The model should be put in a separate script. +# It will be evaluated many times concurrently, +# and possibly will be trained on distributed platforms. +# +# In this tutorial, the model is defined in :doc:`model.py `. +# +# Please understand the model code before continue to next step. + +# %% +# Step 2: Define search space +# --------------------------- +# In model code, we have prepared 4 hyperparameters to be tuned: +# *dense_units*, *activation_type*, *dropout_rate*, and *learning_rate*. +# +# Here we need to define their *search space* so the tuning algorithm can sample them in desired range. +# +# Assuming we have following prior knowledge for these hyperparameters: +# +# 1. *dense_units* should be one of 64, 128, 256. +# 2. *activation_type* should be one of 'relu', 'tanh', 'swish', or None. +# 3. *dropout_rate* should be a float between 0.5 and 0.9. +# 4. *learning_rate* should be a float between 0.0001 and 0.1, and it follows exponential distribution. +# +# In NNI, the space of *dense_units* and *activation_type* is called ``choice``; +# the space of *dropout_rate* is called ``uniform``; +# and the space of *learning_rate* is called ``loguniform``. +# You may have noticed, these names are derived from ``numpy.random``. +# +# For full specification of search space, check :doc:`the reference `. +# +# Now we can define the search space as follow: + +search_space = { + 'dense_units': {'_type': 'choice', '_value': [64, 128, 256]}, + 'activation_type': {'_type': 'choice', '_value': ['relu', 'tanh', 'swish', None]}, + 'dropout_rate': {'_type': 'uniform', '_value': [0.5, 0.9]}, + 'learning_rate': {'_type': 'loguniform', '_value': [0.0001, 0.1]}, +} + +# %% +# Step 3: Configure the experiment +# -------------------------------- +# NNI uses an *experiment* to manage the HPO process. +# The *experiment config* defines how to train the models and how to explore the search space. +# +# In this tutorial we use a *local* mode experiment, +# which means models will be trained on local machine, without using any special training platform. +from nni.experiment import Experiment +experiment = Experiment('local') + +# %% +# Now we start to configure the experiment. +# +# Firstly, specify the model code. +# In NNI evaluation of each hyperparameter set is called a *trial*. +# So the model script is called *trial code*. +# +# If you are using Linux system without Conda, you many need to change ``python`` to ``python3``. +# +# When ``trial_code_directory`` is a relative path, it relates to current working directory. +# To run ``main.py`` from a different path, you can set trial code directory to ``Path(__file__).parent``. +experiment.config.trial_command = 'python model.py' +experiment.config.trial_code_directory = '.' + +# %% +# Then specify the search space we defined above: +experiment.config.search_space = search_space + +# %% +# Choose a tuning algorithm. +# Here we use :doc:`TPE tuner `. +experiment.config.tuner.name = 'TPE' +experiment.config.tuner.class_args['optimize_mode'] = 'maximize' + +# %% +# Specify how many trials to run. +# Here we evaluate 10 sets of hyperparameters in total, and concurrently evaluate 4 sets at a time. +# +# Please note that ``max_trial_number`` here is merely for a quick example. +# With default config TPE tuner requires 20 trials to warm up. +# In real world max trial number is commonly set to 100+. +# +# You can also set ``max_experiment_duration = '1h'`` to limit running time. +# +# And alternatively, you can skip this part and set no limit at all. +# The experiment will run forever until you press Ctrl-C. +experiment.config.max_trial_number = 10 +experiment.config.trial_concurrency = 4 + +# %% +# Step 4: Run the experiment +# -------------------------- +# Now the experiment is ready. Choose a port and launch it. +# +# You can use the web portal to view experiment status: http://localhost:8080. +experiment.run(8080) diff --git a/examples/tutorials/hpo_quickstart_tensorflow/model.py b/examples/tutorials/hpo_quickstart_tensorflow/model.py new file mode 100644 index 000000000..a8215f72c --- /dev/null +++ b/examples/tutorials/hpo_quickstart_tensorflow/model.py @@ -0,0 +1,88 @@ +""" +Port TensorFlow Quickstart to NNI +================================= +This is a modified version of `TensorFlow quickstart`_. + +It can be run directly and will have the exact same result as original version. + +Furthermore, it enables the ability of auto-tuning with an NNI *experiment*, which will be discussed later. + +For now, we recommend to run this script directly to verify the environment. + +There are only 3 key differences from the original version: + + 1. In `Get optimized hyperparameters`_ part, it receives auto-generated hyperparameters. + 2. In `(Optional) Report intermediate results`_ part, it reports per-epoch accuracy for visualization. + 3. In `Report final result`_ part, it reports final accuracy for tuner to generate next hyperparameter set. + +.. _TensorFlow quickstart: https://www.tensorflow.org/tutorials/quickstart/beginner +""" + +# %% +import nni +import tensorflow as tf + +# %% +# Hyperparameters to be tuned +# --------------------------- +params = { + 'dense_units': 128, + 'activation_type': 'relu', + 'dropout_rate': 0.2, + 'learning_rate': 0.001, +} + +# %% +# Get optimized hyperparameters +# ----------------------------- +# If run directly, ``nni.get_next_parameters()`` is a no-op and returns an empty dict. +# But with an NNI *experiment*, it will receive optimized hyperparameters from tuning algorithm. +optimized_params = nni.get_next_parameter() +params.update(optimized_params) + +# %% +# Load dataset +# ------------ +mnist = tf.keras.datasets.mnist + +(x_train, y_train), (x_test, y_test) = mnist.load_data() +x_train, x_test = x_train / 255.0, x_test / 255.0 + +# %% +# Build model with hyperparameters +# -------------------------------- +model = tf.keras.models.Sequential([ + tf.keras.layers.Flatten(input_shape=(28, 28)), + tf.keras.layers.Dense(params['dense_units'], activation=params['activation_type']), + tf.keras.layers.Dropout(params['dropout_rate']), + tf.keras.layers.Dense(10) +]) + +adam = tf.keras.optimizers.Adam(learning_rate=params['learning_rate']) + +loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) + +model.compile(optimizer=adam, loss=loss_fn, metrics=['accuracy']) + +# %% +# (Optional) Report intermediate results +# -------------------------------------- +# The callback reports per-epoch accuracy to show learning curve in NNI web portal. +# And in :doc:`/hpo/assessors`, you will see how to leverage the metrics for early stopping. +# +# You can safely skip this and the experiment will work fine. +callback = tf.keras.callbacks.LambdaCallback( + on_epoch_end = lambda epoch, logs: nni.report_intermediate_result(logs['accuracy']) +) + +# %% +# Train and evluate the model +# --------------------------- +model.fit(x_train, y_train, epochs=5, verbose=2, callbacks=[callback]) +loss, accuracy = model.evaluate(x_test, y_test, verbose=2) + +# %% +# Report final result +# ------------------- +# Report final accuracy to NNI so the tuning algorithm can predict best hyperparameters. +nni.report_final_result(accuracy) diff --git a/nni/algorithms/hpo/random_tuner.py b/nni/algorithms/hpo/random_tuner.py index 741311f28..85791c744 100644 --- a/nni/algorithms/hpo/random_tuner.py +++ b/nni/algorithms/hpo/random_tuner.py @@ -7,6 +7,8 @@ Naive random tuner for hyper-parameter optimization. You can specify an integer seed to determine random result. """ +from __future__ import annotations + __all__ = ['RandomTuner', 'suggest', 'suggest_parameter'] import logging @@ -21,7 +23,26 @@ from nni.tuner import Tuner _logger = logging.getLogger('nni.tuner.random') class RandomTuner(Tuner): - def __init__(self, seed=None): + """ + A naive tuner that generates fully random hyperparameters. + + Examples + -------- + + .. code-block:: + + config.tuner.name = 'Random' + config.tuner.class_args = { + 'seed': 100 + } + + Parameters + ---------- + seed + The random seed. + """ + + def __init__(self, seed: int | None = None): self.space = None if seed is None: # explicitly generate a seed to make the experiment reproducible seed = np.random.default_rng().integers(2 ** 31) diff --git a/nni/algorithms/hpo/tpe_tuner.py b/nni/algorithms/hpo/tpe_tuner.py index c233e97b2..8e57bc238 100644 --- a/nni/algorithms/hpo/tpe_tuner.py +++ b/nni/algorithms/hpo/tpe_tuner.py @@ -10,18 +10,21 @@ Official code: https://github.com/hyperopt/hyperopt/blob/master/hyperopt/tpe.py This is a slightly modified re-implementation of the algorithm. """ +from __future__ import annotations + __all__ = ['TpeTuner', 'TpeArguments', 'suggest', 'suggest_parameter'] from collections import defaultdict import logging import math -from typing import NamedTuple, Optional, Union +from typing import Any, NamedTuple import numpy as np from scipy.special import erf # pylint: disable=no-name-in-module -from nni.tuner import Tuner from nni.common.hpo_utils import OptimizeMode, format_search_space, deformat_parameters, format_parameters +from nni.tuner import Tuner +from nni.typehint import Literal from nni.utils import extract_scalar_reward from . import random_tuner @@ -32,11 +35,11 @@ _logger = logging.getLogger('nni.tuner.tpe') class TpeArguments(NamedTuple): """ These are the hyper-parameters of TPE algorithm itself. - To avoid confusing with trials' hyper-parameters, they are called "arguments" in this code. + To avoid confusing with trials' hyper-parameters, they are called "arguments" in TPE source code. Parameters - ========== - constant_liar_type: 'best' | 'worst' | 'mean' | None (default: 'best') + ---------- + constant_liar_type TPE algorithm itself does not support parallel tuning. This parameter specifies how to optimize for trial_concurrency > 1. @@ -44,20 +47,21 @@ class TpeArguments(NamedTuple): How each liar works is explained in paper's section 6.1. In general "best" suit for small trial number and "worst" suit for large trial number. + (:doc:`experiment result `) - n_startup_jobs: int (default: 20) + n_startup_jobs The first N hyper-parameters are generated fully randomly for warming up. If the search space is large, you can increase this value. Or if max_trial_number is small, you may want to decrease it. - n_ei_candidates: int (default: 24) + n_ei_candidates For each iteration TPE samples EI for N sets of parameters and choose the best one. (loosely speaking) - linear_forgetting: int (default: 25) + linear_forgetting TPE will lower the weights of old trials. This controls how many iterations it takes for a trial to start decay. - prior_weight: float (default: 1.0) + prior_weight TPE treats user provided search space as prior. When generating new trials, it also incorporates the prior in trial history by transforming the search space to one trial configuration (i.e., each parameter of this configuration chooses the mean of its candidate range). @@ -66,11 +70,11 @@ class TpeArguments(NamedTuple): With prior weight 1.0, the search space is treated as one good trial. For example, "normal(0, 1)" effectly equals to a trial with x = 0 which has yielded good result. - gamma: float (default: 0.25) + gamma Controls how many trials are considered "good". The number is calculated as "min(gamma * sqrt(N), linear_forgetting)". """ - constant_liar_type: Optional[str] = 'best' + constant_liar_type: Literal['best', 'worst', 'mean'] | None = 'best' n_startup_jobs: int = 20 n_ei_candidates: int = 24 linear_forgetting: int = 25 @@ -79,18 +83,61 @@ class TpeArguments(NamedTuple): class TpeTuner(Tuner): """ + Tree-structured Parzen Estimator (TPE) is an SMBO tuner. + + TPE models P(x|y) and P(y) where x represents hyperparameters and y the associated evaluation metric. + P(x|y) is modeled by transforming the generative process of hyperparameters, + replacing the distributions of the configuration prior with non-parametric densities. + + TPE is described in detail in *Algorithms for Hyper-Parameter Optimization*. (`paper`_) + + .. _paper: https://proceedings.neurips.cc/paper/2011/file/86e8f7ab32cfd12577bc2619bc635690-Paper.pdf + + Examples + -------- + + .. code-block:: + + ## minimal config ## + + config.tuner.name = 'TPE' + config.tuner.class_args = { + 'optimize_mode': 'minimize' + } + + .. code-block:: + + ## advanced config ## + + config.tuner.name = 'TPE' + config.tuner.class_args = { + 'optimize_mode': maximize, + 'seed': 12345, + 'tpe_args': { + 'constant_liar_type': 'mean', + 'n_startup_jobs': 10, + 'n_ei_candidates': 20, + 'linear_forgetting': 100, + 'prior_weight': 0, + 'gamma': 0.5 + } + } + Parameters - ========== - optimze_mode: 'minimize' | 'maximize' (default: 'minimize') + ---------- + optimze_mode Whether optimize to minimize or maximize trial result. - seed: int | None + seed The random seed. - tpe_args: dict[string, Any] | None + tpe_args Advanced users can use this to customize TPE tuner. See `TpeArguments` for details. """ - def __init__(self, optimize_mode='minimize', seed=None, tpe_args=None): + def __init__(self, + optimize_mode: Literal['minimize', 'maximize'] = 'minimize', + seed: int | None = None, + tpe_args: dict[str, Any] | None = None): self.optimize_mode = OptimizeMode(optimize_mode) self.args = TpeArguments(**(tpe_args or {})) self.space = None @@ -183,7 +230,7 @@ def suggest_parameter(args, rng, spec, parameter_history): ## Utilities part ## class Record(NamedTuple): - param: Union[int, float] + param: int | float loss: float class BestLiar: # assume running parameters have best result, it accelerates "converging" @@ -305,7 +352,7 @@ def adaptive_parzen_normal(args, history_mus, prior_mu, prior_sigma): this function is used for everything other than "choice" and "randint". Parameters - ========== + ---------- args: TpeArguments Algorithm arguments. history_mus: 1-d array of float @@ -317,7 +364,7 @@ def adaptive_parzen_normal(args, history_mus, prior_mu, prior_sigma): σ value of normal search space. Returns - ======= + ------- Tuple of three 1-d float arrays: (weight, µ, σ). The tuple represents N+1 "vicinity of observations" and each one's weight, diff --git a/nni/retiarii/nn/pytorch/hypermodule.py b/nni/retiarii/nn/pytorch/hypermodule.py index 771d84162..7b67f23a4 100644 --- a/nni/retiarii/nn/pytorch/hypermodule.py +++ b/nni/retiarii/nn/pytorch/hypermodule.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +from packaging.version import Version import torch import torch.nn as nn @@ -8,7 +9,6 @@ from nni.retiarii.serializer import basic_unit from .api import LayerChoice from .utils import generate_new_label -from ...utils import version_larger_equal __all__ = ['AutoActivation'] @@ -99,7 +99,7 @@ class UnaryTanh(nn.Module): def forward(self, x): return torch.tanh(x) -if not version_larger_equal(torch.__version__, TorchVersion): +if not Version(torch.__version__) >= Version(TorchVersion): @basic_unit class UnaryAsinh(nn.Module): def forward(self, x): @@ -110,7 +110,7 @@ class UnaryAtan(nn.Module): def forward(self, x): return torch.atan(x) -if not version_larger_equal(torch.__version__, TorchVersion): +if not Version(torch.__version__) >= Version(TorchVersion): @basic_unit class UnarySinc(nn.Module): def forward(self, x): @@ -151,7 +151,7 @@ unary_modules = ['UnaryIdentity', 'UnaryNegative', 'UnaryAbs', 'UnarySquare', 'U 'UnarySinh', 'UnaryCosh', 'UnaryTanh', 'UnaryAtan', 'UnaryMax', 'UnaryMin', 'UnarySigmoid', 'UnaryLogExp', 'UnaryExpSquare', 'UnaryErf'] -if not version_larger_equal(torch.__version__, TorchVersion): +if not Version(torch.__version__) >= Version(TorchVersion): unary_modules.append('UnaryAsinh') unary_modules.append('UnarySinc') diff --git a/nni/retiarii/nn/pytorch/nn.py b/nni/retiarii/nn/pytorch/nn.py index 6b79af636..92663e65c 100644 --- a/nni/retiarii/nn/pytorch/nn.py +++ b/nni/retiarii/nn/pytorch/nn.py @@ -1,11 +1,11 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +from packaging.version import Version import torch import torch.nn as nn from ...serializer import basic_unit -from ...utils import version_larger_equal # NOTE: support pytorch version >= 1.5.0 @@ -31,10 +31,10 @@ __all__ = [ 'Flatten', 'Hardsigmoid' ] -if version_larger_equal(torch.__version__, '1.6.0'): +if Version(torch.__version__) >= Version('1.6.0'): __all__.append('Hardswish') -if version_larger_equal(torch.__version__, '1.7.0'): +if Version(torch.__version__) >= Version('1.7.0'): __all__.extend(['Unflatten', 'SiLU', 'TripletMarginWithDistanceLoss']) @@ -149,10 +149,10 @@ Transformer = basic_unit(nn.Transformer) Flatten = basic_unit(nn.Flatten) Hardsigmoid = basic_unit(nn.Hardsigmoid) -if version_larger_equal(torch.__version__, '1.6.0'): +if Version(torch.__version__) >= Version('1.6.0'): Hardswish = basic_unit(nn.Hardswish) -if version_larger_equal(torch.__version__, '1.7.0'): +if Version(torch.__version__) >= Version('1.7.0'): SiLU = basic_unit(nn.SiLU) Unflatten = basic_unit(nn.Unflatten) TripletMarginWithDistanceLoss = basic_unit(nn.TripletMarginWithDistanceLoss) diff --git a/nni/retiarii/utils.py b/nni/retiarii/utils.py index 5003b5fdf..2220f7e68 100644 --- a/nni/retiarii/utils.py +++ b/nni/retiarii/utils.py @@ -18,13 +18,6 @@ def import_(target: str, allow_none: bool = False) -> Any: return getattr(module, identifier) -def version_larger_equal(a: str, b: str) -> bool: - # TODO: refactor later - a = a.split('+')[0] - b = b.split('+')[0] - return tuple(map(int, a.split('.'))) >= tuple(map(int, b.split('.'))) - - _last_uid = defaultdict(int) _DEFAULT_MODEL_NAMESPACE = 'model' diff --git a/nni/typehint.py b/nni/typehint.py new file mode 100644 index 000000000..504004047 --- /dev/null +++ b/nni/typehint.py @@ -0,0 +1,10 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import sys +import typing + +if typing.TYPE_CHECKING or sys.version_info >= (3, 8): + Literal = typing.Literal +else: + Literal = typing.Any