Squashed commit of the following:
commit 6ab312be0c8477f42f6cb7abe55f0c4e415d1058 Author: KeDengMS <kedeng@microsoft.com> Date: Thu Nov 17 13:35:16 2016 -0800 Update 1BitSGD after merging eldak/emptyMinibatches commit e7bd91e7ed69eb16355691e75af05872d64a679f Merge: 44a1dcd 4813bde Author: KeDengMS <kedeng@microsoft.com> Date: Thu Nov 17 13:31:57 2016 -0800 Merge branch 'eldak/emptyMinibatches' into kedeng/warmStart commit 4813bdec0e620b575e6daeffeab57ce3f6188370 Author: Eldar Akchurin <eldak@microsoft.com> Date: Thu Nov 17 21:17:05 2016 +0100 Fixing block momentum commit 44a1dcdf4563f1ae07039fb03927231564fdee76 Author: KeDengMS <kedeng@microsoft.com> Date: Thu Nov 17 11:14:19 2016 -0800 Eldar's fixes for empty minibatch commit 0aea2972b6561d925033781f476aee14a2dfdcd6 Author: Eldar Akchurin <eldak@microsoft.com> Date: Thu Nov 17 17:26:48 2016 +0100 Fixing empty minibatches commit 8aeb600b5e32304ed81b20833461a8ce7308bff6 Author: KeDengMS <kedeng@microsoft.com> Date: Wed Nov 16 21:50:10 2016 -0800 Squashed commit of the following: commit3afde1a503
Merge:9722676
aa64648
Author: Project Philly <svcphil@microsoft.com> Date: Wed Nov 16 17:28:31 2016 -0800 Integrate ruizhao/max__utt_num_HTKReader into master commit9722676a44
Merge:ffa0b58
9a57aeb
Author: Project Philly <svcphil@microsoft.com> Date: Wed Nov 16 13:24:08 2016 -0800 Integrate mahilleb/1116Changes into master commitffa0b58de9
Author: Sasa Galic <sagalic@microsoft.com> Date: Mon Nov 14 15:19:18 2016 +0100 FCN training fixes Two fixes needed for FCN training are introduced in this change: * BinaryElementWiseNodes support transform interface * Crop node crops just first two dimensions of input commit9a57aebfca
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Wed Nov 16 11:19:46 2016 +0100 Tools/make_binary_drop_linux: adapt wrt new Linux images commitcc23356af2
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Wed Nov 16 10:32:24 2016 +0100 Remove legcay CNTK Python support, contrib\Python plus associated test commit3b1448c449
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Mon Nov 14 11:11:35 2016 +0100 bindings/python/setup.py: link with specific .lib, not everything commit8e79232c43
Author: Philipp Kranen <pkranen@microsoft.com> Date: Wed Nov 16 09:41:58 2016 +0100 fixed dimension inference for times node commit6210f62307
Author: Philipp Kranen <pkranen@microsoft.com> Date: Tue Nov 15 17:32:43 2016 +0100 towards Fast R-CNN eval only commitcbc27a94ca
Author: Philipp Kranen <pkranen@microsoft.com> Date: Tue Nov 15 14:45:00 2016 +0100 fixed 202 and 203 tutorials commit5d4d855407
Merge:8095cea
9f39b27
Author: Qiwei Ye <chivee.ye@gmail.com> Date: Wed Nov 16 16:14:59 2016 +0800 Merge pull request #1043 from Microsoft/qiwye/asgd-dev Add windows baseline for ASGD e2e test commit9f39b27d39
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Nov 16 15:54:37 2016 +0800 Add windows baseline for ASGD e2e test commitaa64648458
Author: RuiZhao <ruzhao@microsoft.com> Date: Mon Oct 3 14:39:04 2016 -0700 support max utt length in HTKReader commit8095ceaa28
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Tue Nov 15 15:43:38 2016 -0800 Feedbacks and fix multi-verso build. commit2fc89e67cc
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Tue Nov 15 14:32:52 2016 -0800 Explain the range in droput commit4b1a05cd52
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Tue Nov 15 14:21:40 2016 -0800 Add documentation for GlobalPooling and Dropout. commitf775d96a2e
Merge:6157426
cb4b03b
Author: Project Philly <svcphil@microsoft.com> Date: Tue Nov 15 10:19:00 2016 -0800 Integrate pkranen/scaleModePy into master commitcb4b03b647
Author: Philipp Kranen <pkranen@microsoft.com> Date: Tue Nov 15 18:04:47 2016 +0100 addressed CR comments commitb2e18c05a3
Author: Philipp Kranen <pkranen@microsoft.com> Date: Tue Nov 15 16:32:17 2016 +0100 bug fix in B2 script commit6157426a61
Merge:9f0d79b
1b794ec
Author: Wolfgang Manousek <wolfma@microsoft.com> Date: Tue Nov 15 16:00:28 2016 +0100 Merge branch 'wolfma/inst-move' of https://github.com/Microsoft/CNTK into wolfma/inst-move commit9f0d79bdd3
Author: Wolfgang Manousek <wolfma@microsoft.com> Date: Tue Nov 15 09:15:26 2016 +0100 move install files into new install subdirectory beneath the script directory, remove obsolete files, update readme in script directory commit12dc18830c
Author: Philipp Kranen <pkranen@microsoft.com> Date: Tue Nov 15 15:53:21 2016 +0100 fixed model removal in A2 script commitec25403ac0
Author: Philipp Kranen <pkranen@microsoft.com> Date: Tue Nov 15 15:45:02 2016 +0100 Support scaleMode option for image deserializer in python API commit1898516b08
Merge:2340b27
e96c1b0
Author: Project Philly <svcphil@microsoft.com> Date: Tue Nov 15 04:13:17 2016 -0800 Integrate eldak/htkLongSequences into master commit2340b27379
Merge:20da1c7
99389b3
Author: Philipp Kranen <pkranen@microsoft.com> Date: Tue Nov 15 12:20:13 2016 +0100 Merge pull request #1039 from tuzzer/patch-2 Fixed formatting errors in CNTK_203_Reinforcement_Learning_Basics.ipynb commit20da1c7157
Merge:a3e2648
ceeedfe
Author: Philipp Kranen <pkranen@microsoft.com> Date: Tue Nov 15 11:53:35 2016 +0100 Merge pull request #1038 from tuzzer/patch-1 Fixed broken link in the tutorial docs commite96c1b0165
Author: Eldar Akchurin <eldak@microsoft.com> Date: Mon Nov 14 12:34:10 2016 +0100 Fixing frame calculation commit1b794ec63b
Author: Wolfgang Manousek <wolfma@microsoft.com> Date: Tue Nov 15 09:15:26 2016 +0100 move install files into new install subdirectory beneath the script directory, remove obsolete files, update readme in script directory commita3e264884d
Author: Wolfgang Manousek <wolfma@microsoft.com> Date: Mon Nov 14 17:21:37 2016 +0100 if the target environment doesn't exist, it will be created with conda env create, for an existing environment it will now be updated through conda env update adddressed CR comments, merged master commit99389b3032
Author: Matthew Chan <matthew.tk.chan@gmail.com> Date: Mon Nov 14 22:23:40 2016 -0800 Update CNTK_203_Reinforcement_Learning_Basics.ipynb Fixed the "unknown character" problems. Fixed the problem which "Bellmann" was not part of the hyperlink. commitceeedfeee4
Author: Matthew Chan <matthew.tk.chan@gmail.com> Date: Mon Nov 14 20:59:27 2016 -0800 Update tutorials.rst Link to the reinforcement learning tutorial was broken. Linking it to the v2.0.beta3.0 version like the others as well. commitc7c9ee6368
Author: Chris Basoglu <cbasoglu@microsoft.com> Date: Mon Nov 14 17:42:03 2016 -0800 Update README.md commit6be43ec454
Author: Junjie Qian <juqia@microsoft.com> Date: Mon Nov 7 11:40:00 2016 -0800 Buffer data for file writing to reduce requests commit7f5c78d4f9
Author: Philipp Kranen <pkranen@microsoft.com> Date: Mon Nov 14 21:19:20 2016 +0100 Update README.md commit 4cd62698f5707d30c7f2b658ff589ae052fe141b Author: KeDengMS <kedeng@microsoft.com> Date: Wed Nov 16 21:41:13 2016 -0800 Update 1-bit SGD commit 3e284cc0983a47a752b1ebe4fa911959b10f2ed5 Author: KeDengMS <kedeng@microsoft.com> Date: Wed Nov 16 18:02:07 2016 -0800 Address CR comments commit b7428a741093a6fe3823f64e50cab9c83e4c84e5 Author: KeDengMS <kedeng@microsoft.com> Date: Wed Nov 16 16:35:34 2016 -0800 Address CR comments Add block momentum python api/example/tests commit 36979bbaab41cabd97e71270bb532be983a19d69 Merge: 84602e1 c3a7d09 Author: Ke Deng <kedeng@microsoft.com> Date: Wed Nov 16 14:01:51 2016 -0800 Merge branch 'kedeng/warmStart' of https://github.com/microsoft/cntk into kedeng/warmStart commit 84602e1d8fef48432f8c83c6ad221e87b6b9277e Author: Ke Deng <kedeng@microsoft.com> Date: Wed Nov 16 13:47:57 2016 -0800 Update distributed examples commit c3a7d0935219ace0999f7d71ee9d7e8bad1e4229 Author: Eldar Akchurin <eldak@microsoft.com> Date: Wed Nov 16 10:57:23 2016 +0100 Fixing bit overflow in the minibatch source commit c546cac30833486609975c9b1bdc95fd8168eb25 Author: KeDengMS <kedeng@microsoft.com> Date: Tue Nov 15 17:01:31 2016 -0800 Try Jenkins commit eb78b308970ac6a5de42405509c041685c7afd1a Author: KeDengMS <kedeng@microsoft.com> Date: Tue Nov 15 11:30:08 2016 -0800 Fix MinibatchSourceTest for random distributed case, as sample count may not evenly distributed to workers commit f637230ebbfc34aeb095fbea38cf5600aeb97993 Merge: 0ebd2f0 9bc7cf7 Author: KeDengMS <kedeng@microsoft.com> Date: Tue Nov 15 10:09:33 2016 -0800 Merge branch 'kedeng/warmStart' of https://github.com/Microsoft/CNTK into kedeng/warmStart commit 0ebd2f09301cafc05c401062934e9a9dbd6fba3b Author: KeDengMS <kedeng@microsoft.com> Date: Tue Nov 15 10:08:58 2016 -0800 Fix MinibatchSource to avoid double SetConfig when warmStart == 0 commit 9bc7cf7dbc11fce169edbf62cb30f1c27e6f29b5 Author: Eldar Akchurin <eldak@microsoft.com> Date: Tue Nov 15 10:21:03 2016 +0100 Fixing empty minibatch commit d931c501c5c3eea5c83154aeab8742ffbbbf64f9 Author: KeDengMS <kedeng@microsoft.com> Date: Mon Nov 14 17:41:55 2016 -0800 ConvNet distributed example update commit f68a0d83da0955b504ddfbcd3db6d071840080d3 Author: KeDengMS <kedeng@microsoft.com> Date: Mon Nov 14 17:16:11 2016 -0800 Add checkpoint test for distributed training Update CifarResNet_Distributed example Add ConvNet_CIFAR10_DataAug_Distributed example commit 11a6c71efed3412585e3e1feba0a5ad3ac663b7b Author: KeDengMS <kedeng@microsoft.com> Date: Mon Nov 14 15:56:21 2016 -0800 Add test for randomized minibatch source commit 4bf787be0f433ff98d1ff2c0d0d62b6227f4eb6f Author: KeDengMS <kedeng@microsoft.com> Date: Mon Nov 14 13:49:50 2016 -0800 Squashed commit of the following: commit d61f1de3715aee8052a14a77bf39f150271778f2 Author: KeDengMS <kedeng@microsoft.com> Date: Mon Nov 14 13:44:34 2016 -0800 Squashed commit of the following: commitbabf078b49
Merge:37bc988
4ef4a91
Author: Project Philly <svcphil@microsoft.com> Date: Mon Nov 14 10:59:42 2016 -0800 Integrate alrezni/v2_scratch into master commit37bc98867f
Merge:90ff04f
3e0d8ba
Author: Project Philly <svcphil@microsoft.com> Date: Mon Nov 14 10:17:33 2016 -0800 Integrate t-ivrodr/kernel_tests_with_padding into master commit90ff04f254
Author: Philipp Kranen <pkranen@microsoft.com> Date: Mon Nov 14 14:52:39 2016 +0100 fixed tutorials and VS links commiteb2dfcefc5
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Mon Nov 14 12:56:43 2016 +0100 Fix a link commit1b1edd8b1d
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Mon Nov 14 08:20:21 2016 +0100 Start editing CNTK.sln and project files commit0d72140c75
Author: Philipp Kranen <pkranen@microsoft.com> Date: Mon Nov 14 12:13:09 2016 +0100 removed two py files commit3d1e93badf
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Fri Nov 11 18:12:43 2016 +0100 Addressed CR comments commitdbd416f5f4
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Fri Nov 11 15:42:11 2016 +0100 Update binary installers to not install Git and clone CNTK repo; Python examples included in Drop. commitfba28bf0ed
Author: Philipp Kranen <pkranen@microsoft.com> Date: Thu Nov 10 13:02:35 2016 +0100 Fixed python commons and eval clients commit9f3a510c26
Author: Alexey Orlov <alexeyo@microsoft.com> Date: Thu Nov 10 12:24:02 2016 +0100 Add root Tutorials folder to Binary drop. Linux commitadbe62f157
Author: Alexey Orlov <alexeyo@microsoft.com> Date: Thu Nov 10 12:08:45 2016 +0100 Add root Tutorials folder to Binary drop. Windows commit9231ece8ce
Author: Philipp Kranen <pkranen@microsoft.com> Date: Mon Nov 7 15:30:32 2016 +0100 Restructuring examples and tutorials commit110aac7f3c
Merge:ac3866a
513182c
Author: Project Philly <svcphil@microsoft.com> Date: Mon Nov 14 06:09:31 2016 -0800 Integrate alrezni/v2_loadmodel_drop_datatype into master commit3e0d8baaa6
Author: Ivan Rodriguez <t-ivrodr@microsoft.com> Date: Wed Nov 2 13:27:32 2016 +0100 Adding convolution and pooling tests with different strides and padding commitac3866a0e5
Merge:b89762f
0383a01
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Mon Nov 14 13:07:33 2016 +0000 Merge remote-tracking branch 'origin/master' into mahilleb/pr/1030 commit513182c2d7
Author: Alexey Reznichenko <alrezni@microsoft.com> Date: Fri Nov 11 15:44:10 2016 +0100 Drop dataType parameter form LoadModel commit4ef4a91130
Author: Alexey Reznichenko <alrezni@microsoft.com> Date: Mon Nov 14 11:31:46 2016 +0100 Update hyper-parameter settings as agreed. For LR, the unit type is now a required parameter. All "non-distributed" examples use per-minibatch learning rate. For momentum, the only way to specify per-sample schedule is to use time-constant. Per-minibatch values are scaled based on the actual MB sizes. commitb89762fe96
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Mon Nov 14 10:16:02 2016 +0000 Tools/docker/CNTK-CPUOnly-Image/: init Multiverso submodule as well commit0383a0134f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Nov 13 20:39:04 2016 +0800 Fix baseline commitad8f1fa393
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Nov 13 17:37:07 2016 +0800 update linux gpu baseline for ASGD e2e test commitb9a2f3fc95
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Nov 13 16:52:59 2016 +0800 quick fix commitb03cea93c3
Author: Takuma Yoshida <me@yoavlt.com> Date: Sun Nov 13 15:13:13 2016 +0900 Fix missing space and submodule commit325e480337
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Nov 13 13:31:48 2016 +0800 TODO: quick fix for master fixing commit6d139b9ebc
Merge:da8b338
e618b91
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Nov 13 13:00:38 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commitda8b338865
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Nov 13 13:00:00 2016 +0800 change lowercase to Uppercase for Multiverso project commite618b917fe
Merge:1f2558c
acc6d4a
Author: Project Philly <svcphil@microsoft.com> Date: Sat Nov 12 03:38:06 2016 -0800 Integrate qiwye/asgd-dev into master commit1f2558c650
Author: Vadim Mazalov <vadimma@microsoft.com> Date: Fri Nov 11 17:04:21 2016 -0800 Polish run-test and yml files for external cross-entropy speech tests commitfa399c12a6
Author: Vadim Mazalov <vadimma@microsoft.com> Date: Fri Nov 11 16:54:45 2016 -0800 Add windows baseline for speech external CE tests commit7ecbdf5183
Author: Vadim Mazalov <vadimma@microsoft.com> Date: Wed Nov 9 10:00:09 2016 -0800 Refactor the speech E2E tests commit668f2efe94
Author: Vadim Mazalov <vadimma@microsoft.com> Date: Fri Nov 4 12:11:32 2016 -0700 E2E external speech tests commitce18ad8974
Merge:6a51e64
acc6d4a
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Nov 12 13:24:19 2016 +0800 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev commit6a51e64680
Merge:af52a3c
82dc610
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Nov 12 13:22:39 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commitaf52a3cf57
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Nov 12 13:20:31 2016 +0800 adding warring for Windows build commitacc6d4a3c3
Author: Fei Gao <feiga@users.noreply.github.com> Date: Fri Nov 11 16:06:34 2016 +0800 Update testcases.yml commit4286c6e9bc
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Nov 11 12:59:14 2016 +0800 link multiverso library to EVAL_EXTENDED_CLIENT commit417513d8aa
Merge:82ebb6e
0014540
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Nov 11 12:56:40 2016 +0800 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev commit82ebb6e900
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Nov 11 12:55:23 2016 +0800 build with multiverso as default commit0014540d53
Author: Fei Gao <feiga@users.noreply.github.com> Date: Fri Nov 11 11:19:04 2016 +0800 Update testcases.yml commita62037dbc7
Merge:8fad703
cbcea1b
Author: unknown <qiwye@MSRAML-SRV08.fareast.corp.microsoft.com> Date: Fri Nov 11 11:01:05 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commit8fad703f27
Author: unknown <qiwye@MSRAML-SRV08.fareast.corp.microsoft.com> Date: Fri Nov 11 11:00:32 2016 +0800 Avoid code duplication by refactoring WaitAll into a separate function. commit249989b95f
Merge:10a6535
ac1a946
Author: unknown <qiwye@MSRAML-SRV08.fareast.corp.microsoft.com> Date: Thu Nov 10 16:50:46 2016 +0800 Merge branch 'master' into qiwye/asgd-dev # Conflicts: # CNTK.sln # Makefile commit10a6535ef9
Author: unknown <qiwye@MSRAML-SRV08.fareast.corp.microsoft.com> Date: Thu Nov 10 16:42:01 2016 +0800 add WaitAll() for ASGD situation commit6264cc1c31
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Nov 10 02:09:52 2016 +0800 adding logic for save checkpoint file commit7af92a3787
Merge:eea3716
9ff4832
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Nov 10 02:06:18 2016 +0800 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev Conflicts: Source/SGDLib/ASGDHelper.cpp commiteea37168e8
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Nov 9 22:27:12 2016 +0800 adding examples for Multiple Nodes commit9ff4832f91
Author: Fei Gao <feiga@users.noreply.github.com> Date: Wed Nov 9 19:34:16 2016 +0800 Update ASGDHelper.cpp commit392fb6512a
Author: Fei Gao <feiga@users.noreply.github.com> Date: Wed Nov 9 17:06:12 2016 +0800 Update testcases.yml commit6491888a47
Author: Qiwei Ye <chivee.ye@gmail.com> Date: Wed Nov 9 16:58:31 2016 +0800 fix windows build commitb6e9aa600e
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 8 21:42:52 2016 +0800 removing baseline from Linux for Windows baseline commitf1c5ac2c47
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 8 21:29:28 2016 +0800 Addressed CR comments commitc0f2fbf5da
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 8 20:54:50 2016 +0800 Addressed CR comments commit9b8904390f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 8 20:11:06 2016 +0800 Addressed CR comments commit345f259190
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 8 19:59:19 2016 +0800 Addressed CR comments commit53379bb248
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 8 19:50:04 2016 +0800 Addressed CR comments commit0b93d7b0de
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Nov 7 19:36:30 2016 +0800 revert unexpected change commitaf999a1144
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Nov 7 19:23:28 2016 +0800 revert makefile: libEval depends on SGDLib, which will depends on Multiverso commit551956c310
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Nov 6 20:47:43 2016 -0800 altering check-git-head for multiverso commite54dfffd88
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Nov 6 14:42:55 2016 +0800 add git attributes for Multiverso commit9ce8d5c63a
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Nov 6 14:30:16 2016 +0800 update submodule to pass the cntk build test commit52fb5ab283
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Nov 6 13:31:15 2016 +0800 fix testcase for e2e test commita6d02860af
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Nov 6 13:22:45 2016 +0800 CNTK.cpp: fix indentation commit71d68c8b5e
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Nov 6 13:11:20 2016 +0800 update makefile to latests commitf1da36427f
Merge:6d00cdc
dd496e5
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Nov 6 12:49:20 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commit6d00cdc6d3
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Nov 4 16:49:44 2016 +0800 Revert MultiversoHelper initialize commitff03e400d2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Nov 3 17:31:28 2016 +0800 fix linux build commit105305e589
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Nov 3 15:00:19 2016 +0800 fix linux build commitcd445874c1
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Nov 3 13:54:29 2016 +0800 Adding InvalidArgument check commit0af066ba13
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Nov 3 13:36:02 2016 +0800 changing E2E ASGD commit6e93dfbd9c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Nov 2 21:05:29 2016 +0800 update Multiverso to latest version commitf00d63fb59
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Nov 2 21:04:30 2016 +0800 code clean : cuda_call in ASGDHelper commit8fbe99a16f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Nov 2 17:21:51 2016 +0800 code clean commit00f9c48291
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Nov 2 16:07:46 2016 +0800 turn tracelevel to 1 for minibatch tracing commitc7697f34d9
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Nov 2 15:31:38 2016 +0800 update E2E test baseline for ASGD commitf9f30d6f64
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Nov 2 14:42:13 2016 +0800 updating e2e testcase commit1ee15d0d0a
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Nov 2 13:20:30 2016 +0800 making e2e test running on GPU only commit02ffd27c95
Merge:63764b3
7cc84c8
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Nov 2 11:16:38 2016 +0800 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev commit63764b3e5c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Nov 2 11:15:35 2016 +0800 Using CUDA_CALL instead of CudaErrorCheck commit7cc84c8731
Author: Fei Gao <gf0109@gmail.com> Date: Tue Nov 1 20:13:58 2016 -0700 Fix Makefile seperator issue commit70ca67c09b
Author: Fei Gao <gf0109@gmail.com> Date: Tue Nov 1 18:52:25 2016 -0700 Fix makefile tab separator issue commit10b40dfffa
Author: Fei Gao <gf0109@gmail.com> Date: Tue Nov 1 06:59:10 2016 -0700 Linux Build OK commit0061178020
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 1 21:06:40 2016 +0800 removing unnecessary dependency; adding _CRT_SECURE_NO_WARNINGS for util class commitfeb6a907e9
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 1 20:56:21 2016 +0800 fix build commit89197f5d80
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 1 20:54:01 2016 +0800 Revert "fix indentation: align with CNTK standard" This reverts commitac65e14ddf
. commit321b20c1de
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 1 20:53:50 2016 +0800 removing ASGDCommon.h commitac65e14ddf
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 1 20:46:35 2016 +0800 fix indentation: align with CNTK standard commit5f8e9c6385
Merge:f8f6638
10428b1
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 1 20:42:49 2016 +0800 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev commitf8f66384c3
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 1 20:42:00 2016 +0800 adding baseline for ASGD e2e test commit10428b12fb
Author: feiga <gf0109@gmail.com> Date: Tue Nov 1 20:41:41 2016 +0800 Change Tab from 2 spaces to 4 spaces commit060e9f678d
Author: feiga <gf0109@gmail.com> Date: Tue Nov 1 20:36:24 2016 +0800 Update Makefile for the new added ASGDHelper.cpp file commitce87588a26
Author: feiga <gf0109@gmail.com> Date: Tue Nov 1 20:34:04 2016 +0800 Remove ASGDCommon, add ASGDHelper Interface, and move Multiverso related implementation to ASGDHelper.cpp file commit65750afd50
Author: feiga <gf0109@gmail.com> Date: Tue Nov 1 17:29:04 2016 +0800 Fix compile error commitc991c4b6f0
Merge:42e5ea1
565c275
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 1 16:31:31 2016 +0800 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev commit42e5ea1328
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 1 16:29:18 2016 +0800 remove unnecessary dependency; adding debug version commit565c2751e4
Author: feiga <gf0109@gmail.com> Date: Tue Nov 1 15:54:08 2016 +0800 update submodule commit6b1e2cd50c
Merge:9776a03
b33e67a
Author: feiga <gf0109@gmail.com> Date: Tue Nov 1 15:52:55 2016 +0800 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev commit9776a03147
Author: feiga <gf0109@gmail.com> Date: Tue Nov 1 15:52:47 2016 +0800 Add Multiverso to the project commitb33e67ac75
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Nov 1 14:39:14 2016 +0800 fix license commit0dbbd066d2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Oct 31 20:40:38 2016 +0800 add comments for multiversowrapper commit5965e19f28
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Oct 31 13:46:10 2016 +0800 using shared_ptr instead of naked pointer; fix some typo commit1b92176e43
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Oct 31 01:05:58 2016 +0800 code review: clean code commit4be2822a65
Merge:4cb911a
2afe819
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Oct 27 16:31:48 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commit4cb911acba
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Oct 26 16:28:32 2016 +0800 using GPU for ASGD e2e test commit29f1ff31bd
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Oct 26 16:09:10 2016 +0800 updating 1bit commitd9274c31e2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Oct 26 15:50:43 2016 +0800 adding quick e2e test commit8f843dfb7f
Merge:0a9f9f2
496c672
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Oct 25 17:39:31 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commit0a9f9f2bc8
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Oct 25 17:35:01 2016 +0800 change asgd to ASGD in configure commit9587fa0261
Merge:a1b7a5c
1613426
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Oct 25 13:47:37 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commita1b7a5cec7
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Oct 25 11:50:58 2016 +0800 code review: removing unnecessary files/tabs commit19844ed9dd
Merge:4dd1625
2bc4332
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Oct 25 11:31:42 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Makefile Source/SGDLib/SGDLib.vcxproj.filters commit4dd1625797
Merge:6143e5c
3b987c2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Oct 18 21:29:23 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Source/SGDLib/SGD.cpp Source/SGDLib/SGDLib.vcxproj.filters commit6143e5c582
Merge:a5d7b7f
cc40f92
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Oct 18 16:06:55 2016 +0800 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK-exp-private into qiwye/asgd-dev commitcc40f924b8
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Oct 18 16:04:59 2016 +0800 modify e2e tests for asgd training commita5d7b7f369
Merge:eecce32
43c7fc8
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Oct 18 16:03:09 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: CNTK.sln commiteecce32487
Merge:291176a
3285e38
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Oct 14 16:52:29 2016 +0800 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev commit291176a7b6
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Oct 14 16:44:14 2016 +0800 adding end-to-end test for ASGD commit3285e38947
Author: feiga <gf0109@gmail.com> Date: Wed Oct 12 13:07:52 2016 +0800 update submodule commitd065765a45
Merge:9944ba4
c5bd4e9
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Oct 11 20:39:29 2016 +0800 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev commitc5bd4e9867
Author: feiga <gf0109@gmail.com> Date: Tue Oct 11 20:58:17 2016 +0900 Update submodule commit590a1851b9
Author: feiga <gf0109@gmail.com> Date: Tue Oct 11 19:51:47 2016 +0800 Add MultiversoTests project to sln file commit700db4c52f
Author: feiga <gf0109@gmail.com> Date: Tue Oct 11 19:12:35 2016 +0800 Update Multiverso submodule commit9944ba4ce6
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Oct 10 20:57:44 2016 +0800 fix build issue commit018a812f34
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Oct 10 20:50:54 2016 +0800 reverting simple evaluator for compatibility, initialing simple evaluator without m_mpi when using ASGD commit0e443fa823
Merge:926144e
2cc0338
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Oct 10 17:21:24 2016 +0800 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev commit926144ec80
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Oct 10 17:14:42 2016 +0800 making cross validation don't use distributed reader while using DataParallelASGD commit2cc03381b9
Author: feiga <gf0109@gmail.com> Date: Mon Oct 10 14:06:23 2016 +0800 SGDLib project configuration update commitebefc5ade5
Merge:5e37b37
614a51e
Author: feiga <gf0109@gmail.com> Date: Mon Oct 10 13:04:06 2016 +0800 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev commit5e37b37093
Author: feiga <gf0109@gmail.com> Date: Mon Oct 10 13:03:57 2016 +0800 Add Multiverso vs project file to CNTK solution commit614a51e7fb
Merge:2ed62be
5ab89ca
Author: feiga <gf0109@gmail.com> Date: Mon Oct 10 12:18:26 2016 +0900 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev commit2ed62be847
Author: feiga <gf0109@gmail.com> Date: Mon Oct 10 12:18:17 2016 +0900 Update Makefile commit5ab89ca283
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Oct 10 10:39:32 2016 +0800 Removing hard tabs commit2ab22c55d7
Merge:e745745
5c18f2f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Oct 9 16:02:34 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commite745745430
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Oct 9 16:01:09 2016 +0800 ASGD: using ArrayTable instead of MatrixTable for less comunications commitacde6e1ab2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Oct 9 13:16:40 2016 +0800 updating submodule for linux build commita3c8e9bca6
Merge:e87e5cd
1b9a33e
Author: feiga <gf0109@gmail.com> Date: Sat Oct 8 16:52:55 2016 +0800 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev # Conflicts: # Source/Common/Include/NoMultiversoWrapper.h commite87e5cd17b
Author: feiga <gf0109@gmail.com> Date: Sat Oct 8 16:50:25 2016 +0800 Update interface of NoMultiversoWrapper commit1b9a33ef26
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Oct 8 15:14:44 2016 +0800 fix 1bit build error commite421707b03
Author: feiga <gf0109@gmail.com> Date: Fri Sep 30 12:21:20 2016 +0900 Update Multiverso UnitTest baseline commit15625d01e0
Author: feiga <gf0109@gmail.com> Date: Fri Sep 30 00:58:21 2016 +0900 Update Makefile, build Multiverso unittest along commitc3fbc6a55d
Author: feiga <gf0109@gmail.com> Date: Thu Sep 29 17:31:49 2016 +0900 Remove tab commit4ec8f1c4e4
Author: feiga <gf0109@gmail.com> Date: Thu Sep 29 17:03:48 2016 +0900 Replace tab with 4 spaces commiteed2832700
Author: feiga <gf0109@gmail.com> Date: Thu Sep 29 16:48:57 2016 +0900 Update Makefile commit6e86798fd1
Merge:fa7e92e
6d3d641
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Sep 28 21:45:14 2016 -0700 Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK-exp-private into qiwye/asgd-dev commitfa7e92ee65
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Sep 28 21:44:58 2016 -0700 fix building bug of 1BitSGD commit6d3d64190b
Merge:b69477a
bde5187
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Sep 29 11:52:49 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commitb69477acec
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Sep 29 11:43:19 2016 +0800 updating submodule to the latest one commit3baa3850d2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Sep 28 20:38:53 2016 -0700 updating submodule : Multiverso, 1BitSGD. commit7910ff69b1
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Sep 29 11:35:23 2016 +0800 updating 1Bit submodule commit5059e931d1
Merge:3d3b523
0c7443f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Sep 29 11:05:11 2016 +0800 Merge branch 'qiwye/asgd-dev' into feiga/multiverso Conflicts: Source/SGDLib/SGD.cpp commit0c7443f65b
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Sep 29 10:56:29 2016 +0800 minor change for Multiverso commit3d3b523e5d
Author: feiga <gf0109@gmail.com> Date: Wed Sep 28 18:27:40 2016 +0900 Fix the relative path to absolution path in Makefile commit414bf2e02a
Merge:8a2e67a
732e9d6
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Sep 28 10:54:51 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commitd7463fe507
Author: feiga <gf0109@gmail.com> Date: Tue Sep 27 22:17:42 2016 +0900 Refine Makefile commit89e53a9226
Author: feiga <gf0109@gmail.com> Date: Tue Sep 27 21:58:30 2016 +0900 fix Makefile commitdfc2c6ccca
Author: Fei Gao <feiga@microsoft.com> Date: Tue Sep 27 20:07:32 2016 +0800 Update Makefile commit3ff5975e2c
Author: feiga <gf0109@gmail.com> Date: Tue Sep 27 19:58:38 2016 +0900 Update Makefile commit6b315511e4
Author: feiga <gf0109@gmail.com> Date: Tue Sep 27 19:46:26 2016 +0900 Update Makefile commit8a2e67aba8
Merge:d904abe
6dc8f37
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Sep 27 17:04:03 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Source/1BitSGD Source/SGDLib/SGD.cpp commitd904abef4a
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Sep 27 17:02:03 2016 +0800 minor fix commit50827a564d
Author: feiga <gf0109@gmail.com> Date: Tue Sep 27 18:00:02 2016 +0900 Remove multiverso header file dependence in SGD.h commit516682b936
Author: Fei Gao <feiga@microsoft.com> Date: Tue Sep 27 14:47:46 2016 +0800 Update Makefile commita46ae614b1
Author: feiga <gf0109@gmail.com> Date: Tue Sep 27 15:40:01 2016 +0900 Minor Makefile change commitb4de776657
Author: feiga <gf0109@gmail.com> Date: Tue Sep 27 12:37:31 2016 +0900 Fix Makefile Multiverso link issue commit2ae40949ae
Author: Fei Gao <gf0109@gmail.com> Date: Mon Sep 26 06:20:51 2016 -0700 Fix Makefile to solve the make failure in Jenkins commitc073fb0b17
Author: Fei Gao <gf0109@gmail.com> Date: Mon Sep 26 04:54:53 2016 -0700 Solve the multiversotests loader library path issue commit804d79286d
Author: Fei Gao <gf0109@gmail.com> Date: Tue Sep 20 21:45:14 2016 -0700 Enable ASGD make commitf5671d86c1
Author: Fei Gao <gf0109@gmail.com> Date: Mon Sep 19 03:07:15 2016 -0700 Build with Multiverso and ASGD commit9c3c876cb2
Author: Fei Gao <gf0109@gmail.com> Date: Mon Sep 19 00:05:59 2016 -0700 Replace tab with spaces commitb2530cfd04
Author: Fei Gao <gf0109@gmail.com> Date: Sun Sep 18 02:22:04 2016 -0700 Add Multiverso test script for TestDriver.py commit870e4a5fc5
Author: Fei Gao <gf0109@gmail.com> Date: Sun Sep 18 02:13:38 2016 -0700 init the multiverso testing branch commite983297f59
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Sep 18 14:10:50 2016 +0800 updating submodule commita8c7d6c041
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Sep 17 23:03:25 2016 -0700 merging master commitfab018435c
Merge:4106b02
d91a01b
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Sep 18 13:34:42 2016 +0800 Merge branch 'qiwye/asgd-dev' into qiwye/asgd-exp Conflicts: Source/CNTK/CNTK.cpp commit4106b0290f
Merge:131ab44
990cca1
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Sep 18 13:33:54 2016 +0800 Merge branch 'qiwye/asgd-exp' of https://github.com/Microsoft/CNTK-exp-private into qiwye/asgd-exp commitd91a01b2e9
Merge:b938d9a
75eb386
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Sep 18 13:31:45 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Source/1BitSGD Source/SGDLib/SGD.cpp commitb938d9a603
Merge:6299fd1
e33eeff
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Sep 13 14:34:05 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: CNTK.sln commit6299fd1d9f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Sep 1 13:07:37 2016 +0800 merging from master commit0d077ff592
Merge:b965a8c
08081d2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Sep 1 12:55:44 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Source/SGDLib/SGD.cpp commit131ab448a6
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Aug 26 10:48:21 2016 +0800 fix bug: perf time commit990cca1c9b
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Aug 23 19:53:24 2016 +0800 adding lib commit02ffa99c2c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Aug 23 19:44:43 2016 +0800 updating headers commite20626af7c
Merge:e086965
b965a8c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Aug 23 19:42:36 2016 +0800 Merge branch 'qiwye/asgd-dev' into qiwye/asgd-exp Conflicts: Source/Multiverso commitb965a8c7a2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Aug 23 19:40:22 2016 +0800 updating submodule commitc5ccb2a10d
Merge:eff143e
d4f40dd
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Aug 23 19:35:31 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commite086965017
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Aug 8 15:53:24 2016 +0800 alter time perf commit5b86ad3709
Merge:4bc342e
eff143e
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Aug 8 15:43:27 2016 +0800 Merge branch 'qiwye/asgd-dev' into qiwye/asgd-exp commiteff143e5ba
Merge:0cda441
c2f5a29
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Aug 8 15:40:37 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Source/CNTK/prebuild.bat configure commit4bc342edcf
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Jul 27 15:23:18 2016 +0800 adding dl for link commit0d30fa6933
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Jul 26 15:57:46 2016 +0800 adding dl lib commit83e1c23c35
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Jul 26 15:31:47 2016 +0800 adding -fPIC commit0d0461c25a
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Jul 26 14:16:19 2016 +0800 commit the latest lib commit38a424eb53
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Jul 26 14:03:05 2016 +0800 adding headers commit07b7993300
Merge:27636dc
0cda441
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Jul 26 14:02:06 2016 +0800 Merge branch 'qiwye/asgd-dev' into qiwye/asgd-exp Conflicts: Source/Multiverso commit0cda44190d
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Jul 26 13:53:19 2016 +0800 updating submodule commit3b7e25edb4
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Jul 26 11:11:19 2016 +0800 remove timer for that main branch has latest timer commit5a33a35eac
Merge:fb9cded
f3dec43
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Jul 25 20:13:27 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Source/1BitSGD Source/SGDLib/SGD.cpp commitfb9cded35b
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Jul 7 16:00:59 2016 +0800 updating submodule commit67998341be
Merge:2e164ab
02bf3bd
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Jul 7 15:59:07 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Source/1BitSGD Source/SGDLib/SGDLib.vcxproj commit27636dcc09
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Jun 23 15:55:44 2016 +0800 fix build commite2ac247cea
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Jun 23 12:53:18 2016 +0800 updating lib commit145144dde9
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Jun 21 22:09:30 2016 +0800 updating new lab commit53a50b75ef
Merge:53428e8
2e164ab
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Jun 21 22:07:49 2016 +0800 Merge branch 'qiwye/asgd-dev' into qiwye/asgd-exp Conflicts: Source/CNTK/CNTK.cpp Source/Common/Include/MultiversoWrapper.h Source/Multiverso Source/SGDLib/SGD.cpp Source/SGDLib/SGD.h commit2e164ab33b
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Jun 21 11:25:02 2016 +0800 fix bug commit1bf9c31ac4
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Jun 20 16:31:41 2016 +0800 mege from master commitd17ff5d9eb
Merge:88883ae
fee99ca
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Jun 15 20:22:12 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Source/CNTK/CNTK.cpp Source/SGDLib/SGD.cpp Source/SGDLib/SGD.h commit88883aebfa
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Jun 15 20:12:42 2016 +0800 updating multiverso commitd8b1a25d71
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Jun 8 17:55:47 2016 +0800 change the perf stat of sim-ma commit5981654c10
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Jun 7 11:19:39 2016 +0800 fix bug: using col first storage for cntk buffer commit591c679a32
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Jun 6 17:07:46 2016 +0800 fix typo commit51f118a54a
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Jun 6 17:01:26 2016 +0800 fix bug: adding the right option to server commit53428e8ea3
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Jun 6 14:19:00 2016 +0800 Revert "git revert:debug" This reverts commit76be031254
. commitefa91bca2a
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Jun 6 14:18:51 2016 +0800 Revert "debug info" This reverts commit6cff37f038
. commitf07ab860aa
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Jun 6 14:18:47 2016 +0800 Revert "debug info" This reverts commitec71b41ccd
. commit13f0321a40
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Jun 6 14:18:42 2016 +0800 Revert "debug info" This reverts commit699d60a842
. commit692e4f59a6
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Jun 6 14:18:38 2016 +0800 Revert "adding debug info" This reverts commit7ad6f69f5e
. commitd6a1b0ef6d
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Jun 2 18:40:01 2016 +0800 updating multiverso commit229eaffc3f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Jun 1 18:01:01 2016 +0800 updating multiverso to support sparse update commit7ad6f69f5e
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Jun 1 15:44:47 2016 +0800 adding debug info commit699d60a842
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 31 22:52:47 2016 +0800 debug info commitec71b41ccd
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 31 10:51:04 2016 +0800 debug info commit6cff37f038
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 30 21:20:32 2016 +0800 debug info commit76be031254
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 30 20:40:20 2016 +0800 git revert:debug commitbef15d98cf
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 30 17:03:59 2016 +0800 Revert "adding debug info" This reverts commitef30a62140
. commitef30a62140
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 30 15:36:34 2016 +0800 adding debug info commitdac1036ece
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 30 11:05:08 2016 +0800 Revert "test" This reverts commit8d157ad513
. commit8d157ad513
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 30 10:54:46 2016 +0800 test commit2d90c6e2d9
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 30 10:45:29 2016 +0800 Revert "test" This reverts commit36e652806a
. commit36e652806a
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 30 10:43:57 2016 +0800 test commitf1af69bc48
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 30 10:43:48 2016 +0800 removing dumpoutput commit02598f9593
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu May 26 20:14:40 2016 +0800 adding debug info commitf6523ab49c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu May 26 10:49:40 2016 +0800 debug: print the matrix info commit7601cb2508
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed May 25 13:45:32 2016 +0800 fix bug commitbb61ca9e28
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed May 25 13:21:25 2016 +0800 adding an counter for sparse updater commit66995edf39
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 24 20:51:32 2016 +0800 Revert "adding debug info" This reverts commit3f88d05f9b
. commit7cd4020c4a
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 24 20:51:08 2016 +0800 Revert "adding a debug line for libsvm reader, need revert" This reverts commite64bb4c83d
. commit4e4f494e16
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 24 20:51:01 2016 +0800 Revert "no message" This reverts commit62a5a95817
. commit189deeea6f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 24 20:50:53 2016 +0800 Revert "debug: need to revert" This reverts commitb229e0a640
. commit2ce38c0f7c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 24 20:50:47 2016 +0800 Revert "convert" This reverts commitc8a520294d
. commitc8a520294d
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 24 18:53:29 2016 +0800 convert commitb229e0a640
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 24 18:42:21 2016 +0800 debug: need to revert commit62a5a95817
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 24 15:12:23 2016 +0800 no message commite64bb4c83d
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 24 14:18:58 2016 +0800 adding a debug line for libsvm reader, need revert commit3f88d05f9b
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 23 15:29:44 2016 +0800 adding debug info commit218916d80b
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri May 20 17:11:59 2016 +0800 change tracelevel commitf2b63fc549
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri May 20 16:19:12 2016 +0800 adding debug info commitc72ca0e4c1
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri May 20 13:45:45 2016 +0800 reverse commit commitfdb82ac79f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri May 20 13:30:24 2016 +0800 Revert "debug info" This reverts commit86da0495d3
. commit86da0495d3
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri May 20 13:08:40 2016 +0800 debug info commitbb413098a6
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri May 20 11:35:49 2016 +0800 fix size_t to int commit7c954885f6
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri May 20 11:27:30 2016 +0800 adding info for model average commit536c83e15c
Merge:af32793
98784c4
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu May 19 18:01:11 2016 +0800 Merge branch 'qiwye/asgd-exp' of https://github.com/Microsoft/CNTK-exp-private into qiwye/asgd-exp commitaf32793280
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu May 19 18:00:37 2016 +0800 sparse row commit98784c4478
Merge:55d9f31
2595a2f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed May 18 17:27:08 2016 +0900 Merge branch 'qiwye/asgd-exp' of github.com:Microsoft/CNTK-exp-private into qiwye/asgd-exp commit55d9f315e3
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed May 18 17:26:44 2016 +0900 fixing a bug in multiverso commit2595a2f686
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 17 11:31:59 2016 +0800 adding debug info commit10049eaea3
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 17 11:26:27 2016 +0800 adding debug info commitc16efb8baf
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 16 17:03:53 2016 +0800 updating 1bit commit36369b1eee
Merge:47867ae
6cc9b5b
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 16 17:00:34 2016 +0800 Merge branch 'qiwye/asgd-exp' of https://github.com/Microsoft/CNTK-exp-private into qiwye/asgd-exp commit47867ae6c4
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 16 16:59:47 2016 +0800 sparse simulating commit6cc9b5b97d
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun May 15 21:14:06 2016 +0900 updating library commit05272367a7
Merge:1d75799
f1d078f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat May 14 14:49:29 2016 +0800 Merge branch 'qiwye/asgd-dev' into qiwye/asgd-exp Conflicts: Source/Common/Include/MultiversoWrapper.h commit1d757991b4
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat May 14 14:47:44 2016 +0800 multiverso commitf1d078f3c1
Merge:74068b5
b09ba68
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat May 14 14:42:20 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commit74068b5a2d
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu May 12 16:43:14 2016 +0800 checking initialmodel commit7b31106d52
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu May 12 13:06:48 2016 +0800 Model averaging simulation commit68636e61fb
Merge:cd633f7
1a7ecae
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed May 11 16:03:06 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commitcd633f7f44
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed May 11 16:00:41 2016 +0800 model Averaging commit6827bf33f5
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 10 20:34:17 2016 +0800 adding modelAggregation commit331c82594c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 9 13:10:46 2016 +0800 change the code to adaptive to the master commit6b47558bfa
Merge:737d507
7705b08
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 9 12:15:34 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Source/SGDLib/SGD.cpp Source/SGDLib/SGD.h commit737d507969
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon May 9 11:29:20 2016 +0800 code clean commit22799c7283
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri May 6 11:56:33 2016 +0800 fix indent commitdf095f8eec
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed May 4 16:51:53 2016 +0800 fix bug: traceLevel commit19cf00f941
Merge:14b044a
4161cb6
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed May 4 17:35:50 2016 +0900 Merge branch 'qiwye/asgd-exp' of github.com:Microsoft/CNTK-exp-private into qiwye/asgd-exp commit14b044a837
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed May 4 17:35:27 2016 +0900 fixbug: PRIu64 commit4161cb609f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed May 4 15:56:52 2016 +0800 adding timer for profiling commit0290c9a774
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed May 4 15:10:58 2016 +0900 updating library commit66023ffe67
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 3 19:55:49 2016 +0800 no message commitd192bc7690
Merge:ae42f3e
facfb07
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 3 19:55:06 2016 +0800 Merge branch 'qiwye/asgd-dev' into qiwye/asgd-exp Conflicts: .gitmodules Source/Multiverso Source/Readers/HTKMLFReader/utterancesourcemulti.h commitfacfb07322
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 3 19:40:05 2016 +0800 updating the library commitac5a5d4595
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 3 11:09:19 2016 +0800 adding multiverso commit02c722564f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 3 11:04:00 2016 +0800 clean old version commit5a23853738
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 3 11:03:18 2016 +0800 change multiverso to public version commit321d0c8b67
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 3 10:58:57 2016 +0800 updatring 1bit-module commit16e890ea75
Merge:bffa012
1a9d19c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue May 3 10:58:07 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Source/SGDLib/SGD.cpp Source/SGDLib/SGD.h configure commitbffa012ece
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Apr 28 14:31:21 2016 +0800 remove the unused ItemDefinition in CNTK and SGDLib commit679b55df50
Merge:7ce8490
c34e358
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Apr 28 13:56:19 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Source/ActionsLib/ActionsLib.vcxproj Source/Readers/HTKMLFReader/utterancesourcemulti.h Source/SGDLib/SGDLib.vcxproj commit7ce849065a
Merge:9968ebd
9fea921
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Apr 18 14:29:28 2016 +0800 Merge branch 'hotfix/htkmlreader-instrument' into qiwye/asgd-dev Conflicts: Source/Readers/HTKMLFReader/utterancesourcemulti.h commit9fea921f1c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Apr 18 13:34:15 2016 +0800 fix bug: using right format specifier for size_t commitae42f3e09b
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Apr 18 13:32:49 2016 +0800 updating format specifier for size_t commitd56db13d26
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Apr 18 13:06:38 2016 +0800 refine code for LOG commita5973971f2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Apr 18 11:43:31 2016 +0800 fix tab indent with 4 space commit0d1604f315
Merge:46a9955
a8b4809
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 20:13:07 2016 +0900 Merge branch 'qiwye/asgd-exp' of github.com:Microsoft/CNTK-exp-private into qiwye/asgd-exp commit46a9955304
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 20:12:40 2016 +0900 updating latest multiverso library for Jenkins commita8b4809df1
Merge:0b654e5
2cb35de
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 18:46:46 2016 +0800 Merge branch 'hotfix/htkmlreader-instrument' into qiwye/asgd-exp Conflicts: Source/Readers/HTKMLFReader/utterancesourcemulti.h commit2cb35de7f5
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 18:21:37 2016 +0800 updating chunking statics commit0b654e580f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 17:28:13 2016 +0800 fix linux build commit1a0b88be0c
Merge:147d117
9968ebd
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 17:09:34 2016 +0800 Merge branch 'qiwye/asgd-dev' into qiwye/asgd-exp commit9968ebd25f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 17:08:45 2016 +0800 updating 1bit submodule commit1020a214fa
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 17:06:34 2016 +0800 fix some minor indent commitf31bd6ee91
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 16:41:40 2016 +0800 Revert "adding information for image reader" This reverts commit5dd3b9bcb0
. commit81595c9017
Merge:5dd3b9b
4c9f918
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 16:22:25 2016 +0800 Merge branch 'master' into hotfix/htkmlreader-instrument commitedcf10c54c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 16:22:05 2016 +0800 minor indent fix commit8f4423d0b8
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 15:27:40 2016 +0800 printed timer info when tracelevel > 2 commit3fda1d1885
Merge:7323d7c
4c9f918
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 15:27:02 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Source/SGDLib/SGD.cpp commit147d1178db
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 12:51:37 2016 +0800 adding Log reference for Updaters commit24327a5d01
Merge:9d10dd0
7323d7c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Apr 17 12:46:47 2016 +0800 Merge branch 'qiwye/asgd-dev' into qiwye/asgd-exp Conflicts: Source/Multiverso commit7323d7c519
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Apr 16 19:10:11 2016 +0800 updating multiverso helper for the new matrix interface commit9760990506
Merge:2dab90c
e87b4d6
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Apr 16 10:41:47 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Source/ActionsLib/ActionsLib.vcxproj Source/CNTK/CNTK.cpp Source/CNTK/CNTK.vcxproj Source/SGDLib/SGD.cpp Source/SGDLib/SGD.h Source/SGDLib/SGDLib.vcxproj Source/SGDLib/SGDLib.vcxproj.filters Tools/generate_build_info configure commit2dab90ca49
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Apr 16 10:10:34 2016 +0800 update submodule commit44b21f8c49
Merge:4aa2017
51dfd58
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Apr 16 10:09:29 2016 +0800 Merge branch 'qiwye/sparse-update' into qiwye/asgd-dev Conflicts: Source/Common/Include/MultiversoWrapper.h commit51dfd58cad
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Apr 13 16:32:26 2016 +0800 sparse update support ready commit4aa20172d9
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Apr 12 16:13:20 2016 +0800 update multiverso to latest version. commitc3fae1003a
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Apr 12 14:48:37 2016 +0800 update the submodule commit5dd3b9bcb0
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Apr 7 13:16:34 2016 +0800 adding information for image reader commitfaff4ee119
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Mar 30 17:07:27 2016 +0800 change format to size_t commit49c38e3cf4
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Mar 30 16:46:27 2016 +0800 add page in timer for htkmlreader commit9d10dd082d
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Mar 18 06:47:23 2016 +0900 updating multiverso to the latest version commite4723cbe7d
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Mar 18 05:36:00 2016 +0800 using matrixtable for the layerewise updae commit1fa2adc09d
Merge:a080383
2ea352e
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Mar 18 05:32:09 2016 +0800 Merge branch 'qiwye/asgd-exp' of https://github.com/Microsoft/CNTK-exp-private into qiwye/asgd-exp commita080383f3d
Merge:0aefe86
8fc2d21
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Mar 18 05:30:49 2016 +0800 Merge branch 'qiwye/sparse-update' into qiwye/asgd-exp Conflicts: Source/Common/Include/MultiversoWrapper.h Source/Multiverso commit8fc2d2135d
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Mar 18 05:26:57 2016 +0800 adding timer for requiredata of HTKMLFReader commitd24a383db2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Mar 18 05:26:21 2016 +0800 using matrixTable for layerwise update commit6a9a288087
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Mar 18 02:51:09 2016 +0800 using matrix table for the layer-wise update commit2ea352e4b1
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Mar 16 04:03:17 2016 +0900 adding NO_SYNC in makefile commitf7ace8d255
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Mar 15 07:42:03 2016 +0800 changing to matrix array ( incomplete) commit2771cc61bd
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Mar 15 06:23:21 2016 +0800 updating to latest multiverso module commit0aefe86925
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Mar 10 03:20:01 2016 +0900 reverting DEBUG:TestMultiThreads commit8083e1e28d
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Mar 10 03:07:34 2016 +0900 BUG fix: adding addtional copy for the RDMA copy in OpenMPI commit02e8121b68
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Mar 4 00:04:49 2016 +0800 multiverso test: thread test commita261872b9a
Merge:80c9fa1
feda2e1
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Mar 3 23:30:52 2016 +0800 Merge branch 'qiwye/asgd-exp' of https://github.com/Microsoft/CNTK-exp-private into qiwye/asgd-exp commit80c9fa1c55
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Mar 3 23:29:56 2016 +0800 fix bug: parameters should init to server synchronized. commitfeda2e1978
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Mar 3 21:46:19 2016 +0900 updating multiverso lib commit07f99ffb5c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Mar 3 20:30:15 2016 +0800 adding debug informations commitcdd21a3136
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Mar 2 21:31:55 2016 +0800 Fixbug: sending msg to each client commit2bb8f361a4
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Mar 2 14:52:34 2016 +0800 DEBUG:TESTNET commitc855831dba
Merge:c536a5b
a335f2c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Mar 2 01:38:56 2016 +0900 Merge branch 'qiwye/asgd-exp' of github.com:Microsoft/CNTK-exp-private into qiwye/asgd-exp commitc536a5bf95
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Mar 2 01:38:49 2016 +0900 updating multiverso commita335f2c4d1
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Mar 2 00:37:59 2016 +0800 Debug:TestNet commit39f1d7628f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Feb 29 15:05:50 2016 +0800 DEBUG: Testnet commit4e6fb134a5
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Feb 29 14:35:01 2016 +0800 DEBUG: TestNet commit7986810770
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Feb 29 12:37:45 2016 +0900 update library for experiment commitf9d9830721
Merge:12b41c2
addc32d
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Feb 29 11:29:17 2016 +0800 Merge branch 'qiwye/asgd-exp' of https://github.com/Microsoft/CNTK-exp-private into qiwye/asgd-exp commit12b41c20b5
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Feb 29 11:28:19 2016 +0800 updating multiverso headers commit5230f2536d
Merge:bfeaebb
2ddf2a3
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Feb 29 11:27:01 2016 +0800 Merge branch 'qiwye/asgd-dev' into qiwye/asgd-exp Conflicts: Source/Multiverso commit2ddf2a3dcb
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Feb 29 11:25:52 2016 +0800 updating submodule commitaddc32d9f8
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Feb 28 10:39:01 2016 +0900 updating library commitbfeaebb652
Merge:9750248
15ddebf
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Feb 27 15:33:23 2016 +0800 Merge branch 'qiwye/asgd-dev' into qiwye/asgd-exp Conflicts: Source/Multiverso commit15ddebfb02
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Feb 27 15:31:21 2016 +0800 update the submodule to latest commit9750248778
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Feb 26 19:11:50 2016 +0900 experiment updating commitf5fd23470e
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Feb 26 18:01:30 2016 +0800 updating submodule to latest one commit8ad04627d3
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Feb 25 21:54:35 2016 +0900 fixbug: to generate right tag for asgd support in build info commit73b0ee90db
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Feb 25 16:03:16 2016 +0900 updating multiverso library to latest one commit42b3b7a214
Merge:ef76b35
fb04c50
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Feb 25 14:44:48 2016 +0800 Merge branch 'qiwye/asgd-dev' into qiwye/asgd-exp Conflicts: Source/Multiverso commitfb04c505d1
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Feb 25 14:40:12 2016 +0800 1. updating the multiverso submodule to the latest one; 2. change the config asgd using to argvector to offer more flexibility; commit956e1bc112
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Feb 20 11:44:06 2016 +0800 updating submodule; reverting MPI THREAD SUPPORT to MPI_THREAD_SERIALIZED commit08942320b7
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Feb 18 15:02:06 2016 +0800 using unify function for config parsing commitc2548d2544
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 17 17:20:58 2016 +0800 making CPUONLY compile succeed commitef76b359f2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 17 17:55:49 2016 +0900 temoporarily checkin for Jenkins build commit232bf4218f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 17 16:49:04 2016 +0800 temporarily checkin the headers commit9969e9a0e7
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 17 16:46:55 2016 +0800 deleting submodule commit782f51b771
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 17 16:41:10 2016 +0800 delete submodule for Jenkins build commit545bccf32d
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 17 16:36:45 2016 +0800 updating submodule to lateset version commit3a131dd865
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 17 16:06:20 2016 +0800 using cntk buffer to to sync with parameter server to save time commit1b9a83617f
Merge:58ec1a0
d39709f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 17 13:37:16 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commit58ec1a035e
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 17 13:24:39 2016 +0800 update submodule to latest version commit8a148d0f8a
Merge:27164a6
edc2dd8
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 17 14:15:17 2016 +0900 Merge branch 'qiwye/asgd-dev' of github.com:Microsoft/CNTK-exp-private into qiwye/asgd-dev commit27164a64ae
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 17 14:14:43 2016 +0900 change MPI init to support to MPI_THREAD_MULTIPLE commitedc2dd81e8
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Feb 16 15:46:04 2016 +0800 adding predefine macro for linux with ASGD support commita3cafdc8a3
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Feb 16 15:29:36 2016 +0900 debug info in actor commitfed7415461
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Feb 16 13:28:40 2016 +0800 Revert "debug check,. WARNING: should be revert later" This reverts commitda13c84d9d
. commit8d6da277e1
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Feb 16 12:38:10 2016 +0800 update multiverso submodule to latest version commitda13c84d9d
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Feb 16 11:47:37 2016 +0800 debug check,. WARNING: should be revert later commit83a60750d7
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Feb 16 11:21:30 2016 +0800 Adding prebuild info for windows building commitb14d6576a2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Feb 15 21:01:04 2016 +0900 make gcc happy for the multiverso lib commitc2c0bea959
Merge:492e273
11f11b1
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Feb 15 10:28:14 2016 +0800 Merge branch 'master' into qiwye/asgd-dev commit492e2735de
Merge:f3f543f
e1093b9
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Feb 14 11:56:26 2016 +0800 Merge branch 'master' into qiwye/asgd-dev Conflicts: Source/ActionsLib/ActionsLib.vcxproj Source/CNTK/CNTK.cpp Source/CNTK/CNTK.vcxproj Source/SGDLib/SGD.cpp Source/SGDLib/SGDLib.vcxproj commitf3f543f2ea
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sun Feb 14 11:49:21 2016 +0800 Pass the build with IMultiverso commit2f118375f3
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 3 19:44:54 2016 +0800 Adding submodule for multiverso commitf9eda75ec2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 3 19:42:43 2016 +0800 remove old multiverso commit6fe2f8e464
Merge:af93aae
0e5ad89
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 3 19:31:38 2016 +0800 Merge branch 'master' into qiwye/multiverso commitaf93aaeeff
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 3 15:43:09 2016 +0800 rename the multiversoWrapper commitf71b47a7ae
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Feb 3 10:13:19 2016 +0800 rename multiverso wrapper commit397614801c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Feb 2 19:11:02 2016 +0800 Adding macros for the multiverso submodule commitf982d76c8d
Merge:c69c2d5
d562172
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Feb 2 12:06:30 2016 +0800 Merge branch 'qiwye/multiverso' of https://github.com/Microsoft/CNTK-exp-private into qiwye/multiverso commitc69c2d50f2
Merge:99cf8cb
69112d1
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Feb 2 12:04:35 2016 +0800 Merge branch 'master' into qiwye/multiverso commitd5621720d1
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Feb 1 21:15:54 2016 +0900 change default value of enable_asgd to yes for the jenkins build commit7e616d97c5
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Feb 1 17:59:32 2016 +0900 adding needed library commitb2341b3378
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Feb 1 13:02:04 2016 +0900 multiverso library update for Git-private commit99cf8cbbbb
Merge:c7bfebe
931e72b
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Feb 1 11:14:55 2016 +0800 Merge branch 'master' into qiwye/multiverso Conflicts: Source/SGDLib/SGD.cpp commitc7bfebe740
Merge:7b495bd
b8badf6
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Jan 29 14:23:04 2016 +0800 Merge branch 'master' into qiwye/multiverso commit7b495bdf9c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Jan 29 14:20:21 2016 +0800 change makefile for Linux work with multiverso commit14ef928c3f
Author: Qiwei ye <qiwye@microsoft.com> Date: Thu Jan 28 23:30:00 2016 +0800 reorganized the folder structure of multiverso commitf8d0d4b182
Merge:659af22
3224f02
Author: Qiwei ye <qiwye@microsoft.com> Date: Thu Jan 28 22:41:07 2016 +0800 Merge branch 'master' into qiwye/multiverso Conflicts: Source/ActionsLib/ActionsLib.vcxproj Source/CNTK/CNTK.vcxproj Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj Source/EvalDll/EvalDll.vcxproj Source/SGDLib/SGDLib.vcxproj commit659af22f61
Author: Qiwei ye <qiwye@microsoft.com> Date: Thu Jan 28 22:15:30 2016 +0800 reorganize source structrue commitc7856a3c46
Author: Qiwei ye <qiwye@microsoft.com> Date: Thu Jan 28 20:51:48 2016 +0800 Addding asgd configure for linux make file commit7ce7a51032
Merge:b805f17
2462647
Author: Qiwei ye <qiwye@microsoft.com> Date: Mon Jan 25 21:22:09 2016 +0800 Merge branch 'master' into qiwye/multiverso commitb805f179a9
Author: Qiwei ye <qiwye@microsoft.com> Date: Mon Jan 25 21:21:16 2016 +0800 code clean commitfcb9235aa8
Merge:6446a2f
cb0b09f
Author: Qiwei ye <qiwye@microsoft.com> Date: Mon Jan 25 21:17:28 2016 +0800 Merge branch 'master' into qiwye/multiverso commit6446a2fa79
Merge:ddf4066
26911b7
Author: Qiwei ye <qiwye@microsoft.com> Date: Mon Jan 25 16:56:12 2016 +0800 Merge branch 'master' into qiwye/multiverso Conflicts: .gitignore CNTK.sln Source/CNTK/CNTK.cpp Source/CNTK/CNTK.vcxproj Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj Source/EvalDll/EvalDll.vcxproj Source/SGDLib/SGD.cpp Source/SGDLib/SGD.h Source/SGDLib/SGDLib.vcxproj Source/SGDLib/SGDLib.vcxproj.filters commitddf4066ee8
Author: Mark Hillebrand <Mark.Hillebrand@microsoft.com> Date: Mon Jan 18 09:37:09 2016 +0100 License change commit6769e37147
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Jan 8 22:43:40 2016 +0900 fix close logic commitf67574520f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Jan 8 21:29:56 2016 +0800 fix close logic for ASGD commit7628026b05
Merge:6c2ee1a
c1c818c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Jan 8 21:03:14 2016 +0800 Merge branch 'master' into qiwye/multiverso commit6c2ee1aa51
Merge:641c75a
40ce1af
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Jan 7 16:57:09 2016 +0800 Merge branch 'master' into qiwye/multiverso Conflicts: Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj Source/SGDLib/SGDLib.vcxproj Source/SGDLib/SGDLib.vcxproj.filters commit641c75a751
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Jan 7 16:52:04 2016 +0800 fix close logic for ASGD commit937816eff2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Dec 31 17:16:10 2015 +0800 fix close logic for multiverso wrapper commit16fea919ef
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Dec 31 17:02:15 2015 +0800 fix close logic for multiverso wrapper commita95eef383f
Merge:c4d2e78
d7f0c07
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Dec 30 15:30:34 2015 +0800 Merge branch 'master' into qiwye/multiverso commitc4d2e78e3a
Merge:bbebed2
cd87741
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Dec 23 10:24:09 2015 +0800 Merge branch 'master' into qiwye/multiverso commitbbebed2f0c
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Dec 22 14:15:14 2015 +0800 clean dlls for debug runtime commit87a5da31cb
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Dec 22 14:13:56 2015 +0800 adding libzmq-gd for debug commite573c52c4e
Merge:c754dfe
1527b89
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Dec 22 13:43:27 2015 +0800 Merge branch 'master' into qiwye/multiverso commitc754dfebdc
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Dec 22 13:36:31 2015 +0800 Adding support for CPUONLY. commit9664daccb0
Merge:b99f3e2
ef80d86
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Dec 19 12:43:46 2015 +0800 Merge branch 'master' into qiwye/multiverso commitb99f3e2f15
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Dec 18 12:44:58 2015 +0800 ignoring the prebuild files commitaf45389a44
Merge:28e3482
ebaf988
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Dec 18 12:41:57 2015 +0800 Merge branch 'shuxin/multiverso' into qiwye/multiverso commitebaf9885ea
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Dec 18 12:40:38 2015 +0800 Revert "Revert "adding an MPI init test in case of that MPI was initialized repeatedly"" This reverts commit23ebe452a5
. commit315e53a16f
Author: Qiwei Ye <qiwye@microsoft.com> Date: Fri Dec 18 11:33:18 2015 +0800 Revert "modify output log order" This reverts commitf4ca6734a8
. commit2e99b681dd
Merge:f4ca673
23ebe45
Author: Shuxin Zheng (MSR Student-Person Consulting) <v-shuzhe@microsoft.com> Date: Fri Dec 18 10:49:54 2015 +0800 Merge branch 'master' into qiwye/multiverso commitf4ca6734a8
Author: Shuxin Zheng (MSR Student-Person Consulting) <v-shuzhe@microsoft.com> Date: Thu Dec 17 22:57:27 2015 +0800 modify output log order commitca729cbaf5
Author: Shuxin Zheng (MSR Student-Person Consulting) <v-shuzhe@microsoft.com> Date: Thu Dec 17 22:42:57 2015 +0800 fix bug - useASGD noMoreSamplesToProcess commit28e34826a7
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Dec 17 15:57:45 2015 +0800 Adding include path for debug profile commit66f78615d4
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Dec 17 11:54:22 2015 +0800 fix include path commit254b45e680
Merge:4c6c57e
583f10e
Author: Qiwei Ye <qiwye@microsoft.com> Date: Thu Dec 17 10:35:08 2015 +0800 Merge branch 'master' into qiwye/multiverso Conflicts: Source/CNTK/CNTK.vcxproj Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj Source/EvalDll/EvalDll.vcxproj Source/SGDLib/SGDLib.vcxproj Source/SGDLib/SGDLib.vcxproj.filters commit4c6c57e1de
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Dec 16 19:53:35 2015 +0800 updating multiverso runtime library commit4b734d9985
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Dec 16 20:48:52 2015 +0900 updating multiverso lib commit7ea900e6d9
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Dec 16 20:48:11 2015 +0900 using cstdint version multiverso commit442b7bb0ad
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Dec 16 17:48:00 2015 +0800 make GCC happy commit5dbfedc609
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Dec 16 15:51:16 2015 +0800 update multiverso lib commitca145de82d
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Dec 16 15:50:52 2015 +0800 optimization for the ASGD speed. adding learningrate adjust logic commit5002203b6f
Merge:1d12689
4022de2
Author: Qiwei Ye <qiwye@microsoft.com> Date: Wed Dec 16 11:05:09 2015 +0800 Merge branch 'master' into qiwye/multiverso Conflicts: MachineLearning/CNTKComputationNetworkLib/CNTKComputationNetworkLib.vcxproj MachineLearning/CNTKSGDLib/CNTKSGDLib.vcxproj MachineLearning/CNTKSGDLib/CNTKSGDLib.vcxproj.filters commit1d12689b58
Merge:f289152
d273078
Author: Qiwei Ye <qiwye@microsoft.com> Date: Tue Dec 15 14:31:49 2015 +0800 Merge branch 'qiwye/dev' into qiwye/multiverso commitf2891522e6
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Dec 14 21:49:20 2015 +0800 fix debug build include path commitfe3bb89162
Author: Qiwei Ye <qiwye@microsoft.com> Date: Mon Dec 14 21:18:41 2015 +0800 fix bug commit07e48cd667
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Dec 12 18:27:55 2015 +0900 adding runtime libraries commit25e2d662b5
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Dec 12 17:15:45 2015 +0800 adding multiverso library commite19a4511b5
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Dec 12 17:12:53 2015 +0800 fix function name commit6f69c62081
Author: Qiwei Ye <qiwye@microsoft.com> Date: Sat Dec 12 16:54:22 2015 +0800 merging main branch into multiverso branch commit e9a0e67c0998cf295bac585a8475078c9b161783 Author: KeDengMS <kedeng@microsoft.com> Date: Mon Nov 14 13:43:02 2016 -0800 python examples relocated commit 8a0cc02b43b7a10b2ead7ef12f7afb1f8ffa1a50 Merge: b49f220 e8eb645 Author: KeDengMS <kedeng@microsoft.com> Date: Mon Nov 14 11:11:30 2016 -0800 Merge branch 'kedeng/beta' of https://github.com/Microsoft/CNTK into kedeng/beta commit b49f2209518f815d565131dce9a85eba2065ccf4 Author: KeDengMS <kedeng@microsoft.com> Date: Mon Nov 14 11:10:45 2016 -0800 Jenkins fix commit e8eb64549f9875e4a3844ba269129b83df88da6d Author: Ke Deng <kedeng@microsoft.com> Date: Sat Nov 12 02:00:02 2016 -0800 ReaderConfig perf tweak commit 127ec79b080a6c26f44efb1021e6870e313823f1 Author: KeDengMS <kedeng@microsoft.com> Date: Fri Nov 11 20:04:45 2016 -0800 MinibatchSource no skip commit 5cda422e93c23d27e3a8668a0a8ccc1e75f2796a Author: KeDengMS <kedeng@microsoft.com> Date: Fri Nov 11 18:53:55 2016 -0800 MinibatchSource: Save/restore sample location when SetConfig to avoid skipping in prefetch commit 2c7017ee6e8cd1a5a0fc68b7d32e744361c9f0d5 Author: KeDengMS <kedeng@microsoft.com> Date: Fri Nov 11 17:14:30 2016 -0800 Fix test again commit 92e8f743642d67b98c3f5bac0269d5d112c61a05 Author: KeDengMS <kedeng@microsoft.com> Date: Fri Nov 11 16:55:54 2016 -0800 Fix Test commit 28143b8eba5a8f03a872913b71dd72a71e8c356a Author: KeDengMS <kedeng@microsoft.com> Date: Fri Nov 11 15:52:59 2016 -0800 V2LibraryDistributionTests fix commit 4d749ec75a191e7772ab9b4f3d171590d3807dc4 Author: KeDengMS <kedeng@microsoft.com> Date: Fri Nov 11 14:47:30 2016 -0800 Add warmStart to distributed training This change allows user to specify the number of samples to run non-distributed. Squashed commit of the following: commit 71fe40b02e790a172e904c7e9082b08699efc90f Author: KeDengMS <kedeng@microsoft.com> Date: Fri Nov 11 14:23:59 2016 -0800 Squashed commit of the following: commit82dc610da7
Merge:67759d3
b6d69c5
Author: Project Philly <svcphil@microsoft.com> Date: Fri Nov 11 11:49:46 2016 -0800 Integrate wilrich/designLeftOvers into master commit67759d310a
Author: Chris Basoglu <cbasoglu@microsoft.com> Date: Fri Nov 11 07:56:05 2016 -0800 Update README.md commit15c0575e55
Merge:9d91e9f
e9fb291
Author: Project Philly <svcphil@microsoft.com> Date: Fri Nov 11 05:56:38 2016 -0800 Integrate alexeyo/ReadMe-News-November-11 into master commit9d91e9f90e
Merge:a262d5b
1be9e30
Author: Project Philly <svcphil@microsoft.com> Date: Fri Nov 11 05:51:06 2016 -0800 Integrate eldak/fixingBlockMomentum into master commite9fb291c60
Author: Alexey Orlov <alexeyo@microsoft.com> Date: Fri Nov 11 14:46:42 2016 +0100 Main ReadMe News, November 11, 2016 commit1be9e30e3a
Author: Eldar Akchurin <eldak@microsoft.com> Date: Mon Nov 7 13:56:34 2016 +0100 Fixing some synchronization of block momentum and distributed checkpointing commita262d5b033
Merge:1a44dac
f150248
Author: Project Philly <svcphil@microsoft.com> Date: Fri Nov 11 03:01:39 2016 -0800 Integrate mahilleb/pr/1023 into master commit1a44dac712
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Thu Nov 10 10:21:57 2016 +0100 Examples/Evaluation/CSEvalClient: bump NuGet package version commitf150248d22
Merge:37ffe9d
cbcea1b
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Fri Nov 11 09:55:22 2016 +0100 Merge remote-tracking branch 'origin/master' into mahilleb/pr/1023 commitb6d69c5866
Author: Willi Richert <wilrich@microsoft.com> Date: Fri Nov 11 09:17:50 2016 +0100 Addressing CR comments commit37ffe9dbb6
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Fri Nov 11 09:29:10 2016 +0100 Fix Windows build commitcbcea1b534
Merge:548ec6b
8f7eb6e
Author: Project Philly <svcphil@microsoft.com> Date: Thu Nov 10 16:35:28 2016 -0800 Integrate nikosk/pull997 into master commit8f7eb6e37e
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Thu Nov 10 15:58:37 2016 -0800 removed tmp_ld_library_path from dockerfile commit548ec6b382
Merge:47d7c03
1c2421f
Author: Project Philly <svcphil@microsoft.com> Date: Thu Nov 10 14:56:05 2016 -0800 Integrate vadimma/EnsureTagCopy into master commitb1e9fcc17e
Author: Nathan Luehr <nluehr@nvidia.com> Date: Thu Nov 10 13:48:10 2016 -0800 Removed "using namespace std" from basetypes.h commit47d7c03e5c
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Thu Nov 10 11:06:41 2016 -0800 Don't override the shape after the expansion. commit1c2421ff70
Author: Vadim Mazalov <vadimma@microsoft.com> Date: Thu Nov 10 10:16:12 2016 -0800 Ensure tags are copied in ComputationNodeBase::CopyTo commit2cd3466b9c
Merge:fdcc61a
0fb5ec4
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Thu Nov 10 09:49:09 2016 -0800 Merge branch 'master' into ebarsoum/globalpooling For checkin and Jenkins. commitfdcc61a896
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Thu Nov 10 09:48:16 2016 -0800 Use NDShape.unknown instead of None commitf8b5217dfc
Author: Willi Richert <wilrich@microsoft.com> Date: Thu Nov 10 17:58:27 2016 +0100 Allow CloneMethod to be passed as string commitb326a9af94
Author: Willi Richert <wilrich@microsoft.com> Date: Thu Nov 10 17:01:06 2016 +0100 Beautifying doc/code changes commit0fb5ec40cc
Merge:3a5403d
6f7e43c
Author: Project Philly <svcphil@microsoft.com> Date: Thu Nov 10 06:10:54 2016 -0800 Integrate zhouwang/pr899 into master commit6f7e43c91e
Author: Zhou Wang <zhouwang@microsoft.com> Date: Thu Nov 10 13:35:14 2016 +0100 fix path for release_noopt configuration commit2cae5f6371
Author: Zhou Wang <zhouwang@microsoft.com> Date: Thu Nov 10 12:52:29 2016 +0100 add release_noopt configuration commit3a5403d6fc
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Thu Nov 10 11:32:07 2016 +0100 fixed graph string test commitc9ecf0e2b1
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Fri Nov 4 16:45:02 2016 +0100 added a test for network graph commit5a2c2bbf60
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Fri Nov 4 12:15:48 2016 +0100 rebase commit5b4ff474b9
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Thu Nov 3 12:04:31 2016 +0100 moved network graph to utils, added save as a DOT file commit5475b6995f
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Thu Oct 27 13:54:24 2016 +0200 updated graph build and display commitc44c93fa23
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Wed Oct 26 12:17:52 2016 +0200 bug fix commita9ac4e090a
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Tue Oct 25 20:43:04 2016 +0200 added basic network plotting commite55cd0afa8
Author: Willi Richert <wilrich@microsoft.com> Date: Tue Oct 18 17:44:56 2016 +0200 Docs commit73013e78ca
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Thu Nov 3 12:04:31 2016 +0100 moved network graph to utils, added save as a DOT file commit43e8581e03
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Wed Nov 2 17:37:05 2016 +0100 merged png and string output into one function commitbf5e28b4f3
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Mon Oct 31 20:13:34 2016 +0100 added a short how-to for network graph example commit95e0dc8d70
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Thu Oct 27 13:54:24 2016 +0200 updated graph build and display commit0836b874d0
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Wed Oct 26 12:17:52 2016 +0200 bug fix commitb6bd4cbd35
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Tue Oct 25 20:43:04 2016 +0200 added basic network plotting commit6b8d5cb72c
Author: Willi Richert <wilrich@microsoft.com> Date: Tue Oct 18 17:44:56 2016 +0200 Docs commitac1a9469ef
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Thu Nov 10 09:13:44 2016 +0100 Bump release tags to v2.0.beta3.0, v2.0.beta3.0+ in CNTK.cpp commit74a7e0efd3
Merge:0110d72
2ca454b
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Wed Nov 9 19:51:26 2016 -0800 Merge branch 'master' into ebarsoum/globalpooling For checkin. commit0110d72f44
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Wed Nov 9 19:50:32 2016 -0800 Check pad for global pooling. commite081b6fab3
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Wed Nov 9 19:16:09 2016 -0800 whitespace commit7bc01bf0da
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Wed Nov 9 19:12:59 2016 -0800 nit nit commitc84e53557c
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Wed Nov 9 19:12:07 2016 -0800 nit commitbacc9889af
Merge:89ee379
452bd0f
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Wed Nov 9 19:10:05 2016 -0800 Merge branch 'nikosk/pull997' of github.com:Microsoft/CNTK into nikosk/pull997 commit452bd0f28a
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Wed Nov 9 19:09:18 2016 -0800 Incorporated most CR feedback commite2cf02a609
Author: Amit Agarwal <amitaga@microsoft.com> Date: Tue Nov 8 14:33:07 2016 -0800 CNTK v2 library: Add binary CrossEntropy operator commit89ee379685
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Wed Nov 9 15:17:09 2016 -0800 Removed accidental commit commitc406520b47
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Wed Nov 9 13:17:08 2016 -0800 incorporated some cr feedback commit8e5635585c
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Tue Nov 8 18:12:49 2016 -0800 Fix merge issue. commit78c63a43f9
Merge:662bebe
7199b51
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Tue Nov 8 17:57:32 2016 -0800 Merge wth master commit662bebe271
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Tue Nov 8 17:31:56 2016 -0800 Update pooling shape attribute before reaching pooling node. commita72e05b0ee
Merge:6922940
f3272f9
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Tue Nov 8 15:30:04 2016 -0800 Merge branch 'fiskio-master' into fiskio2 commit6922940e1b
Merge:646dcde
6b22d38
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Tue Nov 8 15:27:38 2016 -0800 Merge branch 'master' of https://github.com/fiskio/CNTK into fiskio2 commitf3272f9d37
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Tue Nov 8 14:55:10 2016 -0800 Modified python paths commitad753521dc
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Tue Nov 8 14:52:28 2016 -0800 Added readme on Dockerfiles commit2d74d6900c
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Tue Nov 8 14:39:17 2016 -0800 Added build-arg to 1bit image commit01929c4915
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Mon Nov 7 23:55:43 2016 -0800 Fix kernel shape in pooling node final validation. commite680f35804
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Mon Nov 7 23:16:29 2016 -0800 Initial checkin of global pooling. commit6b22d38ea9
Author: Marco Fiscato <marco@swifkey.com> Date: Mon Nov 7 12:12:05 2016 +0000 Python build install packages from default file list commit2039300b61
Author: Marco Fiscato <marco@swifkey.com> Date: Sun Nov 6 23:51:51 2016 +0000 added pandas, seaborn commit7ad4f0f573
Author: Marco Fiscato <marco@swifkey.com> Date: Sun Nov 6 23:19:21 2016 +0000 added gym commit81056ac161
Author: Marco Fiscato <marco@swifkey.com> Date: Sat Nov 5 21:24:37 2016 +0000 Fix LD_LIBRARY_PATH commit62ab074d2f
Merge:624f6d8
abb5aee
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Fri Nov 4 11:03:56 2016 -0700 Merge branch 'master' of https://github.com/fiskio/CNTK into fiskio-master commitabb5aeed9a
Author: Marco Fiscato <marco@swifkey.com> Date: Fri Nov 4 12:09:59 2016 +0000 fiskio -> Microsoft commitfb0994a1e1
Author: Marco Fiscato <marco@swifkey.com> Date: Fri Nov 4 11:59:50 2016 +0000 Added 1bit-SGD docker file commit4f81a1ed72
Author: Marco Fiscato <marco@swifkey.com> Date: Thu Nov 3 23:20:20 2016 +0000 Fix CPU docker file commitf835b4095b
Author: Marco Fiscato <marco@swifkey.com> Date: Thu Nov 3 15:52:49 2016 +0000 fix for python in docker commit 241d2e70537c2d38f4af9bb3e121c2fdf823ea8a Author: KeDengMS <kedeng@microsoft.com> Date: Fri Nov 11 13:54:22 2016 -0800 Fix merged Linux build commit 997a31e5ac7132cbb33d335dada24dc041a38463 Author: KeDengMS <kedeng@microsoft.com> Date: Fri Nov 11 13:44:01 2016 -0800 Post merge fixes commit aa93efdf90bf60849e6a0068abd3ea05da7503ab Author: KeDengMS <kedeng@microsoft.com> Date: Fri Nov 11 10:18:24 2016 -0800 Squashed commits from Master commit67759d310a
Author: Chris Basoglu <cbasoglu@microsoft.com> Date: Fri Nov 11 07:56:05 2016 -0800 Update README.md commit15c0575e55
Merge:9d91e9f
e9fb291
Author: Project Philly <svcphil@microsoft.com> Date: Fri Nov 11 05:56:38 2016 -0800 Integrate alexeyo/ReadMe-News-November-11 into master commit9d91e9f90e
Merge:a262d5b
1be9e30
Author: Project Philly <svcphil@microsoft.com> Date: Fri Nov 11 05:51:06 2016 -0800 Integrate eldak/fixingBlockMomentum into master commite9fb291c60
Author: Alexey Orlov <alexeyo@microsoft.com> Date: Fri Nov 11 14:46:42 2016 +0100 Main ReadMe News, November 11, 2016 commit1be9e30e3a
Author: Eldar Akchurin <eldak@microsoft.com> Date: Mon Nov 7 13:56:34 2016 +0100 Fixing some synchronization of block momentum and distributed checkpointing commita262d5b033
Merge:1a44dac
f150248
Author: Project Philly <svcphil@microsoft.com> Date: Fri Nov 11 03:01:39 2016 -0800 Integrate mahilleb/pr/1023 into master commit1a44dac712
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Thu Nov 10 10:21:57 2016 +0100 Examples/Evaluation/CSEvalClient: bump NuGet package version commitf150248d22
Merge:37ffe9d
cbcea1b
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Fri Nov 11 09:55:22 2016 +0100 Merge remote-tracking branch 'origin/master' into mahilleb/pr/1023 commit37ffe9dbb6
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Fri Nov 11 09:29:10 2016 +0100 Fix Windows build commitcbcea1b534
Merge:548ec6b
8f7eb6e
Author: Project Philly <svcphil@microsoft.com> Date: Thu Nov 10 16:35:28 2016 -0800 Integrate nikosk/pull997 into master commit8f7eb6e37e
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Thu Nov 10 15:58:37 2016 -0800 removed tmp_ld_library_path from dockerfile commit548ec6b382
Merge:47d7c03
1c2421f
Author: Project Philly <svcphil@microsoft.com> Date: Thu Nov 10 14:56:05 2016 -0800 Integrate vadimma/EnsureTagCopy into master commitb1e9fcc17e
Author: Nathan Luehr <nluehr@nvidia.com> Date: Thu Nov 10 13:48:10 2016 -0800 Removed "using namespace std" from basetypes.h commit47d7c03e5c
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Thu Nov 10 11:06:41 2016 -0800 Don't override the shape after the expansion. commit1c2421ff70
Author: Vadim Mazalov <vadimma@microsoft.com> Date: Thu Nov 10 10:16:12 2016 -0800 Ensure tags are copied in ComputationNodeBase::CopyTo commit2cd3466b9c
Merge:fdcc61a
0fb5ec4
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Thu Nov 10 09:49:09 2016 -0800 Merge branch 'master' into ebarsoum/globalpooling For checkin and Jenkins. commitfdcc61a896
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Thu Nov 10 09:48:16 2016 -0800 Use NDShape.unknown instead of None commit0fb5ec40cc
Merge:3a5403d
6f7e43c
Author: Project Philly <svcphil@microsoft.com> Date: Thu Nov 10 06:10:54 2016 -0800 Integrate zhouwang/pr899 into master commit6f7e43c91e
Author: Zhou Wang <zhouwang@microsoft.com> Date: Thu Nov 10 13:35:14 2016 +0100 fix path for release_noopt configuration commit2cae5f6371
Author: Zhou Wang <zhouwang@microsoft.com> Date: Thu Nov 10 12:52:29 2016 +0100 add release_noopt configuration commit3a5403d6fc
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Thu Nov 10 11:32:07 2016 +0100 fixed graph string test commitc9ecf0e2b1
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Fri Nov 4 16:45:02 2016 +0100 added a test for network graph commit5a2c2bbf60
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Fri Nov 4 12:15:48 2016 +0100 rebase commit5b4ff474b9
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Thu Nov 3 12:04:31 2016 +0100 moved network graph to utils, added save as a DOT file commit5475b6995f
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Thu Oct 27 13:54:24 2016 +0200 updated graph build and display commitc44c93fa23
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Wed Oct 26 12:17:52 2016 +0200 bug fix commita9ac4e090a
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Tue Oct 25 20:43:04 2016 +0200 added basic network plotting commite55cd0afa8
Author: Willi Richert <wilrich@microsoft.com> Date: Tue Oct 18 17:44:56 2016 +0200 Docs commit73013e78ca
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Thu Nov 3 12:04:31 2016 +0100 moved network graph to utils, added save as a DOT file commit43e8581e03
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Wed Nov 2 17:37:05 2016 +0100 merged png and string output into one function commitbf5e28b4f3
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Mon Oct 31 20:13:34 2016 +0100 added a short how-to for network graph example commit95e0dc8d70
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Thu Oct 27 13:54:24 2016 +0200 updated graph build and display commit0836b874d0
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Wed Oct 26 12:17:52 2016 +0200 bug fix commitb6bd4cbd35
Author: Alona Kharchenko <t-alkhar@microsoft.com> Date: Tue Oct 25 20:43:04 2016 +0200 added basic network plotting commit6b8d5cb72c
Author: Willi Richert <wilrich@microsoft.com> Date: Tue Oct 18 17:44:56 2016 +0200 Docs commitac1a9469ef
Author: Mark Hillebrand <mahilleb@microsoft.com> Date: Thu Nov 10 09:13:44 2016 +0100 Bump release tags to v2.0.beta3.0, v2.0.beta3.0+ in CNTK.cpp commit74a7e0efd3
Merge:0110d72
2ca454b
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Wed Nov 9 19:51:26 2016 -0800 Merge branch 'master' into ebarsoum/globalpooling For checkin. commit0110d72f44
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Wed Nov 9 19:50:32 2016 -0800 Check pad for global pooling. commite081b6fab3
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Wed Nov 9 19:16:09 2016 -0800 whitespace commit7bc01bf0da
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Wed Nov 9 19:12:59 2016 -0800 nit nit commitc84e53557c
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Wed Nov 9 19:12:07 2016 -0800 nit commitbacc9889af
Merge:89ee379
452bd0f
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Wed Nov 9 19:10:05 2016 -0800 Merge branch 'nikosk/pull997' of github.com:Microsoft/CNTK into nikosk/pull997 commit452bd0f28a
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Wed Nov 9 19:09:18 2016 -0800 Incorporated most CR feedback commite2cf02a609
Author: Amit Agarwal <amitaga@microsoft.com> Date: Tue Nov 8 14:33:07 2016 -0800 CNTK v2 library: Add binary CrossEntropy operator commit89ee379685
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Wed Nov 9 15:17:09 2016 -0800 Removed accidental commit commitc406520b47
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Wed Nov 9 13:17:08 2016 -0800 incorporated some cr feedback commit8e5635585c
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Tue Nov 8 18:12:49 2016 -0800 Fix merge issue. commit78c63a43f9
Merge:662bebe
7199b51
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Tue Nov 8 17:57:32 2016 -0800 Merge wth master commit662bebe271
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Tue Nov 8 17:31:56 2016 -0800 Update pooling shape attribute before reaching pooling node. commita72e05b0ee
Merge:6922940
f3272f9
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Tue Nov 8 15:30:04 2016 -0800 Merge branch 'fiskio-master' into fiskio2 commit6922940e1b
Merge:646dcde
6b22d38
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Tue Nov 8 15:27:38 2016 -0800 Merge branch 'master' of https://github.com/fiskio/CNTK into fiskio2 commitf3272f9d37
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Tue Nov 8 14:55:10 2016 -0800 Modified python paths commitad753521dc
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Tue Nov 8 14:52:28 2016 -0800 Added readme on Dockerfiles commit2d74d6900c
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Tue Nov 8 14:39:17 2016 -0800 Added build-arg to 1bit image commit01929c4915
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Mon Nov 7 23:55:43 2016 -0800 Fix kernel shape in pooling node final validation. commite680f35804
Author: Emad Barsoum <ebarsoum@microsoft.com> Date: Mon Nov 7 23:16:29 2016 -0800 Initial checkin of global pooling. commit6b22d38ea9
Author: Marco Fiscato <marco@swifkey.com> Date: Mon Nov 7 12:12:05 2016 +0000 Python build install packages from default file list commit2039300b61
Author: Marco Fiscato <marco@swifkey.com> Date: Sun Nov 6 23:51:51 2016 +0000 added pandas, seaborn commit7ad4f0f573
Author: Marco Fiscato <marco@swifkey.com> Date: Sun Nov 6 23:19:21 2016 +0000 added gym commit81056ac161
Author: Marco Fiscato <marco@swifkey.com> Date: Sat Nov 5 21:24:37 2016 +0000 Fix LD_LIBRARY_PATH commit62ab074d2f
Merge:624f6d8
abb5aee
Author: Nikos Karampatziakis <nikosk@microsoft.com> Date: Fri Nov 4 11:03:56 2016 -0700 Merge branch 'master' of https://github.com/fiskio/CNTK into fiskio-master commitabb5aeed9a
Author: Marco Fiscato <marco@swifkey.com> Date: Fri Nov 4 12:09:59 2016 +0000 fiskio -> Microsoft commitfb0994a1e1
Author: Marco Fiscato <marco@swifkey.com> Date: Fri Nov 4 11:59:50 2016 +0000 Added 1bit-SGD docker file commit4f81a1ed72
Author: Marco Fiscato <marco@swifkey.com> Date: Thu Nov 3 23:20:20 2016 +0000 Fix CPU docker file commitf835b4095b
Author: Marco Fiscato <marco@swifkey.com> Date: Thu Nov 3 15:52:49 2016 +0000 fix for python in docker commit d5f88e7690a44be7fa2ba7acd33e33edf68b84f6 Author: KeDengMS <kedeng@microsoft.com> Date: Thu Nov 10 15:08:52 2016 -0800 Minor sample/test updates commit d2660530680f3eb43d53b259f21fe5da96707ce5 Author: KeDengMS <kedeng@microsoft.com> Date: Thu Nov 10 11:32:32 2016 -0800 Rename test cpp commit 3f78e33143c649cd70b35712102810e3eb9e745b Author: KeDengMS <kedeng@microsoft.com> Date: Thu Nov 10 11:04:23 2016 -0800 Separate test into multiple files commit a43d7542ef21a7404c8bb1905f7d071c5dcc8ebf Author: Eldar Akchurin <eldak@microsoft.com> Date: Thu Nov 10 10:53:46 2016 +0100 Fixing sample position commit 18ae4d9b75f829672fa30022c988533a3f0cbbc6 Author: Eldar Akchurin <eldak@microsoft.com> Date: Thu Nov 10 10:48:17 2016 +0100 Fixing SetConfiguration and SetCurrentPosition commit 449ef5f2404db7d5b772698a897d80c8a7cce833 Author: KeDengMS <kedeng@microsoft.com> Date: Thu Nov 10 00:05:17 2016 -0800 Better fix to uninitialized ReaderShim ctor commit 5e04b8aa45e9acd4a290cf0e3422ae5e75167248 Author: KeDengMS <kedeng@microsoft.com> Date: Wed Nov 9 23:29:10 2016 -0800 Fix uninitalized variable in ReaderShim commit 1aca3bccdb62c91978d848fbf52003f04e16f5c2 Author: KeDengMS <kedeng@microsoft.com> Date: Wed Nov 9 21:21:20 2016 -0800 Linux Test fix #2 commit e59d02092bb3137c0e0a27c0185f57528fb0d69b Author: KeDengMS <kedeng@microsoft.com> Date: Wed Nov 9 20:44:41 2016 -0800 Fix Linux test break commit b8047cc5762087b3b2836555d2a37f60e167aa98 Merge: 3516b4d2ca454b
Author: KeDengMS <kedeng@microsoft.com> Date: Wed Nov 9 17:58:38 2016 -0800 Merge branch 'master' into kedeng/warmStart commit 3516b4d90a639ae4c48acd5a67cd6b51046fc120 Author: KeDengMS <kedeng@microsoft.com> Date: Wed Nov 9 17:51:15 2016 -0800 Add changes for real commit 8909e41bf8c67e0c2bf675bc2e16063e7058725e Author: KeDengMS <kedeng@microsoft.com> Date: Wed Nov 9 17:47:00 2016 -0800 Add Release_NoOpt to V2LibraryDistributionTests.vcxproj Fix tabs in progress_print.py Restore original CifarResNet_Distributed sample, and rename warmStart sample to CifarResNet_warmStart for now commit ccb1573e9eaff1dc5248c9fca0e3953f001b9a97 Author: KeDengMS <kedeng@microsoft.com> Date: Wed Nov 9 17:33:53 2016 -0800 Add test case for MinibatchSource with warm-starting And fix div-by-0 in progress_print commit 89f3059fd47ab37cd8a7b09b726bfa339a71a274 Author: KeDengMS <kedeng@microsoft.com> Date: Wed Nov 9 14:04:51 2016 -0800 Add test for GPU warm start commit f2387d5c6c130fe4c7158dc8c7408fb9ced94d2e Merge: 3e5137154a33a9
Author: KeDengMS <kedeng@microsoft.com> Date: Tue Nov 8 10:55:35 2016 -0800 Merge branch 'master' into kedeng/warmStart commit 3e5137181f179d4e3fe816d90f002b6886519bd1 Author: KeDengMS <kedeng@microsoft.com> Date: Tue Nov 8 10:53:10 2016 -0800 Add test to verify warm start CE matches single worker in 1-bit CPU device Fixes fo bugs and CR comments commit 3d28386190ca672aebb00a507f4fb7455d252c70 Merge: b36ba66061be6e
Author: KeDengMS <kedeng@microsoft.com> Date: Mon Nov 7 14:49:15 2016 -0800 Merge branch 'master' into kedeng/warmStart commit b36ba6612184abc877a9459090786c55ebdbefd4 Author: KeDengMS <kedeng@microsoft.com> Date: Mon Nov 7 14:47:57 2016 -0800 Address CR feedbacks, default warmStart to infinite commit c15021abbd71e1e648e7de5032940813d24f021b Author: KeDengMS <kedeng@microsoft.com> Date: Sat Nov 5 00:34:59 2016 -0700 Fix test failure commit 62571ee54baaffedd5e01949e6fc4bf62c5cca88 Author: KeDengMS <kedeng@microsoft.com> Date: Sat Nov 5 00:16:20 2016 -0700 print progress with speed commit b5b7399efd47b1a6c0f7cc9caeaee15dadb455fd Author: KeDengMS <kedeng@microsoft.com> Date: Fri Nov 4 22:41:59 2016 -0700 Example fixes commit afede95f0ae7f8224a06bd20da5d7eeded43aeba Author: KeDengMS <kedeng@microsoft.com> Date: Fri Nov 4 18:22:14 2016 -0700 comment and docstring fixes commit ae46f39d0278b7e51462000903385532b4f1f7a8 Merge: 05dad62624f6d8
Author: KeDengMS <kedeng@microsoft.com> Date: Fri Nov 4 18:15:52 2016 -0700 Merge branch 'master' into kedeng/warmStart commit 05dad62862a869164b4eda4131d92e6f41974a5c Author: KeDengMS <kedeng@microsoft.com> Date: Fri Nov 4 18:12:42 2016 -0700 Use SWIG to handle size_t in python, api/test enhancements commit 13da037b7a99961774cca52f158507056ba9e136 Merge: cd08464de58a63
Author: KeDengMS <kedeng@microsoft.com> Date: Thu Nov 3 22:31:40 2016 -0700 Merge branch 'master' into kedeng/warmStart commit cd08464e115f120f5b75f018b3fc15b1a71a070b Author: KeDengMS <kedeng@microsoft.com> Date: Thu Nov 3 22:27:13 2016 -0700 Distributed python side refactor commit 623de80c64d6a1418a1aa8e19056cc13db7d44d3 Merge: 9af9115327f6f9
Author: KeDengMS <kedeng@microsoft.com> Date: Thu Nov 3 10:27:30 2016 -0700 Merge branch 'master' into kedeng/warmStart commit 9af91159da237b27995a5faf1f5c55a2c589f256 Author: KeDengMS <kedeng@microsoft.com> Date: Thu Nov 3 10:26:39 2016 -0700 Address CR comments commit a8a694e9de99d396863db8b8f12a78c4d3c3e9d0 Author: KeDengMS <kedeng@microsoft.com> Date: Thu Nov 3 00:59:18 2016 -0700 Add support for warm start in distributed training This simplifies user code when dealing with warm start
This commit is contained in:
Родитель
d321bdb60a
Коммит
75b9766288
11
CNTK.sln
11
CNTK.sln
|
@ -2520,7 +2520,8 @@ Global
|
|||
{F4CCAAB2-0DB2-4281-929A-2E68E30F0F6E}.Release_NoOpt|Any CPU.ActiveCfg = Release_CpuOnly|x64
|
||||
{F4CCAAB2-0DB2-4281-929A-2E68E30F0F6E}.Release_NoOpt|Mixed Platforms.ActiveCfg = Release_CpuOnly|x64
|
||||
{F4CCAAB2-0DB2-4281-929A-2E68E30F0F6E}.Release_NoOpt|Win32.ActiveCfg = Release_CpuOnly|x64
|
||||
{F4CCAAB2-0DB2-4281-929A-2E68E30F0F6E}.Release_NoOpt|x64.ActiveCfg = Release_CpuOnly|x64
|
||||
{F4CCAAB2-0DB2-4281-929A-2E68E30F0F6E}.Release_NoOpt|x64.ActiveCfg = Release_NoOpt|x64
|
||||
{F4CCAAB2-0DB2-4281-929A-2E68E30F0F6E}.Release_NoOpt|x64.Build.0 = Release_NoOpt|x64
|
||||
{F4CCAAB2-0DB2-4281-929A-2E68E30F0F6E}.Release|Any CPU.ActiveCfg = Release|x64
|
||||
{F4CCAAB2-0DB2-4281-929A-2E68E30F0F6E}.Release|Mixed Platforms.ActiveCfg = Release|x64
|
||||
{F4CCAAB2-0DB2-4281-929A-2E68E30F0F6E}.Release|Mixed Platforms.Build.0 = Release|x64
|
||||
|
@ -2549,8 +2550,8 @@ Global
|
|||
{16F14058-B116-49D9-8BA0-209F3AFFE849}.Release_NoOpt|Mixed Platforms.ActiveCfg = Release_CpuOnly|x64
|
||||
{16F14058-B116-49D9-8BA0-209F3AFFE849}.Release_NoOpt|Mixed Platforms.Build.0 = Release_CpuOnly|x64
|
||||
{16F14058-B116-49D9-8BA0-209F3AFFE849}.Release_NoOpt|Win32.ActiveCfg = release_zmq|x64
|
||||
{16F14058-B116-49D9-8BA0-209F3AFFE849}.Release_NoOpt|x64.ActiveCfg = Release_CpuOnly|x64
|
||||
{16F14058-B116-49D9-8BA0-209F3AFFE849}.Release_NoOpt|x64.Build.0 = Release_CpuOnly|x64
|
||||
{16F14058-B116-49D9-8BA0-209F3AFFE849}.Release_NoOpt|x64.ActiveCfg = Release_NoOpt|x64
|
||||
{16F14058-B116-49D9-8BA0-209F3AFFE849}.Release_NoOpt|x64.Build.0 = Release_NoOpt|x64
|
||||
{16F14058-B116-49D9-8BA0-209F3AFFE849}.Release|Any CPU.ActiveCfg = Release|x64
|
||||
{16F14058-B116-49D9-8BA0-209F3AFFE849}.Release|Mixed Platforms.ActiveCfg = Release|x64
|
||||
{16F14058-B116-49D9-8BA0-209F3AFFE849}.Release|Mixed Platforms.Build.0 = Release|x64
|
||||
|
@ -2579,8 +2580,8 @@ Global
|
|||
{EC7157E9-A51F-4702-A5FD-8DAF88C7029F}.Release_NoOpt|Mixed Platforms.ActiveCfg = Release_CpuOnly|x64
|
||||
{EC7157E9-A51F-4702-A5FD-8DAF88C7029F}.Release_NoOpt|Mixed Platforms.Build.0 = Release_CpuOnly|x64
|
||||
{EC7157E9-A51F-4702-A5FD-8DAF88C7029F}.Release_NoOpt|Win32.ActiveCfg = Release_CpuOnly|x64
|
||||
{EC7157E9-A51F-4702-A5FD-8DAF88C7029F}.Release_NoOpt|x64.ActiveCfg = Release_CpuOnly|x64
|
||||
{EC7157E9-A51F-4702-A5FD-8DAF88C7029F}.Release_NoOpt|x64.Build.0 = Release_CpuOnly|x64
|
||||
{EC7157E9-A51F-4702-A5FD-8DAF88C7029F}.Release_NoOpt|x64.ActiveCfg = Release_NoOpt|x64
|
||||
{EC7157E9-A51F-4702-A5FD-8DAF88C7029F}.Release_NoOpt|x64.Build.0 = Release_NoOpt|x64
|
||||
{EC7157E9-A51F-4702-A5FD-8DAF88C7029F}.Release|Any CPU.ActiveCfg = Release|x64
|
||||
{EC7157E9-A51F-4702-A5FD-8DAF88C7029F}.Release|Mixed Platforms.ActiveCfg = Release|x64
|
||||
{EC7157E9-A51F-4702-A5FD-8DAF88C7029F}.Release|Mixed Platforms.Build.0 = Release|x64
|
||||
|
|
|
@ -150,7 +150,7 @@ def train_and_evaluate(reader_train, reader_test, max_epochs):
|
|||
data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
|
||||
sample_count += data[label_var].num_samples # count samples processed so far
|
||||
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ num_channels = 3 # RGB
|
|||
num_classes = 10
|
||||
|
||||
# Define the reader for both training and evaluation action.
|
||||
def create_reader(map_file, mean_file, train, distributed_communicator=None):
|
||||
def create_reader(map_file, mean_file, train):
|
||||
if not os.path.exists(map_file) or not os.path.exists(mean_file):
|
||||
raise RuntimeError("File '%s' or '%s' does not exist. Please run install_cifar10.py from DataSets/CIFAR-10 to fetch them" %
|
||||
(map_file, mean_file))
|
||||
|
@ -46,9 +46,7 @@ def create_reader(map_file, mean_file, train, distributed_communicator=None):
|
|||
# deserializer
|
||||
return MinibatchSource(ImageDeserializer(map_file, StreamDefs(
|
||||
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
|
||||
labels = StreamDef(field='label', shape=num_classes))), # and second as 'label'
|
||||
distributed_communicator=distributed_communicator)
|
||||
|
||||
labels = StreamDef(field='label', shape=num_classes)))) # and second as 'label'
|
||||
|
||||
# Train and evaluate the network.
|
||||
def convnet_cifar10_dataaug(reader_train, reader_test):
|
||||
|
@ -84,7 +82,7 @@ def convnet_cifar10_dataaug(reader_train, reader_test):
|
|||
|
||||
# Set learning parameters
|
||||
lr_per_sample = [0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625]
|
||||
lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size)
|
||||
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size)
|
||||
momentum_time_constant = [0]*20+[600]*20+[1200]
|
||||
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size)
|
||||
l2_reg_weight = 0.002
|
||||
|
@ -110,7 +108,7 @@ def convnet_cifar10_dataaug(reader_train, reader_test):
|
|||
while sample_count < epoch_size: # loop over minibatches in the epoch
|
||||
data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
sample_count += data[label_var].num_samples # count samples processed so far
|
||||
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
persist.save_model(z, os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))
|
||||
|
|
|
@ -0,0 +1,157 @@
|
|||
# Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
# Licensed under the MIT license. See LICENSE.md file in the project root
|
||||
# for full license information.
|
||||
# ==============================================================================
|
||||
|
||||
import os
|
||||
import math
|
||||
import numpy as np
|
||||
from cntk.utils import *
|
||||
from cntk.layers import *
|
||||
from cntk.models import Sequential, LayerStack
|
||||
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, relu, element_times, constant
|
||||
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs, INFINITE_SAMPLES
|
||||
from cntk import Trainer, persist, cntk_py, distributed
|
||||
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_schedule, momentum_as_time_constant_schedule, UnitType
|
||||
from _cntk_py import set_computation_network_trace_level
|
||||
|
||||
# Paths relative to current python file.
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
data_path = os.path.join(abs_path, "..", "..", "..", "Datasets", "CIFAR-10")
|
||||
model_path = os.path.join(abs_path, "Models")
|
||||
|
||||
# model dimensions
|
||||
image_height = 32
|
||||
image_width = 32
|
||||
num_channels = 3 # RGB
|
||||
num_classes = 10
|
||||
|
||||
# Define the reader for both training and evaluation action.
|
||||
def create_reader(map_file, mean_file, train, distributed_after=INFINITE_SAMPLES):
|
||||
if not os.path.exists(map_file) or not os.path.exists(mean_file):
|
||||
raise RuntimeError("File '%s' or '%s' does not exist. Please run install_cifar10.py from DataSets/CIFAR-10 to fetch them" %
|
||||
(map_file, mean_file))
|
||||
|
||||
# transformation pipeline for the features has jitter/crop only when training
|
||||
transforms = []
|
||||
if train:
|
||||
transforms += [
|
||||
ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter
|
||||
]
|
||||
transforms += [
|
||||
ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),
|
||||
ImageDeserializer.mean(mean_file)
|
||||
]
|
||||
# deserializer
|
||||
return MinibatchSource(ImageDeserializer(map_file, StreamDefs(
|
||||
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
|
||||
labels = StreamDef(field='label', shape=num_classes))), # and second as 'label'
|
||||
distributed_after = distributed_after)
|
||||
|
||||
|
||||
# Train and evaluate the network.
|
||||
def convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer):
|
||||
set_computation_network_trace_level(0)
|
||||
|
||||
# Input variables denoting the features and label data
|
||||
input_var = input_variable((num_channels, image_height, image_width))
|
||||
label_var = input_variable((num_classes))
|
||||
|
||||
# apply model to input
|
||||
scaled_input = element_times(constant(0.00390625), input_var)
|
||||
with default_options (activation=relu, pad=True):
|
||||
z = Sequential([
|
||||
LayerStack(2, lambda : [
|
||||
Convolution((3,3), 64),
|
||||
Convolution((3,3), 64),
|
||||
MaxPooling((3,3), (2,2))
|
||||
]),
|
||||
LayerStack(2, lambda i: [
|
||||
Dense([256,128][i]),
|
||||
Dropout(0.5)
|
||||
]),
|
||||
Dense(num_classes, activation=None)
|
||||
])(scaled_input)
|
||||
|
||||
# loss and metric
|
||||
ce = cross_entropy_with_softmax(z, label_var)
|
||||
pe = classification_error(z, label_var)
|
||||
|
||||
# training config
|
||||
epoch_size = 50000 # for now we manually specify epoch size
|
||||
minibatch_size = 64
|
||||
|
||||
# Set learning parameters
|
||||
lr_per_sample = [0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625]
|
||||
lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size)
|
||||
momentum_time_constant = [0]*20+[600]*20+[1200]
|
||||
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size)
|
||||
l2_reg_weight = 0.002
|
||||
|
||||
# trainer object
|
||||
learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule,
|
||||
l2_regularization_weight = l2_reg_weight)
|
||||
trainer = Trainer(z, ce, pe, learner, distributed_trainer)
|
||||
|
||||
# define mapping from reader streams to network inputs
|
||||
input_map = {
|
||||
input_var: reader_train.streams.features,
|
||||
label_var: reader_train.streams.labels
|
||||
}
|
||||
|
||||
log_number_of_parameters(z) ; print()
|
||||
progress_printer = ProgressPrinter(tag='Training')
|
||||
|
||||
# perform model training
|
||||
max_epochs = 80
|
||||
for epoch in range(max_epochs): # loop over epochs
|
||||
sample_count = 0
|
||||
while sample_count < epoch_size: # loop over minibatches in the epoch
|
||||
data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
if distributed_trainer.communicator().current_worker().global_rank == 0:
|
||||
persist.save_model(z, os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))
|
||||
|
||||
### Evaluation action
|
||||
epoch_size = 10000
|
||||
minibatch_size = 16
|
||||
|
||||
# process minibatches and evaluate the model
|
||||
metric_numer = 0
|
||||
metric_denom = 0
|
||||
sample_count = 0
|
||||
minibatch_index = 0
|
||||
|
||||
while sample_count < epoch_size:
|
||||
current_minibatch = min(minibatch_size, epoch_size - sample_count)
|
||||
# Fetch next test min batch.
|
||||
data = reader_test.next_minibatch(current_minibatch, input_map=input_map)
|
||||
# minibatch data to be trained with
|
||||
metric_numer += trainer.test_minibatch(data) * current_minibatch
|
||||
metric_denom += current_minibatch
|
||||
# Keep track of the number of samples processed so far.
|
||||
sample_count += data[label_var].num_samples
|
||||
minibatch_index += 1
|
||||
|
||||
print("")
|
||||
print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
|
||||
print("")
|
||||
|
||||
return metric_numer/metric_denom
|
||||
|
||||
if __name__=='__main__':
|
||||
distributed_after_samples = 50000
|
||||
num_quantization_bits = 32
|
||||
distributed_trainer = distributed.data_parallel_distributed_trainer(
|
||||
num_quantization_bits=num_quantization_bits,
|
||||
distributed_after=distributed_after_samples)
|
||||
|
||||
reader_train = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True, distributed_after_samples)
|
||||
reader_test = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False)
|
||||
|
||||
convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer)
|
||||
distributed.Communicator.finalize()
|
|
@ -12,7 +12,7 @@ from cntk.blocks import default_options
|
|||
from cntk.layers import Convolution, AveragePooling, GlobalAveragePooling, Dropout, BatchNormalization, Dense
|
||||
from cntk.models import Sequential, LayerStack
|
||||
from cntk.utils import *
|
||||
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs
|
||||
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs, INFINITE_SAMPLES
|
||||
from cntk.initializer import glorot_uniform, he_normal
|
||||
from cntk import Trainer
|
||||
from cntk.learner import momentum_sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule
|
||||
|
@ -36,7 +36,7 @@ num_classes = 10
|
|||
#
|
||||
# Define the reader for both training and evaluation action.
|
||||
#
|
||||
def create_reader(map_file, mean_file, train, distributed_communicator=None):
|
||||
def create_reader(map_file, mean_file, train):
|
||||
if not os.path.exists(map_file) or not os.path.exists(mean_file):
|
||||
raise RuntimeError("File '%s' or '%s' does not exist. Please run install_cifar10.py from Examples/Image/DataSets/CIFAR-10 to fetch them" %
|
||||
(map_file, mean_file))
|
||||
|
@ -54,8 +54,7 @@ def create_reader(map_file, mean_file, train, distributed_communicator=None):
|
|||
# deserializer
|
||||
return MinibatchSource(ImageDeserializer(map_file, StreamDefs(
|
||||
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
|
||||
labels = StreamDef(field='label', shape=num_classes))), # and second as 'label'
|
||||
distributed_communicator=distributed_communicator)
|
||||
labels = StreamDef(field='label', shape=num_classes)))) # and second as 'label'
|
||||
|
||||
#
|
||||
# Resnet building blocks
|
||||
|
@ -196,7 +195,7 @@ def train_and_evaluate(reader_train, reader_test, max_epochs):
|
|||
data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
|
||||
sample_count += data[label_var].num_samples # count samples processed so far
|
||||
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
|
||||
|
|
|
@ -4,193 +4,265 @@
|
|||
# for full license information.
|
||||
# ==============================================================================
|
||||
|
||||
# NOTE:
|
||||
# This example is meant as an illustration of how to use CNTKs distributed training feature from the python API.
|
||||
# The training hyper parameters here are not necessarily optimal and for optimal convergence need to be tuned
|
||||
# for specific parallelization degrees that you want to run the example with.
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
from cntk import Trainer, distributed, device, persist
|
||||
from cntk.cntk_py import DeviceKind_GPU
|
||||
import math
|
||||
import numpy as np
|
||||
import argparse
|
||||
|
||||
from cntk.blocks import default_options
|
||||
from cntk.layers import Convolution, AveragePooling, GlobalAveragePooling, Dropout, BatchNormalization, Dense
|
||||
from cntk.models import Sequential, LayerStack
|
||||
from cntk.utils import *
|
||||
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs, INFINITE_SAMPLES
|
||||
from cntk.initializer import glorot_uniform, he_normal
|
||||
from cntk import Trainer, distributed, persist
|
||||
from cntk.learner import momentum_sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule
|
||||
from cntk.ops import input_variable, constant, parameter, cross_entropy_with_softmax, combine, classification_error, times, element_times, pooling, AVG_POOLING, relu
|
||||
from cntk.io import ReaderConfig, ImageDeserializer
|
||||
from cntk.initializer import he_normal, glorot_uniform
|
||||
from cntk.ops import cross_entropy_with_softmax, classification_error, relu
|
||||
from cntk.ops import input_variable, constant, parameter, combine, times, element_times
|
||||
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "..", "Examples", "common"))
|
||||
from CifarResNet import create_reader, create_resnet_model
|
||||
from nn import conv_bn_relu_layer, conv_bn_layer, linear_layer, print_training_progress
|
||||
#
|
||||
# Paths relative to current python file.
|
||||
#
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
cntk_path = os.path.normpath(os.path.join(abs_path, "..", "..", "..", "..", ".."))
|
||||
data_path = os.path.join(cntk_path, "Examples", "Image", "DataSets", "CIFAR-10")
|
||||
model_path = os.path.join(abs_path, "Models")
|
||||
|
||||
TRAIN_MAP_FILENAME = 'train_map.txt'
|
||||
MEAN_FILENAME = 'CIFAR-10_mean.xml'
|
||||
TEST_MAP_FILENAME = 'test_map.txt'
|
||||
# model dimensions
|
||||
image_height = 32
|
||||
image_width = 32
|
||||
num_channels = 3 # RGB
|
||||
num_classes = 10
|
||||
|
||||
# Trains a residual network model on the Cifar image dataset
|
||||
def cifar_resnet_distributed(data_path, run_test, num_epochs, communicator=None, save_model_filename=None, load_model_filename=None, debug_output=False):
|
||||
image_height = 32
|
||||
image_width = 32
|
||||
num_channels = 3
|
||||
num_classes = 10
|
||||
#
|
||||
# Define the reader for both training and evaluation action.
|
||||
#
|
||||
def create_reader(map_file, mean_file, train, distributed_after=INFINITE_SAMPLES):
|
||||
if not os.path.exists(map_file) or not os.path.exists(mean_file):
|
||||
raise RuntimeError("File '%s' or '%s' does not exist. Please run install_cifar10.py from Examples/Image/DataSets/CIFAR-10 to fetch them" %
|
||||
(map_file, mean_file))
|
||||
|
||||
feats_stream_name = 'features'
|
||||
labels_stream_name = 'labels'
|
||||
# transformation pipeline for the features has jitter/crop only when training
|
||||
transforms = []
|
||||
if train:
|
||||
transforms += [
|
||||
ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter
|
||||
]
|
||||
transforms += [
|
||||
ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),
|
||||
ImageDeserializer.mean(mean_file)
|
||||
]
|
||||
# deserializer
|
||||
return MinibatchSource(ImageDeserializer(map_file, StreamDefs(
|
||||
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
|
||||
labels = StreamDef(field='label', shape=num_classes))), # and second as 'label'
|
||||
randomize = False,
|
||||
distributed_after = distributed_after)
|
||||
|
||||
minibatch_source = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True,
|
||||
distributed_communicator = communicator)
|
||||
|
||||
features_si = minibatch_source[feats_stream_name]
|
||||
labels_si = minibatch_source[labels_stream_name]
|
||||
|
||||
# Instantiate the resnet classification model, or load from file
|
||||
#
|
||||
# Resnet building blocks
|
||||
#
|
||||
# ResNetNode ResNetNodeInc
|
||||
# | |
|
||||
# +------+------+ +---------+----------+
|
||||
# | | | |
|
||||
# V | V V
|
||||
# +----------+ | +--------------+ +----------------+
|
||||
# | Conv, BN | | | Conv x 2, BN | | SubSample, BN |
|
||||
# +----------+ | +--------------+ +----------------+
|
||||
# | | | |
|
||||
# V | V |
|
||||
# +-------+ | +-------+ |
|
||||
# | ReLU | | | ReLU | |
|
||||
# +-------+ | +-------+ |
|
||||
# | | | |
|
||||
# V | V |
|
||||
# +----------+ | +----------+ |
|
||||
# | Conv, BN | | | Conv, BN | |
|
||||
# +----------+ | +----------+ |
|
||||
# | | | |
|
||||
# | +---+ | | +---+ |
|
||||
# +--->| + |<---+ +------>+ + +<-------+
|
||||
# +---+ +---+
|
||||
# | |
|
||||
# V V
|
||||
# +-------+ +-------+
|
||||
# | ReLU | | ReLU |
|
||||
# +-------+ +-------+
|
||||
# | |
|
||||
# V V
|
||||
#
|
||||
def convolution_bn(input, filter_size, num_filters, strides=(1,1), init=he_normal(), activation=relu):
|
||||
if activation is None:
|
||||
activation = lambda x: x
|
||||
|
||||
r = Convolution(filter_size, num_filters, strides=strides, init=init, activation=None, pad=True, bias=False)(input)
|
||||
r = BatchNormalization(map_rank=1)(r)
|
||||
r = activation(r)
|
||||
|
||||
if load_model_filename:
|
||||
print("Loading model:", load_model_filename)
|
||||
classifier_output = persist.load_model(load_model_filename)
|
||||
image_input = classifier_output.arguments[0]
|
||||
else:
|
||||
image_input = input_variable(
|
||||
(num_channels, image_height, image_width), features_si.m_element_type)
|
||||
classifier_output = create_resnet_model(image_input, num_classes)
|
||||
return r
|
||||
|
||||
def resnet_basic(input, num_filters):
|
||||
c1 = convolution_bn(input, (3,3), num_filters)
|
||||
c2 = convolution_bn(c1, (3,3), num_filters, activation=None)
|
||||
p = c2 + input
|
||||
return relu(p)
|
||||
|
||||
def resnet_basic_inc(input, num_filters):
|
||||
c1 = convolution_bn(input, (3,3), num_filters, strides=(2,2))
|
||||
c2 = convolution_bn(c1, (3,3), num_filters, activation=None)
|
||||
|
||||
s = convolution_bn(input, (1,1), num_filters, strides=(2,2), activation=None)
|
||||
|
||||
p = c2 + s
|
||||
return relu(p)
|
||||
|
||||
def resnet_basic_stack(input, num_filters, num_stack):
|
||||
assert (num_stack > 0)
|
||||
|
||||
r = input
|
||||
for _ in range(num_stack):
|
||||
r = resnet_basic(r, num_filters)
|
||||
return r
|
||||
|
||||
#
|
||||
# Defines the residual network model for classifying images
|
||||
#
|
||||
def create_resnet_model(input, num_classes):
|
||||
conv = convolution_bn(input, (3,3), 16)
|
||||
r1_1 = resnet_basic_stack(conv, 16, 3)
|
||||
|
||||
r2_1 = resnet_basic_inc(r1_1, 32)
|
||||
r2_2 = resnet_basic_stack(r2_1, 32, 2)
|
||||
|
||||
r3_1 = resnet_basic_inc(r2_2, 64)
|
||||
r3_2 = resnet_basic_stack(r3_1, 64, 2)
|
||||
|
||||
pool = GlobalAveragePooling()(r3_2)
|
||||
net = Dense(num_classes, init=he_normal(), activation=None)(pool)
|
||||
|
||||
return net
|
||||
|
||||
#
|
||||
# Train and evaluate the network.
|
||||
#
|
||||
def train_and_evaluate(reader_train, reader_test, max_epochs, distributed_trainer):
|
||||
|
||||
# Input variables denoting the features and label data
|
||||
label_var = input_variable((num_classes), features_si.m_element_type)
|
||||
input_var = input_variable((num_channels, image_height, image_width))
|
||||
label_var = input_variable((num_classes))
|
||||
|
||||
ce = cross_entropy_with_softmax(classifier_output, label_var)
|
||||
pe = classification_error(classifier_output, label_var)
|
||||
# Normalize the input
|
||||
feature_scale = 1.0 / 256.0
|
||||
input_var_norm = element_times(feature_scale, input_var)
|
||||
|
||||
# Instantiate the trainer object to drive the model training
|
||||
# apply model to input
|
||||
z = create_resnet_model(input_var_norm, 10)
|
||||
|
||||
mb_size = 128
|
||||
num_mb_per_epoch = 100
|
||||
#
|
||||
# Training action
|
||||
#
|
||||
|
||||
# loss and metric
|
||||
ce = cross_entropy_with_softmax(z, label_var)
|
||||
pe = classification_error(z, label_var)
|
||||
|
||||
# training config
|
||||
epoch_size = 50000
|
||||
minibatch_size = 128
|
||||
|
||||
# Set learning parameters
|
||||
lr_per_minibatch = learning_rate_schedule([1]*80 + [0.1]*40 + [0.01], UnitType.minibatch, epoch_size)
|
||||
momentum_time_constant = momentum_as_time_constant_schedule(-minibatch_size/np.log(0.9))
|
||||
l2_reg_weight = 0.0001
|
||||
|
||||
num_mbs = num_mb_per_epoch * num_epochs
|
||||
# trainer object
|
||||
learner = momentum_sgd(z.parameters,
|
||||
lr = lr_per_minibatch, momentum = momentum_time_constant,
|
||||
l2_regularization_weight = l2_reg_weight)
|
||||
trainer = Trainer(z, ce, pe, learner, distributed_trainer)
|
||||
|
||||
lr_schedule = [1.0/mb_size]*80 + [0.1/mb_size]*40 + [0.01/mb_size]
|
||||
lr_per_minibatch = learning_rate_schedule(lr_schedule, UnitType.minibatch, mb_size * num_mb_per_epoch)
|
||||
momentum_time_constant = momentum_as_time_constant_schedule(-mb_size/np.log(0.9))
|
||||
# define mapping from reader streams to network inputs
|
||||
input_map = {
|
||||
input_var: reader_train.streams.features,
|
||||
label_var: reader_train.streams.labels
|
||||
}
|
||||
|
||||
# create data parallel distributed trainer if needed
|
||||
dist_trainer = distributed.data_parallel_distributed_trainer(communicator, False) if communicator else None
|
||||
log_number_of_parameters(z) ; print()
|
||||
progress_printer = ProgressPrinter(tag='Training')
|
||||
|
||||
# Instantiate the trainer object to drive the model training
|
||||
trainer = Trainer(classifier_output, ce, pe,
|
||||
[momentum_sgd(classifier_output.parameters, lr=lr_per_minibatch, momentum=momentum_time_constant, l2_regularization_weight=0.0001)],
|
||||
distributed_trainer = dist_trainer)
|
||||
# perform model training
|
||||
for epoch in range(max_epochs): # loop over epochs
|
||||
sample_count = 0
|
||||
while sample_count < epoch_size: # loop over minibatches in the epoch
|
||||
data = reader_train.next_minibatch(minibatch_size, input_map=input_map) # fetch minibatch.
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
|
||||
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
progress_printer.epoch_summary(with_metric=True)
|
||||
if distributed_trainer.communicator().current_worker().global_rank == 0:
|
||||
persist.save_model(z, os.path.join(model_path, "CifarResNet_Distributed_{}.dnn".format(epoch)))
|
||||
|
||||
# Get minibatches of images to train with and perform model training
|
||||
training_progress_output_freq = 100 if communicator else 20
|
||||
#
|
||||
# Evaluation action
|
||||
#
|
||||
epoch_size = 10000
|
||||
minibatch_size = 16
|
||||
|
||||
if debug_output:
|
||||
training_progress_output_freq = training_progress_output_freq/4
|
||||
|
||||
for i in range(0, num_mbs):
|
||||
|
||||
# NOTE: depends on network, the mb_size can be changed dynamically here
|
||||
mb = minibatch_source.next_minibatch(mb_size)
|
||||
# process minibatches and evaluate the model
|
||||
metric_numer = 0
|
||||
metric_denom = 0
|
||||
sample_count = 0
|
||||
minibatch_index = 0
|
||||
|
||||
#progress_printer = ProgressPrinter(freq=100, first=10, tag='Eval')
|
||||
while sample_count < epoch_size:
|
||||
current_minibatch = min(minibatch_size, epoch_size - sample_count)
|
||||
|
||||
# Fetch next test min batch.
|
||||
data = reader_test.next_minibatch(current_minibatch, input_map=input_map)
|
||||
|
||||
# Specify the mapping of input variables in the model to actual
|
||||
# minibatch data to be trained with
|
||||
arguments = {
|
||||
image_input: mb[features_si],
|
||||
label_var: mb[labels_si]
|
||||
}
|
||||
trainer.train_minibatch(arguments)
|
||||
metric_numer += trainer.test_minibatch(data) * current_minibatch
|
||||
metric_denom += current_minibatch
|
||||
|
||||
print_training_progress(trainer, i, training_progress_output_freq)
|
||||
|
||||
if save_model_filename:
|
||||
print("Saving model:", save_model_filename)
|
||||
persist.save_model(classifier_output, save_model_filename)
|
||||
# Keep track of the number of samples processed so far.
|
||||
sample_count += data[label_var].num_samples
|
||||
minibatch_index += 1
|
||||
|
||||
if run_test:
|
||||
test_minibatch_source = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False)
|
||||
features_si = test_minibatch_source[feats_stream_name]
|
||||
labels_si = test_minibatch_source[labels_stream_name]
|
||||
print("")
|
||||
print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
|
||||
print("")
|
||||
|
||||
mb_size = 128
|
||||
num_mbs = 100
|
||||
# return evaluation error.
|
||||
return metric_numer/metric_denom
|
||||
|
||||
total_error = 0.0
|
||||
for i in range(0, num_mbs):
|
||||
mb = test_minibatch_source.next_minibatch(mb_size)
|
||||
|
||||
# Specify the mapping of input variables in the model to actual
|
||||
# minibatch data to be trained with
|
||||
arguments = {
|
||||
image_input: mb[features_si],
|
||||
label_var: mb[labels_si]
|
||||
}
|
||||
error = trainer.test_minibatch(arguments)
|
||||
total_error += error
|
||||
|
||||
return total_error / num_mbs
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def train_and_evaluate(data_path, total_epochs, gpu_count=1):
|
||||
# Create distributed communicator for 1-bit SGD for better scaling to multiple GPUs
|
||||
# If you'd like to avoid quantization loss, use simple one instead
|
||||
quantization_bit = 1
|
||||
|
||||
if (quantization_bit == 32):
|
||||
communicator = distributed.mpi_communicator()
|
||||
else:
|
||||
communicator = distributed.quantized_mpi_communicator(quantization_bit)
|
||||
|
||||
workers = communicator.workers()
|
||||
current_worker = communicator.current_worker()
|
||||
print("List all distributed workers")
|
||||
for wk in workers:
|
||||
if current_worker.global_rank == wk.global_rank:
|
||||
print("* {} {}".format(wk.global_rank, wk.host_id))
|
||||
else:
|
||||
print(" {} {}".format(wk.global_rank, wk.host_id))
|
||||
|
||||
if gpu_count == 1 and len(workers) > 1 :
|
||||
print("Warning: running distributed training on 1-GPU will be slow")
|
||||
device.set_default_device(gpu(0))
|
||||
|
||||
print("Training on device type:{} id:{}".format('gpu' if device.default().type() else 'cpu', device.default().id()))
|
||||
|
||||
start_model = "start_model.bin"
|
||||
num_start_epochs = 1
|
||||
num_parallel_epochs = total_epochs - num_start_epochs
|
||||
|
||||
# training the start model only in one worker
|
||||
if communicator.current_worker().global_rank == 0:
|
||||
cifar_resnet_distributed(data_path, save_model_filename=start_model, communicator=None, run_test=False, num_epochs=num_start_epochs)
|
||||
if __name__=='__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-q', '--quantize_bit', help='quantized bit', required=False, default='32')
|
||||
parser.add_argument('-b', '--block_size', help='block momentum block size, quantized bit would be ignored if this is set', required=False)
|
||||
parser.add_argument('-e', '--epochs', help='total epochs', required=False, default='160')
|
||||
parser.add_argument('-w', '--warm_start', help='number of samples to warm start before running distributed', required=False, default='50000')
|
||||
args = vars(parser.parse_args())
|
||||
num_quantization_bits = int(args['quantize_bit'])
|
||||
epochs = int(args['epochs'])
|
||||
distributed_after_samples = int(args['warm_start'])
|
||||
|
||||
communicator.barrier()
|
||||
|
||||
# train in parallel
|
||||
error = cifar_resnet_distributed(data_path, load_model_filename=start_model, communicator=communicator, run_test=True, num_epochs=num_parallel_epochs)
|
||||
if args['block_size']:
|
||||
block_size = int(args['block_size'])
|
||||
print("Start training:block_size = {}, epochs = {}, warm_start = {}".format(block_size, epochs, distributed_after_samples))
|
||||
distributed_trainer = distributed.block_momentum_distributed_trainer(
|
||||
block_size=block_size,
|
||||
distributed_after=distributed_after_samples)
|
||||
else:
|
||||
print("Start training: quantize_bit = {}, epochs = {}, warm_start = {}".format(num_quantization_bits, epochs, distributed_after_samples))
|
||||
distributed_trainer = distributed.data_parallel_distributed_trainer(
|
||||
num_quantization_bits=num_quantization_bits,
|
||||
distributed_after=distributed_after_samples)
|
||||
|
||||
reader_train = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True, distributed_after_samples)
|
||||
reader_test = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False)
|
||||
|
||||
train_and_evaluate(reader_train, reader_test, max_epochs=epochs, distributed_trainer=distributed_trainer)
|
||||
distributed.Communicator.finalize()
|
||||
return error
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# check if we have multiple-GPU, and fallback to 1 GPU if not
|
||||
devices = device.all_devices()
|
||||
gpu_count = 0
|
||||
for dev in devices:
|
||||
gpu_count += (1 if dev.type() == DeviceKind_GPU else 0)
|
||||
print("Found {} GPUs".format(gpu_count))
|
||||
|
||||
if gpu_count == 0:
|
||||
print("No GPU found, exiting")
|
||||
quit()
|
||||
|
||||
data_path = os.path.abspath(os.path.normpath(os.path.join(
|
||||
*"../../../../Examples/Image/DataSets/CIFAR-10/".split("/"))))
|
||||
|
||||
os.chdir(data_path)
|
||||
|
||||
total_epochs = 11
|
||||
error = train_and_evaluate(data_path, total_epochs, gpu_count)
|
||||
|
||||
print("Error: %f" % error)
|
||||
|
|
|
@ -105,7 +105,7 @@ def train(reader, model, max_epochs):
|
|||
# BUGBUG? The change of minibatch_size parameter vv has no effect.
|
||||
data = reader.next_minibatch(min(minibatch_size, epoch_end-t), input_map=input_map) # fetch minibatch
|
||||
trainer.train_minibatch(data) # update model with it
|
||||
t += data[slot_labels].num_samples # count samples processed so far
|
||||
t += trainer.previous_minibatch_sample_count # count samples processed so far
|
||||
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
|
||||
#def trace_node(name):
|
||||
# nl = [n for n in z.parameters if n.name() == name]
|
||||
|
|
1
Makefile
1
Makefile
|
@ -472,6 +472,7 @@ CNTKLIBRARY_TESTS_SRC =\
|
|||
$(CNTKLIBRARY_TESTS_SRC_PATH)/Seq2Seq.cpp \
|
||||
$(CNTKLIBRARY_TESTS_SRC_PATH)/TruncatedLSTMAcousticModel.cpp \
|
||||
$(CNTKLIBRARY_TESTS_SRC_PATH)/DeviceSelectionTests.cpp \
|
||||
$(CNTKLIBRARY_TESTS_SRC_PATH)/MinibatchSourceTest.cpp \
|
||||
Examples/Evaluation/CPPEvalV2Client/EvalMultithreads.cpp \
|
||||
|
||||
CNTKLIBRARY_TESTS:=$(BINDIR)/v2librarytests
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit c489fc304d5a60f7372ec1ee51c50707171d1696
|
||||
Subproject commit fd7daba1c7b6cb336f78f77731994b460e0f42a1
|
|
@ -3316,6 +3316,11 @@ namespace CNTK
|
|||
///
|
||||
CNTK_API double TestMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, const DeviceDescriptor& computeDevice = DeviceDescriptor::UseDefaultDevice());
|
||||
|
||||
///
|
||||
/// Returns whether the trainer is running distributed (more than 1 MPI workers)
|
||||
///
|
||||
CNTK_API bool IsRunningDistributed() const;
|
||||
|
||||
///
|
||||
/// Checkpoint the model and other Trainer state at the specified file location
|
||||
///
|
||||
|
@ -3365,6 +3370,8 @@ namespace CNTK
|
|||
|
||||
private:
|
||||
void Save(const std::wstring& modelFilePath, bool usingLegacyModelFormat, const Dictionary& state);
|
||||
bool UpdateLearners(const std::unordered_map<Parameter, NDArrayViewPtr>& gradients);
|
||||
bool HandleEmptyMinibatch(bool atEndOfData);
|
||||
|
||||
FunctionPtr m_combinedTrainingFunction;
|
||||
FunctionPtr m_model;
|
||||
|
@ -3379,6 +3386,8 @@ namespace CNTK
|
|||
std::vector<LearnerPtr> m_parameterLearners;
|
||||
|
||||
size_t m_prevMinibatchNumSamples;
|
||||
size_t m_totalSamplesSeen;
|
||||
bool m_distributed;
|
||||
ValuePtr m_prevMinibatchAggregateTrainingLossValue;
|
||||
ValuePtr m_prevMinibatchAggregateEvalCriterionValue;
|
||||
};
|
||||
|
@ -3431,7 +3440,8 @@ namespace CNTK
|
|||
{
|
||||
public:
|
||||
static const size_t InfinitelyRepeat = SIZE_MAX;
|
||||
static const size_t FullDataSweep = SIZE_MAX - 2; // An arbitrary sentinel value
|
||||
static const size_t FullDataSweep = SIZE_MAX - 2; // An arbitrary sentinel value
|
||||
static const size_t InfiniteSamples = SIZE_MAX;
|
||||
|
||||
public:
|
||||
///
|
||||
|
@ -3449,6 +3459,11 @@ namespace CNTK
|
|||
size_t minibatchSizeInSequences,
|
||||
const DeviceDescriptor& device = DeviceDescriptor::UseDefaultDevice()) = 0;
|
||||
|
||||
///
|
||||
/// Returns whether the MinibatchSource is running in distributed manner
|
||||
///
|
||||
virtual bool IsDistributed() const = 0;
|
||||
|
||||
///
|
||||
/// Destruct this MinibatchSource.
|
||||
///
|
||||
|
@ -3497,7 +3512,7 @@ namespace CNTK
|
|||
///
|
||||
/// Instantiate the CNTK built-in composite minibatch source.
|
||||
///
|
||||
CNTK_API MinibatchSourcePtr CreateCompositeMinibatchSource(const Dictionary& configuration, DistributedCommunicatorPtr communicator = nullptr);
|
||||
CNTK_API MinibatchSourcePtr CreateCompositeMinibatchSource(const Dictionary& configuration);
|
||||
|
||||
struct StreamConfiguration
|
||||
{
|
||||
|
@ -3514,7 +3529,7 @@ namespace CNTK
|
|||
///
|
||||
/// Instantiate the CNTK built-in test format minibatch source
|
||||
///
|
||||
inline MinibatchSourcePtr TextFormatMinibatchSource(const std::wstring& dataFilePath, const std::vector<StreamConfiguration>& streamConfigs, size_t epochSize = MinibatchSource::InfinitelyRepeat, bool randomize = true, DistributedCommunicatorPtr communicator = nullptr)
|
||||
inline MinibatchSourcePtr TextFormatMinibatchSource(const std::wstring& dataFilePath, const std::vector<StreamConfiguration>& streamConfigs, size_t epochSize = MinibatchSource::InfinitelyRepeat, bool randomize = true, size_t distributedAfterSampleCount = MinibatchSource::InfiniteSamples)
|
||||
{
|
||||
::CNTK::Dictionary minibatchSourceConfiguration;
|
||||
minibatchSourceConfiguration[L"epochSize"] = epochSize;
|
||||
|
@ -3546,7 +3561,10 @@ namespace CNTK
|
|||
deserializerConfiguration[L"input"] = inputStreamsConfig;
|
||||
minibatchSourceConfiguration[L"deserializers"] = std::vector<::CNTK::DictionaryValue>({ deserializerConfiguration });
|
||||
|
||||
return CreateCompositeMinibatchSource(minibatchSourceConfiguration, communicator);
|
||||
//TODO: change all these dictionary names to string constants
|
||||
minibatchSourceConfiguration[L"distributedAfterSampleCount"] = distributedAfterSampleCount;
|
||||
|
||||
return CreateCompositeMinibatchSource(minibatchSourceConfiguration);
|
||||
}
|
||||
|
||||
///
|
||||
|
@ -3710,12 +3728,27 @@ namespace CNTK
|
|||
// Return the distributed communicator used in the distributed trainer
|
||||
CNTK_API virtual DistributedCommunicatorPtr GetCommunicator() = 0;
|
||||
|
||||
// Return the distributed-after sample count
|
||||
CNTK_API size_t GetDistributedAfterSampleCount() const
|
||||
{
|
||||
return m_distributedAfterSampleCount;
|
||||
}
|
||||
|
||||
virtual ~DistributedTrainer() {}
|
||||
|
||||
protected:
|
||||
// Set the parallelization-start-after sample count
|
||||
DistributedTrainer(size_t distributedAfterSampleCount) :
|
||||
m_distributedAfterSampleCount(distributedAfterSampleCount)
|
||||
{}
|
||||
|
||||
private:
|
||||
size_t m_distributedAfterSampleCount;
|
||||
};
|
||||
|
||||
CNTK_API DistributedTrainerPtr CreateDataParallelDistributedTrainer(DistributedCommunicatorPtr communicator, bool useAsyncBufferedParameterUpdate);
|
||||
CNTK_API DistributedTrainerPtr CreateDataParallelDistributedTrainer(DistributedCommunicatorPtr communicator, bool useAsyncBufferedParameterUpdate, size_t distributedAfterSampleCount = 0);
|
||||
|
||||
CNTK_API DistributedTrainerPtr CreateQuantizedDataParallelDistributedTrainer(DistributedCommunicatorPtr communicator, bool useAsyncBufferedParameterUpdate);
|
||||
CNTK_API DistributedTrainerPtr CreateQuantizedDataParallelDistributedTrainer(QuantizedDistributedCommunicatorPtr communicator, bool useAsyncBufferedParameterUpdate, size_t distributedAfterSampleCount);
|
||||
|
||||
CNTK_API DistributedTrainerPtr CreateBlockMomentumDistributedTrainer(
|
||||
DistributedCommunicatorPtr communicator,
|
||||
|
@ -3723,14 +3756,16 @@ namespace CNTK
|
|||
double blockMomentumAsTimeConstant,
|
||||
bool useNestrovMomentum = true,
|
||||
bool resetSGDMomentumAfterAggregation = true,
|
||||
double blockLearningRate = 1.0);
|
||||
double blockLearningRate = 1.0,
|
||||
size_t distributedAfterSampleCount = 0);
|
||||
|
||||
CNTK_API DistributedTrainerPtr CreateBlockMomentumDistributedTrainer(
|
||||
DistributedCommunicatorPtr communicator,
|
||||
size_t blockSize,
|
||||
bool useNestrovMomentum = true,
|
||||
bool resetSGDMomentumAfterAggregation = true,
|
||||
double blockLearningRate = 1.0);
|
||||
double blockLearningRate = 1.0,
|
||||
size_t distributedAfterSampleCount = 0);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -21,9 +21,9 @@ namespace CNTK
|
|||
return MakeSharedObject<QuantizedMPICommunicatorImpl>(zeroThresholdFor1Bit, useQuantizationForSelfStripe, numQuantizationBits);
|
||||
}
|
||||
|
||||
DistributedTrainerPtr CreateQuantizedDataParallelDistributedTrainer(DistributedCommunicatorPtr communicator, bool useAsyncBufferedParameterUpdate)
|
||||
DistributedTrainerPtr CreateQuantizedDataParallelDistributedTrainer(QuantizedDistributedCommunicatorPtr communicator, bool useAsyncBufferedParameterUpdate, size_t distributedAfterSampleCount)
|
||||
{
|
||||
return MakeSharedObject<DataParallelDistributedTrainer>(communicator, useAsyncBufferedParameterUpdate);
|
||||
return MakeSharedObject<QuantizedDataParallelDistributedTrainer>(communicator, useAsyncBufferedParameterUpdate, distributedAfterSampleCount);
|
||||
}
|
||||
|
||||
DistributedTrainerPtr CreateBlockMomentumDistributedTrainer(
|
||||
|
@ -31,14 +31,16 @@ namespace CNTK
|
|||
size_t blockSize,
|
||||
bool useNestrovMomentum,
|
||||
bool resetSGDMomentumAfterAggregation,
|
||||
double blockLearningRate)
|
||||
double blockLearningRate,
|
||||
size_t distributedAfterSampleCount)
|
||||
{
|
||||
return MakeSharedObject<BlockMomentumDistributedTrainer>(
|
||||
communicator,
|
||||
blockSize,
|
||||
useNestrovMomentum,
|
||||
resetSGDMomentumAfterAggregation,
|
||||
blockLearningRate);
|
||||
blockLearningRate,
|
||||
distributedAfterSampleCount);
|
||||
}
|
||||
|
||||
DistributedTrainerPtr CreateBlockMomentumDistributedTrainer(
|
||||
|
@ -47,7 +49,8 @@ namespace CNTK
|
|||
double blockMomentumAsTimeConstant,
|
||||
bool useNestrovMomentum,
|
||||
bool resetSGDMomentumAfterAggregation,
|
||||
double blockLearningRate)
|
||||
double blockLearningRate,
|
||||
size_t distributedAfterSampleCount)
|
||||
{
|
||||
return MakeSharedObject<BlockMomentumDistributedTrainer>(
|
||||
communicator,
|
||||
|
@ -55,7 +58,8 @@ namespace CNTK
|
|||
useNestrovMomentum,
|
||||
resetSGDMomentumAfterAggregation,
|
||||
blockLearningRate,
|
||||
blockMomentumAsTimeConstant);
|
||||
blockMomentumAsTimeConstant,
|
||||
distributedAfterSampleCount);
|
||||
}
|
||||
|
||||
#else
|
||||
|
@ -64,7 +68,7 @@ namespace CNTK
|
|||
LogicError("Quantized MPI Communicator is not supported for this build. The 1BitSGD build is needed, see CNTK wiki for details.");
|
||||
}
|
||||
|
||||
DistributedTrainerPtr CreateQuantizedDataParallelDistributedTrainer(DistributedCommunicatorPtr, bool)
|
||||
DistributedTrainerPtr CreateQuantizedDataParallelDistributedTrainer(QuantizedDistributedCommunicatorPtr, bool, size_t)
|
||||
{
|
||||
LogicError("Quantized Distributed Trainer is not supported for this build. The 1BitSGD build is needed, see CNTK wiki for details.");
|
||||
}
|
||||
|
@ -74,7 +78,8 @@ namespace CNTK
|
|||
size_t /*blockSize*/,
|
||||
bool /*useNestrovMomentum*/,
|
||||
bool /*resetSGDMomentumAfterAggregation*/,
|
||||
double /*blockLearningRate*/)
|
||||
double /*blockLearningRate*/,
|
||||
size_t /*distributedAfterSampleCount*/)
|
||||
{
|
||||
LogicError("Block Momentum Distributed Trainer is not supported for this build. The 1BitSGD build is needed, see CNTK wiki for details.");
|
||||
}
|
||||
|
@ -85,19 +90,20 @@ namespace CNTK
|
|||
double /*blockMomentumAsTimeConstant*/,
|
||||
bool /*useNestrovMomentum*/,
|
||||
bool /*resetSGDMomentumAfterAggregation*/,
|
||||
double /*blockLearningRate*/)
|
||||
double /*blockLearningRate*/,
|
||||
size_t /*distributedAfterSampleCount*/)
|
||||
{
|
||||
LogicError("Block Momentum Distributed Trainer is not supported for this build. The 1BitSGD build is needed, see CNTK wiki for details.");
|
||||
}
|
||||
#endif
|
||||
|
||||
DistributedTrainerPtr CreateDataParallelDistributedTrainer(DistributedCommunicatorPtr communicator, bool useAsyncBufferedParameterUpdate)
|
||||
DistributedTrainerPtr CreateDataParallelDistributedTrainer(DistributedCommunicatorPtr communicator, bool useAsyncBufferedParameterUpdate, size_t distributedAfterSampleCount)
|
||||
{
|
||||
return MakeSharedObject<DataParallelDistributedTrainer>(communicator, useAsyncBufferedParameterUpdate);
|
||||
return MakeSharedObject<DataParallelDistributedTrainer>(communicator, useAsyncBufferedParameterUpdate, distributedAfterSampleCount);
|
||||
}
|
||||
|
||||
DataParallelDistributedTrainer::DataParallelDistributedTrainer(DistributedCommunicatorPtr communicator, bool useAsyncBufferedParameterUpdate)
|
||||
: DistributedTrainerBase(communicator)
|
||||
DataParallelDistributedTrainer::DataParallelDistributedTrainer(DistributedCommunicatorPtr communicator, bool useAsyncBufferedParameterUpdate, size_t distributedAfterSampleCount)
|
||||
: DistributedTrainerBase(communicator, distributedAfterSampleCount)
|
||||
{
|
||||
if (useAsyncBufferedParameterUpdate)
|
||||
LogicError("Asynchronous parameter update is not yet supported.");
|
||||
|
@ -106,6 +112,8 @@ namespace CNTK
|
|||
// Optional override that gets called per minibatch after finishing gradient computation but before updating model parameters
|
||||
bool DataParallelDistributedTrainer::PreParameterUpdateCallback(const Trainer& /*trainer*/, std::vector<std::pair<Parameter, NDArrayViewPtr>>& gradientValues, MinibatchInfo& info)
|
||||
{
|
||||
HandleEmptyMinibatch(gradientValues, info);
|
||||
|
||||
std::vector<NDArrayViewPtr> valuesToAggregate;
|
||||
for (const auto& i : gradientValues)
|
||||
valuesToAggregate.push_back(i.second);
|
||||
|
|
|
@ -16,7 +16,7 @@ namespace CNTK
|
|||
class DataParallelDistributedTrainer : public DistributedTrainerBase
|
||||
{
|
||||
public:
|
||||
DataParallelDistributedTrainer(DistributedCommunicatorPtr communicator, bool useAsyncBufferedParameterUpdate);
|
||||
DataParallelDistributedTrainer(DistributedCommunicatorPtr communicator, bool useAsyncBufferedParameterUpdate, size_t distributedAfterSampleCount);
|
||||
|
||||
// Optional override that gets called per minibatch after finishing gradient computation but before updating model parameters
|
||||
bool PreParameterUpdateCallback(const Trainer& trainer, std::vector<std::pair<Parameter, NDArrayViewPtr>>& gradientValues, MinibatchInfo& info) override;
|
||||
|
|
|
@ -9,8 +9,9 @@
|
|||
|
||||
namespace CNTK
|
||||
{
|
||||
DistributedTrainerBase::DistributedTrainerBase(DistributedCommunicatorPtr communicator)
|
||||
: m_communicator(communicator)
|
||||
DistributedTrainerBase::DistributedTrainerBase(DistributedCommunicatorPtr communicator, size_t distributedAfterSampleCount)
|
||||
: DistributedTrainer(distributedAfterSampleCount),
|
||||
m_communicator(communicator)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -52,4 +53,22 @@ namespace CNTK
|
|||
RuntimeError("Cannot restore from the checkpoint, 0 rank is missing.");
|
||||
return checkpoint[key].Value<Dictionary>();
|
||||
}
|
||||
|
||||
void DistributedTrainerBase::HandleEmptyMinibatch(std::vector<std::pair<Parameter, NDArrayViewPtr>>& gradientValues, MinibatchInfo& info)
|
||||
{
|
||||
if (info.numberOfSamples == 0)
|
||||
{
|
||||
// Need to intialize gradients to 0 in case when it is an empty minibatch.
|
||||
for (auto& g : gradientValues)
|
||||
{
|
||||
auto weights = g.first.Value();
|
||||
g.second = MakeSharedObject<NDArrayView>(0, weights->GetDataType(), weights->Shape(), weights->Device());
|
||||
}
|
||||
|
||||
// TODO: what if in the future the type is different?
|
||||
auto dataType = gradientValues.front().first.GetDataType();
|
||||
info.evalCriterionValue = MakeSharedObject<NDArrayView>(0, dataType, NDShape{ 1 }, DeviceDescriptor::CPUDevice());
|
||||
info.trainingLossValue = MakeSharedObject<NDArrayView>(0, dataType, NDShape{ 1 }, DeviceDescriptor::CPUDevice());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,9 +33,11 @@ namespace CNTK
|
|||
}
|
||||
|
||||
protected:
|
||||
explicit DistributedTrainerBase(DistributedCommunicatorPtr communicator);
|
||||
explicit DistributedTrainerBase(DistributedCommunicatorPtr communicator, size_t distributedAfterSampleCount);
|
||||
Dictionary CreateCheckpoint(const Dictionary& localStateToShare);
|
||||
|
||||
static void HandleEmptyMinibatch(std::vector<std::pair<Parameter, NDArrayViewPtr>>& gradientValues, MinibatchInfo& info);
|
||||
|
||||
DistributedCommunicatorPtr m_communicator;
|
||||
};
|
||||
}
|
|
@ -13,6 +13,7 @@
|
|||
#include "Function.h"
|
||||
#include <tuple>
|
||||
#include "Value.h"
|
||||
#include "MPIWrapper.h"
|
||||
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
|
||||
|
@ -62,15 +63,23 @@ namespace CNTK
|
|||
return *(*(matchingStreamInfos.begin()));
|
||||
}
|
||||
|
||||
MinibatchSourcePtr CreateCompositeMinibatchSource(const Dictionary& configuration, DistributedCommunicatorPtr communicator)
|
||||
MinibatchSourcePtr CreateCompositeMinibatchSource(const Dictionary& configuration)
|
||||
{
|
||||
return MinibatchSourcePtr(new CompositeMinibatchSource(configuration, communicator));
|
||||
return MinibatchSourcePtr(new CompositeMinibatchSource(configuration));
|
||||
}
|
||||
|
||||
/*static*/ const std::wstring CompositeMinibatchSource::MinibatchSourcePositionAttributeName = L"minibatchSourcePosition";
|
||||
/*static*/ const std::wstring CompositeMinibatchSource::PositionAttributeName = L"minibatchSourcePosition";
|
||||
/*static*/ const std::wstring CompositeMinibatchSource::DistributedAfterSampleCountAttributeName = L"minibatchDistributedAfterSampleCount";
|
||||
|
||||
CompositeMinibatchSource::CompositeMinibatchSource(const Dictionary& configuration, DistributedCommunicatorPtr communicator)
|
||||
: m_epochEndReached(false), m_prevMinibatchSize(0), m_epochSize(MinibatchSource::InfinitelyRepeat), m_truncationLength(0), m_communicator(communicator)
|
||||
CompositeMinibatchSource::CompositeMinibatchSource(const Dictionary& configuration)
|
||||
: m_epochEndReached(false),
|
||||
m_prevMinibatchSize(0),
|
||||
m_epochSize(MinibatchSource::InfinitelyRepeat),
|
||||
m_truncationLength(0),
|
||||
m_numWorkers(1),
|
||||
m_workerRank(0),
|
||||
m_distributed(false),
|
||||
m_distributedAfterSampleCount(MinibatchSource::InfiniteSamples)
|
||||
{
|
||||
// The CNTK reader implementation requires for each deserializer both the module and deserializer type be specified
|
||||
// This is redundant and the V2 API users will just specify type from which the module is automatically inferred
|
||||
|
@ -129,6 +138,9 @@ namespace CNTK
|
|||
|
||||
if (m_epochSize == MinibatchSource::FullDataSweep)
|
||||
m_epochSize = Microsoft::MSR::CNTK::requestDataSize;
|
||||
// Setting big value, but not the max in order to aviod bit overflow.
|
||||
else if (m_epochSize == MinibatchSource::InfinitelyRepeat)
|
||||
m_epochSize = std::numeric_limits<size_t>::max()/2;
|
||||
|
||||
const wchar_t* truncatedConfigurationKey = L"truncated";
|
||||
const wchar_t* truncationLengthConfigurationKey = L"truncationLength";
|
||||
|
@ -139,6 +151,11 @@ namespace CNTK
|
|||
m_truncationLength = augmentedConfiguration[truncationLengthConfigurationKey].Value<size_t>();
|
||||
}
|
||||
|
||||
// TODO: change all the dictionary names to string constants
|
||||
const wchar_t* distributedAfterSampleCountConfigurationKey = L"distributedAfterSampleCount";
|
||||
if (augmentedConfiguration.Contains(distributedAfterSampleCountConfigurationKey))
|
||||
m_distributedAfterSampleCount = augmentedConfiguration[distributedAfterSampleCountConfigurationKey].Value<size_t>();
|
||||
|
||||
typedef Reader*(*CreateCompositeDataReaderProc)(const ConfigParameters* parameters);
|
||||
CreateCompositeDataReaderProc createReaderProc = (CreateCompositeDataReaderProc)Plugin().Load(L"CompositeDataReader", "CreateCompositeDataReader");
|
||||
std::shared_ptr<Microsoft::MSR::CNTK::Reader> compositeDataReader(createReaderProc(&config));
|
||||
|
@ -149,6 +166,22 @@ namespace CNTK
|
|||
|
||||
m_shim = std::shared_ptr<ReaderShim<float>>(new ReaderShim<float>(compositeDataReader), [](ReaderShim<float>* x) { x->Destroy(); });
|
||||
m_shim->Init(config);
|
||||
|
||||
const wchar_t* numWorkersConfigurationKey = L"numWorkers";
|
||||
if (configuration.Contains(numWorkersConfigurationKey))
|
||||
{
|
||||
m_numWorkers = configuration[numWorkersConfigurationKey].Value<size_t>();
|
||||
|
||||
const wchar_t* workerRankConfigurationKey = L"workerRank";
|
||||
if (configuration.Contains(workerRankConfigurationKey))
|
||||
{
|
||||
m_workerRank = configuration[workerRankConfigurationKey].Value<size_t>();
|
||||
}
|
||||
if (m_workerRank > m_numWorkers - 1)
|
||||
{
|
||||
LogicError("Invalid worker rank %lu (numWorkers %lu)", m_workerRank, m_numWorkers);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*virtual*/ const std::unordered_map<StreamInformation, MinibatchData>&
|
||||
|
@ -166,11 +199,30 @@ namespace CNTK
|
|||
if (minibatchSizeInSamples == 0)
|
||||
InvalidArgument("GetNextMinibatch: Requested minibatch sizes must be > 0");
|
||||
|
||||
// For the first number of m_distributedAfterSampleCount samples, minibatch source won't run distributed.
|
||||
bool wasDistributed = m_distributed;
|
||||
if (!m_distributed && IsDistributed())
|
||||
{
|
||||
m_distributed = true;
|
||||
|
||||
if (m_numWorkers == 1)
|
||||
{
|
||||
MPIWrapperPtr mpi = MPIWrapper::GetInstance();
|
||||
if (mpi == nullptr)
|
||||
{
|
||||
// create mpi instance if intended to be distributed
|
||||
mpi = MPIWrapper::GetInstance(true);
|
||||
}
|
||||
m_numWorkers = mpi->NumNodesInUse();
|
||||
m_workerRank = mpi->CurrentNodeRank();
|
||||
}
|
||||
}
|
||||
|
||||
if (m_prevMinibatchSize == 0)
|
||||
{
|
||||
EpochConfiguration epochConfig;
|
||||
epochConfig.m_numberOfWorkers = m_communicator ? m_communicator->Workers().size() : 1;
|
||||
epochConfig.m_workerRank = m_communicator ? m_communicator->CurrentWorker().m_globalRank : 0;
|
||||
epochConfig.m_numberOfWorkers = m_distributed ? m_numWorkers : 1;
|
||||
epochConfig.m_workerRank = m_distributed ? m_workerRank : 0;
|
||||
epochConfig.m_minibatchSizeInSamples = minibatchSizeInSamples;
|
||||
epochConfig.m_truncationSize = m_truncationLength;
|
||||
|
||||
|
@ -203,26 +255,30 @@ namespace CNTK
|
|||
|
||||
m_shim->StartEpoch(epochConfig, inputs);
|
||||
m_prevMinibatchSize = minibatchSizeInSamples;
|
||||
wasDistributed = m_distributed;
|
||||
}
|
||||
|
||||
if (minibatchSizeInSamples != m_prevMinibatchSize)
|
||||
if (minibatchSizeInSamples != m_prevMinibatchSize || wasDistributed != m_distributed)
|
||||
{
|
||||
std::map<std::wstring, int> inputDescriptions;
|
||||
for (const auto& s : m_streamInfos)
|
||||
inputDescriptions[s.m_name] = AsCNTKImplDeviceId(device);
|
||||
|
||||
ReaderConfiguration newConfig;
|
||||
newConfig.m_numberOfWorkers = m_communicator ? m_communicator->Workers().size() : 1;
|
||||
newConfig.m_workerRank = m_communicator ? m_communicator->CurrentWorker().m_globalRank : 0;
|
||||
newConfig.m_numberOfWorkers = m_distributed ? m_numWorkers : 1;
|
||||
newConfig.m_workerRank = m_distributed ? m_workerRank : 0;
|
||||
newConfig.m_minibatchSizeInSamples = minibatchSizeInSamples;
|
||||
newConfig.m_truncationSize = m_truncationLength;
|
||||
|
||||
m_shim->SetConfiguration(newConfig, inputDescriptions);
|
||||
|
||||
m_prevMinibatchSize = minibatchSizeInSamples;
|
||||
}
|
||||
|
||||
auto compositeReaderMinibatchDataEmpty = m_shim->GetMinibatch(m_matrices);
|
||||
auto hasData = m_shim->GetMinibatch(m_matrices);
|
||||
m_epochEndReached = m_shim->IsEndOfEpoch();
|
||||
if (m_epochEndReached && !hasData)
|
||||
return m_minibatchData;
|
||||
|
||||
for (const auto& s: m_streamInfos)
|
||||
{
|
||||
|
@ -230,9 +286,9 @@ namespace CNTK
|
|||
auto& currentStreamInfo = s;
|
||||
|
||||
ValuePtr minibatchValuePtr;
|
||||
if (!compositeReaderMinibatchDataEmpty)
|
||||
if (!hasData)
|
||||
{
|
||||
minibatchValuePtr = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(currentStreamInfo.m_elementType, s.m_sampleLayout.AppendShape({ 0, 0 }), DeviceDescriptor::CPUDevice()));
|
||||
m_minibatchData[currentStreamInfo] = { 0, 0, nullptr };
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -262,13 +318,15 @@ namespace CNTK
|
|||
/*virtual*/ Dictionary CompositeMinibatchSource::GetCheckpointState() const /*override*/
|
||||
{
|
||||
Dictionary checkpointState;
|
||||
checkpointState[MinibatchSourcePositionAttributeName] = m_shim->GetCurrentSamplePosition();
|
||||
checkpointState[PositionAttributeName] = m_shim->GetCurrentSamplePosition();
|
||||
checkpointState[DistributedAfterSampleCountAttributeName] = m_distributedAfterSampleCount;
|
||||
return checkpointState;
|
||||
}
|
||||
|
||||
/*virtual*/ void CompositeMinibatchSource::RestoreFromCheckpoint(const Dictionary& checkpoint) /*override*/
|
||||
{
|
||||
auto checkpointedMinibatchSourcePosition = checkpoint[MinibatchSourcePositionAttributeName].Value<size_t>();
|
||||
auto checkpointedMinibatchSourcePosition = checkpoint[PositionAttributeName].Value<size_t>();
|
||||
m_shim->SetCurrentSamplePosition(checkpointedMinibatchSourcePosition);
|
||||
m_distributedAfterSampleCount = checkpoint[DistributedAfterSampleCountAttributeName].Value<size_t>();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,10 +16,11 @@ namespace CNTK
|
|||
{
|
||||
class CompositeMinibatchSource final : public MinibatchSource
|
||||
{
|
||||
static const std::wstring MinibatchSourcePositionAttributeName;
|
||||
static const std::wstring PositionAttributeName;
|
||||
static const std::wstring DistributedAfterSampleCountAttributeName;
|
||||
|
||||
public:
|
||||
CompositeMinibatchSource(const Dictionary& configuration, DistributedCommunicatorPtr communicator);
|
||||
CompositeMinibatchSource(const Dictionary& configuration);
|
||||
|
||||
virtual const std::unordered_set<StreamInformation>& StreamInfos() override { return m_streamInfos; }
|
||||
|
||||
|
@ -30,6 +31,11 @@ namespace CNTK
|
|||
virtual Dictionary GetCheckpointState() const override;
|
||||
virtual void RestoreFromCheckpoint(const Dictionary& checkpoint) override;
|
||||
|
||||
virtual bool IsDistributed() const override
|
||||
{
|
||||
return m_shim->GetCurrentSamplePosition() >= m_distributedAfterSampleCount;
|
||||
}
|
||||
|
||||
private:
|
||||
static Microsoft::MSR::CNTK::InputStreamDescription GetInputStreamDescription(const StreamInformation& s, const DeviceDescriptor& device)
|
||||
{
|
||||
|
@ -41,9 +47,12 @@ namespace CNTK
|
|||
}
|
||||
|
||||
private:
|
||||
DistributedCommunicatorPtr m_communicator;
|
||||
std::unordered_set<StreamInformation> m_streamInfos;
|
||||
bool m_epochEndReached;
|
||||
bool m_distributed;
|
||||
size_t m_numWorkers;
|
||||
size_t m_workerRank;
|
||||
size_t m_distributedAfterSampleCount;
|
||||
size_t m_prevMinibatchSize;
|
||||
size_t m_epochSize;
|
||||
size_t m_truncationLength;
|
||||
|
|
|
@ -13,12 +13,20 @@ namespace
|
|||
{
|
||||
const std::wstring learnersPropertyName = L"Learners";
|
||||
const std::wstring distributedLearnerPropertyName = L"DistributedLearner";
|
||||
const std::wstring totalSeenSamplesPropertyName = L"TotalSeenSamples";
|
||||
}
|
||||
|
||||
namespace CNTK
|
||||
{
|
||||
Trainer::Trainer(const FunctionPtr& model, const FunctionPtr& lossFunction, const FunctionPtr& evaluationFunction, const std::vector<LearnerPtr>& parameterLearners, const DistributedTrainerPtr& distributedTrainer)
|
||||
: m_model(model), m_lossFunction(lossFunction), m_evaluationFunction(evaluationFunction), m_parameterLearners(parameterLearners), m_prevMinibatchNumSamples(1), m_distributedTrainer(distributedTrainer)
|
||||
: m_model(model),
|
||||
m_lossFunction(lossFunction),
|
||||
m_evaluationFunction(evaluationFunction),
|
||||
m_parameterLearners(parameterLearners),
|
||||
m_prevMinibatchNumSamples(1),
|
||||
m_distributedTrainer(distributedTrainer),
|
||||
m_totalSamplesSeen(0),
|
||||
m_distributed(false)
|
||||
{
|
||||
std::vector<Variable> combinedFunctionArgs = { m_model, m_lossFunction };
|
||||
if (!m_lossFunction->Output().DynamicAxes().empty())
|
||||
|
@ -151,14 +159,36 @@ namespace CNTK
|
|||
|
||||
bool Trainer::TrainMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
|
||||
{
|
||||
{
|
||||
// TODO: We should reconsider the interface
|
||||
// Probably passing the flag that the minibatch is the last, and empty arguments in case of empty minibatch.
|
||||
bool emptyMinibatch = arguments.empty() || (arguments.begin()->second == nullptr);
|
||||
if (emptyMinibatch)
|
||||
return HandleEmptyMinibatch(arguments.empty());
|
||||
}
|
||||
|
||||
std::unordered_map<Variable, ValuePtr> outputs = { { m_aggregatedLossFunction, nullptr }, { m_trainingSampleCountVar, nullptr } };
|
||||
if (m_aggregatedEvaluationFunction)
|
||||
outputs.insert({ m_aggregatedEvaluationFunction, nullptr });
|
||||
|
||||
outputs.insert(outputsToFetch.begin(), outputsToFetch.end());
|
||||
|
||||
if (m_distributedTrainer)
|
||||
bool wasDistributed = m_distributed;
|
||||
|
||||
// when distributed trainer exists, parallelization starts after specified number of samples seen
|
||||
// before that, all workers run locally without aggregation (and minibatch source run locally as well)
|
||||
// NOTE that this relies on determinism on reader for all workers to reach the same state
|
||||
// TODO: pass the model/parameter from worker-0 to other workers when start parallelization
|
||||
|
||||
m_distributed = IsRunningDistributed();
|
||||
|
||||
if (m_distributed)
|
||||
{
|
||||
// when switching from not distributed, all workers needs to sync up before starting cooperation
|
||||
if (!wasDistributed) m_distributedTrainer->GetCommunicator()->Barrier();
|
||||
|
||||
m_distributedTrainer->PreMinibatchCallback(*this);
|
||||
}
|
||||
|
||||
auto backPropSate = m_combinedTrainingFunction->Forward(arguments, outputs, computeDevice, { m_aggregatedLossFunction });
|
||||
m_prevMinibatchAggregateTrainingLossValue = outputs[m_aggregatedLossFunction];
|
||||
|
@ -184,19 +214,21 @@ namespace CNTK
|
|||
parameterGradients[parameter] = nullptr;
|
||||
}
|
||||
|
||||
// TODO: Why Backward signature does not take Parameter instead of Variable for gradients?
|
||||
m_combinedTrainingFunction->Backward(backPropSate, { { m_aggregatedLossFunction, rootGradientValue } }, parameterGradients);
|
||||
|
||||
m_prevMinibatchNumSamples = GetSampleCount(m_trainingSampleCountVar, outputs[m_trainingSampleCountVar]);
|
||||
m_totalSamplesSeen += m_prevMinibatchNumSamples;
|
||||
|
||||
// Aggregation should happen in the same order, the order of parmaters is guaranteed to be the same.
|
||||
std::vector<std::pair<Parameter, NDArrayViewPtr>> gradients;
|
||||
gradients.reserve(modelParameters.size());
|
||||
for (const auto& parameter : modelParameters)
|
||||
gradients.push_back(std::make_pair(parameter, parameterGradients[parameter]->Data()));
|
||||
|
||||
bool endOfData = m_prevMinibatchNumSamples == 0;
|
||||
if (m_distributedTrainer)
|
||||
if (m_distributed)
|
||||
{
|
||||
// Aggregation should happen in the same order, the order of parmaters is guaranteed to be the same.
|
||||
std::vector<std::pair<Parameter, NDArrayViewPtr>> gradients;
|
||||
gradients.reserve(modelParameters.size());
|
||||
for (const auto& parameter : modelParameters)
|
||||
gradients.push_back(std::make_pair(parameter, parameterGradients[parameter]->Data()));
|
||||
|
||||
MinibatchInfo info
|
||||
{
|
||||
arguments.empty(),
|
||||
|
@ -209,6 +241,11 @@ namespace CNTK
|
|||
m_prevMinibatchNumSamples = info.numberOfSamples;
|
||||
}
|
||||
|
||||
return UpdateLearners(std::unordered_map<Parameter, NDArrayViewPtr>(gradients.begin(), gradients.end())) && !endOfData;
|
||||
}
|
||||
|
||||
bool Trainer::UpdateLearners(const std::unordered_map<Parameter, NDArrayViewPtr>& gradients)
|
||||
{
|
||||
bool anyUpdatesPerformed = false;
|
||||
for (auto learner : m_parameterLearners)
|
||||
{
|
||||
|
@ -216,16 +253,55 @@ namespace CNTK
|
|||
const auto& learnerParameters = learner->Parameters();
|
||||
for (const auto& parameter : learnerParameters)
|
||||
{
|
||||
learnerParameterGradients[parameter] = parameterGradients[parameter]->Data();
|
||||
auto value = gradients.find(parameter);
|
||||
if (value == gradients.end())
|
||||
LogicError("Learner contains parameter that does not exists in the model");
|
||||
|
||||
if (parameterGradients[parameter]->Mask())
|
||||
LogicError("The gradient value for a Parameter cannot have an associated mask!");
|
||||
learnerParameterGradients[parameter] = value->second;
|
||||
}
|
||||
|
||||
anyUpdatesPerformed |= learner->Update(learnerParameterGradients, m_prevMinibatchNumSamples);
|
||||
}
|
||||
return anyUpdatesPerformed;
|
||||
}
|
||||
|
||||
return anyUpdatesPerformed && !endOfData;
|
||||
bool Trainer::HandleEmptyMinibatch(bool atEndOfData)
|
||||
{
|
||||
if (m_distributedTrainer == nullptr) return false;
|
||||
|
||||
m_prevMinibatchNumSamples = 0;
|
||||
|
||||
// Gradients are not existing.
|
||||
std::vector<std::pair<Parameter, NDArrayViewPtr>> gradients;
|
||||
auto modelParameters = m_combinedTrainingFunction->Parameters();
|
||||
gradients.reserve(modelParameters.size());
|
||||
for (const auto& parameter : modelParameters)
|
||||
gradients.push_back(std::make_pair(parameter, nullptr));
|
||||
|
||||
MinibatchInfo info
|
||||
{
|
||||
atEndOfData,
|
||||
0,
|
||||
m_prevMinibatchAggregateTrainingLossValue->Data(),
|
||||
m_prevMinibatchAggregateEvalCriterionValue->Data()
|
||||
};
|
||||
|
||||
bool end = m_distributedTrainer->PreParameterUpdateCallback(*this, gradients, info);
|
||||
m_prevMinibatchNumSamples = info.numberOfSamples;
|
||||
|
||||
bool anyUpdatesPerformed = false;
|
||||
if (!m_prevMinibatchNumSamples)
|
||||
anyUpdatesPerformed = UpdateLearners(std::unordered_map<Parameter, NDArrayViewPtr>(gradients.begin(), gradients.end()));
|
||||
return anyUpdatesPerformed && !end;
|
||||
}
|
||||
|
||||
bool Trainer::IsRunningDistributed() const
|
||||
{
|
||||
return m_distributedTrainer != nullptr &&
|
||||
// TODO: only run distributed with more than 1-worker.
|
||||
// This is disabled now for V1 parity so that quantization would run for 1-worker
|
||||
//m_distributedTrainer->GetCommunicator()->Workers().size() > 1 &&
|
||||
m_totalSamplesSeen >= m_distributedTrainer->GetDistributedAfterSampleCount();
|
||||
}
|
||||
|
||||
static std::wstring GetTrainerStateCheckpointFilePath(const std::wstring& modelFilePath)
|
||||
|
@ -258,14 +334,13 @@ namespace CNTK
|
|||
vector<DictionaryValue> learnerStates;
|
||||
for (const auto& learner : m_parameterLearners)
|
||||
{
|
||||
// TODO: add DictionaryValue(T&&)
|
||||
learnerStates.push_back(DictionaryValue(learner->Serialize()));
|
||||
learnerStates.push_back(std::move(DictionaryValue(learner->Serialize())));
|
||||
}
|
||||
|
||||
// add DictionaryValue ctor that takes an rvalue!
|
||||
Dictionary state;
|
||||
state[learnersPropertyName] = learnerStates;
|
||||
state[distributedLearnerPropertyName] = distributedLearnerState;
|
||||
state[totalSeenSamplesPropertyName] = m_totalSamplesSeen;
|
||||
|
||||
m_combinedTrainingFunction->SaveModel(modelFilePath, usinglegacyModelFormat);
|
||||
std::wstring trainerStateCheckpointFilePath = GetTrainerStateCheckpointFilePath(modelFilePath);
|
||||
|
@ -284,6 +359,7 @@ namespace CNTK
|
|||
Dictionary checkpoint;
|
||||
*ckpStream >> checkpoint;
|
||||
|
||||
m_totalSamplesSeen = checkpoint[totalSeenSamplesPropertyName].Value<size_t>();
|
||||
const DictionaryValue& learners = checkpoint[learnersPropertyName];
|
||||
const vector<DictionaryValue>& learnerStates = learners.Value<vector<DictionaryValue>>();
|
||||
|
||||
|
|
|
@ -72,6 +72,10 @@ void BlockRandomizer::StartEpoch(const EpochConfiguration& config)
|
|||
m_epochSize = config.m_totalEpochSizeInSamples;
|
||||
}
|
||||
|
||||
// Sanity check, too big values can cause invalid behavior due to overflow.
|
||||
if (m_epochSize > std::numeric_limits<size_t>::max() / 2)
|
||||
InvalidArgument("Too big epoch size can cause bit overflow");
|
||||
|
||||
SetCurrentSamplePosition(m_epochSize * config.m_epochIndex);
|
||||
if (m_verbosity >= Notification)
|
||||
{
|
||||
|
|
|
@ -22,15 +22,29 @@
|
|||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
template <class ElemType>
|
||||
ReaderShim<ElemType>::ReaderShim(ReaderFactory factory)
|
||||
: m_factory(factory), m_deviceId(CPUDEVICE), m_dataTransferers(2, DataTransfererPtr()), m_currentDataTransferIndex(0), m_endOfEpoch(false)
|
||||
ReaderShim<ElemType>::ReaderShim() :
|
||||
m_deviceId(CPUDEVICE),
|
||||
m_dataTransferers(2, DataTransfererPtr()),
|
||||
m_currentDataTransferIndex(0),
|
||||
m_endOfEpoch(false),
|
||||
m_currentSamplePosition(0),
|
||||
m_reader(nullptr),
|
||||
m_factory(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
ReaderShim<ElemType>::ReaderShim(ReaderPtr reader)
|
||||
: m_deviceId(CPUDEVICE), m_dataTransferers(2, DataTransfererPtr()), m_currentDataTransferIndex(0), m_reader(reader), m_factory(nullptr), m_endOfEpoch(false)
|
||||
ReaderShim<ElemType>::ReaderShim(ReaderFactory factory) :
|
||||
ReaderShim()
|
||||
{
|
||||
m_factory = factory;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
ReaderShim<ElemType>::ReaderShim(ReaderPtr reader) :
|
||||
ReaderShim()
|
||||
{
|
||||
m_reader = reader;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -81,6 +95,61 @@ void ReaderShim<ElemType>::StartDistributedMinibatchLoop(
|
|||
StartEpoch(config, inputs);
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void ReaderShim<ElemType>::SetCurrentSamplePosition(size_t currentSamplePosition)
|
||||
{
|
||||
// Make sure there are no outstanding reads.
|
||||
if (m_prefetchTask.valid())
|
||||
m_prefetchTask.wait();
|
||||
|
||||
// Let's check that there is no outstanding copies.
|
||||
// Wait on all events if there are any pending copy operations in flight.
|
||||
if (m_dataTransferers[m_currentDataTransferIndex])
|
||||
m_dataTransferers[m_currentDataTransferIndex]->WaitForCopyCPUToGPU();
|
||||
|
||||
// Set current position.
|
||||
m_reader->SetCurrentSamplePosition(currentSamplePosition);
|
||||
m_currentSamplePosition = m_reader->GetCurrentSamplePosition();
|
||||
|
||||
// Start prefetch.
|
||||
auto localCurrentDataTransferIndex = m_currentDataTransferIndex;
|
||||
// Starting the prefetch task. There is always a single async read in flight.
|
||||
// When the network requests a new minibatch, we wait for the current async to finish, swap the buffers
|
||||
// and kick off the new prefetch.
|
||||
m_prefetchTask = std::async(m_launchType,
|
||||
[this, localCurrentDataTransferIndex]()
|
||||
{
|
||||
return PrefetchMinibatch(localCurrentDataTransferIndex);
|
||||
});
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void ReaderShim<ElemType>::SetConfiguration(const ReaderConfiguration& config, const std::map<std::wstring, int>& inputDescriptions)
|
||||
{
|
||||
// Make sure there are no outstanding reads.
|
||||
if (m_prefetchTask.valid())
|
||||
m_prefetchTask.wait();
|
||||
|
||||
// Let's check that there is no outstanding copies.
|
||||
// Wait on all events if there are any pending copy operations in flight.
|
||||
if (m_dataTransferers[m_currentDataTransferIndex])
|
||||
m_dataTransferers[m_currentDataTransferIndex]->WaitForCopyCPUToGPU();
|
||||
|
||||
m_reader->SetConfiguration(config, inputDescriptions);
|
||||
m_reader->SetCurrentSamplePosition(m_currentSamplePosition);
|
||||
|
||||
// Start prefetch.
|
||||
auto localCurrentDataTransferIndex = m_currentDataTransferIndex;
|
||||
// Starting the prefetch task. There is always a single async read in flight.
|
||||
// When the network requests a new minibatch, we wait for the current async to finish, swap the buffers
|
||||
// and kick off the new prefetch.
|
||||
m_prefetchTask = std::async(m_launchType,
|
||||
[this, localCurrentDataTransferIndex]()
|
||||
{
|
||||
return PrefetchMinibatch(localCurrentDataTransferIndex);
|
||||
});
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void ReaderShim<ElemType>::StartEpoch(const EpochConfiguration& config, const std::unordered_set<InputStreamDescription>& inputs)
|
||||
{
|
||||
|
|
|
@ -27,6 +27,9 @@ template <class ElemType>
|
|||
class ReaderShim : public IDataReader
|
||||
{
|
||||
friend class ::CNTK::CompositeMinibatchSource;
|
||||
private:
|
||||
ReaderShim();
|
||||
|
||||
public:
|
||||
explicit ReaderShim(ReaderFactory factory);
|
||||
explicit ReaderShim(ReaderPtr reader);
|
||||
|
@ -88,15 +91,9 @@ public:
|
|||
|
||||
virtual size_t GetCurrentSamplePosition() override;
|
||||
|
||||
void SetCurrentSamplePosition(size_t currentSamplePosition)
|
||||
{
|
||||
m_reader->SetCurrentSamplePosition(currentSamplePosition);
|
||||
}
|
||||
void SetCurrentSamplePosition(size_t currentSamplePosition);
|
||||
|
||||
void SetConfiguration(const ReaderConfiguration& config, const std::map<std::wstring, int>& inputDescriptions)
|
||||
{
|
||||
m_reader->SetConfiguration(config, inputDescriptions);
|
||||
}
|
||||
void SetConfiguration(const ReaderConfiguration& config, const std::map<std::wstring, int>& inputDescriptions);
|
||||
|
||||
bool IsEndOfEpoch() const
|
||||
{
|
||||
|
|
|
@ -12,6 +12,7 @@ from cntk.cntk_py import DeviceKind_GPU
|
|||
from cntk.device import set_default_device
|
||||
from cntk.io import ReaderConfig, ImageDeserializer
|
||||
import pytest
|
||||
import subprocess
|
||||
|
||||
abs_path = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Classification", "ResNet", "Python"))
|
||||
|
@ -47,3 +48,6 @@ def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd):
|
|||
|
||||
assert np.allclose(test_error, expected_test_error,
|
||||
atol=TOLERANCE_ABSOLUTE)
|
||||
|
||||
# test multi process
|
||||
subprocess.check_call("mpiexec -n 2 python CifarResNet_Distributed.py", stderr=subprocess.STDOUT, shell=True)
|
||||
|
|
|
@ -58,4 +58,4 @@ int main(int argc, char* argv[])
|
|||
|
||||
fclose(stdout);
|
||||
return 0;
|
||||
}
|
||||
}
|
|
@ -5,6 +5,10 @@
|
|||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release_NoOpt|x64">
|
||||
<Configuration>Release_NoOpt</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
|
@ -92,6 +96,7 @@
|
|||
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
|
||||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MultiThreaded</RuntimeLibrary>
|
||||
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Release_NoOpt|x64'">MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
|
|
|
@ -157,7 +157,7 @@ inline CNTK::FunctionPtr FullyConnectedLinearLayer(CNTK::Variable input, size_t
|
|||
assert(input.Shape().Rank() == 1);
|
||||
size_t inputDim = input.Shape()[0];
|
||||
|
||||
auto timesParam = CNTK::Parameter({ outputDim, inputDim }, CNTK::DataType::Float, CNTK::GlorotUniformInitializer(), device, L"timesParam");
|
||||
auto timesParam = CNTK::Parameter({ outputDim, inputDim }, CNTK::DataType::Float, CNTK::GlorotUniformInitializer(CNTK::SentinelValueForInferParamInitRank, CNTK::SentinelValueForInferParamInitRank, CNTK::DefaultParamInitScale, 1), device, L"timesParam");
|
||||
auto timesFunction = CNTK::Times(timesParam, input, L"times");
|
||||
|
||||
auto plusParam = CNTK::Parameter({ outputDim }, 0.0f, device, L"plusParam");
|
||||
|
@ -182,7 +182,7 @@ inline CNTK::FunctionPtr FullyConnectedFeedForwardClassifierNet(CNTK::Variable i
|
|||
for (size_t i = 1; i < numHiddenLayers; ++i)
|
||||
classifierRoot = FullyConnectedDNNLayer(classifierRoot, hiddenLayerDim, device, nonLinearity);
|
||||
|
||||
auto outputTimesParam = CNTK::Parameter(CNTK::NDArrayView::RandomUniform<float>({ numOutputClasses, hiddenLayerDim }, -0.5, 0.5, CNTK::SentinelValueForAutoSelectRandomSeed, device));
|
||||
auto outputTimesParam = CNTK::Parameter(CNTK::NDArrayView::RandomUniform<float>({ numOutputClasses, hiddenLayerDim }, -0.5, 0.5, 1, device));
|
||||
return Times(outputTimesParam, classifierRoot, 1, outputName);
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ void TrainSequenceToSequenceTranslator();
|
|||
void TrainTruncatedLSTMAcousticModelClassifer();
|
||||
void DeviceSelectionTests();
|
||||
void MultiThreadsEvaluation(bool);
|
||||
void MinibatchSourceTests();
|
||||
|
||||
int main()
|
||||
{
|
||||
|
@ -65,6 +66,8 @@ int main()
|
|||
TrainSequenceToSequenceTranslator();
|
||||
TrainTruncatedLSTMAcousticModelClassifer();
|
||||
|
||||
MinibatchSourceTests();
|
||||
|
||||
MultiThreadsEvaluation(IsGPUAvailable());
|
||||
|
||||
fprintf(stderr, "\nCNTKv2Library tests: Passed\n");
|
||||
|
|
|
@ -0,0 +1,202 @@
|
|||
//
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
//
|
||||
|
||||
#include "CNTKLibrary.h"
|
||||
#include "Common.h"
|
||||
|
||||
using namespace CNTK;
|
||||
|
||||
// Mock communicator to simulate MPI run
|
||||
class MockCommunicator : public DistributedCommunicator
|
||||
{
|
||||
private:
|
||||
std::unordered_set<DistributedWorkerDescriptor> m_workers;
|
||||
DistributedWorkerDescriptor m_self;
|
||||
|
||||
public:
|
||||
virtual const std::unordered_set<DistributedWorkerDescriptor>& Workers() const override
|
||||
{
|
||||
return m_workers;
|
||||
}
|
||||
|
||||
virtual const DistributedWorkerDescriptor& CurrentWorker() const override
|
||||
{
|
||||
return m_self;
|
||||
}
|
||||
|
||||
virtual DistributedCommunicatorPtr SubGroup(const std::unordered_set<DistributedWorkerDescriptor>&) const override
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
virtual void Concatenate(
|
||||
const std::vector<ValuePtr>&,
|
||||
std::vector<ValuePtr>&,
|
||||
const std::unordered_set<DistributedWorkerDescriptor>&) override
|
||||
{}
|
||||
|
||||
virtual void Concatenate(
|
||||
const std::vector<NDArrayViewPtr>&,
|
||||
std::vector<NDArrayViewPtr>&,
|
||||
const std::unordered_set<DistributedWorkerDescriptor>&) override
|
||||
{}
|
||||
|
||||
virtual void Gather(
|
||||
const Dictionary&,
|
||||
std::vector<DictionaryPtr>&,
|
||||
const std::unordered_set<DistributedWorkerDescriptor>&) override
|
||||
{}
|
||||
|
||||
virtual void AggregateInPlace(
|
||||
const std::vector<NDArrayViewPtr>&,
|
||||
const std::unordered_set<DistributedWorkerDescriptor>&) override
|
||||
{}
|
||||
|
||||
virtual void Aggregate(
|
||||
const std::vector<NDArrayViewPtr>&,
|
||||
std::vector<NDArrayViewPtr>&,
|
||||
const std::unordered_set<DistributedWorkerDescriptor>&) override
|
||||
{}
|
||||
|
||||
virtual void Barrier() override
|
||||
{}
|
||||
|
||||
MockCommunicator(size_t numWorkers)
|
||||
{
|
||||
for (size_t i = 0; i < numWorkers; i++)
|
||||
{
|
||||
DistributedWorkerDescriptor desc;
|
||||
desc.m_hostId = L"MockCommunicator";
|
||||
desc.m_globalRank = i;
|
||||
|
||||
m_workers.insert(desc);
|
||||
}
|
||||
MockRank(0);
|
||||
}
|
||||
|
||||
void MockRank(size_t rank)
|
||||
{
|
||||
m_self.m_hostId = L"MockCommunicator";
|
||||
m_self.m_globalRank = rank;
|
||||
}
|
||||
};
|
||||
|
||||
MinibatchSourcePtr TextFormatMinibatchSourceWithMockCommunicator(const std::wstring& dataFilePath, const std::vector<StreamConfiguration>& streamConfigs, size_t epochSize = MinibatchSource::InfinitelyRepeat, bool randomize = true, size_t distributedAfterSampleCount = MinibatchSource::InfiniteSamples, size_t numWorkers = 2, size_t workerRank = 0)
|
||||
{
|
||||
::CNTK::Dictionary minibatchSourceConfiguration;
|
||||
minibatchSourceConfiguration[L"epochSize"] = epochSize;
|
||||
|
||||
if (randomize)
|
||||
minibatchSourceConfiguration[L"randomize"] = true;
|
||||
|
||||
::CNTK::Dictionary deserializerConfiguration;
|
||||
deserializerConfiguration[L"type"] = L"CNTKTextFormatDeserializer";
|
||||
deserializerConfiguration[L"file"] = dataFilePath;
|
||||
|
||||
::CNTK::Dictionary inputStreamsConfig;
|
||||
for (auto streamConfig : streamConfigs)
|
||||
{
|
||||
std::wstring streamName = streamConfig.m_streamName;
|
||||
size_t streamDim = streamConfig.m_dim;
|
||||
bool isSparse = streamConfig.m_isSparse;
|
||||
std::wstring streamAlias = streamConfig.m_streamAlias;
|
||||
|
||||
::CNTK::Dictionary inputStreamConfig;
|
||||
inputStreamConfig[L"dim"] = streamDim;
|
||||
inputStreamConfig[L"format"] = isSparse ? L"sparse" : L"dense";
|
||||
if (!streamAlias.empty())
|
||||
inputStreamConfig[L"alias"] = streamAlias;
|
||||
|
||||
inputStreamsConfig[streamName] = inputStreamConfig;
|
||||
}
|
||||
|
||||
deserializerConfiguration[L"input"] = inputStreamsConfig;
|
||||
minibatchSourceConfiguration[L"deserializers"] = std::vector<::CNTK::DictionaryValue>({ deserializerConfiguration });
|
||||
minibatchSourceConfiguration[L"distributedAfterSampleCount"] = distributedAfterSampleCount;
|
||||
minibatchSourceConfiguration[L"numWorkers"] = numWorkers;
|
||||
minibatchSourceConfiguration[L"workerRank"] = workerRank;
|
||||
return CreateCompositeMinibatchSource(minibatchSourceConfiguration);
|
||||
}
|
||||
|
||||
void TestMinibatchSourceWarmStart(size_t numMBs, size_t minibatchSize, size_t warmStartSamples, bool randomize)
|
||||
{
|
||||
const size_t inputDim = 2;
|
||||
const size_t numOutputClasses = 2;
|
||||
auto featureStreamName = L"features";
|
||||
auto labelsStreamName = L"labels";
|
||||
const size_t numWorkers = 2;
|
||||
|
||||
auto minibatchSource = TextFormatMinibatchSourceWithMockCommunicator(
|
||||
L"SimpleDataTrain_cntk_text.txt",
|
||||
{ { featureStreamName, inputDim }, { labelsStreamName, numOutputClasses } },
|
||||
MinibatchSource::InfinitelyRepeat,
|
||||
randomize,
|
||||
warmStartSamples,
|
||||
numWorkers,
|
||||
0);
|
||||
|
||||
auto minibatchSource2 = TextFormatMinibatchSourceWithMockCommunicator(
|
||||
L"SimpleDataTrain_cntk_text.txt",
|
||||
{ { featureStreamName, inputDim }, { labelsStreamName, numOutputClasses } },
|
||||
MinibatchSource::InfinitelyRepeat,
|
||||
randomize,
|
||||
warmStartSamples,
|
||||
numWorkers,
|
||||
1);
|
||||
|
||||
auto featureStreamInfo = minibatchSource->StreamInfo(featureStreamName);
|
||||
auto labelStreamInfo = minibatchSource->StreamInfo(labelsStreamName);
|
||||
|
||||
auto featureStreamInfo2 = minibatchSource2->StreamInfo(featureStreamName);
|
||||
auto labelStreamInfo2 = minibatchSource2->StreamInfo(labelsStreamName);
|
||||
|
||||
size_t totalSamples = 0;
|
||||
for (size_t i = 0; i < numMBs; ++i)
|
||||
{
|
||||
bool distributed = minibatchSource->IsDistributed();
|
||||
bool distributed2 = minibatchSource2->IsDistributed();
|
||||
if (distributed != (totalSamples >= warmStartSamples) || distributed != distributed2)
|
||||
{
|
||||
ReportFailure("TestMinibatchSourceWarmStart failed in distributed state: expected %d, actual %d",
|
||||
totalSamples >= warmStartSamples, distributed);
|
||||
}
|
||||
|
||||
auto minibatchData = minibatchSource->GetNextMinibatch(minibatchSize);
|
||||
auto minibatchData2 = minibatchSource2->GetNextMinibatch(minibatchSize);
|
||||
|
||||
// NOTE: the expectedNumSamples are valid only in this test case scenario
|
||||
size_t expectedNumSamples = minibatchSize;
|
||||
size_t numSamples = minibatchData[featureStreamInfo].m_numSamples;
|
||||
size_t numSamples2 = minibatchData2[featureStreamInfo].m_numSamples;
|
||||
|
||||
if (!distributed && numSamples != numSamples2)
|
||||
{
|
||||
ReportFailure("TestMinibatchSourceWarmStart failed in sample count: expected %lu, distributed %d (0:%lu, 1:%lu)",
|
||||
expectedNumSamples, distributed, numSamples, numSamples2);
|
||||
}
|
||||
|
||||
size_t actualNumSamples = distributed ? numSamples + numSamples2 : numSamples;
|
||||
|
||||
if (actualNumSamples != expectedNumSamples)
|
||||
{
|
||||
ReportFailure("TestMinibatchSourceWarmStart failed in sample count: expected %lu, actual %lu distributed %d (%lu+%lu)",
|
||||
expectedNumSamples, actualNumSamples, distributed, numSamples, numSamples2);
|
||||
}
|
||||
|
||||
totalSamples += actualNumSamples;
|
||||
}
|
||||
}
|
||||
|
||||
void MinibatchSourceTests()
|
||||
{
|
||||
// Test no-randomize minibatch source
|
||||
TestMinibatchSourceWarmStart(10, 64, 128, false);
|
||||
TestMinibatchSourceWarmStart(10, 64, 0, false);
|
||||
TestMinibatchSourceWarmStart(10, 64, 100, false);
|
||||
|
||||
// Test randomized minibatch source
|
||||
TestMinibatchSourceWarmStart(10, 64, 0, true);
|
||||
TestMinibatchSourceWarmStart(10, 64, 128, true);
|
||||
}
|
|
@ -119,6 +119,7 @@
|
|||
<ClCompile Include="Common.cpp" />
|
||||
<ClCompile Include="DeviceSelectionTests.cpp" />
|
||||
<ClCompile Include="LearnerTests.cpp" />
|
||||
<ClCompile Include="MinibatchSourceTest.cpp" />
|
||||
<ClCompile Include="Seq2Seq.cpp" />
|
||||
<ClCompile Include="SerializationTests.cpp" />
|
||||
<ClCompile Include="FeedForwardTests.cpp" />
|
||||
|
|
|
@ -66,6 +66,9 @@
|
|||
<ClCompile Include="CifarResNet.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="MinibatchSourceTest.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Common.h">
|
||||
|
|
|
@ -340,6 +340,37 @@ fail:
|
|||
%eq_for(NDShape, NDShape_eq)
|
||||
%eq_for(DeviceDescriptor, DeviceDescriptor_eq)
|
||||
|
||||
//
|
||||
// size_t converter and extend DictionaryValue constructor
|
||||
//
|
||||
|
||||
// declare python type
|
||||
struct SizeTWrapper
|
||||
{
|
||||
public:
|
||||
size_t value;
|
||||
SizeTWrapper(int v) : value(static_cast<size_t>(v)) {}
|
||||
SizeTWrapper(size_t v) : value(v) {}
|
||||
};
|
||||
|
||||
//inject to c++
|
||||
%{
|
||||
struct SizeTWrapper
|
||||
{
|
||||
public:
|
||||
size_t value;
|
||||
SizeTWrapper(int v) : value(static_cast<size_t>(v)) {}
|
||||
SizeTWrapper(size_t v) : value(v) {}
|
||||
};
|
||||
%}
|
||||
|
||||
// extend constructor
|
||||
%extend CNTK::DictionaryValue {
|
||||
DictionaryValue(const SizeTWrapper& w)
|
||||
{
|
||||
return new DictionaryValue(w.value);
|
||||
}
|
||||
}
|
||||
|
||||
%extend CNTK::Dictionary {
|
||||
CNTK::DictionaryValue __getitem__(const wchar_t* key) {
|
||||
|
|
|
@ -72,50 +72,86 @@ class Communicator(cntk_py.DistributedCommunicator):
|
|||
|
||||
@staticmethod
|
||||
def finalize():
|
||||
cntk_py.DistributedCommunicator.finalize();
|
||||
|
||||
class QuantizedCommunicator(Communicator, cntk_py.QuantizedDistributedCommunicator):
|
||||
'''
|
||||
calls MPI_Finalize(), and no more communication can happen afterwards
|
||||
'''
|
||||
cntk_py.DistributedCommunicator.finalize()
|
||||
|
||||
class DistributedTrainer(cntk_py.DistributedTrainer):
|
||||
'''
|
||||
A communicator interface exposing communication primitives that serve as building blocks
|
||||
for distributed training.
|
||||
A distributed trainer that handles data like gradients/momentums across multiple MPI workers
|
||||
'''
|
||||
|
||||
@typemap
|
||||
def communicator(self):
|
||||
'''
|
||||
Returns the distributed communicator that talks to other MPI workers
|
||||
|
||||
Returns:
|
||||
:class:`Communicator`: descriptor of current process.
|
||||
'''
|
||||
return super().get_communicator()
|
||||
|
||||
@property
|
||||
def distributed_after(self):
|
||||
'''
|
||||
number of samples to process, then parallelization starts
|
||||
'''
|
||||
return super().get_distributed_after_sample_count()
|
||||
|
||||
@typemap
|
||||
def mpi_communicator():
|
||||
def data_parallel_distributed_trainer(num_quantization_bits=32, distributed_after=0, use_async_buffered_parameter_update=False):
|
||||
'''
|
||||
Creates a mpi communicator
|
||||
|
||||
Returns:
|
||||
:class:`Communicator`: a distributed communicator
|
||||
'''
|
||||
return cntk_py.mpicommunicator()
|
||||
|
||||
@typemap
|
||||
def quantized_mpi_communicator(num_quantization_bits):
|
||||
'''
|
||||
Creates a quantized mpi communicator
|
||||
Creates a data parallel distributed trainer
|
||||
|
||||
Args:
|
||||
num_quantization_bits (`int`): num_quantization_bits
|
||||
|
||||
Returns:
|
||||
:class:`QuantizedCommunicator`: a quantized distributed communicator
|
||||
'''
|
||||
return cntk_py.quantized_mpicommunicator(True, True, num_quantization_bits)
|
||||
|
||||
def data_parallel_distributed_trainer(communicator, use_async_buffered_parameter_update):
|
||||
'''
|
||||
Creates a data parallel distributed trainer using `communicator` with
|
||||
option `use_async_buffered_parameter_update`.
|
||||
|
||||
Args:
|
||||
communicator: a communicator or a quantized communicator
|
||||
use_async_buffered_parameter_update (`bool`): use async buffered parameter update
|
||||
num_quantization_bits (int): number of bits for quantization (1 to 32)
|
||||
distributed_after (int): number of samples after which distributed training starts
|
||||
use_async_buffered_parameter_update (bool): use async buffered parameter update
|
||||
|
||||
Returns:
|
||||
a distributed trainer instance
|
||||
'''
|
||||
if (isinstance(communicator, QuantizedCommunicator)):
|
||||
return cntk_py.create_quantized_data_parallel_distributed_trainer(communicator, use_async_buffered_parameter_update)
|
||||
if (num_quantization_bits < 32):
|
||||
return cntk_py.create_quantized_data_parallel_distributed_trainer(
|
||||
cntk_py.quantized_mpicommunicator(True, True, num_quantization_bits),
|
||||
use_async_buffered_parameter_update,
|
||||
distributed_after)
|
||||
else:
|
||||
return cntk_py.create_data_parallel_distributed_trainer(communicator, use_async_buffered_parameter_update)
|
||||
return cntk_py.create_data_parallel_distributed_trainer(
|
||||
cntk_py.mpicommunicator(),
|
||||
use_async_buffered_parameter_update,
|
||||
distributed_after)
|
||||
|
||||
@typemap
|
||||
def block_momentum_distributed_trainer(block_size, block_momentum_as_time_constant=None, use_nestrov_momentum=True, reset_sgd_momentum_after_aggregation=True, block_learning_rate=1.0, distributed_after=0):
|
||||
'''
|
||||
Creates a block momentum distributed trainer
|
||||
|
||||
Args:
|
||||
block_size (int): block size
|
||||
block_momentum_as_time_constant (float): block momentum as time constant
|
||||
use_nestrov_momentum (bool): use nestrov momentum
|
||||
reset_sgd_momentum_after_aggregation (bool): reset SGD momentum after aggregation
|
||||
block_learning_rate (float): block learning rate
|
||||
distributed_after (int): number of samples after which distributed training starts
|
||||
Returns:
|
||||
a distributed trainer instance
|
||||
'''
|
||||
if block_momentum_as_time_constant == None:
|
||||
return cntk_py.create_block_momentum_distributed_trainer(
|
||||
cntk_py.mpicommunicator(),
|
||||
block_size,
|
||||
use_nestrov_momentum,
|
||||
reset_sgd_momentum_after_aggregation,
|
||||
block_learning_rate,
|
||||
distributed_after)
|
||||
else:
|
||||
return cntk_py.create_block_momentum_distributed_trainer(
|
||||
cntk_py.mpicommunicator(),
|
||||
block_size,
|
||||
block_momentum_as_time_constant,
|
||||
use_nestrov_momentum,
|
||||
reset_sgd_momentum_after_aggregation,
|
||||
block_learning_rate,
|
||||
distributed_after)
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
# Licensed under the MIT license. See LICENSE.md file in the project root
|
||||
# for full license information.
|
||||
|
@ -11,6 +11,7 @@ from cntk.device import use_default_device
|
|||
|
||||
INFINITELY_REPEAT = cntk_py.MinibatchSource.infinitely_repeat
|
||||
FULL_DATA_SWEEP = cntk_py.MinibatchSource.full_data_sweep
|
||||
INFINITE_SAMPLES = cntk_py.MinibatchSource.infinite_samples
|
||||
|
||||
class MinibatchData(cntk_py.MinibatchData):
|
||||
'''
|
||||
|
@ -60,13 +61,23 @@ class MinibatchSource(cntk_py.MinibatchSource):
|
|||
A `MinibatchSource` can be indexed by the stream name, which will return a
|
||||
:class:`MinibatchData` object that can be passed e.g. to the
|
||||
:func:`~cntk.trainer.Trainer.train_minibatch` function.
|
||||
'''
|
||||
|
||||
def __init__(self, deserializers=None, randomize=True, epoch_size=INFINITELY_REPEAT, distributed_communicator=None):
|
||||
Args:
|
||||
deserializers ('list', default is empty): list of deserializers
|
||||
(:class:`ImageDeserializer` for now).
|
||||
randomize (bool, default True): randomize images before every epoch
|
||||
epoch_size (int): epoch size
|
||||
distributed_after (int): sample count after which minibatch source becomes distributed
|
||||
'''
|
||||
def __init__(self, deserializers=None, randomize=True, epoch_size=INFINITELY_REPEAT, distributed_after=INFINITE_SAMPLES):
|
||||
if not isinstance(deserializers, (list,tuple)):
|
||||
deserializers = [deserializers] # allow passing a single item or a list
|
||||
reader_config = ReaderConfig(deserializers=deserializers, randomize=randomize, epoch_size=epoch_size)
|
||||
source = minibatch_source(reader_config, distributed_communicator)
|
||||
reader_config = ReaderConfig(
|
||||
deserializers=deserializers,
|
||||
randomize=randomize,
|
||||
epoch_size=epoch_size,
|
||||
distributed_after=distributed_after)
|
||||
source = minibatch_source(reader_config)
|
||||
# transplant into this class instance
|
||||
self.__dict__ = source.__dict__
|
||||
# transplant all members of deserializers into a record called streams
|
||||
|
@ -174,6 +185,12 @@ class MinibatchSource(cntk_py.MinibatchSource):
|
|||
'''
|
||||
super(MinibatchSource, self).restore_from_checkpoint(checkpoint)
|
||||
|
||||
@property
|
||||
def is_distributed(self):
|
||||
'''
|
||||
Whether the minibatch source is running distributed
|
||||
'''
|
||||
return super(MinibatchSource, self).is_distributed()
|
||||
|
||||
def _py_dict_to_cntk_dict(py_dict):
|
||||
'''
|
||||
|
@ -204,20 +221,16 @@ def _py_dict_to_cntk_dict(py_dict):
|
|||
|
||||
# TODO: This should be a private function; use MinibatchSource(deserializer, ...).
|
||||
@typemap
|
||||
def minibatch_source(config, distributed_communicator):
|
||||
def minibatch_source(config):
|
||||
'''
|
||||
Instantiate the CNTK built-in composite minibatch source which is used to stream data into the network.
|
||||
Args:
|
||||
config (dict): a dictionary containing all the key-value configuration entries.
|
||||
distributed_communicator: optional distributed communicator
|
||||
Returns:
|
||||
:class:`MinibatchSource`
|
||||
'''
|
||||
cntk_dict = _py_dict_to_cntk_dict(config)
|
||||
if (distributed_communicator == None):
|
||||
return cntk_py.create_composite_minibatch_source(cntk_dict)
|
||||
else:
|
||||
return cntk_py.create_composite_minibatch_source(cntk_dict, distributed_communicator)
|
||||
return cntk_py.create_composite_minibatch_source(cntk_dict)
|
||||
|
||||
# TODO: This should be a private class.
|
||||
class ReaderConfig(dict):
|
||||
|
@ -229,30 +242,28 @@ class ReaderConfig(dict):
|
|||
(:class:`ImageDeserializer` for now).
|
||||
randomize (bool, default True): randomize images before every epoch
|
||||
epoch_size (int): epoch size
|
||||
distributed_after (int): sample count after which reader becomes distributed
|
||||
'''
|
||||
def __init__(self, deserializers=None, randomize=True, epoch_size=INFINITELY_REPEAT, distributed_after=INFINITE_SAMPLES):
|
||||
|
||||
def __init__(self, deserializers=None, randomize=True, epoch_size=INFINITELY_REPEAT):
|
||||
|
||||
self['epochSize'] = epoch_size
|
||||
self['epochSize'] = cntk_py.SizeTWrapper(epoch_size) # force to store in size_t
|
||||
if not isinstance(deserializers, (list, tuple)):
|
||||
deserializers = [deserializers]
|
||||
self['deserializers'] = self.deserializers = deserializers or []
|
||||
self['randomize'] = randomize
|
||||
self['distributedAfterSampleCount'] = cntk_py.SizeTWrapper(distributed_after)
|
||||
|
||||
@typemap
|
||||
def minibatch_source(self, distributed_communicator=None):
|
||||
def minibatch_source(self):
|
||||
'''
|
||||
Creates an instance of :class:`MinibatchSource` from this
|
||||
instance, which can be used to feed data into the `eval()` methods of
|
||||
the graph nodes or the `train_minibatch()` of :class:`~cntk.trainer.Trainer`.
|
||||
|
||||
Args:
|
||||
distributed_communicator (:class:`~cntk.distributed.communicator`): distributed communicator
|
||||
|
||||
Returns:
|
||||
instance of :class:`MinibatchSource`
|
||||
'''
|
||||
return minibatch_source(self, distributed_communicator)
|
||||
return minibatch_source(self)
|
||||
|
||||
|
||||
class Deserializer(dict):
|
||||
|
@ -475,7 +486,7 @@ class CTFDeserializer(Deserializer):
|
|||
|
||||
# TODO: This should not exist; use MinibatchSource(CTFDeserializer(...))
|
||||
@typemap
|
||||
def text_format_minibatch_source(path, stream_configs, epoch_size=INFINITELY_REPEAT, randomize=True, distributed_communicator=None):
|
||||
def text_format_minibatch_source(path, stream_configs, epoch_size=INFINITELY_REPEAT, randomize=True, distributed_after=INFINITE_SAMPLES):
|
||||
'''
|
||||
Creates a minibatch source from a CNTKTextFormatReader file.
|
||||
|
||||
|
@ -487,15 +498,12 @@ def text_format_minibatch_source(path, stream_configs, epoch_size=INFINITELY_REP
|
|||
epoch_size (int, optional): size of an epoch. In case of 0 the size
|
||||
of the training set will be taken. Default is max of 64bit.
|
||||
randomize (bool, optional): whether to randomize the contents of data file.
|
||||
distributed_communicator (:class:`~cntk.distributed.communicator`): optional distributed communicator
|
||||
distributed_after (int, optional): sample count after which minibatch source becomes distributed
|
||||
|
||||
Returns:
|
||||
:class:`MinibatchSource`
|
||||
'''
|
||||
if distributed_communicator == None:
|
||||
return cntk_py.text_format_minibatch_source(path, stream_configs, epoch_size, randomize)
|
||||
else:
|
||||
return cntk_py.text_format_minibatch_source(path, stream_configs, epoch_size, randomize, distributed_communicator)
|
||||
return cntk_py.text_format_minibatch_source(path, stream_configs, epoch_size, randomize, distributed_after)
|
||||
|
||||
|
||||
# TODO: this should be a private class; use StreamDef instead
|
||||
|
|
|
@ -78,7 +78,7 @@ def test_image():
|
|||
|
||||
rc = ReaderConfig(image, randomize=False, epoch_size=epoch_size)
|
||||
|
||||
assert rc['epochSize'] == epoch_size
|
||||
assert rc['epochSize'].value == epoch_size
|
||||
assert rc['randomize'] == False
|
||||
assert len(rc['deserializers']) == 1
|
||||
d = rc['deserializers'][0]
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
|
||||
# Licensed under the MIT license. See LICENSE.md file in the project root
|
||||
# for full license information.
|
||||
|
@ -13,8 +13,25 @@ from ..learner import *
|
|||
from .. import distributed
|
||||
from .. import cross_entropy_with_softmax, classification_error, parameter, \
|
||||
input_variable, times, plus, reduce_sum
|
||||
|
||||
def create_data_parallel_distributed_trainer(quantized, warm_start):
|
||||
return distributed.data_parallel_distributed_trainer(
|
||||
use_async_buffered_parameter_update=False,
|
||||
num_quantization_bits=(1 if quantized else 32),
|
||||
distributed_after=warm_start)
|
||||
|
||||
def run_distributed_trainer(tmpdir, quantized):
|
||||
def create_block_momentum_distributed_trainer(quantized, warm_start):
|
||||
return distributed.block_momentum_distributed_trainer(
|
||||
block_size=1024,
|
||||
distributed_after = warm_start)
|
||||
|
||||
def create_block_momentum_distributed_trainer_with_time_constant(quantized, warm_start):
|
||||
return distributed.block_momentum_distributed_trainer(
|
||||
block_size=1024,
|
||||
block_momentum_as_time_constant=4096,
|
||||
distributed_after=warm_start)
|
||||
|
||||
def run_distributed_trainer(tmpdir, quantized, create_func):
|
||||
|
||||
in1 = input_variable(shape=1)
|
||||
labels = input_variable(shape=1)
|
||||
|
@ -23,21 +40,21 @@ def run_distributed_trainer(tmpdir, quantized):
|
|||
ce = cross_entropy_with_softmax(z, labels)
|
||||
errs = classification_error(z, labels)
|
||||
|
||||
if quantized:
|
||||
communicator = distributed.quantized_mpi_communicator(1)
|
||||
else:
|
||||
communicator = distributed.mpi_communicator()
|
||||
warm_start = (100 if quantized else 0)
|
||||
|
||||
dist_trainer = create_func(quantized, warm_start)
|
||||
|
||||
assert dist_trainer.distributed_after == warm_start
|
||||
|
||||
communicator = dist_trainer.communicator()
|
||||
workers = communicator.workers()
|
||||
current_worker = communicator.current_worker()
|
||||
found_rank = False
|
||||
for wk in workers:
|
||||
if current_worker.global_rank == wk.global_rank:
|
||||
found_rank = True
|
||||
|
||||
assert found_rank
|
||||
|
||||
dist_trainer = distributed.data_parallel_distributed_trainer(communicator, False)
|
||||
assert found_rank
|
||||
|
||||
momentum_time_constant = momentum_as_time_constant_schedule(1100)
|
||||
lr_per_sample = learning_rate_schedule(0.007, UnitType.sample)
|
||||
|
@ -49,7 +66,7 @@ def run_distributed_trainer(tmpdir, quantized):
|
|||
arguments = {in1: in1_value, labels: label_value}
|
||||
z_output = z.output
|
||||
updated, var_map = trainer.train_minibatch(arguments, [z_output])
|
||||
|
||||
|
||||
p = str(tmpdir / 'checkpoint.dat')
|
||||
trainer.save_checkpoint(p)
|
||||
trainer.restore_from_checkpoint(p)
|
||||
|
@ -64,6 +81,9 @@ def run_distributed_trainer(tmpdir, quantized):
|
|||
assert isinstance(trainer.parameter_learners[0], Learner)
|
||||
|
||||
def test_distributed(tmpdir, is_1bit_sgd):
|
||||
run_distributed_trainer(tmpdir, quantized=(True if is_1bit_sgd==1 else False))
|
||||
run_distributed_trainer(tmpdir, quantized=(True if is_1bit_sgd==1 else False), create_func=create_data_parallel_distributed_trainer)
|
||||
if is_1bit_sgd == 1:
|
||||
run_distributed_trainer(tmpdir, True, create_func=create_block_momentum_distributed_trainer)
|
||||
run_distributed_trainer(tmpdir, True, create_func=create_block_momentum_distributed_trainer_with_time_constant)
|
||||
distributed.Communicator.finalize()
|
||||
|
|
@ -203,3 +203,10 @@ class Trainer(cntk_py.Trainer):
|
|||
The number of samples in the last minibatch trained with
|
||||
'''
|
||||
return super(Trainer, self).previous_minibatch_sample_count()
|
||||
|
||||
@property
|
||||
def is_running_distributed(self):
|
||||
'''
|
||||
Whether the trainer is running distributed
|
||||
'''
|
||||
return super(Trainer, self).is_running_distributed()
|
|
@ -1,8 +1,9 @@
|
|||
# ==============================================================================
|
||||
# ==============================================================================
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license. See LICENSE.md file in the project root
|
||||
# for full license information.
|
||||
# ==============================================================================
|
||||
from time import time
|
||||
|
||||
# TODO: Let's switch to import logging in the future instead of print. [ebarsoum]
|
||||
class ProgressPrinter:
|
||||
|
@ -34,6 +35,7 @@ class ProgressPrinter:
|
|||
self.freq = freq
|
||||
self.first = first
|
||||
self.tag = '' if not tag else "[{}] ".format(tag)
|
||||
self.epoch_start_time = 0
|
||||
|
||||
if freq==0:
|
||||
print(' average since average since examples')
|
||||
|
@ -100,10 +102,16 @@ class ProgressPrinter:
|
|||
if self.freq > 0:
|
||||
self.updates = 0
|
||||
avg_loss, avg_metric, samples = self.reset_start()
|
||||
epoch_end_time = time()
|
||||
time_delta = epoch_end_time - self.epoch_start_time
|
||||
speed = 0
|
||||
if (time_delta > 0):
|
||||
speed = samples / time_delta
|
||||
self.epoch_start_time = epoch_end_time
|
||||
if with_metric:
|
||||
print("Finished Epoch [{}]: {}loss = {:0.6f} * {}, metric = {:0.1f}% * {}".format(self.epochs, self.tag, avg_loss, samples, avg_metric*100.0, samples))
|
||||
print("Finished Epoch [{}]: {}loss = {:0.6f} * {}, metric = {:0.1f}% * {} {:0.3f}s ({:5.1f} samples per second)".format(self.epochs, self.tag, avg_loss, samples, avg_metric*100.0, samples, time_delta, speed))
|
||||
else:
|
||||
print("Finished Epoch [{}]: {}loss = {:0.6f} * {}".format(self.epochs, self.tag, avg_loss, samples))
|
||||
print("Finished Epoch [{}]: {}loss = {:0.6f} * {} {:0.3f}s ({:5.1f} samples per second)".format(self.epochs, self.tag, avg_loss, samples, time_delta, speed))
|
||||
return avg_loss, avg_metric, samples # BUGBUG: for freq=0, we don't return anything here
|
||||
|
||||
def update(self, loss, minibatch_size, metric=None):
|
||||
|
@ -124,6 +132,8 @@ class ProgressPrinter:
|
|||
if metric is not None:
|
||||
self.metric_since_start += metric * minibatch_size
|
||||
self.metric_since_last += metric * minibatch_size
|
||||
if self.epoch_start_time == 0:
|
||||
self.epoch_start_time = time()
|
||||
if self.freq == 0 and (self.updates+1) & self.updates == 0:
|
||||
avg_loss, avg_metric, samples = self.reset_last()
|
||||
if metric is not None:
|
||||
|
|
|
@ -15,7 +15,7 @@ def map_if_possible(obj):
|
|||
from cntk.learner import Learner
|
||||
from cntk.io import MinibatchSource, MinibatchData, StreamConfiguration
|
||||
from cntk.axis import Axis
|
||||
from cntk.distributed import WorkerDescriptor, Communicator, QuantizedCommunicator
|
||||
from cntk import distributed
|
||||
__typemap = {
|
||||
cntk_py.Variable: Variable,
|
||||
cntk_py.Parameter: Parameter,
|
||||
|
@ -26,9 +26,9 @@ def map_if_possible(obj):
|
|||
cntk_py.MinibatchData: MinibatchData,
|
||||
cntk_py.StreamConfiguration: StreamConfiguration,
|
||||
cntk_py.Axis: Axis,
|
||||
cntk_py.DistributedWorkerDescriptor: WorkerDescriptor,
|
||||
cntk_py.DistributedCommunicator: Communicator,
|
||||
cntk_py.QuantizedDistributedCommunicator: QuantizedCommunicator
|
||||
cntk_py.DistributedWorkerDescriptor: distributed.WorkerDescriptor,
|
||||
cntk_py.DistributedCommunicator: distributed.Communicator,
|
||||
cntk_py.DistributedTrainer: distributed.DistributedTrainer
|
||||
}
|
||||
|
||||
# Some types like NumPy arrays don't let to set the __class__
|
||||
|
|
Загрузка…
Ссылка в новой задаче