From 7fd713988f9c955577d2fd1f6c931052c8ad9c99 Mon Sep 17 00:00:00 2001 From: Chirag Gupta Date: Tue, 14 Nov 2017 06:05:11 +0000 Subject: [PATCH] Bonsai compiles; not tested --- BonsaiLocalDriver.cpp | 2 +- src/Bonsai/Bonsai.h | 2 +- src/Bonsai/BonsaiFunctions.cpp | 2 +- src/Bonsai/BonsaiHyperParams.cpp | 5 ++--- src/Bonsai/BonsaiIngestTest.cpp | 2 +- src/Bonsai/BonsaiTrainer.cpp | 23 +++++++++++++---------- 6 files changed, 19 insertions(+), 17 deletions(-) diff --git a/BonsaiLocalDriver.cpp b/BonsaiLocalDriver.cpp index bd68cbd7..743e57fa 100644 --- a/BonsaiLocalDriver.cpp +++ b/BonsaiLocalDriver.cpp @@ -38,7 +38,7 @@ int main(int argc, char **argv) BonsaiPredictor predictor(modelBytes, model); // use the constructor predictor(modelBytes, model, false) for loading a sparse model. predictor.importMeanVar(meanVarBytes, meanVar); - predictor.batchEvaluate(trainer.data.Xtest, trainer.data.Ytest, dataDir, currResultsPath); + predictor.batchEvaluate(trainer.data.Xvalidation, trainer.data.Yvalidation, dataDir, currResultsPath); delete[] model, meanVar; diff --git a/src/Bonsai/Bonsai.h b/src/Bonsai/Bonsai.h index c7ef7289..ab6d2239 100644 --- a/src/Bonsai/Bonsai.h +++ b/src/Bonsai/Bonsai.h @@ -80,7 +80,7 @@ namespace EdgeML int seed; int iters, epochs; - dataCount_t ntrain, ntest, batchSize; + dataCount_t ntrain, nvalidation, batchSize; bool isOneIndex; FP_TYPE Sigma; ///< Sigmoid parameter for prediction diff --git a/src/Bonsai/BonsaiFunctions.cpp b/src/Bonsai/BonsaiFunctions.cpp index b736639c..76177a26 100644 --- a/src/Bonsai/BonsaiFunctions.cpp +++ b/src/Bonsai/BonsaiFunctions.cpp @@ -957,7 +957,7 @@ void Bonsai::parseInput(const int& argc, const char** argv, required++; break; case 'E': - hyperParam.ntest = int(atoi(argv[i])); + hyperParam.nvalidation = int(atoi(argv[i])); required++; break; } diff --git a/src/Bonsai/BonsaiHyperParams.cpp b/src/Bonsai/BonsaiHyperParams.cpp index 8b925102..4326496b 100644 --- a/src/Bonsai/BonsaiHyperParams.cpp +++ b/src/Bonsai/BonsaiHyperParams.cpp @@ -20,7 +20,7 @@ BonsaiModel::BonsaiHyperParams::BonsaiHyperParams() seed = 42; ntrain = 0; - ntest = 0; + nvalidation = 0; batchSize = 0; iters = 0; @@ -102,7 +102,6 @@ void BonsaiModel::BonsaiHyperParams::finalizeHyperParams() // Following asserts removed to faciliate support for TLC // which does not know how many datapoints are going to be fed before-hand! // assert(ntrain >= 1); - // assert(ntest >= 0); assert(projectionDimension <= dataDimension + 1); assert(numClasses > 0); @@ -125,4 +124,4 @@ void BonsaiModel::BonsaiHyperParams::finalizeHyperParams() internalClasses = (numClasses <= 2) ? 1 : numClasses; isModelInitialized = true; LOG_INFO("Dataset successfully initialized..."); -} \ No newline at end of file +} diff --git a/src/Bonsai/BonsaiIngestTest.cpp b/src/Bonsai/BonsaiIngestTest.cpp index 256430b4..b1c6c37f 100644 --- a/src/Bonsai/BonsaiIngestTest.cpp +++ b/src/Bonsai/BonsaiIngestTest.cpp @@ -24,7 +24,7 @@ int main() hyperParam.numClasses = 10; - hyperParam.ntest = 0; + hyperParam.nvalidation = 0; hyperParam.ntrain = 5000; hyperParam.Sigma = 1.0; diff --git a/src/Bonsai/BonsaiTrainer.cpp b/src/Bonsai/BonsaiTrainer.cpp index 8f50833a..cef03379 100644 --- a/src/Bonsai/BonsaiTrainer.cpp +++ b/src/Bonsai/BonsaiTrainer.cpp @@ -29,7 +29,7 @@ BonsaiTrainer::BonsaiTrainer( data(dataIngestType, DataFormatParams{ model.hyperParams.ntrain, - model.hyperParams.ntest, + model.hyperParams.nvalidation, model.hyperParams.numClasses, model.hyperParams.dataDimension }) { @@ -47,7 +47,7 @@ BonsaiTrainer::BonsaiTrainer( mean = MatrixXuf::Zero(model.hyperParams.dataDimension, 1); variance = MatrixXuf::Zero(model.hyperParams.dataDimension, 1); - data.loadDataFromFile(model.hyperParams.dataformatType, dataDir + "/train.txt", dataDir + "/test.txt"); + data.loadDataFromFile(model.hyperParams.dataformatType, dataDir + "/train.txt", dataDir + "/test.txt", ""); finalizeData(); } @@ -62,7 +62,7 @@ BonsaiTrainer::BonsaiTrainer( data(dataIngestType, DataFormatParams{ model.hyperParams.ntrain, - model.hyperParams.ntest, + model.hyperParams.nvalidation, model.hyperParams.numClasses, model.hyperParams.dataDimension }) { @@ -81,7 +81,7 @@ BonsaiTrainer::BonsaiTrainer( mean = MatrixXuf::Zero(model.hyperParams.dataDimension, 1); variance = MatrixXuf::Zero(model.hyperParams.dataDimension, 1); - data.loadDataFromFile(model.hyperParams.dataformatType, dataDir + "/train.txt", dataDir + "/test.txt"); + data.loadDataFromFile(model.hyperParams.dataformatType, dataDir + "/train.txt", dataDir + "/test.txt", ""); finalizeData(); initializeModel(); @@ -96,7 +96,7 @@ BonsaiTrainer::BonsaiTrainer( data(dataIngestType, DataFormatParams{ model.hyperParams.ntrain, - model.hyperParams.ntest, + model.hyperParams.nvalidation, model.hyperParams.numClasses, model.hyperParams.dataDimension }) { @@ -155,12 +155,11 @@ void BonsaiTrainer::finalizeData() // This condition means that the ingest type is Interface ingest, // hence the number of training points was not known beforehand. model.hyperParams.ntrain = data.Xtrain.cols(); - assert(data.Xtest.cols() == 0); - model.hyperParams.ntest = 0; + assert(data.Xvalidation.cols() == 0); + model.hyperParams.nvalidation = 0; } else { assert(model.hyperParams.ntrain == data.Xtrain.cols()); - // assert(model.hyperParams.ntest == data.Xtest.cols()); } // Following asserts can only be made in finalieData since TLC @@ -340,11 +339,15 @@ void BonsaiTrainer::exportMeanVar( void BonsaiTrainer::normalize() { if (model.hyperParams.normalizationType == minMax) { - minMaxNormalize(data.Xtrain, data.Xtest); + computeMinMax(data.Xtrain, data.min, data.max); + minMaxNormalize(data.Xtrain, data.min, data.max); + if (data.Xvalidation.cols() > 0) + minMaxNormalize(data.Xvalidation, data.min, data.max); } else if (model.hyperParams.normalizationType == l2) { l2Normalize(data.Xtrain); - l2Normalize(data.Xtest); + if (data.Xvalidation.cols() > 0) + l2Normalize(data.Xvalidation); } else; }