From 9cbcbf4d80424e7b917fdf467348ab697b471648 Mon Sep 17 00:00:00 2001 From: liming-vie Date: Tue, 26 Jul 2016 16:17:47 +0800 Subject: [PATCH] Add loss for train --- .../LogisticRegression/src/logreg.cpp | 6 +- .../LogisticRegression/src/model/model.cpp | 17 +-- .../LogisticRegression/src/model/model.h | 7 +- .../src/objective/ftrl_objective.h | 6 +- .../src/objective/objective.cpp | 126 ++++++++++++------ .../src/objective/objective.h | 13 +- .../src/objective/sigmoid_objective.h | 8 +- .../src/objective/softmax_objective.h | 6 +- 8 files changed, 119 insertions(+), 70 deletions(-) diff --git a/Applications/LogisticRegression/src/logreg.cpp b/Applications/LogisticRegression/src/logreg.cpp index caee1bb..b7287e2 100644 --- a/Applications/LogisticRegression/src/logreg.cpp +++ b/Applications/LogisticRegression/src/logreg.cpp @@ -54,6 +54,7 @@ void LogReg::Train(const std::string& train_file) { int count = 0; int train_epoch = config_->train_epoch; size_t sample_seen = 0; + float train_loss = 0.0f; size_t last = 0; for (int ep = 0; ep < train_epoch; ++ep) { reader->Reset(); @@ -65,11 +66,12 @@ void LogReg::Train(const std::string& train_file) { while ((count = reader->Read(buffer_size, samples))) { Log::Write(Debug, "model training %d samples, sample seen %d\n", count, sample_seen); - model_->Update(count, samples); + train_loss += model_->Update(count, samples); sample_seen += count; if (sample_seen - last >= config_->show_time_per_sample) { + Log::Write(Info, "Sample seen %lld, train loss %f\n", sample_seen, train_loss / (sample_seen - last)); + train_loss = 0.0f; last = sample_seen; - Log::Write(Info, "Sample seen %lld\n", sample_seen); model_->DisplayTime(); } reader->Free(count); diff --git a/Applications/LogisticRegression/src/model/model.cpp b/Applications/LogisticRegression/src/model/model.cpp index ab0432a..f024c53 100644 --- a/Applications/LogisticRegression/src/model/model.cpp +++ b/Applications/LogisticRegression/src/model/model.cpp @@ -55,13 +55,14 @@ Model::~Model() { } template -inline void Model::GetGradient(Sample* sample, +inline float Model::GetGradient(Sample* sample, DataBlock* delta) { - objective_->Gradient(sample, table_, delta); + return objective_->Gradient(sample, table_, delta); } template -void Model::Update(int count, Sample** samples) { +float Model::Update(int count, Sample** samples) { + float train_loss = 0.0f; // process each batch for (int i = 0; i < count; i += minibatch_size_) { ++compute_count_; @@ -71,7 +72,7 @@ void Model::Update(int count, Sample** samples) { int upper = i + minibatch_size_; upper = upper > count ? count : upper; for (int j = i; j < upper; ++j) { - GetGradient(samples[j], delta_); + train_loss += GetGradient(samples[j], delta_); } // calculate and average delta @@ -106,6 +107,7 @@ void Model::Update(int count, Sample** samples) { // update delta UpdateTable(delta_); } + return train_loss; } template @@ -128,17 +130,12 @@ inline void Model::UpdateTable(DataBlock* delta) { computation_time_ += timer_.ElapseMilliSeconds(); } -template -inline void Model::Predict(Sample *sample, EleType *predict) { - objective_->Predict(sample, table_, predict); -} - template int Model::Predict(int count, Sample**samples, EleType**predicts) { int correct(0); for (int i = 0; i < count; ++i) { - Predict(samples[i], predicts[i]); + this->objective_->Predict(samples[i], this->table_, predicts[i]); if (objective_->Correct(samples[i]->label, predicts[i])) { ++correct; } diff --git a/Applications/LogisticRegression/src/model/model.h b/Applications/LogisticRegression/src/model/model.h index e850117..9a563c8 100644 --- a/Applications/LogisticRegression/src/model/model.h +++ b/Applications/LogisticRegression/src/model/model.h @@ -29,9 +29,10 @@ public: explicit Model(Configure& config); virtual ~Model(); // update model with #count samples - virtual void Update(int count, Sample**samples); + // \return sum of train loss of every sample + virtual float Update(int count, Sample**samples); // \param input one input - virtual void Predict(Sample*sample, EleType*predict); + // \return correct number virtual int Predict(int count, Sample**samples, EleType**predicts); // load model data from a binary file virtual void Load(const std::string& model_file); @@ -48,7 +49,7 @@ public: protected: // copmpute update delta - virtual void GetGradient(Sample* sample, DataBlock* delta); + virtual float GetGradient(Sample* sample, DataBlock* delta); // update table virtual void UpdateTable(DataBlock* delta); diff --git a/Applications/LogisticRegression/src/objective/ftrl_objective.h b/Applications/LogisticRegression/src/objective/ftrl_objective.h index a5ff301..7795c29 100644 --- a/Applications/LogisticRegression/src/objective/ftrl_objective.h +++ b/Applications/LogisticRegression/src/objective/ftrl_objective.h @@ -12,15 +12,15 @@ public: ~FTRLObjective(); - void Gradient(Sample* sample, + float Gradient(Sample* sample, DataBlock* model, DataBlock* gradient); - void Predict(Sample*sample, + float Predict(Sample*sample, DataBlock* model, EleType* predict); private: - void Predict(Sample*sample, + float Predict(Sample*sample, DataBlock* model, EleType* predict, DataBlock* w); EleType sgn(const EleType x); diff --git a/Applications/LogisticRegression/src/objective/objective.cpp b/Applications/LogisticRegression/src/objective/objective.cpp index ce6b171..1150b5e 100644 --- a/Applications/LogisticRegression/src/objective/objective.cpp +++ b/Applications/LogisticRegression/src/objective/objective.cpp @@ -13,6 +13,14 @@ namespace logreg { +inline float MathLog(float x) { + return log(x < 0.000001f ? 0.000001f : x); +} + +inline int Round(float x) { + return x < 0.5 ? 0 : 1; +} + template Objective::Objective(const Configure &config) { this->input_size_ = config.input_size; @@ -26,13 +34,30 @@ Objective::~Objective() { } template -inline void Objective::Gradient(Sample* sample, +inline float Objective::Gradient(Sample* sample, DataBlock*model, DataBlock* gradient) { EleType* loss = new EleType[this->output_size_]; - Predict(sample, model, loss); + float train_loss = Predict(sample, model, loss); + Diff(sample->label, loss); AddRegularization(sample, model, loss, gradient); delete []loss; + + return train_loss; +} + +template +inline float Objective::Loss(Sample*sample, + EleType* predict) { + if (output_size_ == 1) { + return pow(static_cast(*predict - (sample->label == 1 ? 1.0 : 0.0)), 2); + } + + float ret = 0.0f; + for (int i = 0; i < output_size_; ++i) { + ret += pow(static_cast(predict[i] - (sample->label == i ? 1 : 0)), 2); + } + return ret / output_size_; } template @@ -47,7 +72,7 @@ inline void Objective::AddRegularization(Sample*sample, // each input for (size_t j = 0; j < size; ++j) { size_t key = sample->keys[j] + offset; - EleType val = (EleType)(sample->values[j] * loss[i]) + EleType val = static_cast(sample->values[j] * loss[i]) + regular_->Calculate(key, model); EleType* pval = gradient->Get(key); @@ -66,7 +91,7 @@ inline void Objective::AddRegularization(Sample*sample, size_t index = 0; for (int i = 0; i < this->output_size_; ++i) { for (size_t j = 0; j < this->input_size_; ++j) { - rawgrad[index] += (EleType)(rawinput[j] * loss[i] + rawgrad[index] += static_cast(rawinput[j] * loss[i] + regular_->Calculate(index, model)); ++index; } @@ -86,17 +111,18 @@ inline void Objective::Diff(int label, EleType*diff) { } template -void Objective::Predict(Sample*sample, +float Objective::Predict(Sample*sample, DataBlock*model, EleType* predict) { for (int i = 0; i < this->output_size_; ++i) { predict[i] = Dot((size_t)i * this->input_size_, model, sample); } + return this->Loss(sample, predict); } template bool Objective::Correct(const int label, EleType*output) { if (this->output_size_ == 1) { - return (*output - static_cast(label == 1)) == 0; + return (Round(static_cast(*output)) - static_cast(label == 1)) == 0; } EleType max = *(output++); @@ -122,28 +148,35 @@ Objective(config) { } template -inline void SigmoidObjective::Gradient(Sample* sample, +inline float SigmoidObjective::Gradient(Sample* sample, DataBlock*model, DataBlock* gradient) { - EleType loss = (EleType)Sigmoid(sample, model); + EleType loss = static_cast(Sigmoid(sample, model)); + float train_loss = this->Loss(sample, &loss); this->Diff(sample->label, &loss); this->AddRegularization(sample, model, &loss, gradient); + return train_loss; } template -inline void SigmoidObjective::Predict(Sample* sample, +inline float SigmoidObjective::Predict(Sample* sample, DataBlock* model, EleType* predict) { - *predict = Round(Sigmoid(sample, model)); + *predict = static_cast(Sigmoid(sample, model)); + return this->Loss(sample, predict); } template -inline double SigmoidObjective::Sigmoid(Sample* sample, +inline float SigmoidObjective::Sigmoid(Sample* sample, DataBlock*model) { - return 1.0 / (1.0 + exp(-Dot(0, model, sample))); + return static_cast(1.0f / (1.0f + exp(-Dot(0, model, sample)))); } template -inline EleType SigmoidObjective::Round(double x) { - return x < 0.5 ? (EleType)0 : (EleType)1; +inline float SigmoidObjective::Loss(Sample*sample, + EleType* predict) { + if (sample->label == 1) { + return -MathLog(static_cast(*predict)); + } + return -MathLog(1.0f - static_cast(*predict)); } DECLARE_TEMPLATE_CLASS_WITH_BASIC_TYPE(SigmoidObjective); @@ -157,35 +190,46 @@ Objective(config) { } template -inline void SoftmaxObjective::Predict(Sample* sample, +inline float SoftmaxObjective::Predict(Sample* sample, DataBlock* model, EleType* predict) { - double sum = Sigmoid(sample, model, predict); + float sum = Sigmoid(sample, model, predict); for (int i = 0; i < this->output_size_; ++i) { - predict[i] = (EleType)(predict[i] / sum); + predict[i] = static_cast(predict[i] / sum); } + return this->Loss(sample, predict); } template -double SoftmaxObjective::Sigmoid(Sample* sample, +float SoftmaxObjective::Sigmoid(Sample* sample, DataBlock*model, EleType*sigmoid) { for (int i = 0; i < this->output_size_; ++i) { sigmoid[i] = Dot(i*this->input_size_, model, sample); } - double max = sigmoid[0]; + float max = static_cast(sigmoid[0]); for (int i = 1; i < this->output_size_; ++i) { - max = max < sigmoid[i] ? sigmoid[i] : max; + max = static_cast(max < sigmoid[i] ? sigmoid[i] : max); } - double sum = 0.0; + float sum = 0.0f; for (int i = 0; i < this->output_size_; ++i) { - sigmoid[i] = (EleType)exp(sigmoid[i] - max); - sum += sigmoid[i]; + sigmoid[i] = static_cast(exp(sigmoid[i] - max)); + sum += static_cast(sigmoid[i]); } return sum; } template -inline EleType SoftmaxObjective::Round(double x) { - return x < 0.5 ? (EleType)0 : (EleType)1; +inline float SoftmaxObjective::Loss(Sample*sample, + EleType* predict) { + float ret = 0.0f; + for (int i = 0; i < output_size_; ++i) { + if (sample->label == i){ + ret -= MathLog(static_cast(predict[i])); + } + else { + ret -= MathLog(1.0f - static_cast(predict[i])); + } + } + return ret / output_size_; } DECLARE_TEMPLATE_CLASS_WITH_BASIC_TYPE(SoftmaxObjective) @@ -214,13 +258,14 @@ FTRLObjective::~FTRLObjective() { } template -void FTRLObjective::Gradient(Sample* sample, +float FTRLObjective::Gradient(Sample* sample, DataBlock* model, DataBlock* gradient) { EleType* loss = new EleType[this->output_size_]; auto w_ = DataBlock::GetBlock(true, model->size()); - Predict(sample, model, loss, w_); + float train_loss = Predict(sample, model, loss, w_); + this->Diff(sample->label , loss); auto g = (DataBlock>*)gradient; @@ -229,10 +274,10 @@ void FTRLObjective::Gradient(Sample* sample, for (int i = 0; i < this->output_size_; ++i) { size_t size = sample->keys.size(); for (size_t j = 0; j < size; ++j) { - double delta_z; + EleType delta_z; - double delta_g = sample->values[j] * loss[i]; - double square_g = delta_g * delta_g; + EleType delta_g = sample->values[j] * loss[i]; + EleType square_g = static_cast(pow(delta_g, 2)); size_t key = sample->keys[j] + offset; EleType *w = w_->Get(key); @@ -241,32 +286,35 @@ void FTRLObjective::Gradient(Sample* sample, } else { FTRLEntry *en = entry->Get(key); if (en == nullptr) { - delta_z = alpha_ * delta_g; + delta_z = static_cast(alpha_ * delta_g); } else { - delta_z = alpha_ * (sqrt(en->n + square_g) - en->sqrtn); + delta_z = static_cast(alpha_ * (sqrt(en->n + square_g) - en->sqrtn)); } delta_z = delta_z * (*w) - delta_g; } // delta_n delta_g = -square_g; - g->Set(key, FTRLGradient((EleType)delta_z, (EleType)delta_g)); + g->Set(key, FTRLGradient(static_cast(delta_z), + static_cast(delta_g))); } offset += this->input_size_; } delete[]loss; delete w_; + return train_loss; } template -void FTRLObjective::Predict(Sample* sample, +float FTRLObjective::Predict(Sample* sample, DataBlock* model, EleType* predict) { auto w = DataBlock::GetBlock(true, model->size()); - Predict(sample, model, predict, w); + float test_loss = Predict(sample, model, predict, w); delete w; + return test_loss; } template -void FTRLObjective::Predict(Sample*sample, +float FTRLObjective::Predict(Sample*sample, DataBlock* model, EleType* predict, DataBlock* w) { auto entry = (DataBlock>*)model; w->Clear(); @@ -276,7 +324,7 @@ void FTRLObjective::Predict(Sample*sample, for (size_t j = 0; j < sample->values.size(); ++j) { FTRLEntry *en = entry->Get(sample->keys[j] + offset); if (en != nullptr && abs(en->z) > lambda1_) { - EleType val = (EleType)((sgn(en->z) * lambda1_ - en->z) + EleType val = static_cast((sgn(en->z) * lambda1_ - en->z) / ((beta_ + en->sqrtn) * alpha_ + lambda2_)); w->Set(sample->keys[j] + offset, val); } @@ -284,12 +332,12 @@ void FTRLObjective::Predict(Sample*sample, offset += this->input_size_; } - objective_->Predict(sample, w, predict); + return objective_->Predict(sample, w, predict); } template EleType FTRLObjective::sgn(const EleType x) { - return (EleType)(x > 0 ? 1 : (x < 0 ? -1 : 0)); + return static_cast(x > 0 ? 1 : (x < 0 ? -1 : 0)); } DECLARE_TEMPLATE_CLASS_WITH_BASIC_TYPE(FTRLObjective); diff --git a/Applications/LogisticRegression/src/objective/objective.h b/Applications/LogisticRegression/src/objective/objective.h index e1e7f7c..74c8068 100644 --- a/Applications/LogisticRegression/src/objective/objective.h +++ b/Applications/LogisticRegression/src/objective/objective.h @@ -19,16 +19,16 @@ public: // regular type explicit Objective(const Configure& config); virtual ~Objective(); - - virtual void Gradient(Sample* sample, + // return train loss + virtual float Gradient(Sample* sample, DataBlock* model, DataBlock* gradient); - - virtual void Predict(Sample*sample, + // return test loss + virtual float Predict(Sample*sample, DataBlock* model, EleType* predict); - virtual bool Correct(const int label, EleType*output); - + virtual bool Correct(const int label, EleType*predict); + // factory method to get a new instance // \param config should contain objective type // and params for Objective initialization @@ -41,6 +41,7 @@ protected: DataBlock* model, EleType* loss, DataBlock* gradient); + virtual float Loss(Sample*sample, EleType* predict); protected: Regular *regular_; diff --git a/Applications/LogisticRegression/src/objective/sigmoid_objective.h b/Applications/LogisticRegression/src/objective/sigmoid_objective.h index 6c5a6dc..ef6338c 100644 --- a/Applications/LogisticRegression/src/objective/sigmoid_objective.h +++ b/Applications/LogisticRegression/src/objective/sigmoid_objective.h @@ -10,17 +10,17 @@ class SigmoidObjective : public Objective { public: explicit SigmoidObjective(const Configure& config); - void Gradient(Sample* sample, + float Gradient(Sample* sample, DataBlock* model, DataBlock* gradient); - void Predict(Sample*sample, + float Predict(Sample*sample, DataBlock* model, EleType* predict); private: - double Sigmoid(Sample* sample, + float Sigmoid(Sample* sample, DataBlock*model); - EleType Round(double x); + float Loss(Sample*sample, EleType* predict); }; } // namespace logreg diff --git a/Applications/LogisticRegression/src/objective/softmax_objective.h b/Applications/LogisticRegression/src/objective/softmax_objective.h index 17167c2..521bd09 100644 --- a/Applications/LogisticRegression/src/objective/softmax_objective.h +++ b/Applications/LogisticRegression/src/objective/softmax_objective.h @@ -10,13 +10,13 @@ class SoftmaxObjective : public Objective { public: explicit SoftmaxObjective(const Configure& config); - virtual void Predict(Sample*sample, + virtual float Predict(Sample*sample, DataBlock* model, EleType* predict); protected: - double Sigmoid(Sample* sample, + float Sigmoid(Sample* sample, DataBlock*model, EleType*sigmoid); - EleType Round(double x); + float Loss(Sample*sample, EleType* predict); }; } // namespace logreg