adding source files to new repo

This commit is contained in:
Lin Xiao 2020-09-03 21:10:31 -07:00
Родитель c6147e27e2
Коммит 0933d4dc27
48 изменённых файлов: 6592 добавлений и 12 удалений

14
CONTRIBUTING.md Normal file
Просмотреть файл

@ -0,0 +1,14 @@
# Contributing
This project welcomes contributions and suggestions. Most contributions require you to agree to a
Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
When you submit a pull request, a CLA bot will automatically determine whether you need to provide
a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
provided by the bot. You will only need to do this once across all repos using our CLA.
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.

Просмотреть файл

@ -1,14 +1,4 @@
# pSDCA
# Contributing
A variant of SDCA (Stochastic Dual Coordinate Ascent) method with primal gradient update, instead of using the proximal mappig of the conjugate loss functions which may be hard to compute.
This project welcomes contributions and suggestions. Most contributions require you to agree to a
Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
When you submit a pull request, a CLA bot will automatically determine whether you need to provide
a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
provided by the bot. You will only need to do this once across all repos using our CLA.
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.

229
src/LBFGS/LBFGS.cpp Normal file
Просмотреть файл

@ -0,0 +1,229 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <iostream>
#include <string>
#include <sstream>
#include <fstream>
#include <iomanip>
#include <mpi.h>
#include "floattype.h"
#include "utils.h"
#include "randalgms.h"
#include "lbfgs_omp.h"
using namespace functions;
using namespace distropt;
int main(int argc, char* argv[])
{
// In our implementation, only the main thread calls MPI, so we use MPI_THREAD_FUNNELED
int mpi_thread_provided;
MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &mpi_thread_provided);
MPI_Comm comm_world = MPI_COMM_WORLD;
int mpi_size, mpi_rank;
MPI_Comm_size(comm_world, &mpi_size);
MPI_Comm_rank(comm_world, &mpi_rank);
//---------------------------------------------------------------------------------------------
// parse the input arguments
std::string data_file;
std::string init_mthd = "none";
char loss_type, regu_type;
double lambda, l1weight = 0; // default l1 weight set to zero
double rownorm = 1; // default to normalize rows of X
lbfgs_params params;
int must_count = 0;
for (int i = 1; i < argc; i += 2) {
std::string optstr(argv[i]);
std::string valstr(argv[i + 1]);
if (optstr == "-data") {
data_file = valstr;
must_count++;
}
else if (optstr == "-loss") {
loss_type = valstr[0];
must_count++;
}
else if (optstr == "-lambda") {
lambda = std::stof(valstr);
must_count++;
}
else if (optstr == "-regu") {
regu_type = valstr[0];
must_count++;
}
else if (optstr == "-init") {
init_mthd = valstr;
}
else if (optstr == "-maxitrs") {
params.max_itrs = std::stoi(valstr);
}
else if (optstr == "-epsgrad") {
params.eps_grad = std::stof(valstr);
}
else if (optstr == "-memory") {
params.m_memory = std::stoi(valstr);
}
else if (optstr == "-lsrho") {
params.btls_rho = std::stof(valstr);
}
else if (optstr == "-lsdec") {
params.btls_dec = std::stof(valstr);
}
else if (optstr == "-lsmax") {
params.btls_max = std::stoi(valstr);
}
else if (optstr == "-lsada") {
params.btls_ada = (std::stoi(valstr) > 0);
}
else if (optstr == "-display") {
params.display = (std::stoi(valstr) > 0);
}
else {
std::cout << "LBFGS: Invalid arguments, please try again." << std::endl;
std::exit(0);
}
}
if (must_count < 4) {
std::cout << "LBFGS arguments: -data <string> -loss <char> -lambda <double> -regu <char>" << std::endl;
std::cout << " -init <string> -maxitrs <int> -epsgrad <double> -memory <int>" << std::endl;
std::cout << " -lsrho <double> -lsdec <double> -lsmax <int> -lsada <bool>" << std::endl;
std::cout << " -display <int>" << std::endl;
std::cout << "The first 4 options must be given, others can use default values." << std::endl;
std::exit(0);
}
// generate the output file name reflecting the dataset, algorithm, loss+regu, and lambda value
std::string data_name = data_file.substr(data_file.find_last_of("/\\") + 1);
std::stringstream sslambda;
sslambda << std::scientific << std::setprecision(1) << lambda;
std::string slambda = sslambda.str();
slambda.erase(slambda.find_first_of("."), 2);
std::string lsada = params.btls_ada ? "ada_" : "_";
params.filename = data_name + "_" + std::string(1, loss_type) + std::string(1, regu_type) + "_lbfgs"
+ std::to_string(params.m_memory) + lsada
+ std::to_string(mpi_size) + "_" + slambda + "_" + init_mthd;
//---------------------------------------------------------------------------------------------
// read local data file
string myfile = data_file + '_' + std::to_string(mpi_rank + 1); // file labels start with 1
std::vector<spmatT> labels;
std::vector<spmatT> weights;
std::vector<spmatT> values;
std::vector<size_t> colidx;
std::vector<size_t> rowptr;
//std::cout << "Loading training data ... " << std::endl;
double time_start = MPI_Wtime();
size_t n_examples = load_datafile(myfile, labels, weights, values, colidx, rowptr, false, false, true);
SparseMatrixCSR X(values, colidx, rowptr, false);
Vector y(labels);
if (rownorm > 0) { X.normalize_rows(rownorm); }
MPI_Barrier(comm_world);
if (mpi_rank == 0) {
std::cout << "Loading files took " << MPI_Wtime() - time_start << " sec" << std::endl;
}
// use collective communications to get nTotalSamples and nAllFeatures
size_t nSamples = X.nrows();
size_t nFeatures = X.ncols();
size_t nnzs = X.nnzs();
size_t N, D, NZ;
time_start = MPI_Wtime();
MPI_Allreduce(&nSamples, &N, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, comm_world);
MPI_Allreduce(&nFeatures, &D, 1, MPI_UNSIGNED_LONG_LONG, MPI_MAX, comm_world);
MPI_Allreduce(&nnzs, &NZ, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, comm_world);
// make sure everyone have the same feature dimension
X.reset_ncols(D);
if (mpi_rank == 0) {
std::cout << "MPI_Allreduce took " << MPI_Wtime() - time_start << " sec" << std::endl;
std::cout << "N = " << N << std::endl;
std::cout << "D = " << D << std::endl;
std::cout << "NZ = " << NZ << std::endl;
}
//---------------------------------------------------------------------------------------------
// construct loss function and regularization
RegularizedLoss reguloss(X, y, loss_type, lambda, '2');
auto localfval = [&reguloss](const Vector &w) { return reguloss.regu_loss(w); };
auto localgrad = [&reguloss](const Vector &w, Vector &g) { return reguloss.regu_grad(w, g); };
auto fval = [&reguloss, &comm_world, N, lambda](const Vector &w) {
double sumloss = reguloss.sum_loss(w);
MPI_Allreduce(MPI_IN_PLACE, &sumloss, 1, MPI_DOUBLE, MPI_SUM, comm_world);
return sumloss / N + (lambda / 2)*w.dot(w);
};
auto grad = [&reguloss, &comm_world, N, lambda](const Vector &w, Vector &g) {
double sumloss = reguloss.sum_loss(w);
MPI_Allreduce(MPI_IN_PLACE, &sumloss, 1, MPI_DOUBLE, MPI_SUM, comm_world);
reguloss.sum_grad(w, g);
MPI_Allreduce(MPI_IN_PLACE, g.data(), g.length(), MPI_VECTOR_TYPE, MPI_SUM, comm_world);
g.scale(1.0 / N);
g.axpy(lambda, w);
return sumloss / N + (lambda / 2)*w.dot(w);
};
//---------------------------------------------------------------------------------------------
// compute initial condition using different methods
Vector w(D), a(X.nrows());
if (init_mthd == "sdca") {
ISmoothLoss *f = nullptr;
switch (loss_type)
{
case 'L':
case 'l':
f = new LogisticLoss();
break;
case 'S':
case 's':
f = new SmoothedHinge();
break;
case '2':
f = new SquareLoss();
break;
default:
throw std::runtime_error("Loss function type " + std::to_string(loss_type) + " not defined.");
}
SquaredL2Norm g;
randalgms::sdca(X, y, *f, lambda, g, 20, 1e-8, w, a, 'p', 'd', params.display);
delete f;
}
else if (init_mthd == "lbfgs") {
params.display = false;
lbfgs_omp(localfval, localgrad, w, w, params);
}
else {}
// Allreduce to compute the sum and then the average of local solutions
int w_len = int(w.length());
MPI_Allreduce(MPI_IN_PLACE, w.data(), w_len, MPI_VECTOR_TYPE, MPI_SUM, comm_world);
// need to compute the average as starting points for all machines
w.scale(1.0 / mpi_size);
// LBFGS algorithm ------------------------------------------------------------------------------
params.display = mpi_rank == 0 ? true : false;
params.fileoutput = mpi_rank == 0 ? true : false;
MPI_Barrier(comm_world);
lbfgs_omp(fval, grad, w, w, params);
// always call MPI_Finalize()
MPI_Finalize();
return 0;
}

165
src/LBFGS/LBFGS.vcxproj Normal file
Просмотреть файл

@ -0,0 +1,165 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>15.0</VCProjectVersion>
<ProjectGuid>{2AF44FB9-F85D-48D0-AC4F-EE06CFFB398E}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>LBFGS</RootNamespace>
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>../Numerics;../Utils;../SDCA;$(MSMPI_INC);$(MSMPI_INC)\x64</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>$(OutDir);C:\Program Files %28x86%29\Microsoft SDKs\MPI\Lib\x64</AdditionalLibraryDirectories>
<AdditionalDependencies>Numerics.lib;Utils.lib;msmpi.lib;Arica.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>../Numerics;../Utils;../SDCA;$(MSMPI_INC);$(MSMPI_INC)\x64</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>$(OutDir);C:\Program Files %28x86%29\Microsoft SDKs\MPI\Lib\x64</AdditionalLibraryDirectories>
<AdditionalDependencies>Numerics.lib;Utils.lib;msmpi.lib;Arica.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<Text Include="ReadMe.txt" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="lbfgs_omp.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="LBFGS.cpp" />
<ClCompile Include="lbfgs_omp.cpp" />
<ClCompile Include="test_lbfgs.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

Просмотреть файл

@ -0,0 +1,36 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<Text Include="ReadMe.txt" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="lbfgs_omp.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="LBFGS.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="lbfgs_omp.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="test_lbfgs.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

37
src/LBFGS/ReadMe.txt Normal file
Просмотреть файл

@ -0,0 +1,37 @@
========================================================================
CONSOLE APPLICATION : [!output PROJECT_NAME] Project Overview
========================================================================
AppWizard has created this [!output PROJECT_NAME] application for you.
This file contains a summary of what you will find in each of the files that
make up your [!output PROJECT_NAME] application.
This is the main project file for VC++ projects generated using an Application Wizard.
It contains information about the version of Visual C++ that generated the file, and
information about the platforms, configurations, and project features selected with the
Application Wizard.
This is the filters file for VC++ projects generated using an Application Wizard.
It contains information about the association between the files in your project
and the filters. This association is used in the IDE to show grouping of files with
similar extensions under a specific node (for e.g. ".cpp" files are associated with the
"Source Files" filter).
This is the main application source file.
/////////////////////////////////////////////////////////////////////////////
Other standard files:
StdAfx.h, StdAfx.cpp
These files are used to build a precompiled header (PCH) file
named [!output PROJECT_NAME].pch and a precompiled types file named StdAfx.obj.
/////////////////////////////////////////////////////////////////////////////
Other notes:
AppWizard uses "TODO:" comments to indicate parts of the source code you
should add to or customize.
/////////////////////////////////////////////////////////////////////////////

167
src/LBFGS/lbfgs_omp.cpp Normal file
Просмотреть файл

@ -0,0 +1,167 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include<deque>
#include<string>
#include<iostream>
#include<fstream>
#include<iomanip>
#include "timer.h"
#include "lbfgs_omp.h"
namespace distropt
{
// formatted output: "iter pcg_itrs primal_obj newton_dec time"
void formatted_output(std::ostream &ofs, const int iters, const double stepsize, const int lscnt, const double f, const double gnorm, const double t)
{
ofs << std::setw(3) << iters
<< std::scientific << std::setprecision(3)
<< std::setw(12) << stepsize
<< std::setw(4) << lscnt
<< std::fixed << std::setprecision(12)
<< std::setw(17) << f
<< std::scientific << std::setprecision(3)
<< std::setw(12) << gnorm
<< std::fixed << std::setprecision(3)
<< std::setw(9) << t
<< std::endl;
}
int lbfgs_omp(std::function<double(const Vector&)> fval, std::function<double(const Vector&, Vector &)> grad,
const Vector &x0, Vector &x, const lbfgs_params &params)
{
// need more careful check, including function signature? may not be possible
assert(x0.length() == x.length());
size_t d = x0.length();
int m = params.m_memory;
// construct variables and working space
Vector g(d), p(d), x_try(d); // gradient, search direction, and next trial
Vector x_pre(d), g_pre(d), dx(d), dg(d); // vectors for L-BFGS memory
std::deque<Vector> s, y;
std::deque<double> rho;
std::vector<double> alpha(m);
// timing and recording
double t_elapsed;
HighResTimer timer;
std::vector<double> stepsizes, objvals, gradnorms, time;
std::vector<int> nls;
if (params.display) {
std::cout << std::endl << "iter stepsize ls f(x) |grad(x)| time" << std::endl;
}
// L-BFGS iterations
x.copy(x0);
int k = 0, lscnt = 0;
double stepsize = 1.0;
// compute gradient at x
double fx = grad(x, g);
double g_norm = g.norm2();
t_elapsed = timer.seconds_from_start();
// record progress
stepsizes.push_back(1.0);
objvals.push_back(fx);
gradnorms.push_back(g_norm);
time.push_back(t_elapsed);
nls.push_back(0);
if (params.display)
{
formatted_output(std::cout, k, stepsize, lscnt, fx, g_norm, t_elapsed);
}
for (k = 1; k <= params.max_itrs; k++)
{
// update previous vectors
x_pre.copy(x);
g_pre.copy(g);
// compute the limited-memory-matrix-vector product
p.copy(g);
for (int i = 0; i < s.size(); i++)
{
alpha[i] = rho[i] * s[i].dot(p);
p.axpy(-alpha[i], y[i]);
}
double gamma = s.size() > 0 ? s[0].dot(y[0]) / (y[0].dot(y[0])) : 1.0;
p.scale(gamma);
for (int i = s.size() - 1; i > -1; i--)
{
double beta = rho[i] * y[i].dot(p);
p.axpy(alpha[i] - beta, s[i]);
}
// backtracking line search (for nonconvex functions, need Wolfe conditions)
stepsize = params.btls_ada ? stepsize / params.btls_rho : 1.0;
double gdp = g.dot(p);
for (lscnt = 0; lscnt < params.btls_max; lscnt++)
{
x_try.copy(x);
x_try.axpy(-stepsize, p);
if (fval(x_try) < fx - params.btls_dec*stepsize*gdp) {
break;
}
stepsize *= params.btls_rho;
}
x.copy(x_try);
// compute function value and gradient at new point
fx = grad(x, g);
g_norm = g.norm2();
t_elapsed = timer.seconds_from_start();
// record progress
stepsizes.push_back(stepsize);
objvals.push_back(fx);
gradnorms.push_back(g_norm);
time.push_back(t_elapsed);
nls.push_back(lscnt);
if (params.display)
{
formatted_output(std::cout, k, stepsize, lscnt, fx, g_norm, t_elapsed);
}
// stop criteria: norm of gradient
if (g_norm < params.eps_grad)
{
break;
}
// remove old items from queues if k > m
if (k > m)
{
s.pop_back();
y.pop_back();
rho.pop_back();
}
// add new s and y to queue
Vector::elem_subtract(x, x_pre, dx);
Vector::elem_subtract(g, g_pre, dg);
s.push_front(dx);
y.push_front(dg);
rho.push_front(1.0 / dx.dot(dg));
}
//---------------------------------------------------------------------
// write progress record in a file
if (params.fileoutput) {
std::ofstream ofs(params.filename, std::ofstream::out);
ofs << "iter stepsize ls f(x) |grad(x)| time" << std::endl;
for (int i = 0; i < objvals.size(); i++) {
formatted_output(ofs, i, stepsizes[i], nls[i], objvals[i], gradnorms[i], time[i]);
}
ofs.close();
}
// return number of iterations performed
return k;
}
}

32
src/LBFGS/lbfgs_omp.h Normal file
Просмотреть файл

@ -0,0 +1,32 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include <functional>
#include "regu_loss.h"
using namespace functions;
namespace distropt {
struct lbfgs_params {
int max_itrs = 100;
int m_memory = 20;
double eps_grad = 1e-8;
double btls_rho = 0.5;
double btls_dec = 1e-4;
int btls_max = 10;
bool btls_ada = false;
bool display = true;
bool fileoutput = false;
std::string filename = "temp.txt";
// is this a good place to put it? probably not, good to leave with the application
//void parse(int argc, char* argv[]);
};
int lbfgs_omp(std::function<double(const Vector&)> fval, std::function<double(const Vector&, Vector &)> grad,
const Vector &x0, Vector &x, const lbfgs_params &params);
}

73
src/LBFGS/test_lbfgs.cpp Normal file
Просмотреть файл

@ -0,0 +1,73 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include<iostream>
//#include<functional>
#include "utils.h"
#include "randalgms.h"
#include "lbfgs_omp.h"
using namespace std;
using namespace distropt;
int main_test_lbfgs(int argc, char* argv[])
//int main(int argc, char* argv[])
{
if (argc < 2) {
std::cout << "Need to specify training data file." << std::endl;
std::exit(0);
}
string data_file = argv[1];
// read training and testing files
vector<spmatT> labels;
vector<spmatT> weights;
vector<spmatT> values;
vector<size_t> colidx;
vector<size_t> rowptr;
std::cout << "Loading training data ... " << std::endl;
// news20 actually has column index starting from 0, rcv1 and most others start with 1. Need to revise code!
//size_t n_examples = load_datafile(data_file, labels, weights, values, colidx, rowptr, false, true, true, 0);
size_t n_examples = load_datafile(data_file, labels, weights, values, colidx, rowptr, false, true, true);
SparseMatrixCSR X(values, colidx, rowptr, false);
Vector y(labels);
X.normalize_rows();
// -------------------------------------------------------------------------------------
lbfgs_params params;
params.max_itrs = 100;
params.eps_grad = 1e-8;
params.m_memory = 20;
params.btls_rho = 0.5;
params.btls_dec = 1e-4;
params.btls_max = 20;
params.btls_ada = false;
double lambda = 1.0e-5;
size_t dim = X.ncols();
Vector w0(dim), w(dim);
// -------------------------------------------------------------------------------------
RegularizedLoss reguloss(X, y, 'l', lambda, '2');
// Replace std::bind solution by using lambdas
//using namespace std::placeholders;
//auto fval = std::bind(&RegularizedLoss::regu_loss, &reguloss, _1);
//auto grad = std::bind(&RegularizedLoss::regu_grad, &reguloss, _1, _2);
auto fval = [&reguloss](const Vector &x) {return reguloss.regu_loss(x); };
auto grad = [&reguloss](const Vector &x, Vector &g) {return reguloss.regu_grad(x, g); };
lbfgs_omp(fval, grad, w0, w, params);
// -------------------------------------------------------------------------------------
float train_err = binary_error_rate(X, y, w);
std::cout << "Training error rate = " << train_err * 100 << " %" << std::endl;
return 0;
}

52
src/Numerics/Numerics.cpp Normal file
Просмотреть файл

@ -0,0 +1,52 @@
// Numerics.cpp : Defines the entry point for the console application.
#include <string>
#include <iostream>
#include <stdexcept>
#include <ctime>
#include <math.h>
#include "spmatrix.h"
#include "utils.h"
using namespace std;
using namespace Numerics;
//int main_test_sparsematrix(int argc, char* argv[])
int main(int argc, char* argv[])
{
//string filename = "C:/Data/LIBSVMdata/splice1percent100_1";
string filename = "C:/Data/LIBSVMdata/ads1percent100_1";
//string filename = "C:/Data/LIBSVMdata/rcv1_test.binary";
//string filename = "C:/Data/LIBSVMdata/rcv1_train.binary";
//string filename = "C:/Data/LIBSVMdata/news20.binary";
//string filename = "C:/Data/LIBSVMdata/covtype.libsvm.binary.scale";
vector<float> vals, labl;
vector<size_t> cidx;
vector<size_t> rptr;
clock_t time;
time = clock();
Load_LIBSVM_file(filename, vals, cidx, rptr, labl);
time = clock() - time;
std::cout << "Loading file took " << ((float)time) / CLOCKS_PER_SEC << " seconds)." << std::endl;
SparseMatrixCSR A(vals, cidx, rptr);
std::cout << A.Rows() << ' ' << A.Cols() << ' ' << A.NNZs() << std::endl;
Vector y(labl);
Vector x(A.Cols());
time = clock();
A.aAxby(1, x, 0, y);
time = clock() - time;
std::cout << "Matrix-Vector multiplications took " << ((float)time) / CLOCKS_PER_SEC << " seconds)." << std::endl;
time = clock();
A.aATxby(1, y, 0, x);
time = clock() - time;
std::cout << "Matrix.T-vector multiplications took " << ((float)time) / CLOCKS_PER_SEC << " seconds)." << std::endl;
return 0;
}

Просмотреть файл

@ -0,0 +1,142 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{3BB4DA69-B2E0-46D0-86DD-1757C5922506}</ProjectGuid>
<RootNamespace>Numerics</RootNamespace>
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>..\Utils</AdditionalIncludeDirectories>
<OpenMPSupport>false</OpenMPSupport>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>Utils.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>..\Utils</AdditionalIncludeDirectories>
<OpenMPSupport>true</OpenMPSupport>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
<AdditionalDependencies>Utils.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Console</SubSystem>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="dnvector.h" />
<ClInclude Include="floattype.h" />
<ClInclude Include="functions.h" />
<ClInclude Include="regu_loss.h" />
<ClInclude Include="spmatrix.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="dnvector.cpp" />
<ClCompile Include="functions.cpp" />
<ClCompile Include="regu_loss.cpp" />
<ClCompile Include="spmatrix.cpp" />
<ClCompile Include="test_numerics.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

Просмотреть файл

@ -0,0 +1,51 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="dnvector.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="spmatrix.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="floattype.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="functions.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="regu_loss.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="dnvector.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="spmatrix.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="test_numerics.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="functions.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="regu_loss.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

673
src/Numerics/dnvector.cpp Normal file
Просмотреть файл

@ -0,0 +1,673 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <assert.h>
#include <math.h>
#include <algorithm>
#include <functional>
#include <random>
#include <cstdint>
#include <omp.h>
#include "dnvector.h"
// OpenMP 3.0 supports unsigned for-loop iterator, so can replace std::int64_t by size_t
using std::int64_t;
namespace numerics {
// Construct an all-zero vector
template <typename T>
Vector<T>::Vector(const size_t n)
{
_length = n;
_elements = new T[n];
memset(_elements, 0, n * sizeof(T));
_need_cleanup = true;
}
// Constructor from another Vector
template <typename T>
Vector<T>::Vector(const Vector<T> &v)
{
_length = v._length;
_elements = new T[_length];
memcpy(_elements, v._elements, _length * sizeof(T));
_need_cleanup = true;
}
template <typename T>
Vector<T>::Vector(const std::vector<float> &v)
{
_length = v.size();
_elements = new T[_length];
// cannot use memcpy() here since T may not be float
//memcpy(_elements, v.data(), _length * sizeof(T));
for (size_t i = 0; i < _length; i++) {
_elements[i] = v[i];
}
_need_cleanup = true;
}
template <typename T>
Vector<T>::Vector(const std::vector<double> &v)
{
_length = v.size();
_elements = new T[_length];
// cannot use memcpy() here since T may not be double
//memcpy(_elements, v.data(), _length * sizeof(T));
for (size_t i = 0; i < _length; i++) {
_elements[i] = v[i];
}
_need_cleanup = true;
}
template <typename T>
Vector<T>::Vector(const Vector<T> &v, const std::vector<size_t> &indices)
{
assert(*std::min_element(std::begin(indices), std::end(indices)) >= 0
&& *std::max_element(std::begin(indices), std::end(indices)) < v.length());
_length = indices.size();
_elements = new T[_length];
for (size_t i = 0; i < _length; i++) {
_elements[i] = v[indices[i]];
}
_need_cleanup = true;
}
// Destructor, also called by derived class SubVector without cleaning up memory
template <typename T>
Vector<T>::~Vector()
{
if (_need_cleanup) delete[] _elements;
}
// convert to std::vector<T>, with move return after c++11
template <typename T>
std::vector<T> Vector<T>::to_vector()
{
std::vector<T> v(_length);
memcpy(v.data(), _elements, _length * sizeof(T));
return std::move(v);
}
// find pre-permutation vector v[p[i]] = this[i]
template <typename T>
void Vector<T>::pre_permute(const std::vector<size_t> &p, Vector<T> &v)
{
assert(v._length == _length && p.size() == _length);
assert(*std::min_element(std::begin(p), std::end(p)) >= 0
&& *std::max_element(std::begin(p), std::end(p)) < v.length());
auto elem = _elements;
auto len = _length;
for (size_t i = 0; i < len; i++) {
v[p[i]] = elem[i];
}
}
// find post-permutation vector v[i] = this[p[i]]
template <typename T>
void Vector<T>::post_permute(const std::vector<size_t> &p, Vector<T> &v)
{
assert(v._length == _length && p.size() == _length);
assert(*std::min_element(std::begin(p), std::end(p)) >= 0
&& *std::max_element(std::begin(p), std::end(p)) < v.length());
auto elem = _elements;
auto len = _length;
for (size_t i = 0; i < len; i++) {
v[i] = elem[p[i]];
}
}
// Copy from part of a longer vector x
template <typename T>
void Vector<T>::copy(const Vector<T> &v, const size_t start_idx)
{
assert(v._length >= start_idx + _length);
memcpy(_elements, v._elements + start_idx, _length*sizeof(T));
}
// vector concatenation: z[] = [ x[]; y[] ]
template <typename T>
void Vector<T>::concat(const Vector<T> &x, const Vector<T> &y)
{
assert(x._length + y._length == _length);
memcpy(_elements, x._elements, x._length*sizeof(T));
memcpy(_elements + x._length, y._elements, y._length*sizeof(T));
}
// split vector into two z[] = [ x[]; y[] ]
template <typename T>
void Vector<T>::split(Vector<T> &x, Vector<T> &y) const
{
assert(x._length + y._length == _length);
memcpy(x._elements, _elements, x._length * sizeof(T));
memcpy(y._elements, _elements + x._length, y._length * sizeof(T));
}
// Set every element to zero
template <typename T>
void Vector<T>::fill_zero()
{
memset(_elements, 0, _length*sizeof(T));
}
// Fill Vector with random numbers in the interval [a, b)
template <typename T>
void Vector<T>::fill_rand(const T a, const T b)
{
std::mt19937_64 reng(std::random_device{}());
std::uniform_real_distribution<T> unif(a, b);
auto rgen = std::bind(unif, reng);
std::generate(_elements, _elements + _length, rgen);
}
// Fill every element with a constant
template <typename T>
void Vector<T>::fill(const T c)
{
auto elem = _elements;
auto len = _length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
elem[i] = c;
}
}
// Add a constant to each element
template <typename T>
void Vector<T>::add(const T c)
{
auto elem = _elements;
auto len = _length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
elem[i] += c;
}
}
// Return sum of elements
template <typename T>
T Vector<T>::sum() const
{
auto elem = _elements;
auto len = _length;
T s = 0;
#pragma omp parallel for reduction(+:s)
for (int64_t i = 0; i < len; i++) {
s += elem[i];
}
return s;
}
// Return the maximum value. No OpenMP used
template <typename T>
T Vector<T>::max() const
{
auto elem = _elements;
auto len = _length;
T a = elem[0];
for (size_t i = 1; i < len; i++) {
if (elem[i] > a) a = elem[i];
}
return a;
}
// Return the minimum value. No OpenMP used
template <typename T>
T Vector<T>::min() const
{
auto elem = _elements;
auto len = _length;
T a = elem[0];
for (size_t i = 1; i < len; i++) {
if (elem[i] < a) a = elem[i];
}
return a;
}
// Return 1-norm: sum_i abs(x[i])
template <typename T>
T Vector<T>::norm1() const
{
auto elem = _elements;
auto len = _length;
T a = 0;
#pragma omp parallel for reduction(+:a)
for (int64_t i = 0; i < len; i++) {
a += fabs(elem[i]);
}
return a;
}
// Returns 2-norm (Euclidean norm) of the vector
template <typename T>
T Vector<T>::norm2() const
{
auto elem = _elements;
auto len = _length;
T sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; i++) {
sum += pow(elem[i], 2);
}
return sqrt(sum);
}
// Return infinity norm: max_i abs(x[i])
template <typename T>
T Vector<T>::normInf() const
{
auto elem = _elements;
auto len = _length;
T a = 0, xi_abs;
for (size_t i = 0; i < len; i++)
{
xi_abs = fabs(elem[i]);
if (xi_abs > a) a = xi_abs;
}
return a;
}
// Return a vector of signs in {-1, 0, +1}
template <typename T>
void Vector<T>::sign(Vector<T> &s) const
{
assert(_length == s._length);
auto elem = _elements;
auto s_elem = s._elements;
auto len = _length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
s_elem[i] = elem[i] > 0 ? 1.0f : elem[i] < 0 ? -1 : 0.0f;
}
}
// Convert to vector of signs in {-1, 0, +1}
template <typename T>
void Vector<T>::to_sign()
{
auto elem = _elements;
auto len = _length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
elem[i] = elem[i] > 0 ? 1.0f : elem[i] < 0 ? -1.0f : 0.0f;
}
}
// Count number of positive elements
template <typename T>
size_t Vector<T>::count_positive() const
{
auto elem = _elements;
auto len = _length;
size_t count = 0;
#pragma omp parallel for reduction(+:count)
for (int64_t i = 0; i < len; i++) {
count += elem[i] > 0 ? 1 : 0;
}
return count;
}
// Count number of negative elements
template <typename T>
size_t Vector<T>::count_negative() const
{
auto elem = _elements;
auto len = _length;
size_t count = 0;
#pragma omp parallel for reduction(+:count)
for (int64_t i = 0; i < len; i++) {
count += elem[i] < 0 ? 1 : 0;
}
return count;
}
// Scale itself by a constant alpha
template <typename T>
void Vector<T>::scale(const T alpha)
{
auto elem = _elements;
auto len = _length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
elem[i] *= alpha;
}
}
// Return inner product with vector x
template <typename T>
T Vector<T>::dot(const Vector<T> &x) const
{
assert(_length == x._length);
auto elem = _elements;
auto x_elem = x._elements;
auto len = x._length;
T a = 0;
#pragma omp parallel for reduction(+:a)
for (int64_t i = 0; i < len; i++) {
a += elem[i] * x_elem[i];
}
return a;
}
template <typename T>
T Vector<T>::dot(const Vector<T> &x, const Vector<T> &y)
{
return x.dot(y);
}
// y = alpha * x + y
template <typename T>
void Vector<T>::axpy(const T alpha, const Vector<T> &x)
{
assert(x._length == _length);
auto x_elem = x._elements;
auto y_elem = _elements;
auto len = _length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
y_elem[i] += alpha*x_elem[i];
}
}
// y = alpha * x + y
template <typename T>
void Vector<T>::axpy(const T alpha, const Vector<T> &x, Vector<T> &y)
{
assert(x._length == y._length);
auto x_elem = x._elements;
auto y_elem = y._elements;
auto len = x._length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
y_elem[i] += alpha*x_elem[i];
}
}
/* A tricky mistake, this always shadowed by the previous static method!!!
// this = alpha * x + y
template <typename T>
void Vector<T>::axpy(const T alpha, const Vector<T> &x, const Vector<T> &y)
{
this->copy(y);
this->axpy(alpha, x);
}
*/
// Elementwise absolute value: vabs[i] = abs( v[i] )
template <typename T>
void Vector<T>::elem_abs(Vector<T> &v) const
{
assert(v._length == _length);
auto elem = _elements;
auto v_elem = v._elements;
auto len = _length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
v_elem[i] = fabs(elem[i]);
}
}
// Elementwise inversion: inv[i] = 1/v[i]
template <typename T>
void Vector<T>::elem_inv(Vector<T> &v) const
{
assert(v._length == _length);
auto elem = _elements;
auto v_elem = v._elements;
auto len = _length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
v_elem[i] = 1 / elem[i];
}
}
// Elementwise square root: vsqrt[i] = sqrt( v[i] )
template <typename T>
void Vector<T>::elem_sqrt(Vector<T> &v) const
{
assert(v._length == _length);
auto elem = _elements;
auto v_elem = v._elements;
auto len = _length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
v_elem[i] = sqrt(elem[i]);
}
}
// Elementwise exponential: vexp[i] = exp( v[i] )
template <typename T>
void Vector<T>::elem_exp(Vector<T> &v) const
{
assert(v._length == _length);
auto elem = _elements;
auto v_elem = v._elements;
auto len = _length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
v_elem[i] = exp(elem[i]);
}
}
// Elementwise natural logarithm: vlog[i] = log( v[i] )
template <typename T>
void Vector<T>::elem_log(Vector<T> &v) const
{
assert(v._length == _length);
auto elem = _elements;
auto v_elem = v._elements;
auto len = _length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
v_elem[i] = log(elem[i]);
}
}
// Elementwise exponential after minus 1: vexpm1[i] = exp(v[i]) - 1
template <typename T>
void Vector<T>::elem_expm1(Vector<T> &v) const
{
assert(v._length == _length);
auto elem = _elements;
auto v_elem = v._elements;
auto len = _length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
v_elem[i] = expm1(elem[i]);
}
}
// Elementwise natural logarithm of 1 plus: vlog1p[i] = log( 1 + v[i] )
template <typename T>
void Vector<T>::elem_log1p(Vector<T> &v) const
{
assert(v._length == _length);
auto elem = _elements;
auto v_elem = v._elements;
auto len = _length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
v_elem[i] = log1p(elem[i]);
}
}
// Elememtwise hinge loss: h[i] = max{0, 1 - yXw[i]}
template <typename T>
void Vector<T>::elem_hinge(Vector<T> &h) const
{
assert(_length == h._length);
auto elem = _elements;
auto h_elem = h._elements;
auto len = _length;
T diff = 0;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
diff = 1 - elem[i];
h_elem[i] = diff > 0 ? diff : 0;
}
}
// Elementwise clip: b[i] = min( ub, max(lb, a[i]))
template <typename T>
void Vector<T>::elem_clip(const T lb, const T ub, Vector<T> &b) const
{
assert(_length == b._length);
auto elem = _elements;
auto b_elem = b._elements;
auto len = _length;
T val = 0;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
val = elem[i];
b_elem[i] = val > ub ? ub : val < lb ? lb : val;
}
}
// Elementwise addition: z[i] = x[i] + y[i]
template <typename T>
void Vector<T>::elem_add(const Vector<T> &x, const Vector<T> &y, Vector<T> &z)
{
assert(x._length == y._length && x._length == z._length);
auto x_elem = x._elements;
auto y_elem = y._elements;
auto z_elem = z._elements;
auto len = x._length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
z_elem[i] = x_elem[i] + y_elem[i];
}
}
// Elementwise subtraction: z[i] = x[i] - y[i]
template <typename T>
void Vector<T>::elem_subtract(const Vector<T> &x, const Vector<T> &y, Vector<T> &z)
{
assert(x._length == y._length && x._length == z._length);
auto x_elem = x._elements;
auto y_elem = y._elements;
auto z_elem = z._elements;
auto len = x._length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
z_elem[i] = x_elem[i] - y_elem[i];
}
}
// Elementwise multiplication: z[i] = x[i] * y[i]
template <typename T>
void Vector<T>::elem_multiply(const Vector<T> &x, const Vector<T> &y, Vector<T> &z)
{
assert(x._length == y._length && x._length == z._length);
auto x_elem = x._elements;
auto y_elem = y._elements;
auto z_elem = z._elements;
auto len = x._length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
z_elem[i] = x_elem[i] * y_elem[i];
}
}
// Elementwise division: z[i] = x[i] / y[i]
template <typename T>
void Vector<T>::elem_divide(const Vector<T> &x, const Vector<T> &y, Vector<T> &z)
{
assert(x._length == y._length && x._length == z._length);
auto x_elem = x._elements;
auto y_elem = y._elements;
auto z_elem = z._elements;
auto len = x._length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
z_elem[i] = x_elem[i] / y_elem[i];
}
}
// Elementwise min: z[i] = min( x[i], y[i] )
template <typename T>
void Vector<T>::elem_min(const Vector<T> &x, const Vector<T> &y, Vector<T> &z)
{
assert(x._length == y._length && x._length == z._length);
auto x_elem = x._elements;
auto y_elem = y._elements;
auto z_elem = z._elements;
auto len = x._length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
z_elem[i] = fmin(x_elem[i], y_elem[i]);
}
}
// Elementwise max: z[i] = max( x[i], y[i] )
template <typename T>
void Vector<T>::elem_max(const Vector<T> &x, const Vector<T> &y, Vector<T> &z)
{
assert(x._length == y._length && x._length == z._length);
auto x_elem = x._elements;
auto y_elem = y._elements;
auto z_elem = z._elements;
auto len = x._length;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++) {
z_elem[i] = fmax(x_elem[i], y_elem[i]);
}
}
// Member functions of SubVector, a mask to memory in Vector
template <typename T>
SubVector<T>::SubVector(const Vector<T> &v, const size_t start_idx, const size_t length)
{
this->recast(v, start_idx, length);
}
template <typename T>
void SubVector<T>::recast(const Vector<T> &v, const size_t start_idx, const size_t length)
{
assert(start_idx + length <= v.length());
this->_length = length;
this->_elements = v.data() + start_idx;
this->_need_cleanup = false;
}
// instantiates template with two float types
template class Vector<float>;
template class Vector<double>;
template class SubVector<float>;
template class SubVector<double>;
}

150
src/Numerics/dnvector.h Normal file
Просмотреть файл

@ -0,0 +1,150 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include <vector>
#include <cstdint>
namespace numerics {
template <typename T>
class Vector
{
protected:
size_t _length; // Length of vector
T *_elements; // Pointer to memory storing the elements
bool _need_cleanup; // Indicator to delete[] elements
// Default constructor for creating the SubVector class (masks only)
Vector() = default;
public:
Vector(const size_t n); // construct all-zero vector of length n
Vector(const Vector<T> &v); // copy constructor
// the following two functions avoid the needs for using two template parameters
Vector(const std::vector<float> &v); // construct from std::vector<float>
Vector(const std::vector<double> &v); // construct from std::vector<double>
Vector(const Vector<T> &v, const std::vector<size_t> &indices);
// Destructor, also called by derived class SubVector without cleaning up memory
~Vector();
std::vector<T> to_vector(); // convert to std::vector<T>, using C++11 std::move()
// find pre-permutation vector v[p[i]] = this[i]
void pre_permute(const std::vector<size_t> &p, Vector<T> &v);
// find post-permutation vector v[i] = this[p[i]]
void post_permute(const std::vector<size_t> &p, Vector<T> &v);
// Do not allow copy assignment constructor due to subVector cleanup issues!
Vector<T>& operator=(const Vector<T> &v) = delete;
// Returns length of vector
inline size_t length() const { return _length; }
// Return a direct pointer to the data memory (e.g., used as MPI buffe)
inline T* data() const { return _elements; }
// Overloading operator [], use 0-based indexing, inlined for speed
inline T& operator[] (const size_t i) { return _elements[i]; }
inline const T& operator[] (const size_t i) const { return _elements[i]; }
// To remove compilation warning of converting int64_t to size_t when using OpenMP
inline T& operator[] (const int64_t i) { return _elements[i]; }
inline const T& operator[] (const int64_t i) const { return _elements[i]; }
// Copy from (part of a longer) vector x
void copy(const Vector<T> &v, const size_t start_idx=0);
// vector concatenation: z[] = [ x[]; y[] ]
void concat(const Vector<T> &x, const Vector<T> &y);
// split vector into two z[] = [ x[]; y[] ]
void split(Vector<T> &x, Vector<T> &y) const;
// Set every element to zero
void fill_zero();
// Fill Vector with random numbers in the interval [a, b)
void fill_rand(const T a, const T b);
// Fill every element with a constant
void fill(const T c);
// Add a constant to each element
void add(const T c);
// Return sum of elements
T sum() const;
// Return the maximum value of elements
T max() const;
// Return the minimum value of elements
T min() const;
// Return 1-norm of vector
T norm1() const;
// Returns 2-norm (Euclidean norm) of the vector
T norm2() const;
// Return the infinite norm
T normInf() const;
// Return a vector of signs in {-1, 0, +1}
void sign(Vector<T> &s) const;
// Convert elements to signs in {-1, 0, +1}
void to_sign();
// Count number of positive elements
size_t count_positive() const;
// Count number of negative elements
size_t count_negative() const;
// Scale itself by a constant alpha
void scale(const T alpha);
// Return inner product with vector x
T dot(const Vector<T> &x) const;
static T dot(const Vector<T> &x, const Vector<T> &y);
// y = alpha * x + y
void axpy(const T alpha, const Vector<T> &x);
static void axpy(const T alpha, const Vector<T> &x, Vector<T> &y);
// this = alpha * x + y // this is completely ignored as call always to the above static method!!!
//void axpy(const T alpha, const Vector<T> &x, const Vector<T> &y);
// Elementwise absolute value: vabs[i] = abs( v[i] )
void elem_abs(Vector<T> &vabs) const;
// Elementwise inversion: inv[i] = 1/v[i]
void elem_inv(Vector<T> &vinv) const;
// Elementwise square root: vsqrt[i] = sqrt( v[i] )
void elem_sqrt(Vector<T> &vsqrt) const;
// Elementwise exponential: vexp[i] = exp( v[i] )
void elem_exp(Vector<T> &vexp) const;
// Elementwise natural logarithm: vlog[i] = log( v[i] )
void elem_log(Vector<T> &vlog) const;
// Elementwise exponential after minus 1: vexpm1[i] = exp(v[i]) - 1
void elem_expm1(Vector<T> &vexpm1) const;
// Elementwise natural logarithm of 1 plus: vlog1p[i] = log( 1 + v[i] )
void elem_log1p(Vector<T> &vlog1p) const;
// Elememtwise hinge loss: h[i] = max{0, 1 - yXw[i]}
void elem_hinge(Vector<T> &h) const;
// Elementwise clip: b[i] = min( ub, max(lb, a[i]))
void elem_clip(const T lb, const T ub, Vector<T> &b) const;
// Elementwise addition: z[i] = x[i] + y[i]
static void elem_add(const Vector<T> &x, const Vector<T> &y, Vector<T> &z);
// Elementwise subtraction: z[i] = x[i] - y[i]
static void elem_subtract(const Vector<T> &x, const Vector<T> &y, Vector<T> &z);
// Elementwise multiplication: z[i] = x[i] * y[i]
static void elem_multiply(const Vector<T> &x, const Vector<T> &y, Vector<T> &z);
// Elementwise division: z[i] = x[i] / y[i]
static void elem_divide(const Vector<T> &x, const Vector<T> &y, Vector<T> &z);
// Elementwise min: z[i] = min( x[i], y[i] )
static void elem_min(const Vector<T> &x, const Vector<T> &y, Vector<T> &z);
// Elementwise max: z[i] = max( x[i], y[i] )
static void elem_max(const Vector<T> &x, const Vector<T> &y, Vector<T> &z);
};
// This class defines a mask for a contiguous part of a Vector
// !!! Memory problem if using SubVector after referred Vector is desconstructed!
template <typename T>
class SubVector final : public Vector<T>
{
public:
// Use 0-based indexing for starting index
SubVector(const Vector<T> &v, const size_t start_idx, const size_t sub_length);
void recast(const Vector<T> &v, const size_t start_idx, const size_t sub_length);
// no memory allocated, so do not allow copy constructor and copy-assignment operator
SubVector() = delete;
SubVector(const SubVector<T> &) = delete;
SubVector& operator=(const SubVector<T> &) = delete;
};
}

23
src/Numerics/floattype.h Normal file
Просмотреть файл

@ -0,0 +1,23 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include <type_traits>
// for convenience to use conditional MPI types
#define DOUBLE_VECTOR true
#define DOUBLE_MATRIX true
using floatT = std::conditional<DOUBLE_VECTOR, double, float>::type;
using spmatT = std::conditional<DOUBLE_MATRIX, double, float>::type;
// otherwise can simply use the following
//using floatT = double;
//using spmatT = float;
#if DOUBLE_VECTOR
#define MPI_VECTOR_TYPE MPI_DOUBLE
#else
#define MPI_VECTOR_TYPE MPI_FLOAT
#endif

740
src/Numerics/functions.cpp Normal file
Просмотреть файл

@ -0,0 +1,740 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <assert.h>
#include <math.h>
#include <omp.h>
#include <cstdint>
#include <iostream>
// OpenMP 3.0 supports unsigned for-loop iterator, so can replace std::int64_t by size_t
#include "functions.h"
using std::int64_t;
namespace functions
{
// mse for linear regression (1/n)*||X*w - y||^2 where n is length of y
floatT regression_mse(const SparseMatrixCSR &X, const Vector &y, const Vector &w)
{
assert(X.nrows() == y.length() && X.ncols() == w.length());
Vector yp(y);
X.aAxby(1, w, -1, yp); // yp = X * w - y
return pow(yp.norm2(), 2) / yp.length();
}
// return number of prediction errors where sign(Xi'*w) != yi
size_t binary_error_count(const SparseMatrixCSR &X, const Vector &y, const Vector &w)
{
assert(X.nrows() == y.length() && X.ncols() == w.length());
Vector yp(y.length());
X.aAxby(1, w, 0, yp); // yp = X * w
Vector::elem_multiply(y, yp, yp);
return yp.count_negative();
}
// return error rate: number of errors (with sign(Xi'*w) != yi) divided by length of y
floatT binary_error_rate(const SparseMatrixCSR &X, const Vector &y, const Vector &w)
{
assert(X.nrows() == y.length() && X.ncols() == w.length());
Vector yp(y.length());
X.aAxby(1, w, 0, yp); // yp = X * w
yp.to_sign(); // only keep signs in {-1, 0, +1}
return (1 - yp.dot(y) / y.length()) / 2;
}
// Member functions of SquareLoss class: f(t) = (1/2)*(t-b)^2 ---------------------
floatT SquareLoss::loss(const floatT Xiw, const floatT yi) const
{
return 0.5f * pow(Xiw - yi, 2);
}
void SquareLoss::loss(const Vector &Xw, const Vector y, Vector &loss) const
{
assert(Xw.length() == y.length() && y.length() == loss.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
loss[i] = 0.5f * pow(Xw[i] - y[i], 2);
}
}
// return F(Xw) = sum_i 0.5*(Xw[i]-y[i])^2
floatT SquareLoss::sum_loss(const Vector &Xw, const Vector &y) const
{
assert(Xw.length() == y.length());
size_t len = y.length();
floatT sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; i++)
{
sum += 0.5f * pow(Xw[i] - y[i], 2);
}
return sum;
}
// return F(Xw) = sum_i 0.5*(Xw[i]-y[i])^2
floatT SquareLoss::sum_loss(const Vector &theta, const Vector &Xw, const Vector &y) const
{
assert(Xw.length() == y.length() && theta.length() == y.length());
size_t len = y.length();
floatT sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; i++)
{
sum += 0.5f * pow(Xw[i] - y[i], 2) * theta[i];
}
return sum;
}
// return di = f'(Xiw) = Xiw - yi
floatT SquareLoss::derivative(const floatT Xiw, const floatT yi) const
{
return Xiw - yi;
}
// return d = F'(Xw) = Xw - y
void SquareLoss::derivative(const Vector &Xw, const Vector &y, Vector &d) const
{
assert(Xw.length() == y.length() && y.length() == d.length());
Vector::elem_subtract(Xw, y, d);
}
floatT SquareLoss::second_derivative(const floatT Xiw, const floatT yi) const
{
return 1;
}
void SquareLoss::second_derivative(const Vector &Xw, const Vector &y, Vector &d) const
{
assert(Xw.length() == y.length() && y.length() == d.length());
d.fill(1);
}
floatT SquareLoss::conjugate(const floatT ai, const floatT yi) const
{
return 0.5f * ai*ai + yi*ai;
}
void SquareLoss::conjugate(const Vector &a, const Vector &y, Vector &conj) const
{
assert(a.length() == y.length() && y.length() == conj.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
conj[i] = 0.5f * a[i] * a[i] + y[i] * a[i];
}
}
// return f*(a) = (1/2)*||a||^2 + a'*y
floatT SquareLoss::sum_conjugate(const Vector &a, const Vector &y) const
{
assert(a.length() == y.length());
return 0.5f * a.dot(a) + a.dot(y);
}
floatT SquareLoss::sum_conjugate(const Vector &theta, const Vector &a, const Vector &y) const
{
assert(a.length() == y.length() && theta.length() == y.length());
size_t len = y.length();
floatT sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; i++)
{
sum += (0.5f * a[i] * a[i] + y[i] * a[i]) * theta[i];
}
return sum;
}
floatT SquareLoss::conj_derivative(const floatT ai, const floatT yi) const
{
return ai + yi;
}
void SquareLoss::conj_derivative(const Vector &a, const Vector &y, Vector &cd) const
{
assert(a.length() == y.length() && y.length() == cd.length());
Vector::elem_add(a, y, cd);
}
// Return argmin_b {sigma*(f*)(b) + (1/2)*||b - a||_2^2}
floatT SquareLoss::conj_prox(const floatT sigma, const floatT ai, const floatT yi) const
{
return (ai - sigma*yi) / (1 + sigma);
}
// Return argmin_b {sigma*(f*)(b) + (1/2)*||b - a||_2^2}
void SquareLoss::conj_prox(const floatT sigma, const Vector &a, const Vector &y, Vector &u) const
{
assert(a.length() == y.length() && y.length() == u.length());
size_t len = a.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
u[i] = (a[i] - sigma*y[i]) / (1 + sigma);
}
}
// Member functions of LogisticLoss class ------------------------------------------
LogisticLoss::LogisticLoss(const floatT deltabd, const floatT epsilon, const int maxiter)
{
assert(deltabd > 0 && deltabd < 1 && epsilon > 0 && maxiter > 0);
_deltabd = deltabd;
_epsilon = epsilon;
_maxiter = maxiter;
// conjugate function value when dual variable is within deltabd to boundary of [-1,0]
_xentrpy = deltabd*log(deltabd) + (1 - deltabd)*log1p(-deltabd);
_xderiva = log1p(-deltabd) - log(deltabd);
}
floatT LogisticLoss::loss(const floatT Xiw, const floatT yi) const
{
floatT yxwi = yi * Xiw;
return yxwi > 50 ? 0 : yxwi < -50 ? -yxwi : log1p(exp(-yxwi));
}
void LogisticLoss::loss(const Vector &Xw, const Vector y, Vector &loss) const
{
assert(Xw.length() == y.length() && y.length() == loss.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
loss[i] = this->loss(Xw[i], y[i]);
}
}
// Return sum_i log(1+exp(-y[i]*Xw[i])). Need to parallelize using OpenMP
floatT LogisticLoss::sum_loss(const Vector &Xw, const Vector &y) const
{
assert(Xw.length() == y.length());
size_t len = y.length();
double sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; i++)
{
sum += this->loss(Xw[i], y[i]);
}
return floatT(sum);
}
floatT LogisticLoss::sum_loss(const Vector &theta, const Vector &Xw, const Vector &y) const
{
assert(Xw.length() == y.length() && theta.length() == y.length());
size_t len = y.length();
double sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; i++)
{
sum += this->loss(Xw[i], y[i]) * theta[i];
}
return floatT(sum);
}
floatT LogisticLoss::derivative(const floatT Xiw, const floatT yi) const
{
return -yi / (1 + exp(yi*Xiw));
}
// d[i] = -y[i]/(1+exp(y[i]*Xw[i]) so that gradient is expressed as sum_i d[i]*X[i,:]
void LogisticLoss::derivative(const Vector &Xw, const Vector &y, Vector &d) const
{
assert(Xw.length() == y.length() && Xw.length() == y.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
d[i] = -y[i] / (1 + exp(y[i] * Xw[i]));
}
}
floatT LogisticLoss::second_derivative(const floatT Xiw, const floatT yi) const
{
return 1.0 / (2 + exp(yi*Xiw) + exp(-yi*Xiw));
}
// d[i] = -y[i]/(1+exp(y[i]*Xw[i]) so that gradient is expressed as sum_i d[i]*X[i,:]
void LogisticLoss::second_derivative(const Vector &Xw, const Vector &y, Vector &d) const
{
assert(Xw.length() == y.length() && Xw.length() == y.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
d[i] = 1.0 / (2 + exp(y[i] * Xw[i]) + exp(-y[i] * Xw[i]));
}
}
bool LogisticLoss::conj_feasible(const Vector &a, const Vector &y) const
{
assert(a.length() == y.length());
//Vector ya(y.length());
//Vector::elem_multiply(y, a, ya);
//return (ya.max() <= 0 && ya.min() >= -1);
size_t len = y.length();
for (size_t i = 0; i < len; i++)
{
if (!this->conj_feasible(a[i], y[i])) { return false; }
}
return true;
}
floatT LogisticLoss::conjugate(const floatT ai, const floatT yi) const
{
// Need to check if all a_j belongs to [-1, 0], but can ignore if initialized this way
floatT t = yi*ai;
assert(t >= -1 && t <= 0);
return t > -_deltabd ? _xentrpy : t < -1.0 + _deltabd ? _xentrpy : (-t) * log(-t) + (1.0 + t) * log1p(t);
}
void LogisticLoss::conjugate(const Vector &a, const Vector &y, Vector &conj) const
{
assert(a.length() == y.length() && y.length() == conj.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
conj[i] = this->conjugate(a[i], y[i]);
}
}
// Return (f*)(a) sum_i (-ya[i])*log(-ya[i]) + (1+ya[i])*log(1+ya[i]) with ya[i] in [-1, 0]
floatT LogisticLoss::sum_conjugate(const Vector &a, const Vector &y) const
{
assert(a.length() == y.length());
size_t len = a.length();
double sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; i++)
{
sum += this->conjugate(a[i], y[i]);
}
return floatT(sum);
}
floatT LogisticLoss::sum_conjugate(const Vector &theta, const Vector &a, const Vector &y) const
{
assert(a.length() == y.length() && theta.length() == y.length());
size_t len = a.length();
double sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; i++)
{
sum += this->conjugate(a[i], y[i]) * theta[i];
}
return floatT(sum);
}
floatT LogisticLoss::conj_derivative(const floatT ai, const floatT yi) const
{
floatT t = yi*ai;
assert(t >= -1 && t <= 0);
return t > -_deltabd ? yi*_xderiva : t < -1.0 + _deltabd ? -yi*_xderiva : yi*(log1p(t) - log(-t));
}
void LogisticLoss::conj_derivative(const Vector &a, const Vector &y, Vector &cd) const
{
assert(a.length() == y.length() && y.length() == cd.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
cd[i] = this->conj_derivative(a[i], y[i]);
}
}
// Return argmin_b {sigma*(f*)(b) + (1/2)*(b - a)^2}
floatT LogisticLoss::conj_prox(const floatT sigma, const floatT ai, const floatT yi) const
{
// using double here is critical for numerical stability
double lb = _deltabd - 1.0, ub = -_deltabd;
double epsilon = _epsilon;
int maxiter = _maxiter;
double yai = ai*yi; // need to convert sign first to use one code
double bi = fmin(ub, fmax(lb, yai));
double f = 0, df = 0;
for (int k = 0; k < maxiter; ++k)
{
f = bi - yai + sigma * log((1.0 + bi) / (-bi));
if (fabs(f) < epsilon) break;
df = 1.0 - sigma / (bi * (1.0 + bi));
bi -= f / df;
bi = fmin(ub, fmax(lb, bi)); // critical for convergence
}
return floatT(bi * yi);
}
// Return argmin_b {sigma*(f*)(b) + (1/2)*||b - a||_2^2}
void LogisticLoss::conj_prox(const floatT sigma, const Vector &a, const Vector &y, Vector &b) const
{
assert(a.length() == y.length() && y.length() == b.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
b[i] = this->conj_prox(sigma, a[i], y[i]);
}
}
// Member functions of SmoothedHinge class ------------------------------------------
floatT SmoothedHinge::loss(const floatT Xiw, const floatT yi) const
{
floatT yXwi = yi * Xiw;
floatT t = 1 - yXwi;
return yXwi >= 1 ? 0 : t >= _delta ? t - _delta / 2 : t*t / (2 * _delta);
}
void SmoothedHinge::loss(const Vector &Xw, const Vector y, Vector &loss) const
{
assert(Xw.length() == y.length() && y.length() == loss.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
loss[i] = this->loss(Xw[i], y[i]);
}
}
floatT SmoothedHinge::sum_loss(const Vector &Xw, const Vector &y) const
{
assert(Xw.length() == y.length());
size_t len = y.length();
floatT sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; ++i)
{
sum += this->loss(Xw[i], y[i]);
}
return sum;
}
floatT SmoothedHinge::sum_loss(const Vector &theta, const Vector &Xw, const Vector &y) const
{
assert(Xw.length() == y.length() && theta.length() == y.length());
size_t len = y.length();
floatT sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; ++i)
{
sum += this->loss(Xw[i], y[i]) * theta[i];
}
return sum;
}
floatT SmoothedHinge::derivative(const floatT Xiw, const floatT yi) const
{
floatT yXwi = yi * Xiw;
floatT di = yXwi>1 ? 0 : yXwi < 1 - _delta ? -1 : (yXwi - 1) / _delta;
return yi*di;
}
// Compute d[i] so that gradient is expressed as sum_i d[i]*X[i,:]
void SmoothedHinge::derivative(const Vector &Xw, const Vector &y, Vector &d) const
{
assert(Xw.length() == y.length() && Xw.length() == y.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
d[i] = this->derivative(Xw[i], y[i]);
}
}
floatT SmoothedHinge::second_derivative(const floatT Xiw, const floatT yi) const
{
floatT yXwi = yi * Xiw;
floatT di = yXwi>1 ? 0 : yXwi < 1 - _delta ? 0 : 1.0 / _delta;
return yi*di;
}
// Compute d[i] so that gradient is expressed as sum_i d[i]*X[i,:]
void SmoothedHinge::second_derivative(const Vector &Xw, const Vector &y, Vector &d) const
{
assert(Xw.length() == y.length() && Xw.length() == y.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
d[i] = this->second_derivative(Xw[i], y[i]);
}
}
bool SmoothedHinge::conj_feasible(const Vector &a, const Vector &y) const
{
assert(a.length() == y.length());
//Vector ya(y.length());
//Vector::elem_multiply(y, a, ya);
//return (ya.max() <= 0 && ya.min() >= -1);
size_t len = y.length();
for (size_t i = 0; i < len; i++)
{
if (!this->conj_feasible(a[i], y[i])) { return false; }
}
return true;
}
floatT SmoothedHinge::conjugate(const floatT ai, const floatT yi) const
{
floatT yai = yi*ai;
assert(yai >= -1 && yai <= 0);
return yai + (_delta / 2)*(yai*yai);
}
void SmoothedHinge::conjugate(const Vector &a, const Vector &y, Vector &conj) const
{
assert(a.length() == y.length() && y.length() == conj.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
conj[i] = this->conjugate(a[i], y[i]);
}
}
floatT SmoothedHinge::sum_conjugate(const Vector &a, const Vector &y) const
{
assert(a.length() == y.length());
size_t len = y.length();
floatT sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; i++)
{
floatT yai = y[i] * a[i];
assert(yai >= -1 && yai <= 0);
sum += yai + (_delta / 2)*(yai*yai);
//sum += this->conjugate(a[i], y[i]);
}
return sum;
}
floatT SmoothedHinge::sum_conjugate(const Vector &theta, const Vector &a, const Vector &y) const
{
assert(a.length() == y.length() && theta.length() == y.length());
size_t len = y.length();
floatT sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; i++)
{
floatT yai = y[i] * a[i];
assert(yai >= -1 && yai <= 0);
sum += (yai + (_delta / 2)*(yai*yai)) * theta[i];
//sum += this->conjugate(a[i], y[i]) * theta[i];
}
return sum;
}
floatT SmoothedHinge::conj_derivative(const floatT ai, const floatT yi) const
{
floatT yai = yi*ai;
assert(yai >= -1 && yai <= 0);
return _delta*ai + yi;
}
void SmoothedHinge::conj_derivative(const Vector &a, const Vector &y, Vector &cd) const
{
assert(a.length() == y.length() && y.length() == cd.length());
cd.copy(y);
Vector::axpy(_delta, a, cd);
}
// Return argmin_b {sigma*(f*)(b) + (1/2)*||b - a||_2^2}
floatT SmoothedHinge::conj_prox(const floatT sigma, const floatT ai, const floatT yi) const
{
// should not assert yi*ai in [-1, 0] here because ai can be arbitrary
floatT bi = (yi*ai - sigma) / (1 + sigma*_delta);
return yi*(bi > 0 ? 0 : bi < -1 ? -1 : bi);
}
// Return argmin_b {sigma*(f*)(b) + (1/2)*||b - a||_2^2}
void SmoothedHinge::conj_prox(const floatT sigma, const Vector &a, const Vector &y, Vector &b) const
{
assert(a.length() == y.length() && y.length() == b.length());
size_t len = y.length();
floatT delta = _delta;
#pragma omp parallel for default(none) shared(a,y,b,len,delta)
for (int64_t i = 0; i < len; i++)
{
floatT bi = (y[i]*a[i] - sigma) / (1 + sigma*delta);
b[i] = y[i] * (bi > 0 ? 0 : bi < -1 ? -1 : bi);
}
}
// Member functions of SquaredL2Norm class ------------------------------------------
floatT SquaredL2Norm::operator()(const Vector &w) const
//floatT SquaredL2Norm::value(const Vector &w) const
{
return 0.5f * pow(w.norm2(), 2);
}
// Compute f*(v) = max_w {v'*w - f(w)}
floatT SquaredL2Norm::conjugate(const Vector &v) const
{
return 0.5f *pow(v.norm2(), 2);
}
// Compute f*(v) = max_w {v'*w - f(w)}, with maxing w already computed
floatT SquaredL2Norm::conjugate(const Vector &v, const Vector &w) const
{
assert(v.length() == w.length());
return this->conjugate(v);
}
// prox() operator computes u = argmin_u { tau*f(u) + (1/2)*||u-w||_2^2 }
void SquaredL2Norm::prox(const floatT tau, const Vector &w, Vector &u) const
{
assert(w.length() == u.length());
if (&w != &u) { u.copy(w); }
u.scale(1 / (1 + tau));
}
// Given vector v, compute g = argmax_w {v'*w - f(w)} = argmin_w {-v'*w + f(w)}
void SquaredL2Norm::conj_grad(const Vector &v, Vector &w) const
{
assert(v.length() == w.length());
if (&w != &v) { w.copy(v); }
}
// Member functions of ElasticNet class ------------------------------------------
//floatT ElasticNet::value(const Vector &w) const
floatT ElasticNet::operator()(const Vector &w) const
{
return 0.5f * pow(w.norm2(), 2) + _lambda1 * w.norm1();
}
// Compute f*(v) = max_w {v'*w - f(w)}
floatT ElasticNet::conjugate(const Vector &v) const
{
Vector w(v.length());
this->conj_grad(v, w);
return v.dot(w) - (*this)(w);
}
// Compute f*(v) = max_w {v'*w - f(w)}, with maxing w already computed
floatT ElasticNet::conjugate(const Vector &v, const Vector &w) const
{
assert(v.length() == w.length());
return v.dot(w) - (*this)(w);
}
// Compute u = argmin_u { tau*f(u) + (1/2)*||u-w||_2^2 }
// Note that we allow u and w being the same Vector, that is, allow &w==&u
void ElasticNet::prox(const floatT tau, const Vector &w, Vector &u) const
{
assert(w.length() == u.length());
floatT tl2 = 1 + tau;
floatT tl1 = tau * _lambda1;
size_t len = w.length();
#pragma omp parallel for
for(int64_t i=0; i<len; i++)
{
floatT vneg = w[i] + tl1;
floatT vpos = w[i] - tl1;
u[i] = vpos > 0 ? vpos / tl2 : vneg < 0 ? vneg / tl2 : 0;
}
}
// Given vector v, compute g = argmax_w {v'*w - f(w)} = argmin_w {-v'*w + f(w)}
void ElasticNet::conj_grad(const Vector &v, Vector &w) const
{
size_t len = v.length();
floatT lambda1 = _lambda1;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
floatT vneg = v[i] + lambda1;
floatT vpos = v[i] - lambda1;
w[i] = vpos > 0 ? vpos : vneg < 0 ? vneg : 0;
}
}
// Member functions of UnitQuadratic class ------------------------------------------
OffsetQuadratic::OffsetQuadratic(const size_t dim)
{
_w0 = new Vector(dim);
_dw = new Vector(dim);
}
OffsetQuadratic::~OffsetQuadratic()
{
if (_w0 != nullptr) delete _w0;
if (_dw != nullptr) delete _dw;
}
floatT OffsetQuadratic::operator()(const Vector &w) const
{
Vector::elem_subtract(w, *_w0, *_dw);
//Vector _dw(w);
//Vector::axpy(-1, *_w0, *_dw);
return 0.5f * pow(_dw->norm2(), 2);
}
floatT OffsetQuadratic::conjugate(const Vector &v) const
{
return 0.5f * pow(v.norm2(), 2) + v.dot(*_w0);
}
floatT OffsetQuadratic::conjugate(const Vector &v, const Vector &w) const
{
assert(v.length() == w.length());
// return v.dot(w) - (*this)(w);
// but it's easier to call directly
return this->conjugate(v);
}
void OffsetQuadratic::prox(const floatT tau, const Vector &w, Vector &u) const
{
assert(w.length() == u.length());
if (&w != &u) { u.copy(w); }
Vector::axpy(tau, *_w0, u);
u.scale(1.0f / (1.0f + tau));
}
// Given vector v, compute g = argmax_w {v'*w - f(w)} = argmin_w {-v'*w + f(w)}
// this one does not exploit sparsity, can be computationally very expensive
// see efficient implementation in sdca
void OffsetQuadratic::conj_grad(const Vector &v, Vector &w) const
{
assert(v.length() == w.length());
//if (&w != &v) { w.copy(v); }
//Vector::axpy(1.0f, *_w0, w);
Vector::elem_add(*_w0, v, w);
}
}

224
src/Numerics/functions.h Normal file
Просмотреть файл

@ -0,0 +1,224 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include <assert.h>
#include <stdexcept>
#include "floattype.h"
#include "dnvector.h"
#include "spmatrix.h"
namespace functions
{
using Vector = numerics::Vector<floatT>;
using SubVector = numerics::SubVector<floatT>;
using SparseMatrixCSR = numerics::SparseMatrixCSR<spmatT, floatT>;
using SubSparseMatrixCSR = numerics::SubSparseMatrixCSR<spmatT, floatT>;
// functions to compute performance of linear regression and binary classification
floatT regression_mse(const SparseMatrixCSR &X, const Vector &y, const Vector &w);
floatT binary_error_rate(const SparseMatrixCSR &X, const Vector &y, const Vector &w);
size_t binary_error_count(const SparseMatrixCSR &X, const Vector &y, const Vector &w);
// An abstract class to serve as interface for sum of parametrized smooth loss functions
class ISmoothLoss
{
public:
ISmoothLoss() = default;
virtual char symbol() const = 0;
virtual floatT smoothness() const = 0; // smoothness parameter of single component function
virtual floatT loss(const floatT Xiw, const floatT yi) const = 0;
// the vector form of loss() is provided to faciliate weighted sum of losses
virtual void loss(const Vector &Xw, const Vector y, Vector &loss) const = 0;
// can also add weighted_sum_loss(), but it can be done by dot(weights, loss)
virtual floatT sum_loss(const Vector &Xw, const Vector &y) const = 0;
virtual floatT sum_loss(const Vector &theta, const Vector &Xw, const Vector &y) const = 0;
// derivative() is defined such that gradient is simply expressed as sum_i d[i]*X[i,:]
virtual floatT derivative(const floatT Xiw, const floatT yi) const = 0;
virtual void derivative(const Vector &Xw, const Vector &y, Vector &d) const = 0;
virtual floatT second_derivative(const floatT Xiw, const floatT yi) const = 0;
virtual void second_derivative(const Vector &Xw, const Vector &y, Vector &d) const = 0;
virtual bool conj_feasible(const floatT ai, const floatT yi) const = 0;
virtual bool conj_feasible(const Vector &a, const Vector &y) const = 0;
virtual floatT conjugate(const floatT ai, const floatT yi) const = 0;
// the vector form of conjugate() is provided to faciliate weighted sum of losses
virtual void conjugate(const Vector &a, const Vector &y, Vector &conj) const = 0;
virtual floatT sum_conjugate(const Vector &a, const Vector &y) const = 0;
virtual floatT sum_conjugate(const Vector &theta, const Vector &a, const Vector &y) const = 0;
virtual floatT conj_derivative(const floatT ai, const floatT yi) const = 0;
virtual void conj_derivative(const Vector &a, const Vector &y, Vector &cd) const = 0;
virtual floatT conj_prox(const floatT sigma, const floatT ai, const floatT yi) const = 0;
virtual void conj_prox(const floatT sigma, const Vector &a, const Vector &y, Vector &u) const = 0;
};
// f_y(t) = (1/2)*(t-y)^2 where y is a real number for linear regression
class SquareLoss : public ISmoothLoss
{
public:
SquareLoss() {};
inline char symbol() const override { return '2'; };
inline floatT smoothness() const override { return 1.0f; }; // smoothness of (1/2)*(x-b)^2
floatT loss(const floatT Xiw, const floatT yi) const override;
void loss(const Vector &Xw, const Vector y, Vector &loss) const override;
floatT sum_loss(const Vector &Xw, const Vector &y) const override;
floatT sum_loss(const Vector &theta, const Vector &Xw, const Vector &y) const override;
floatT derivative(const floatT Xiw, const floatT yi) const override;
void derivative(const Vector &Xw, const Vector &y, Vector &d) const override;
floatT second_derivative(const floatT Xiw, const floatT yi) const override;
void second_derivative(const Vector &Xw, const Vector &y, Vector &d) const override;
bool conj_feasible(const floatT ai, const floatT yi) const override { return true; };
bool conj_feasible(const Vector &a, const Vector &y) const override { return true; };
floatT conjugate(const floatT ai, const floatT yi) const override;
void conjugate(const Vector &a, const Vector &y, Vector &conj) const override;
floatT sum_conjugate(const Vector &a, const Vector &y) const override;
floatT sum_conjugate(const Vector &theta, const Vector &a, const Vector &y) const override;
floatT conj_derivative(const floatT ai, const floatT yi) const override;
void conj_derivative(const Vector &a, const Vector &y, Vector &cd) const override;
floatT conj_prox(const floatT sigma, const floatT ai, const floatT yi) const override;
void conj_prox(const floatT sigma, const Vector &a, const Vector &y, Vector &u) const override;
};
// f_y(t) = log(1+exp(-y*t) where y is +1 or -1 for binary classification
class LogisticLoss : public ISmoothLoss
{
private:
// need to test if the min_log trik is really necessary for numerical stability
floatT _deltabd; // boundary margin within [-1, 0] for taking logarithm
floatT _epsilon; // stopping accuracy for Newton's method
int _maxiter; // maximum iteration for Newton's method
floatT _xentrpy; // entropy when dual variable are close to boundary of [-1,0]
floatT _xderiva; // derivative of conjugate function near the boundary
public:
// may not work as desired when floatT = float, 1 + 1e-12 = 1 anyway!
LogisticLoss(const floatT deltabd = 1.0e-12f, const floatT epsilon = 1.0e-6f, const int maxiter = 100);
inline char symbol() const override { return 'l'; };
inline floatT smoothness() const override { return 0.25; } // smoothness of log(1+exp(-b*t))
floatT loss(const floatT Xiw, const floatT yi) const override;
void loss(const Vector &Xw, const Vector y, Vector &loss) const override;
floatT sum_loss(const Vector &Xw, const Vector &y) const override;
floatT sum_loss(const Vector &theta, const Vector &Xw, const Vector &y) const override;
floatT derivative(const floatT Xiw, const floatT yi) const override;
void derivative(const Vector &Xw, const Vector &y, Vector &d) const override;
floatT second_derivative(const floatT Xiw, const floatT yi) const override;
void second_derivative(const Vector &Xw, const Vector &y, Vector &d) const override;
bool conj_feasible(const floatT ai, const floatT yi) const override { return (yi*ai <= 0 && yi*ai >= -1); };
bool conj_feasible(const Vector &a, const Vector &y) const override;
floatT conjugate(const floatT ai, const floatT yi) const override;
void conjugate(const Vector &a, const Vector &y, Vector &conj) const override;
floatT sum_conjugate(const Vector &a, const Vector &y) const override;
floatT sum_conjugate(const Vector &theta, const Vector &a, const Vector &y) const override;
floatT conj_derivative(const floatT ai, const floatT yi) const override;
void conj_derivative(const Vector &a, const Vector &y, Vector &cd) const override;
floatT conj_prox(const floatT sigma, const floatT ai, const floatT yi) const override;
void conj_prox(const floatT sigma, const Vector &a, const Vector &y, Vector &u) const override;
};
// smoothed hinge constructed by adding strong convexity to conjugate
class SmoothedHinge : public ISmoothLoss
{
private:
floatT _delta; // conjugate smoothing parameter
public:
SmoothedHinge(const floatT delta = 1) { assert(delta > 0); _delta = delta; };
inline char symbol() const override { return 's'; };
inline floatT smoothness() const override { return 1.0f / _delta; }; // smoothness by conjugate smoothing
floatT loss(const floatT Xiw, const floatT yi) const override;
void loss(const Vector &Xw, const Vector y, Vector &loss) const override;
floatT sum_loss(const Vector &Xw, const Vector &y) const override;
floatT sum_loss(const Vector &theta, const Vector &Xw, const Vector &y) const override;
floatT derivative(const floatT Xiw, const floatT yi) const override;
void derivative(const Vector &Xw, const Vector &y, Vector &d) const override;
floatT second_derivative(const floatT Xiw, const floatT yi) const override;
void second_derivative(const Vector &Xw, const Vector &y, Vector &d) const override;
bool conj_feasible(const floatT ai, const floatT yi) const override { return (yi*ai <= 0 && yi*ai >= -1); };
bool conj_feasible(const Vector &a, const Vector &y) const override;
floatT conjugate(const floatT ai, const floatT yi) const override;
void conjugate(const Vector &a, const Vector &y, Vector &conj) const override;
floatT sum_conjugate(const Vector &a, const Vector &y) const override;
floatT sum_conjugate(const Vector &theta, const Vector &a, const Vector &y) const override;
floatT conj_derivative(const floatT ai, const floatT yi) const override;
void conj_derivative(const Vector &a, const Vector &y, Vector &cd) const override;
floatT conj_prox(const floatT sigma, const floatT ai, const floatT yi) const override;
void conj_prox(const floatT sigma, const Vector &a, const Vector &y, Vector &u) const override;
};
//================================================================================================
// An abstract class to serve as interface for convex regularization functions
class IUnitRegularizer
{
private:
Vector *_offset = nullptr;
public:
IUnitRegularizer() = default;
virtual char symbol() const = 0; // enable customized algorithm implementations
virtual Vector *offset() const { return _offset; }; // enable offset for derived classes
floatT convexity() const { return 1.0f; }; // strong convexity parameter
virtual floatT l1penalty() const { return 0.0f; }; // return l1 penalty parameter if any
virtual floatT operator()(const Vector &w) const = 0; // return function value
// Compute f*(v) = max_w {v'*w - f(w)}, can also use maxing w if already computed
virtual floatT conjugate(const Vector &v) const = 0;
virtual floatT conjugate(const Vector &v, const Vector &w) const = 0;
// prox() operator computes u = argmin_u { tau*f(u) + (1/2)*||u-w||_2^2 }
virtual void prox(const floatT tau, const Vector &w, Vector &u) const = 0;
// return gradient of conjugate function: argmax_w {v'*w - f(w)} = argmin_w {-v'*w + f(w)}
virtual void conj_grad(const Vector &v, Vector &w) const = 0;
};
// g(w) = (1/2)*||w||_2^2
class SquaredL2Norm : public IUnitRegularizer
{
public:
SquaredL2Norm() {};
inline char symbol() const override { return '2'; };
floatT operator()(const Vector &w) const override;
floatT conjugate(const Vector &v) const override;
floatT conjugate(const Vector &v, const Vector &w) const override;
void prox(const floatT tau, const Vector &w, Vector &u) const override;
void conj_grad(const Vector &w, Vector &g) const override;
};
// g(w) = (1/2)*||w||_2^2 + lambda1*||w||_1
class ElasticNet : public IUnitRegularizer
{
private:
floatT _lambda1;
public:
ElasticNet(const floatT lambda1) { assert(lambda1 > 0); _lambda1 = lambda1; };
inline char symbol() const override { return 'e'; };
inline floatT l1penalty() const override { return _lambda1; };
floatT operator()(const Vector &w) const override;
floatT conjugate(const Vector &v) const override;
floatT conjugate(const Vector &v, const Vector &w) const override;
void prox(const floatT tau, const Vector &w, Vector &u) const override;
void conj_grad(const Vector &w, Vector &g) const override;
};
// R(w) = (1/2)*||w-w0||_2^2
class OffsetQuadratic : public IUnitRegularizer
{
private:
Vector *_w0;
Vector *_dw;
public:
OffsetQuadratic(const size_t dim);
~OffsetQuadratic();
void update_offset(const Vector &w0) { _w0->copy(w0); };
Vector *offset() const override { return _w0; };
char symbol() const { return 'q'; };
floatT operator()(const Vector &w) const override;
floatT conjugate(const Vector &v) const override;
floatT conjugate(const Vector &v, const Vector &w) const override;
void prox(const floatT tau, const Vector &w, Vector &u) const override;
void conj_grad(const Vector &w, Vector &g) const override;
};
// need to implement the following more sophisticated regularizer for sparsity
// g(w) = (3/2)log(d)||w||_q^2 + (lambda1/lambda2)||w||_1 (when feature has low infty norm)
}

Просмотреть файл

@ -0,0 +1,573 @@
#include <assert.h>
#include <math.h>
#include <omp.h>
#include <cstdint>
// OpenMP 3.0 supports unsigned for-loop iterator, so can replace std::int64_t by size_t
#include "functions.h"
using std::int64_t;
namespace randalgms
{
// mse for linear regression (1/n)*||X*w - y||^2 where n is length of y
floatT regression_mse(const SparseMatrixCSR &X, const Vector &y, const Vector &w)
{
assert(X.nrows() == y.length() && X.ncols() == w.length());
Vector yp(y);
X.aAxby(1, w, -1, yp); // yp = X * w - y
return pow(yp.norm2(), 2) / yp.length();
}
// return number of prediction errors where sign(Xi'*w) != yi
size_t binary_error_count(const SparseMatrixCSR &X, const Vector &y, const Vector &w)
{
assert(X.nrows() == y.length() && X.ncols() == w.length());
Vector yp(y.length());
X.aAxby(1, w, 0, yp); // yp = X * w
Vector::elem_multiply(y, yp, yp);
return yp.count_negative();
}
// return error rate: number of errors (with sign(Xi'*w) != yi) divided by length of y
floatT binary_error_rate(const SparseMatrixCSR &X, const Vector &y, const Vector &w)
{
assert(X.nrows() == y.length() && X.ncols() == w.length());
Vector yp(y.length());
X.aAxby(1, w, 0, yp); // yp = X * w
yp.to_sign(); // only keep signs in {-1, 0, +1}
return (1 - yp.dot(y) / y.length()) / 2;
}
// Member functions of SquareLoss class: f(t) = (1/2)*(t-b)^2 ---------------------
inline floatT SquareLoss::loss(const float Xiw, const floatT yi) const
{
return pow(Xiw - yi, 2) / 2;
}
void SquareLoss::loss(const Vector &Xw, const Vector y, Vector &loss) const
{
assert(Xw.length() == y.length() && y.length() == loss.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
loss[i] = pow(Xw[i] - y[i], 2) / 2;
}
}
// return F(Xw) = sum_i 0.5*(Xw[i]-y[i])^2
floatT SquareLoss::sum_loss(const Vector &Xw, const Vector &y) const
{
assert(Xw.length() == y.length());
size_t len = y.length();
floatT sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; i++)
{
floatT zi = Xw[i] - y[i];
sum += zi*zi / 2;
}
return sum;
}
// return di = f'(Xiw) = Xiw - yi
inline floatT SquareLoss::derivative(const floatT Xiw, const floatT yi) const
{
return Xiw - yi;
}
// return d = F'(Xw) = Xw - y
void SquareLoss::derivative(const Vector &Xw, const Vector &y, Vector &d) const
{
assert(Xw.length() == y.length() && y.length() == d.length());
Vector::elem_subtract(Xw, y, d);
}
inline floatT SquareLoss::conjugate(const floatT &ai, const floatT yi) const
{
return ai*ai / 2 + yi*ai;
}
void SquareLoss::conjugate(const Vector &a, const Vector &y, Vector &conj) const
{
assert(a.length() == y.length() && y.length() == conj.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
conj[i] = a[i] * a[i] / 2 + y[i] * a[i];
}
}
// return f*(a) = (1/2)*||a||^2 + a'*y
floatT SquareLoss::sum_conjugate(const Vector &a, const Vector &y) const
{
assert(a.length() == y.length());
return a.dot(a) / 2 + a.dot(y);
}
// Return argmin_b {sigma*(f*)(b) + (1/2)*||b - a||_2^2}
inline floatT SquareLoss::conj_prox(floatT sigma, const floatT ai, const floatT yi) const
{
return (ai - sigma*yi) / (1 + sigma);
}
// Return argmin_b {sigma*(f*)(b) + (1/2)*||b - a||_2^2}
void SquareLoss::conj_prox(floatT sigma, const Vector &a, const Vector &y, Vector &u) const
{
assert(a.length() == y.length() && y.length() == u.length());
size_t len = a.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
u[i] = (a[i] - sigma*y[i]) / (1 + sigma);
}
}
// Member functions of LogisticLoss class ------------------------------------------
LogisticLoss::LogisticLoss(const floatT min_log, const floatT epsilon, const int maxiter)
{
assert(min_log > 0 && epsilon > 0 && maxiter > 0);
_min_log = min_log;
_epsilon = epsilon;
_maxiter = maxiter;
}
inline floatT LogisticLoss::loss(const float Xiw, const floatT yi) const
{
floatT yxwi = yi * Xiw;
return yxwi > 50 ? 0 : yxwi < -50 ? -yxwi : log(1 + exp(-yxwi));
}
void LogisticLoss::loss(const Vector &Xw, const Vector y, Vector &loss) const
{
assert(Xw.length() == y.length() && y.length() == loss.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
floatT yxwi = y[i] * Xw[i];
loss[i] = yxwi > 50 ? 0 : yxwi < -50 ? -yxwi : log(1 + exp(-yxwi));
}
}
// Return sum_i log(1+exp(-y[i]*Xw[i])). Need to parallelize using OpenMP
floatT LogisticLoss::sum_loss(const Vector &Xw, const Vector &y) const
{
assert(Xw.length() == y.length());
size_t len = y.length();
floatT sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; i++)
{
floatT yxwi = y[i] * Xw[i];
sum += yxwi > 50 ? 0 : yxwi < -50 ? -yxwi : log(1 + exp(-yxwi));
}
return sum;
}
inline floatT LogisticLoss::derivative(const floatT Xiw, const floatT yi) const
{
return -yi / (1 + exp(yi*Xiw));
}
// d[i] = -y[i]/(1+exp(y[i]*Xw[i]) so that gradient is expressed as sum_i d[i]*X[i,:]
void LogisticLoss::derivative(const Vector &Xw, const Vector &y, Vector &d) const
{
assert(Xw.length() == y.length() && Xw.length() == y.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
d[i] = -y[i] / (1 + exp(y[i] * Xw[i]));
}
}
inline floatT LogisticLoss::conjugate(const floatT &ai, const floatT yi) const
{
assert(yi*ai >= -1 && yi*ai <= 0); // yi*ai in [-1, 0], may not be necessary here
// clip may not work here, especially for float 1e-12 - 1 = -1, log(1+t)=log(0)
// better approach: should directly bound entropy argument from zero!
double t = fmin(-_min_log, fmax(_min_log - 1, yi*ai)); // clip [-1 + min_log, - min_log]
return floatT((-t) * log(-t) + (1 + t) * log(1 + t));
}
void LogisticLoss::conjugate(const Vector &a, const Vector &y, Vector &conj) const
{
assert(a.length() == y.length() && y.length() == conj.length());
size_t len = y.length();
double lb = _min_log - 1, ub = -_min_log;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
assert(y[i] * a[i] >= -1 && y[i] * a[i] <= 0);
double t = fmin(ub, fmax(lb, y[i] * a[i]));
conj[i] = floatT((-t) * log(-t) + (1 + t) * log(1 + t));
}
}
// Return (f*)(a) sum_i (-ya[i])*log(-ya[i]) + (1+ya[i])*log(1+ya[i]) with ya[i] in [-1, 0]
floatT LogisticLoss::sum_conjugate(const Vector &a, const Vector &y) const
{
// Need to check if all a_j belongs to [-1, 0], but can ignore if initialized this way
assert(a.length() == y.length());
size_t len = a.length();
double lb = _min_log - 1, ub = -_min_log;
double sum = 0;
// this code may have more numerical error accumulation !!?
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; i++)
{
assert(y[i] * a[i] >= -1 && y[i] * a[i] <= 0);
double t = fmin(ub, fmax(lb, y[i] * a[i]));
sum += (-t) * log(-t) + (1 + t) * log(1 + t);
}
return floatT(sum);
}
// Return argmin_b {sigma*(f*)(b) + (1/2)*(b - a)^2}
floatT LogisticLoss::conj_prox(floatT sigma, const floatT ai, const floatT yi) const
{
floatT lb = _min_log - 1, ub = -_min_log;
floatT epsilon = _epsilon;
int maxiter = _maxiter;
double yai = ai*yi; // need to convert sign first to use one code
double bi = fmin(ub, fmax(lb, yai));
double f = 0, df = 0;
for (int k = 0; k < maxiter; ++k)
{
f = bi - yai + sigma * log((1 + bi) / (-bi));
if (fabs(f) < epsilon) break;
df = 1.0 - sigma / (bi * (1 + bi));
bi -= f / df;
bi = fmin(ub, fmax(lb, bi)); // critical for convergence
}
return floatT(bi * yi);
}
// Return argmin_b {sigma*(f*)(b) + (1/2)*||b - a||_2^2}
void LogisticLoss::conj_prox(floatT sigma, const Vector &a, const Vector &y, Vector &b) const
{
// Need to check if all a_j belongs to [-1, 0], but can ignore because always clipped
assert(a.length() == y.length());
Vector ya(y.length());
Vector::elem_multiply(y, a, ya); // u[i] = y[i]*a[i]. Here y[i] need to be in {-1, +1};
floatT lb = _min_log - 1, ub = -_min_log;
floatT epsil = _epsilon;
int maxiter = _maxiter;
size_t len = y.length();
#pragma omp parallel for default(none) shared(y,ya,b,sigma,lb,ub,epsilon,maxiter,len)
for (int64_t i = 0; i < len; i++)
{
double bi = fmin(ub, fmax(lb, ya[i]));
double f = 0, df = 0;
for (int k = 0; k < maxiter; ++k)
{
f = bi - ya[i] + sigma * log((1 + bi) / (-bi));
if (fabs(f) < epsil) break;
df = 1.0 - sigma / (bi * (1 + bi));
bi -= f / df;
bi = fmin(ub, fmax(lb, bi)); // critical for convergence
}
b[i] = floatT(bi * y[i]);
}
}
// Member functions of SmoothedHinge class ------------------------------------------
inline floatT SmoothedHinge::loss(const float Xiw, const floatT yi) const
{
floatT yXwi = yi * Xiw;
floatT t = 1 - yXwi;
return yXwi >= 1 ? 0 : t >= _delta ? t - _delta / 2 : t*t / (2 * _delta);
}
void SmoothedHinge::loss(const Vector &Xw, const Vector y, Vector &loss) const
{
assert(Xw.length() == y.length() && y.length() == loss.length());
size_t len = y.length();
floatT delta = _delta;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
floatT yXwi = y[i] * Xw[i];
floatT t = 1 - yXwi;
loss[i] = yXwi >= 1 ? 0 : t >= delta ? t - delta / 2 : t*t / (2 * delta);
}
}
floatT SmoothedHinge::sum_loss(const Vector &Xw, const Vector &y) const
{
assert(Xw.length() == y.length());
size_t len = y.length();
floatT delta = _delta;
floatT sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; ++i)
{
floatT yXwi = y[i] * Xw[i];
floatT t = 1.0f - yXwi;
sum += yXwi >= 1 ? 0 : t >= delta ? t - delta / 2 : t*t / (2 * delta);
}
return sum;
}
inline floatT SmoothedHinge::derivative(const floatT Xiw, const floatT yi) const
{
floatT yXwi = yi * Xiw;
return yXwi>1 ? 0 : yXwi < 1 - _delta ? -1 : (yXwi - 1) / _delta;
}
// Compute d[i] so that gradient is expressed as sum_i d[i]*X[i,:]
void SmoothedHinge::derivative(const Vector &Xw, const Vector &y, Vector &d) const
{
assert(Xw.length() == y.length() && Xw.length() == y.length());
size_t len = y.length();
floatT delta = _delta;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
floatT yXwi = y[i] * Xw[i];
d[i] = yXwi>1 ? 0 : yXwi < 1 - delta ? -1 : (yXwi - 1) / delta;
}
}
inline floatT SmoothedHinge::conjugate(const floatT &ai, const floatT yi) const
{
floatT yai = yi*ai;
assert(yai >= -1 && yai <= 0);
return yai + (_delta / 2)*(yai*yai);
}
void SmoothedHinge::conjugate(const Vector &a, const Vector &y, Vector &conj) const
{
assert(a.length() == y.length() && y.length() == conj.length());
size_t len = y.length();
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
floatT yai = y[i] * a[i];
assert(yai >= -1 && yai <= 0);
conj[i] = yai + (_delta / 2)*(yai*yai);
}
}
floatT SmoothedHinge::sum_conjugate(const Vector &a, const Vector &y) const
{
assert(a.length() == y.length());
size_t len = y.length();
floatT sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int64_t i = 0; i < len; i++)
{
floatT yai = y[i] * a[i];
assert(yai >= -1 && yai <= 0);
sum += yai + (_delta / 2)*(yai*yai);
}
return sum;
}
// Return argmin_b {sigma*(f*)(b) + (1/2)*||b - a||_2^2}
inline floatT SmoothedHinge::conj_prox(floatT sigma, const floatT ai, const floatT yi) const
{
// should not assert yi*ai in [-1, 0] here because ai can be arbitrary
floatT bi = (yi*ai - sigma) / (1 + sigma*_delta);
return yi*(bi > 0 ? 0 : bi < -1 ? -1 : bi);
}
// Return argmin_b {sigma*(f*)(b) + (1/2)*||b - a||_2^2}
void SmoothedHinge::conj_prox(floatT sigma, const Vector &a, const Vector &y, Vector &b) const
{
assert(a.length() == y.length() && y.length() == b.length());
size_t len = y.length();
floatT delta = _delta;
#pragma omp parallel for default(none) shared(a,y,b,len,delta)
for (int64_t i = 0; i < len; i++)
{
floatT bi = (y[i]*a[i] - sigma) / (1 + sigma*delta);
b[i] = y[i] * (bi > 0 ? 0 : bi < -1 ? -1 : bi);
}
}
// Member functions of SquaredL2Norm class ------------------------------------------
SquaredL2Norm::SquaredL2Norm(const floatT lambda)
{
assert(lambda > 0);
_lambda = lambda;
if (lambda == 1.0f) {
_is_unity = true;
}
else {
_is_unity = false;
}
}
SquaredL2Norm::SquaredL2Norm(const SquaredL2Norm &g, bool unitize)
{
if (unitize) {
_lambda = 1.0f;
_is_unity = true;
}
else {
_lambda = g._lambda;
_is_unity = g._is_unity;
}
}
inline floatT SquaredL2Norm::unitize()
{
floatT previous_lambda = _lambda;
_lambda = 1.0f;
_is_unity = true;
return previous_lambda;
}
floatT SquaredL2Norm::operator()(const Vector &w) const
//floatT SquaredL2Norm::value(const Vector &w) const
{
return (0.5f *_lambda)*pow(w.norm2(), 2);
}
// Compute f*(v) = max_w {v'*w - f(w)}
floatT SquaredL2Norm::conjugate(const Vector &v) const
{
return (0.5f / _lambda)*pow(v.norm2(), 2);
}
// Compute f*(v) = max_w {v'*w - f(w)}, with maxing w already computed
floatT SquaredL2Norm::conjugate(const Vector &v, const Vector &w) const
{
return this->conjugate(v);
}
// prox() operator computes u = argmin_u { tau*f(u) + (1/2)*||u-w||_2^2 }
void SquaredL2Norm::prox(const floatT tau, const Vector &w, Vector &u) const
{
if (&w != &u) { u.copy(w); }
u.scale(1 / (1 + tau*_lambda));
}
// Given vector v, compute g = argmax_w {v'*w - f(w)} = argmin_w {-v'*w + f(w)}
void SquaredL2Norm::conj_grad(const Vector &v, Vector &w) const
{
if (&w != &v) { w.copy(v); }
if (!_is_unity) {
w.scale(1.0f / _lambda);
}
}
// Member functions of ElasticNet class ------------------------------------------
ElasticNet::ElasticNet(const floatT lambda1, const floatT lambda2)
{
assert(lambda1 >= 0 && lambda2 > 0);
_lambda1 = lambda1;
_lambda2 = lambda2;
if (lambda2 == 1.0f) {
_is_unity = true;
}
else {
_is_unity = false;
}
}
ElasticNet::ElasticNet(const ElasticNet &g, bool unitize)
{
if (unitize) {
_lambda1 = g._lambda1 / g._lambda2;
_lambda2 = 1.0f;
_is_unity = true;
}
else {
_lambda1 = g._lambda1;
_lambda2 = g._lambda2;
_is_unity = g._is_unity;
}
}
inline floatT ElasticNet::unitize()
{
floatT previous_lambda2 = _lambda2;
_lambda1 = _lambda1 / _lambda2;
_lambda2 = 1.0f;
_is_unity = true;
return previous_lambda2;
}
//floatT ElasticNet::value(const Vector &w) const
floatT ElasticNet::operator()(const Vector &w) const
{
return (_lambda2 / 2)*pow(w.norm2(), 2) + _lambda1*w.norm1();
}
// Compute f*(v) = max_w {v'*w - f(w)}
floatT ElasticNet::conjugate(const Vector &v) const
{
Vector w(v.length());
this->conj_grad(v, w);
return v.dot(w) - (*this)(v);
}
// Compute f*(v) = max_w {v'*w - f(w)}, with maxing w already computed
floatT ElasticNet::conjugate(const Vector &v, const Vector &w) const
{
assert(v.length() == w.length());
return v.dot(w) - (*this)(v);
}
// Compute u = argmin_u { tau*f(u) + (1/2)*||u-w||_2^2 }
// Note that we allow u and w being the same Vector, that is, allow &w==&u
void ElasticNet::prox(const floatT tau, const Vector &w, Vector &u) const
{
floatT tl2 = 1 + tau * _lambda2;
floatT tl1 = tau * _lambda1;
size_t len = w.length();
#pragma omp parallel for
for(int64_t i=0; i<len; i++)
{
floatT vneg = w[i] + tl1;
floatT vpos = w[i] - tl1;
u[i] = vpos > 0 ? vpos / tl2 : vneg < 0 ? vneg / tl2 : 0;
}
}
// Given vector v, compute g = argmax_w {v'*w - f(w)} = argmin_w {-v'*w + f(w)}
void ElasticNet::conj_grad(const Vector &v, Vector &g) const
{
size_t len = v.length();
floatT lambda1 = _lambda1;
floatT lambda2 = _lambda2;
#pragma omp parallel for
for (int64_t i = 0; i < len; i++)
{
floatT vneg = v[i] + lambda1;
floatT vpos = v[i] - lambda1;
g[i] = vpos > 0 ? vpos / lambda2 : vneg < 0 ? vneg / lambda2 : 0;
}
}
}

Просмотреть файл

@ -0,0 +1,164 @@
#pragma once
#include <assert.h>
#include <stdexcept>
#include "floattype.h"
namespace randalgms
{
// functions to compute performance of linear regression and binary classification
floatT regression_mse(const SparseMatrixCSR &X, const Vector &y, const Vector &w);
floatT binary_error_rate(const SparseMatrixCSR &X, const Vector &y, const Vector &w);
size_t binary_error_count(const SparseMatrixCSR &X, const Vector &y, const Vector &w);
// An abstract class to serve as interface for sum of parametrized smooth loss functions
class ISmoothLoss
{
public:
ISmoothLoss() = default;
virtual floatT smoothness() const = 0; // smoothness parameter of single component function
virtual floatT loss(const float Xiw, const floatT yi) const = 0;
virtual void loss(const Vector &Xw, const Vector y, Vector &loss) const = 0;
// can also add weighted_sum_loss(), but it can be done by dot(weights, loss)
virtual floatT sum_loss(const Vector &Xw, const Vector &y) const = 0;
// derivative() is defined such that gradient is simply expressed as sum_i d[i]*X[i,:]
virtual floatT derivative(const floatT Xiw, const floatT yi) const = 0;
virtual void derivative(const Vector &Xw, const Vector &y, Vector &d) const = 0;
virtual floatT conjugate(const floatT &ai, const floatT yi) const = 0;
virtual void conjugate(const Vector &a, const Vector &y, Vector &conj) const = 0;
virtual floatT sum_conjugate(const Vector &a, const Vector &y) const = 0;
virtual floatT conj_prox(floatT sigma, const floatT ai, const floatT yi) const = 0;
virtual void conj_prox(floatT sigma, const Vector &a, const Vector &y, Vector &u) const = 0;
};
// f_y(t) = (1/2)*(t-y)^2 where y is a real number for linear regression
class SquareLoss : public ISmoothLoss
{
public:
SquareLoss() {};
inline floatT smoothness() const override { return 1.0f; }; // smoothness of (1/2)*(x-b)^2
floatT loss(const float Xiw, const floatT yi) const override;
void loss(const Vector &Xw, const Vector y, Vector &loss) const override;
floatT sum_loss(const Vector &Xw, const Vector &y) const override;
floatT derivative(const floatT Xiw, const floatT yi) const override;
void derivative(const Vector &Xw, const Vector &y, Vector &d) const override;
floatT conjugate(const floatT &ai, const floatT yi) const override;
void conjugate(const Vector &a, const Vector &y, Vector &conj) const override;
floatT sum_conjugate(const Vector &a, const Vector &y) const override;
floatT conj_prox(floatT sigma, const floatT ai, const floatT yi) const override;
void conj_prox(floatT sigma, const Vector &a, const Vector &y, Vector &u) const override;
};
// f_y(t) = log(1+exp(-y*t) where y is +1 or -1 for binary classification
class LogisticLoss : public ISmoothLoss
{
private:
// need to test if the min_log trik is really necessary for numerical stability
floatT _min_log; // boundary margin within [-1, 0] for taking logarithm
floatT _epsilon; // stopping accuracy for Newton's method
int _maxiter; // maximum iteration for Newton's method
public:
// may not work as desired when floatT = float, 1 + 1e-12 = 1 anyway!
LogisticLoss(const floatT min_log = 1.0e-12F, const floatT epsilon = 1.0e-6F, const int maxiter = 10);
inline floatT smoothness() const override { return 0.25; } // smoothness of log(1+exp(-b*t))
floatT loss(const float Xiw, const floatT yi) const override;
void loss(const Vector &Xw, const Vector y, Vector &loss) const override;
floatT sum_loss(const Vector &Xw, const Vector &y) const override;
floatT derivative(const floatT Xiw, const floatT yi) const override;
void derivative(const Vector &Xw, const Vector &y, Vector &d) const override;
floatT conjugate(const floatT &ai, const floatT yi) const override;
void conjugate(const Vector &a, const Vector &y, Vector &conj) const override;
floatT sum_conjugate(const Vector &a, const Vector &y) const override;
floatT conj_prox(floatT sigma, const floatT ai, const floatT yi) const override;
void conj_prox(floatT sigma, const Vector &a, const Vector &y, Vector &u) const override;
};
// smoothed hinge constructed by adding strong convexity to conjugate
class SmoothedHinge : public ISmoothLoss
{
private:
floatT _delta; // conjugate smoothing parameter
public:
SmoothedHinge(const floatT delta = 1) { assert(delta > 0); _delta = delta; };
inline floatT smoothness() const override { return 1.0f / _delta; }; // smoothness by conjugate smoothing
floatT loss(const float Xiw, const floatT yi) const override;
void loss(const Vector &Xw, const Vector y, Vector &loss) const override;
floatT sum_loss(const Vector &Xw, const Vector &y) const override;
floatT derivative(const floatT Xiw, const floatT yi) const override;
void derivative(const Vector &Xw, const Vector &y, Vector &d) const override;
floatT conjugate(const floatT &ai, const floatT yi) const override;
void conjugate(const Vector &a, const Vector &y, Vector &conj) const override;
floatT sum_conjugate(const Vector &a, const Vector &y) const override;
floatT conj_prox(floatT sigma, const floatT ai, const floatT yi) const override;
void conj_prox(floatT sigma, const Vector &a, const Vector &y, Vector &u) const override;
};
//================================================================================================
// An abstract class to serve as interface for convex regularization functions
class IRegularizer
{
protected:
bool _is_unity = false; // _is_unity == true iff this->convexity() == 1
public:
IRegularizer() = default;
virtual char symbol() const = 0; // to enable customized algorithm implementations
virtual floatT convexity() const = 0; // strong convexity parameter
inline bool is_unity() const { return _is_unity; };
// rescale so that convexity parameter = 1, return equivalent scale (previous convexity)
virtual floatT unitize() = 0;
virtual floatT operator()(const Vector &w) const = 0; // return function value
virtual floatT conjugate(const Vector &v) const = 0; // return conjugate function value
virtual floatT conjugate(const Vector &v, const Vector &w) const = 0;
// prox() operator computes u = argmin_u { tau*f(u) + (1/2)*||u-w||_2^2 }
virtual void prox(const floatT tau, const Vector &w, Vector &u) const = 0;
// return gradient of conjugate function: argmax_w {v'*w - f(w)} = argmin_w {-v'*w + f(w)}
virtual void conj_grad(const Vector &v, Vector &w) const = 0;
};
// g(w) = (lambda2/2)*||w||_2^2
class SquaredL2Norm : public IRegularizer
{
private:
floatT _lambda;
public:
SquaredL2Norm(const floatT lambda);
SquaredL2Norm(const SquaredL2Norm &g, bool unitize);
inline char symbol() const override { return '2'; };
inline floatT convexity() const override { return _lambda; };
inline floatT unitize() override;
floatT operator()(const Vector &w) const override;
floatT conjugate(const Vector &v) const override;
floatT conjugate(const Vector &v, const Vector &w) const override;
void prox(const floatT tau, const Vector &w, Vector &u) const override;
void conj_grad(const Vector &w, Vector &g) const override;
};
// g(w) = (lambda2/2)*||w||_2^2 + lambda1*||w||_1
class ElasticNet : public IRegularizer
{
private:
floatT _lambda1, _lambda2;
public:
ElasticNet(const floatT lambda1, const floatT lambda2);
ElasticNet(const ElasticNet &g, bool unitize);
inline char symbol() const override { return 'e'; };
inline floatT convexity() const override { return _lambda2; };
inline floatT unitize() override;
floatT operator()(const Vector &w) const override;
floatT conjugate(const Vector &v) const override;
floatT conjugate(const Vector &v, const Vector &w) const override;
void prox(const floatT tau, const Vector &w, Vector &u) const override;
void conj_grad(const Vector &w, Vector &g) const override;
// member functions specific to this class, not defined in IRegularizer
inline floatT l1parameter() const { return _lambda1; };
};
// g(w) = (3/2)log(d)||w||_q^2 + (lambda1/lambda2)||w||_1 (when feature has low infty norm)
}

103
src/Numerics/regu_loss.cpp Normal file
Просмотреть файл

@ -0,0 +1,103 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include<string>
#include "regu_loss.h"
namespace functions
{
RegularizedLoss::RegularizedLoss(const SparseMatrixCSR &A, const Vector &b, const char f, double lambda,
const char g, const double l1weight)
{
assert(A.nrows() == b.length());
_A = new SubSparseMatrixCSR(A, 0, A.nrows());
_b = new SubVector(b, 0, b.length());
_lambda = lambda;
_Ax = new Vector(A.nrows());
_drv = new Vector(A.nrows());
// such conditional constructions needs to be put in a reuseable function!
switch (f)
{
case 'L':
case 'l':
_f = new LogisticLoss();
break;
case 'S':
case 's':
_f = new SmoothedHinge();
break;
case '2':
_f = new SquareLoss();
break;
default:
throw std::runtime_error("Loss function type " + std::to_string(f) + " not defined.");
}
switch (g)
{
case '2':
_g = new SquaredL2Norm();
break;
case 'E':
case 'e':
_g = new ElasticNet(l1weight / lambda);
break;
default:
throw std::runtime_error("Regularizer type " + std::to_string(g) + " not defined.");
}
}
RegularizedLoss::~RegularizedLoss()
{
delete _A;
delete _b;
delete _f;
delete _g;
delete _Ax;
delete _drv;
}
double RegularizedLoss::avg_loss(const Vector &x)
{
return this->sum_loss(x) / _b->length();
}
double RegularizedLoss::sum_loss(const Vector &x)
{
_A->aAxby(1.0, x, 0, *_Ax);
return _f->sum_loss(*_Ax, *_b);
}
double RegularizedLoss::regu_loss(const Vector &x)
{
return this->sum_loss(x) / _b->length() + _lambda*(*_g)(x);
}
double RegularizedLoss::avg_grad(const Vector &x, Vector &g)
{
double sum_loss = this->sum_grad(x, g);
g.scale(1.0 / _b->length());
return sum_loss / _b->length();
}
double RegularizedLoss::sum_grad(const Vector &x, Vector &g)
{
_A->aAxby(1.0, x, 0, *_Ax);
_f->derivative(*_Ax, *_b, *_drv);
_A->aATxby(1.0, *_drv, 0, g);
return _f->sum_loss(*_Ax, *_b);
}
double RegularizedLoss::regu_grad(const Vector &x, Vector &g)
{
double sum_loss = this->sum_grad(x, g);
g.scale(1.0 / _b->length());
g.axpy(_lambda, x); // this only works for L2 regularization!
return sum_loss / _b->length() + _lambda*(*_g)(x);
}
}

33
src/Numerics/regu_loss.h Normal file
Просмотреть файл

@ -0,0 +1,33 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include "functions.h"
namespace functions {
class RegularizedLoss {
private:
SubSparseMatrixCSR *_A;
SubVector *_b;
ISmoothLoss *_f; // really should be an object here.
double _lambda;
IUnitRegularizer *_g;
// working buffers
Vector *_Ax, *_drv;
public:
RegularizedLoss(const SparseMatrixCSR &A, const Vector &b, const char f, double lambda, const char g,
const double l1weight = 0.0);
~RegularizedLoss();
double avg_loss(const Vector &x);
double sum_loss(const Vector &x);
double regu_loss(const Vector &x);
// for nonsmooth regularizers, grad means gradient mapping
double avg_grad(const Vector &x, Vector &g);
double sum_grad(const Vector &x, Vector &g);
double regu_grad(const Vector &x, Vector &g);
};
}

615
src/Numerics/spmatrix.cpp Normal file
Просмотреть файл

@ -0,0 +1,615 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <assert.h>
#include <stdexcept>
#include <algorithm>
#include <numeric>
#include <cstdint>
#include <vector>
#include <omp.h>
#include "spmatrix.h"
// OpenMP 3.0 supports unsigned for-loop iterator, so can replace std::int64_t by size_t
using std::int64_t;
namespace numerics
{
// constructor from three vectors, column index start with 0-indexing, not necessarily ordered
template <typename floatT, typename T>
SparseMatrixCSR<floatT, T>::SparseMatrixCSR(std::vector<floatT> &vals, std::vector<size_t> &cidx,
std::vector<size_t> &rptr, const bool has_bias_feature, const bool make_copy)
{
_nrows = rptr.size() - 1;
_ncols = *std::max_element(cidx.begin(), cidx.end()) + 1;
_nnzs = vals.size();
if (make_copy) // make deep copies using std::vector<> overloaded operator=()
{
_mem_values = vals;
_mem_colidx = cidx;
_mem_rowptr = rptr;
}
else // swap two vector<>, so argument memory removed at exit to reduce memory footage
{
_mem_values.swap(vals);
_mem_colidx.swap(cidx);
_mem_rowptr.swap(rptr);
}
// these pointers only point to existing memories, no need to use new and delete[]
_values = _mem_values.data();
_colidx = _mem_colidx.data();
_rowstr = _mem_rowptr.data();
_rowend = _mem_rowptr.data() + 1;
// specify biased feature
_has_bias_feature = has_bias_feature;
}
// copy constructor, can take a transpose if specified. A could be a SubSparseMatrixCSR!
template <typename floatT, typename T>
SparseMatrixCSR<floatT, T>::SparseMatrixCSR(const SparseMatrixCSR<floatT, T> &A, bool transpose)
{
if (!transpose)
{
_nrows = A._nrows;
_ncols = A._ncols;
_nnzs = A._nnzs;
_mem_values.resize(_nnzs);
_mem_colidx.resize(_nnzs);
_mem_rowptr.resize(_nrows + 1);
size_t k = 0;
for (size_t i = 0; i < _nrows; i++) {
_mem_rowptr[i] = k;
for (size_t j = A._rowstr[i]; j < A._rowend[i]; j++)
{
_mem_colidx[k] = A._colidx[j];
_mem_values[k] = A._values[j];
k++;
}
}
_mem_rowptr[_nrows] = k;
_rowstr = _mem_rowptr.data();
_rowend = _mem_rowptr.data() + 1;
_has_bias_feature = A.has_bias_feature();
return;
}
// the following code does not work if A is a SubSparseMatrix, contiguous or not
/*
if (!transpose) // use std::vector<>::operator= to make deep copies
{
_nrows = A._nrows;
_ncols = A._ncols;
_nnzs = A._nnzs;
_mem_values = A._mem_values;
_mem_colidx = A._mem_colidx;
_mem_rowptr = A._mem_rowptr;
_values = _mem_values.data();
_colidx = _mem_colidx.data();
_rowstr = _mem_rowptr.data();
_rowend = _mem_rowptr.data() + 1;
_has_bias_feature = A.has_bias_feature();
return;
}
*/
// SparseMatrixCSR transpose, works for un-sorted column indices
size_t m, n, nnz, j, p, q;
size_t *Trwp, *Tcli;
floatT *Tval;
const size_t* Arstr = A._rowstr;
const size_t* Arend = A._rowend;
const size_t* Acli = A._colidx;
const floatT* Aval = A._values;
m = A._nrows;
n = A._ncols;
nnz = A._nnzs;
_nrows = n;
_ncols = m;
_nnzs = nnz;
_mem_values.resize(nnz);
_mem_colidx.resize(nnz);
_mem_rowptr.resize(n + 1);
Trwp = _mem_rowptr.data();
Tcli = _mem_colidx.data();
Tval = _mem_values.data();
std::vector<size_t> ws(n, 0);
for (j = 0; j < m; j++) {
for (p = Arstr[j]; p < Arend[j]; p++) {
ws[Acli[p]]++;
}
}
Trwp[0] = 0;
for (j = 0; j < n; j++) {
Trwp[j + 1] = Trwp[j] + ws[j];
ws[j] = Trwp[j];
}
for (j = 0; j < m; j++) {
for (p = Arstr[j]; p < Arend[j]; p++) {
Tcli[q = ws[Acli[p]]++] = j;
Tval[q] = Aval[p];
}
}
_values = _mem_values.data();
_colidx = _mem_colidx.data();
_rowstr = _mem_rowptr.data();
_rowend = _mem_rowptr.data() + 1;
// hard to tell if bias_feature is meaningful in transpose
_has_bias_feature = A.has_bias_feature();
}
// THIS ONE IS DANGEOUS TO USE FOR A TRANSPOSED DATA MATRIX WHTERE BIAS ARE THE LAST ROW!
template <typename floatT, typename T>
bool SparseMatrixCSR<floatT, T>::set_bias_feature(const floatT c)
{
if (_has_bias_feature) {
auto vals = _values;
auto rend = _rowend;
auto nrows = _nrows;
for (size_t i = 0; i < nrows; i++) {
vals[rend[i]-1] = c;
}
}
return _has_bias_feature;
}
// THIS ONE IS DANGEOUS TO USE FOR A TRANSPOSED DATA MATRIX WHTERE BIAS ARE THE LAST ROW!
// reset number of columns only if n is larger than this->nClos;
template <typename floatT, typename T>
bool SparseMatrixCSR<floatT, T>::reset_ncols(const size_t n)
{
if (n <= _ncols) return false;
_ncols = n;
if (_has_bias_feature) {
auto cidx = _colidx;
auto rend = _rowend;
auto nrows = _nrows;
for (size_t i = 0; i < nrows; i++) {
cidx[rend[i] - 1] = n - 1;
}
}
return true;
}
// return number of elements in each column
template <typename floatT, typename T>
void SparseMatrixCSR<floatT, T>::col_elem_counts(std::vector<size_t> &counts) const
{
counts.resize(_ncols);
std::fill(counts.begin(), counts.end(), 0);
for (size_t i = 0; i < _nrows; i++) {
for (size_t p = _rowstr[i]; p < _rowend[i]; p++) {
counts[_colidx[p]]++;
}
}
}
template <typename floatT, typename T>
void SparseMatrixCSR<floatT, T>::row_elem_counts(std::vector<size_t> &counts) const
{
counts.resize(_nrows);
std::fill(counts.begin(), counts.end(), 0);
for (size_t i = 0; i < _nrows; i++) {
counts[i] = _rowend[i] - _rowstr[i];
}
}
// scale ith row of sparse CSR matrix by y[i]
template <typename floatT, typename T>
void SparseMatrixCSR<floatT, T>::scale_rows(const Vector<T> &y)
{
assert(y.length() == _nrows);
floatT *vals = _values;
size_t nrows = _nrows;
#pragma omp parallel for
for (int64_t i = 0; i < nrows; i++)
{
floatT yi = y[i];
size_t endidx = _rowend[i];
for (size_t j = _rowstr[i]; j < endidx; j++)
{
vals[j] *= yi;
}
}
}
// return value: L[i] is 2-norm squared of ith row of the sparse CSR matrix
template <typename floatT, typename T>
void SparseMatrixCSR<floatT, T>::row_sqrd2norms(Vector<T> & sqrd2norms) const
{
assert(sqrd2norms.length() == _nrows);
floatT *vals = _values;
size_t nrows = _nrows;
#pragma omp parallel for
for (int64_t i = 0; i < nrows; i++)
{
T s = 0;
size_t endidx = _rowend[i];
for (size_t j = _rowstr[i]; j < endidx; j++)
{
s += vals[j] * vals[j];
}
sqrd2norms[i] = s;
}
}
// scale each row so that each rom has target 2-norm
template <typename floatT, typename T>
void SparseMatrixCSR<floatT, T>::normalize_rows(const T tgt2norm)
{
floatT *vals = _values;
size_t nrows = _nrows;
#pragma omp parallel for
for (int64_t i = 0; i < nrows; i++)
{
T s = 0;
size_t endidx = _rowend[i];
for (size_t j = _rowstr[i]; j < endidx; j++)
{
s += vals[j] * vals[j];
}
if (s > 0) {
s = tgt2norm / sqrt(s);
for (size_t j = _rowstr[i]; j < endidx; j++)
{
vals[j] *= s;
}
}
}
}
// return Frobenious norm of matrix
template <typename floatT, typename T>
T SparseMatrixCSR<floatT, T>::Frobenious_norm() const
{
Vector<T> L(_nrows);
this->row_sqrd2norms(L);
return sqrt(L.sum());
}
// to test if two matrices has the same values
template <typename floatT, typename T>
T SparseMatrixCSR<floatT, T>::norm_of_difference(SparseMatrixCSR<floatT, T> &A)
{
if (_nrows != A._nrows || _ncols != A._ncols || _nnzs != A._nnzs) return -1;
T diff_sqrd = 0.0;
floatT *vals = _values;
floatT *A_vals = A._values;
size_t *colidx = _colidx;
size_t *A_colidx = A._colidx;
size_t nrows = _nrows;
for (int64_t i = 0; i < nrows; i++)
{
if (_rowend[i] - _rowstr[i] != A._rowend[i] - A._rowstr[i]) return -1;
size_t ni = _rowend[i] - _rowstr[i];
size_t jj, jA;
for (size_t j = 0; j < ni; j++)
{
jj = _rowstr[i] + j;
jA = A._rowstr[i] + j;
if (colidx[jj] != A_colidx[jA]) return -1;
diff_sqrd += std::pow(vals[jj] - A_vals[jA], 2);
}
}
return std::sqrt(diff_sqrd);
}
// y = alpha * A * x + beta * y
template <typename floatT, typename T>
void SparseMatrixCSR<floatT, T>::aAxby(const T alpha, const Vector<T> &x, const T beta, Vector<T> &y) const
{
assert(y.length() == _nrows && x.length() == _ncols);
floatT *vals = _values;
size_t *cidx = _colidx;
size_t nrows = _nrows;
#pragma omp parallel for
for (int64_t i = 0; i < nrows; i++)
{
T Aix = 0;
size_t endidx = _rowend[i];
for (size_t j = _rowstr[i]; j < endidx; j++)
{
Aix += vals[j] * x[cidx[j]];
}
y[i] *= beta;
y[i] += alpha*Aix;
}
}
// y[i] = alpha * A[perm[i],:] * x + beta * y[i]
template <typename floatT, typename T>
void SparseMatrixCSR<floatT, T>::aAxby(const T alpha, const Vector<T> &x, const T beta, Vector<T> &y, std::vector<size_t> &rowperm) const
{
assert(y.length() == _nrows && x.length() == _ncols && rowperm.size() == _nrows);
assert(*std::min_element(std::begin(rowperm), std::end(rowperm)) >= 0
&& *std::max_element(std::begin(rowperm), std::end(rowperm)) < _nrows);
floatT *vals = _values;
size_t *cidx = _colidx;
size_t nrows = _nrows;
#pragma omp parallel for
for (int64_t i = 0; i < nrows; i++)
{
T Aix = 0;
size_t endidx = _rowend[rowperm[i]];
for (size_t j = _rowstr[rowperm[i]]; j < endidx; j++)
{
Aix += vals[j] * x[cidx[j]];
}
y[i] *= beta;
y[i] += alpha*Aix;
}
}
// block matrix vector product: y = alpha * A(:,start:end) * x + beta * y. Vector x has length end-start+1
template <typename floatT, typename T>
void SparseMatrixCSR<floatT, T>::aAxbyBlock(const T alpha, const size_t start, const Vector<T> &x,
const T beta, Vector<T> &y) const
{
assert(y.length() == _nrows && start + x.length() <= _ncols);
floatT *vals = _values;
size_t *cidx = _colidx;
size_t nrows = _nrows;
size_t blockend = start + x.length();
#pragma omp parallel for
for (int64_t i = 0; i < nrows; i++)
{
T Aix = 0;
size_t endidx = _rowend[i];
size_t idx;
for (size_t j = _rowstr[i]; j < endidx; j++)
{
idx = cidx[j];
if (idx >= start && idx < blockend)
{
Aix += vals[j] * x[idx - start];
}
}
y[i] *= beta;
y[i] += alpha*Aix;
}
}
// y = alpha * A' * x + beta * y
template <typename floatT, typename T>
void SparseMatrixCSR<floatT, T>::aATxby(const T alpha, const Vector<T> &x, const T beta, Vector<T> &y) const
{
assert(x.length() == _nrows && y.length() == _ncols);
// first scale it before entering racing conditions using OpenMP
y.scale(beta);
floatT *vals = _values;
size_t *cidx = _colidx;
size_t nrows = _nrows;
#pragma omp parallel for
for (int64_t i = 0; i < nrows; i++)
{
size_t endidx = _rowend[i];
for (size_t j = _rowstr[i]; j < endidx; j++)
{
#pragma omp atomic
y[cidx[j]] += alpha * vals[j] * x[i];
}
}
}
// y = alpha * A[rowperm,:]' * x + beta * y
template <typename floatT, typename T>
void SparseMatrixCSR<floatT, T>::aATxby(const T alpha, const Vector<T> &x, const T beta, Vector<T> &y, std::vector<size_t> &rowperm) const
{
assert(x.length() == _nrows && y.length() == _ncols && rowperm.size() == _nrows);
assert(*std::min_element(std::begin(rowperm), std::end(rowperm)) >= 0
&& *std::max_element(std::begin(rowperm), std::end(rowperm)) < _nrows);
// first scale it before entering racing conditions using OpenMP
y.scale(beta);
floatT *vals = _values;
size_t *cidx = _colidx;
size_t nrows = _nrows;
#pragma omp parallel for
for (int64_t i = 0; i < nrows; i++)
{
size_t endidx = _rowend[rowperm[i]];
for (size_t j = _rowstr[rowperm[i]]; j < endidx; j++)
{
#pragma omp atomic
y[cidx[j]] += alpha * vals[j] * x[i];
}
}
}
// block matrix Transpose vector product y = alpha * A(:,start:end)' * x + beta * y. Vector y has length end-start+1
template <typename floatT, typename T>
void SparseMatrixCSR<floatT, T>::aATxbyBlock(const T alpha, const Vector<T> &x, const T beta,
const size_t start, Vector<T> &y) const
{
assert(x.length() == _nrows && start + y.length() <= _ncols);
// first scale it before entering racing conditions using OpenMP
y.scale(beta);
floatT *vals = _values;
size_t *cidx = _colidx;
size_t nrows = _nrows;
size_t blockend = start + x.length();
#pragma omp parallel for
for (int64_t i = 0; i < nrows; i++)
{
size_t endidx = _rowend[i];
size_t cj;
for (size_t j = _rowstr[i]; j < endidx; j++)
{
cj = cidx[j];
if (cj >= start && cj < blockend)
{
#pragma omp atomic
y[cj - start] += alpha * vals[j] * x[i];
}
}
}
}
// return inner product of A(idx,:) and vector x, where idx is zero-based indexing
template <typename floatT, typename T>
T SparseMatrixCSR<floatT, T>::row_dot(const size_t row, const Vector<T> &x) const
{
assert(row < _nrows && x.length() == _ncols);
floatT *vals = _values;
size_t *cidx = _colidx;
size_t stridx = _rowstr[row];
size_t endidx = _rowend[row];
T a = 0;
//#pragma omp parallel for reduction(+:a) // dramatic slow down for small number of features
//#pragma omp parallel for reduction(+:a) if (endidx-stridx > 10000) // not really helpful
for (int64_t j = stridx; j < endidx; j++)
{
a += vals[j] * x[cidx[j]];
}
return a;
}
// y = alpha * A(idx,:) + y, where idx is zero-based indexing
template <typename floatT, typename T>
void SparseMatrixCSR<floatT, T>::row_add(const size_t row, const T alpha, Vector<T> &y) const
{
assert(row < _nrows && y.length() == _ncols);
floatT *vals = _values;
size_t *cidx = _colidx;
size_t stridx = _rowstr[row];
size_t endidx = _rowend[row];
//#pragma omp parallel for // dramatic slow down for small number of features, need condition
//#pragma omp parallel for if (endidx-stridx > 10000) // even this is not really helpful
for (int64_t j = stridx; j < endidx; j++)
{
y[cidx[j]] += alpha * vals[j];
}
}
// Sparse soft-thresholding with threshold c, only using the sparsity pattern of row, not using values!
// Only read elements of v and modify elements of w that correspond to nonzero elements in sepcified row
template <typename floatT, typename T>
void SparseMatrixCSR<floatT, T>::row_sst(const size_t row, const T c, const Vector<T> &v, Vector<T> &w) const
{
assert(v.length() == _ncols && w.length() == _ncols);
size_t stridx = _rowstr[row];
size_t endidx = _rowend[row];
size_t *cidx = _colidx;
//#pragma omp parallel for // dramatic slow down for small number of features, need condition
//#pragma omp parallel for if (endidx-stridx > 10000) // even this is not really helpful
for (int64_t j = stridx; j < endidx; j++)
{
size_t idx = cidx[j];
T vlb = v[idx] - c;
T vub = v[idx] + c;
w[idx] = vlb > 0 ? vlb : vub < 0 ? vub : 0;
}
}
// Block-rows submatrix of SparseMatrixCSR with 0-based indexing, only pointers, no memory allocation
template <typename floatT, typename T>
SubSparseMatrixCSR<floatT, T>::SubSparseMatrixCSR(const SparseMatrixCSR<floatT, T> &A, const size_t row_start, const size_t num_rows)
{
this->recast(A, row_start, num_rows);
}
// this will work for submatrix of submatrix, but only contiguous submatrices!
template <typename floatT, typename T>
void SubSparseMatrixCSR<floatT, T>::recast(const SparseMatrixCSR<floatT, T> &A, const size_t row_start, const size_t num_rows)
{
std::vector<size_t> idx(num_rows);
std::iota(std::begin(idx), std::end(idx), row_start);
this->recast(A, idx);
// The following code does not work for submatrix of submatrix if the rows are not contiguous!
/*
assert(num_rows > 0 && row_start + num_rows <= A._nrows);
_nrows = num_rows;
_ncols = A._ncols;
_nnzs = A._rowend[row_start + num_rows - 1] - A._rowstr[row_start];
_values = A._values;
_colidx = A._colidx;
_rowstr = A._rowstr + row_start;
_rowend = A._rowend + row_start;
_has_bias_feature = A.has_bias_feature();
*/
}
template <typename floatT, typename T>
SubSparseMatrixCSR<floatT, T>::SubSparseMatrixCSR(const SparseMatrixCSR<floatT, T> &A, const std::vector<size_t> &row_indices)
{
this->recast(A, row_indices);
}
template <typename floatT, typename T>
void SubSparseMatrixCSR<floatT, T>::recast(const SparseMatrixCSR<floatT, T> &A, const std::vector<size_t> &row_indices)
{
assert(*min_element(row_indices.begin(), row_indices.end()) >= 0);
assert(*max_element(row_indices.begin(), row_indices.end()) < A._nrows);
_nrows = row_indices.size();
_ncols = A._ncols;
_values = A._values;
_colidx = A._colidx;
_mem_rowstr.resize(_nrows);
_mem_rowend.resize(_nrows);
_nnzs = 0;
for (size_t i = 0; i < _nrows; i++) {
_mem_rowstr[i] = A._rowstr[row_indices[i]];
_mem_rowend[i] = A._rowend[row_indices[i]];
_nnzs += _mem_rowend[i] - _mem_rowstr[i];
}
_rowstr = _mem_rowstr.data();
_rowend = _mem_rowend.data();
_has_bias_feature = A.has_bias_feature();
}
// instantiates template with different float types
template class SparseMatrixCSR<float, float>;
template class SparseMatrixCSR<float, double>;
template class SparseMatrixCSR<double, float>;
template class SparseMatrixCSR<double, double>;
template class SubSparseMatrixCSR<float, float>;
template class SubSparseMatrixCSR<float, double>;
template class SubSparseMatrixCSR<double, float>;
template class SubSparseMatrixCSR<double, double>;
}

126
src/Numerics/spmatrix.h Normal file
Просмотреть файл

@ -0,0 +1,126 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include <vector>
#include "dnvector.h"
namespace numerics{
template <typename, typename> class SubSparseMatrixCSR;
// Sparse matrix storage with type floatT, interacting with Vector of type T
template <typename floatT, typename T>
class SparseMatrixCSR
{
// derived class cannot access private/protected members of instances of parent class
friend class SubSparseMatrixCSR<floatT, T>;
private:
// std::vector<>'s are used to better interface with memory allocated when reading from file
std::vector<floatT> _mem_values;
std::vector<size_t> _mem_colidx;
std::vector<size_t> _mem_rowptr; // compact storage, only use NIST format for pointers!
protected:
size_t _nrows; // number of rows.
size_t _ncols; // number of columns.
size_t _nnzs; // number of non-zero elements.
// pointers to stored memory, convenient for Python interface and derived SubSparseMatrixCSR
floatT *_values; // size of _nnzs, pointing nonzero float/double elements in sparse matrix
size_t *_colidx; // size of _nnzs, column indices (0-based for elements in _values.
size_t *_rowstr; // size of _nrows, rowstr[i]-rowstr[0] is index of first non-zero in row i
size_t *_rowend; // size of _nrows, rowend[i]-rowstr[0] is index of last non-zero in row i
bool _has_bias_feature = false;
// this constructor allows derived classes, but protected to prevent explicit use
SparseMatrixCSR() = default;
public:
// This constructor swaps vector<>'s by default, so input memory is lost after calling
SparseMatrixCSR(std::vector<floatT> &vals, std::vector<size_t> &cidx, std::vector<size_t> &rptr,
const bool has_bias_feature = false, const bool make_copy = false);
// copy constructor, can take a transpose if specified, works with SubSparseMatrixCSR as input
SparseMatrixCSR(const SparseMatrixCSR<floatT, T> &A, bool transpose = false);
// Explicit Descructor is not necessary because std::vector<> used to store data
//~SparseMatrixCSR();
// For ease of memory management, we do not allow copy-assignment operator
SparseMatrixCSR<floatT, T>& operator=(const SparseMatrixCSR<floatT, T> &A) = delete;
// to test if two matrices has the same values
T norm_of_difference(SparseMatrixCSR<floatT, T> &A);
inline size_t nrows() const { return _nrows; }
inline size_t ncols() const { return _ncols; }
inline size_t nnzs() const { return _nnzs; }
inline bool has_bias_feature() const { return _has_bias_feature; }
// THE FOLLOWING TWO ARE DANGEOUS TO USE FOR A TRANSPOSED DATA MATRIX WHTERE BIAS ARE THE LAST ROW!
bool set_bias_feature(const floatT c); // only works if _has_bias_feature == true
bool reset_ncols(const size_t n); // also need to reset bias feature index if exist
// return number of elements in each column
void col_elem_counts(std::vector<size_t> &counts) const;
void row_elem_counts(std::vector<size_t> &counts) const;
// scale ith row of sparse CSR matrix by y[i]
void scale_rows(const Vector<T> &y);
// return value: L[i] is squared 2-norm of ith row of the sparse CSR matrix
void row_sqrd2norms(Vector<T> &sqr2dnorms) const;
// scale each row so that each rom has target 2-norm
void normalize_rows(const T tgt2norm = 1);
// return Frobenious norm of matrix
T Frobenious_norm() const;
// y = alpha * A * x + beta * y
void aAxby(const T alpha, const Vector<T> &x, const T beta, Vector<T> &y) const;
// y[i] = alpha * A[rowperm[i],:] * x + beta * y[i]
void aAxby(const T alpha, const Vector<T> &x, const T beta, Vector<T> &y, std::vector<size_t> &rowperm) const;
// y = alpha * A' * x + beta * y
void aATxby(const T alpha, const Vector<T> &x, const T beta, Vector<T> &y) const;
// y = alpha * A[rowperm,:]' * x + beta * y
void aATxby(const T alpha, const Vector<T> &x, const T beta, Vector<T> &y, std::vector<size_t> &rowperm) const;
// The following two functions are not efficient to take advantage of submatrix
// block matrix vector product: y = alpha * A(:,start:end) * x + beta * y. Vector x has length end-start+1
void aAxbyBlock(const T alpha, const size_t start, const Vector<T> &x, const T beta, Vector<T> &y) const;
// block matrix Transpose vector product y = alpha * A(:,start:end)' * x + beta * y. y has length end-start+1
void aATxbyBlock(const T alpha, const Vector<T> &x, const T beta, const size_t start, Vector<T> &y) const;
// return inner product of A(idx,:) and vector x
T row_dot(const size_t rowidx, const Vector<T> &x) const;
// y = alpha * A(idx,:) + y
void row_add(const size_t rowidx, const T alpha, Vector<T> &y) const;
// sparse soft-thresholding using the sparse pattern of one particular row
void row_sst(const size_t row, const T c, const Vector<T> &v, Vector<T> &w) const;
};
// This class defines a mask for a submatrix consisting of contiguous rows of a sparse matrix
template <typename floatT, typename T>
class SubSparseMatrixCSR final : public SparseMatrixCSR<floatT, T>
{
private:
std::vector<size_t> _mem_rowstr; // store NIST format row indices when sub-matrix
std::vector<size_t> _mem_rowend; // is constructed from a subset of row indices
public:
// constructor from a SparseMatrixCSR with 0-based indexing
SubSparseMatrixCSR(const SparseMatrixCSR<floatT, T> &A, const size_t row_start, const size_t num_rows);
SubSparseMatrixCSR(const SparseMatrixCSR<floatT, T> &A, const std::vector<size_t> &row_indices);
void recast(const SparseMatrixCSR<floatT, T> &A, const size_t row_start, const size_t num_rows);
void recast(const SparseMatrixCSR<floatT, T> &A, const std::vector<size_t> &row_indices);
// For ease of memory management, we do not allow copy constructor and copy-assignment operator
SubSparseMatrixCSR() = delete;
SubSparseMatrixCSR(const SubSparseMatrixCSR<floatT, T>&) = delete;
SubSparseMatrixCSR(const SubSparseMatrixCSR<floatT, T>&, bool) = delete;
SubSparseMatrixCSR<floatT, T>& operator=(const SubSparseMatrixCSR<floatT, T>&) = delete;
};
}

Просмотреть файл

@ -0,0 +1,124 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <vector>
#include <iostream>
#include <chrono>
#include <random>
#include <numeric>
#include <algorithm>
#include "utils.h"
#include "timer.h"
#include "spmatrix.h"
#include "floattype.h"
using Vector = numerics::Vector<floatT>;
using SubVector = numerics::SubVector<floatT>;
using SparseMatrixCSR = numerics::SparseMatrixCSR<spmatT, floatT>;
using SubSparseMatrixCSR = numerics::SubSparseMatrixCSR<spmatT, floatT>;
int main(int argc, char* argv[])
{
//string filename = "C:/Data/LIBSVM/splice1percent100_1";
//string filename = "C:/Data/LIBSVM/ads1percent100_1";
//string filename = "C:/Data/LIBSVM/rcv1_test.binary";
string filename = "C:/Data/LIBSVM/rcv1_train.binary";
//string filename = "C:/Data/LIBSVM/news20.binary";
//string filename = "C:/Data/LIBSVM/covtype.libsvm.binary.scale";
vector<spmatT> labels;
vector<spmatT> weights;
vector<spmatT> values;
vector<size_t> colidx;
vector<size_t> rowptr;
load_datafile(filename, labels, weights, values, colidx, rowptr, false, true, true);
SparseMatrixCSR X(values, colidx, rowptr, false, false);
Vector y(labels);
size_t N = X.nrows();
size_t D = X.ncols();
std::cout << "Sparse matrix with " << N << " rows, " << D << " cols, " << X.nnzs() << " nnzs." << std::endl;
// test matrix-vector multiplication -----------------------------------------
Vector v(D), Xv(N), u(N), XTu(D);
u.fill_rand(0, 1);
v.fill_rand(0, 1);
HighResTimer timer;
X.aAxby(1, v, 0, Xv);
float t_elpsd = timer.seconds_from_last();
std::cout << "Matrix-Vector multiplications took " << t_elpsd << " seconds)." << std::endl;
timer.reset();
X.aATxby(1, u, 0, XTu);
t_elpsd = timer.seconds_from_last();
std::cout << "Matrix.T-Vector multiplications took " << t_elpsd << " seconds)." << std::endl;
// ---------------------------------------------------------------------------
// test submatrix splitting with random indices, prepare for DSCOVR algorithms
size_t m = 10;
size_t K = 20;
std::vector<size_t> row_ids = random_permutation(N);
std::vector<size_t> col_ids = random_permutation(D);
std::vector<SparseMatrixCSR*> XiT(m);
std::vector<std::vector<SubSparseMatrixCSR*>> XiTk(m);
for (size_t i = 0; i < m; i++) { XiTk[i].resize(K); }
size_t row_stride = N / m;
size_t row_remain = N % m;
size_t col_stride = D / K;
size_t col_remain = D % K;
SubSparseMatrixCSR Xi(X, 0, 1); // temporary initialization
size_t row_start = 0;
for (size_t i = 0; i < m; i++) {
size_t n_rows = i < row_remain ? row_stride + 1 : row_stride;
std::vector<size_t> subrows(row_ids.begin() + row_start, row_ids.begin() + (row_start + n_rows));
Xi.recast(X, subrows);
XiT[i] = new SparseMatrixCSR(Xi, true);
row_start += n_rows;
size_t col_start = 0;
for (size_t k = 0; k < K; k++) {
size_t n_cols = k < col_remain ? col_stride + 1 : col_stride;
std::vector<size_t> subcols(col_ids.begin() + col_start, col_ids.begin() + (col_start + n_cols));
XiTk[i][k] = new SubSparseMatrixCSR(*XiT[i], subcols); // actually rows of XiT after transpose
col_start += n_cols;
}
}
// test matrix vector multiplications using the split submatrices
Vector Xv2(N), Xv3(N), uperm(N), XTu2(D);
SubVector Xiv(Xv2, 0, N);
u.post_permute(row_ids, uperm);
SubVector ui(uperm, 0, N);
row_start = 0;
for (size_t i = 0; i < m; i++) {
Xiv.recast(Xv2, row_start, XiT[i]->ncols());
XiT[i]->aATxby(1, v, 0, Xiv);
ui.recast(uperm, row_start, XiT[i]->ncols());
XiT[i]->aAxby(1, ui, 1, XTu2);
row_start += Xiv.length();
}
std::cout << "||X*v|| = " << Xv.norm2() << std::endl;
std::cout << "||X1*v; ...; Xm*v|| = " << Xv2.norm2() << std::endl;
Xv2.pre_permute(row_ids, Xv3);
Vector::axpy(-1, Xv, Xv3);
std::cout << "||X*v - [X1*v; ...; Xm*v]|| = " << Xv3.norm2() << std::endl;
std::cout << "||X'*u|| = " << XTu.norm2() << std::endl;
std::cout << "||X1'*u1 + ... + Xm'*um|| = " << XTu2.norm2() << std::endl;
Vector::axpy(-1, XTu, XTu2);
std::cout << "||X'*u - (X1'*u1 + ... + Xm'*um)|| = " << XTu2.norm2() << std::endl;
// delete pointers allocated with "new"
for (size_t i = 0; i < m; i++) {
delete XiT[i];
for (size_t k = 0; k < K; k++) {
delete XiTk[i][k];
}
}
return 0;
}

141
src/SDCA/SDCA.vcxproj Normal file
Просмотреть файл

@ -0,0 +1,141 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<ItemGroup>
<ClInclude Include="display.h" />
<ClInclude Include="randalgms.h" />
<ClInclude Include="sampling.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="display.cpp" />
<ClCompile Include="rpcd_cocoa.cpp" />
<ClCompile Include="sdca.cpp" />
<ClCompile Include="sdca_weighted.cpp" />
<ClCompile Include="test_sdca.cpp" />
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{F2136A94-90B2-44EE-B812-9FA8C0EDD175}</ProjectGuid>
<RootNamespace>Arica</RootNamespace>
<ProjectName>SDCA</ProjectName>
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>../Numerics;../Utils</AdditionalIncludeDirectories>
<OpenMPSupport>false</OpenMPSupport>
</ClCompile>
<Link>
<AdditionalDependencies>Numerics.lib;Utils.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
<SubSystem>Console</SubSystem>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>../Numerics;../Utils</AdditionalIncludeDirectories>
<OpenMPSupport>true</OpenMPSupport>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
<AdditionalDependencies>Numerics.lib;Utils.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Console</SubSystem>
</Link>
</ItemDefinitionGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

54
src/SDCA/display.cpp Normal file
Просмотреть файл

@ -0,0 +1,54 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <string>
#include <iostream>
#include <iomanip>
#include <vector>
#include <numeric>
#include <random>
#include <algorithm>
#include "display.h"
namespace randalgms {
void display_header(std::string algm_info)
{
std::cout << std::endl << algm_info << std::endl;
std::cout << "epoch primal dual gap time" << std::endl;
}
void display_progress(int epoch, double primal_obj, double dual_obj, double time_from_start)
{
std::cout << std::setw(3) << epoch
<< std::fixed << std::setprecision(6)
<< std::setw(10) << primal_obj
<< std::setw(10) << dual_obj
<< std::scientific << std::setprecision(3)
<< std::setw(12) << primal_obj - dual_obj
<< std::fixed << std::setprecision(3)
<< std::setw(9) << time_from_start
<< std::endl;
}
void display_header_stage(std::string algm_info)
{
std::cout << std::endl << algm_info << std::endl;
std::cout << "stage epoch primal dual gap time" << std::endl;
}
void display_progress_stage(int stage, int epoch, double primal_obj, double dual_obj, double time_from_start)
{
std::cout << std::setw(3) << stage
<< std::setw(6) << epoch
<< std::fixed << std::setprecision(6)
<< std::setw(10) << primal_obj
<< std::setw(10) << dual_obj
<< std::scientific << std::setprecision(3)
<< std::setw(12) << primal_obj - dual_obj
<< std::fixed << std::setprecision(3)
<< std::setw(9) << time_from_start
<< std::endl;
}
}

14
src/SDCA/display.h Normal file
Просмотреть файл

@ -0,0 +1,14 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include<string>
namespace randalgms
{
void display_header(std::string algm_info);
void display_progress(int epoch, double primal_obj, double dual_obj, double time_from_start);
void display_header_stage(std::string algm_info);
void display_progress_stage(int stage, int epoch, double primal_obj, double dual_obj, double time_from_start);
}

25
src/SDCA/randalgms.h Normal file
Просмотреть файл

@ -0,0 +1,25 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include "floattype.h"
#include "functions.h"
using namespace functions;
namespace randalgms
{
// SDCA need the regularization function to have strongly convex parameter 1 (unit strong convexity)
int sdca(const SparseMatrixCSR &X, const Vector &y, const ISmoothLoss &f, const double lambda,
const IUnitRegularizer &g, const int max_epoch, const double eps_gap, Vector &wio, Vector &aio,
const char opt_sample = 'p', const char opt_update = 'd', const bool display = true);
int sdca(const SparseMatrixCSR &X, const Vector &y, const Vector &theta, const ISmoothLoss &f, const double lambda,
const IUnitRegularizer &g, const int max_epoch, const double eps_gap, Vector &wio, Vector &aio,
const char opt_sample = 'p', const char opt_update = 'd', const bool display = true);
// RPCD algorithm for solving CoCoA local optimization problems
int rpcd_cocoa(const SparseMatrixCSR &X, const Vector &y, const ISmoothLoss &f, const double lambda_N, const double sigma,
const int max_epoch, const Vector &Xw, const Vector &a0, Vector &a1, const char opt_sample = 'p', const bool display = true);
}

111
src/SDCA/rpcd_cocoa.cpp Normal file
Просмотреть файл

@ -0,0 +1,111 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <stdexcept>
#include <string>
#include "randalgms.h"
#include "sampling.h"
#include "timer.h"
#include "display.h"
namespace randalgms {
// RPCD algorithm for solving CoCoA local optimization problems
int rpcd_cocoa(const SparseMatrixCSR &X, const Vector &y, const ISmoothLoss &f, const double lambda_N, const double sigma,
const int max_epoch, const Vector &Xw, const Vector &a0, Vector &a1, const char opt_sample, const bool display)
{
size_t n = X.nrows();
size_t d = X.ncols();
if (y.length() != n || Xw.length() != n || a0.length() != n || a1.length() != n) {
throw std::runtime_error("RPCD(CoCoA): Input/output matrix and vector dimensions do not match.");
}
// start tracking time elapsed.
HighResTimer timer;
// initialize primal-dual variables and working space variables
Vector L(n); // vector to store row-wise Lipschitz constant
X.row_sqrd2norms(L);
// construct the random index generator after vector L is computed
IRandomIndexGenerator *randidx;
switch (opt_sample) {
case 'u': // i.i.d. uniform sampling
randidx = new RandIndex_Unifom(n);
break;
case 'w': // i.i.d. weighted sampling proportional to L[i]
randidx = new RandIndex_Weighted(L.to_vector());
break;
case 'p': // random permutations for each epoch
randidx = new RandIndex_Permut(n);
break;
default:
throw std::runtime_error("RPCD(CoCoA): sampling method " + std::string(1, opt_sample) + " is not defined.");
}
// scale L for convenience of computation: L[i] = ||X_i||^2 * sigma / (lambda * n)
L.scale(sigma / lambda_N);
// need to test if initial a is feasible using f.conj_feasible() return true or false
a1.copy(a0);
if (!f.conj_feasible(a1, y)) {
a1.fill_zero();
}
double Da = f.sum_conjugate(a1, y); // objective value with initial a1 = a0
Da /= n;
double t_elpsd = timer.seconds_from_start();
if (display) {
display_header("RPCD(CoCoA) with options: sample(" + std::string(1, opt_sample) + ").");
display_progress(0, 0, Da, t_elpsd);
}
Vector XTda(d); // X(a-a0), needed for computing gradient of quadratic (L/2)||X(a-a0)||^2
size_t i;
double ai, yi, Li, gi, ai1; // elements of dual variables a1 and Xw
int epoch = 0;
while (epoch < max_epoch)
{
// iterate over data once for each epoch
for (int64_t iter = 0; iter < n; iter++)
{
i = randidx->next();
ai = a1[i];
yi = y[i];
Li = L[i];
gi = -Xw[i] + (sigma / lambda_N)*X.row_dot(i, XTda);
if (Li > 0)
{
ai1 = f.conj_prox(1.0f / Li, ai - gi / Li, yi);
}
if (ai1 != ai)
{
X.row_add(i, ai1 - ai, XTda);
a1[i] = ai1;
}
}
epoch++;
Da = f.sum_conjugate(a1, y) - Xw.dot(a1) + 0.5*(sigma / lambda_N)*pow(XTda.norm2(), 2);
Da /= n;
t_elpsd = timer.seconds_from_start();
if (display) {
display_progress(epoch, 0, Da, t_elpsd);
}
}
// delete pointers initialized with "new"
delete randidx;
return epoch;
}
}

78
src/SDCA/sampling.h Normal file
Просмотреть файл

@ -0,0 +1,78 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include <algorithm>
#include <numeric>
#include <random>
#include <vector>
namespace randalgms {
// an abstract class to serve as interface to random index generators
class IRandomIndexGenerator
{
protected:
size_t _n; // range of number of indices 0, 1, 2, ..., n-1
std::mt19937_64 _rng; // random number generator, seeded in constructor
public:
IRandomIndexGenerator(const size_t n) : _n(n) {
std::random_device rd;
_rng.seed(rd());
};
virtual size_t operator()() = 0;
virtual size_t next() = 0;
};
class RandIndex_Unifom : public IRandomIndexGenerator
{
private:
std::uniform_int_distribution<size_t> _unif;
public:
RandIndex_Unifom(const size_t n) : IRandomIndexGenerator(n), _unif(0, n - 1) {};
size_t operator()() override { return _unif(_rng); };
size_t next() override { return _unif(_rng); };
};
class RandIndex_Permut : public IRandomIndexGenerator
{
private:
std::vector<size_t> _indices;
size_t _count;
public:
RandIndex_Permut(const size_t n) : IRandomIndexGenerator(n), _indices(n), _count(0) {
std::iota(_indices.begin(), _indices.end(), 0); // 0, 1, ..., n-1
std::shuffle(_indices.begin(), _indices.end(), _rng); // random permutation
};
size_t operator()() override { return this->next(); };
size_t next() override {
if (_count == _n) {
std::shuffle(_indices.begin(), _indices.end(), _rng); // random permutation
_count = 0; // reset _count = 0
}
return _indices[_count++];
};
};
class RandIndex_Weighted : public IRandomIndexGenerator
{
private:
std::discrete_distribution<size_t> _distr;
public:
RandIndex_Weighted(const std::vector<float> weights)
:IRandomIndexGenerator(weights.size()), _distr(weights.begin(), weights.end()) {};
RandIndex_Weighted(const std::vector<double> weights)
:IRandomIndexGenerator(weights.size()), _distr(weights.begin(), weights.end()) {};
size_t operator()() override { return _distr(_rng); };
size_t next() override { return _distr(_rng); };
};
}

192
src/SDCA/sdca.cpp Normal file
Просмотреть файл

@ -0,0 +1,192 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <stdexcept>
#include <string>
#include "randalgms.h"
#include "sampling.h"
#include "timer.h"
#include "display.h"
namespace randalgms {
// SDCA need the regularization function to have strongly convex parameter 1 (unit strong convexity)
int sdca(const SparseMatrixCSR &X, const Vector &y, const ISmoothLoss &f, const double lambda,
const IUnitRegularizer &g, const int max_epoch, const double eps_gap, Vector &w, Vector &a,
const char opt_sample, const char opt_update, const bool display)
{
size_t n = X.nrows();
size_t d = X.ncols();
if (y.length() != n || w.length() != d || a.length() != n) {
throw std::runtime_error("SDCA: Input/output matrix and vector dimensions do not match.");
}
// start tracking time elapsed.
HighResTimer timer;
// initialize primal-dual variables and working space variables
Vector L(n); // vector to store row-wise Lipschitz constant
X.row_sqrd2norms(L);
// construct the random index generator after vector L is computed
IRandomIndexGenerator *randidx;
switch (opt_sample) {
case 'u': // i.i.d. uniform sampling
randidx = new RandIndex_Unifom(n);
break;
case 'w': // i.i.d. weighted sampling proportional to L[i]
randidx = new RandIndex_Weighted(L.to_vector());
break;
case 'p': // random permutations for each epoch
randidx = new RandIndex_Permut(n);
break;
default:
throw std::runtime_error("SDCA: sampling method " + std::string(1, opt_sample) + " is not defined.");
}
double gammainv = f.smoothness(); // f is 1/gamma smooth
double lambda_n = lambda*n; // sdca suppose g has unit strong convexity
L.scale(1.0 / lambda_n); // scale L for easy computation: L[i] = ||X_i||^2 / (lambda * n)
// need to test if initial a is feasible using f.conj_feasible() return true or false
if (!f.conj_feasible(a, y)) {
a.fill_zero();
}
// initial dual variable a is used (not w) because SDCA is fundamentally a dual algoithm
Vector v(d); // intermediate variable to compute next w
Vector Xw(n); // vector to store X * w
X.aATxby(-1.0 / lambda_n, a, 0, v); // v = - (X' * a) / (lambda * n)
g.conj_grad(v, w); // w = \nabla g*(v) determined by a
X.aAxby(1.0f, w, 0, Xw); // Xw = X * w
// compute primal and dual objective values, and duality gap
double Pw = f.sum_loss(Xw, y) / n + lambda*g(w);
double Da = -f.sum_conjugate(a, y) / n - lambda*g.conjugate(v, w);
double Pw_Da = Pw - Da;
Vector beta(n); // vector for conjugate-free update
f.conj_derivative(a, y, beta);
double t_elpsd = timer.seconds_from_start();
if (display) {
display_header("SDCA with options: sample(" + std::string(1, opt_sample)
+ "), update(" + std::string(1, opt_update) + ").");
display_progress(0, Pw, Da, t_elpsd);
}
// SDCA main loop
size_t i;
double ai, yi, Li, Xiw, ai1, da; // elements of dual variables a1 and Xw
double cfp = 2;
int epoch = 0;
while (epoch < max_epoch)
{
// iterate over data once for each epoch
for (int64_t iter = 0; iter < n; iter++)
{
i = randidx->next();
ai = a[i];
yi = y[i];
Li = L[i];
// compute Xi'*w, exploit structure of g
switch (g.symbol()) {
case 'q':
Xiw = X.row_dot(i, v) + X.row_dot(i, *g.offset());
break;
default:
Xiw = X.row_dot(i, w);
break;
}
da = 0;
if (Li > 0)
{
switch (opt_update) {
case 'b': // primal update, use Bregman divergence
if (epoch == 0) {
da = (f.derivative(Xiw, yi) - ai) / (1.0 + Li * gammainv);
ai1 = ai + da;
beta[i] = f.conj_derivative(ai1, yi);
}
else {
beta[i] = ((Li*gammainv / cfp)*beta[i] + Xiw) / (1.0 + Li * gammainv / cfp);
ai1 = f.derivative(beta[i], yi);
da = ai1 - ai;
}
break;
case 'p': // primal update, use primal function gradient for update
da = (f.derivative(Xiw, yi) - ai) / (1.0 + Li*gammainv);
ai1 = ai + da;
break;
case 'd': // dual update, use dual (conjugate) function prox mapping
default:
ai1 = f.conj_prox(1.0f / Li, ai + Xiw / Li, yi);
da = ai1 - ai;
break;
}
}
//if (da != 0)
if (abs(da) > 1e-8)
{
switch (g.symbol()) {
case '2': // regularization g is unit L2 norm squared
X.row_add(i, -da / lambda_n, w);
break;
case 'q':
X.row_add(i, -da / lambda_n, v);
break;
case 'e': // regularization g is elastic net (unit L2 + L1)
X.row_add(i, -da / lambda_n, v);
X.row_sst(i, g.l1penalty(), v, w);
break;
default: // generic update w = \nabla g*(v)
X.row_add(i, -da / lambda_n, v);
g.conj_grad(v, w);
break;
}
a[i] = ai1;
}
}
epoch++;
// re-calculate v if necessary to combat numerical error accumulation (useful for async?)
//X.aATxby(-1.0 / lambda_n, a, 0, v);
//g.conj_grad(v, w);
// compute primal and dual objective values and duality gap
if (g.symbol() == 'q') {
Vector::elem_add(*g.offset(), v, w);
}
X.aAxby(1.0f, w, 0, Xw);
Pw = f.sum_loss(Xw, y) / n + lambda*g(w);
if (g.symbol() == '2') {
Da = -f.sum_conjugate(a, y) / n - lambda*g.conjugate(w);
}
else {
Da = -f.sum_conjugate(a, y) / n - lambda*g.conjugate(v);
}
t_elpsd = timer.seconds_from_start();
if (display) {
display_progress(epoch, Pw, Da, t_elpsd);
}
Pw_Da = Pw - Da;
if (Pw_Da < eps_gap) {
break;
}
}
// delete pointers initialized with "new"
delete randidx;
return epoch;
}
}

178
src/SDCA/sdca_weighted.cpp Normal file
Просмотреть файл

@ -0,0 +1,178 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <stdexcept>
#include <string>
#include "randalgms.h"
#include "sampling.h"
#include "timer.h"
#include "display.h"
namespace randalgms {
// SDCA need the regularization function to have strongly convex parameter 1 (unit strong convexity)
int sdca(const SparseMatrixCSR &X, const Vector &y, const Vector &theta, const ISmoothLoss &f, const double lambda,
const IUnitRegularizer &g, const int max_epoch, const double eps_gap, Vector &w, Vector &a,
const char opt_sample, const char opt_update, const bool display)
{
size_t n = X.nrows();
size_t d = X.ncols();
if (y.length() != n || theta.length() != n || w.length() != d || a.length() != n) {
throw std::runtime_error("SDCA: Input/output matrix and vector dimensions do not match.");
}
// start tracking time elapsed.
HighResTimer timer;
// initialize primal-dual variables and working space variables
Vector L(n); // vector to store row-wise Lipschitz constant
X.row_sqrd2norms(L);
// construct the random index generator after vector L is computed
IRandomIndexGenerator *randidx;
switch (opt_sample) {
case 'u': // i.i.d. uniform sampling
randidx = new RandIndex_Unifom(n);
break;
case 'w': // i.i.d. weighted sampling proportional to L[i]
randidx = new RandIndex_Weighted(L.to_vector());
break;
case 'p': // random permutations for each epoch
randidx = new RandIndex_Permut(n);
break;
default:
throw std::runtime_error("SDCA: sampling method " + std::string(1, opt_sample) + " is not defined.");
}
double gammainv = f.smoothness(); // f is 1/gamma smooth
double lambda_n = lambda*n; // sdca suppose g has unit strong convexity
L.scale(1.0 / lambda_n); // scale L for easy computation: L[i] = ||X_i||^2 / (lambda * n)
Vector::elem_multiply(L, theta, L); // modifications for update in the weighted case
// need to test if initial a is feasible using f.conj_feasible() return true or false
if (!f.conj_feasible(a, y)) {
a.fill_zero();
}
// initial dual variable a is used (not w) because SDCA is fundamentally a dual algoithm
Vector v(d); // intermediate variable to compute next w
Vector Xw(n); // vector to store X * w
Vector::elem_multiply(theta, a, Xw); // borrow memory of Xw for computing v (not affecting later)
X.aATxby(-1.0 / lambda_n, Xw, 0, v); // v = - (X' * (theta.*a)) / (lambda * n)
g.conj_grad(v, w); // w = \nabla g*(v) determined by a
X.aAxby(1.0f, w, 0, Xw); // Xw = X * w
// compute primal and dual objective values, and duality gap
double Pw = f.sum_loss(theta, Xw, y) / n + lambda*g(w);
double Da = -f.sum_conjugate(theta, a, y) / n - lambda*g.conjugate(v, w);
double Pw_Da = Pw - Da;
double t_elpsd = timer.seconds_from_start();
if (display) {
display_header("SDCA with options: sample(" + std::string(1, opt_sample)
+ "), update(" + std::string(1, opt_update) + ").");
display_progress(0, Pw, Da, t_elpsd);
}
// SDCA main loop
size_t i;
double ai, yi, Li, Xiw, ai1, da; // elements of dual variables a1 and Xw
double cfp = 2;
int epoch = 0;
while (epoch < max_epoch)
{
// iterate over data once for each epoch
for (int64_t iter = 0; iter < n; iter++)
{
i = randidx->next();
ai = a[i];
yi = y[i];
Li = L[i];
// compute Xi'*w, exploit structure of g
switch (g.symbol()) {
case 'q':
Xiw = X.row_dot(i, v) + X.row_dot(i, *g.offset());
break;
default:
Xiw = X.row_dot(i, w);
break;
}
da = 0;
if (Li > 0)
{
switch (opt_update) {
case 'p': // primal update, use primal function gradient for update
da = (f.derivative(Xiw, yi) - ai) / (1.0 + Li*gammainv);
ai1 = ai + da;
break;
case 'd': // dual update, use dual (conjugate) function prox mapping
default:
ai1 = f.conj_prox(1.0f / Li, ai + Xiw / Li, yi);
da = ai1 - ai;
break;
}
}
if (da != 0)
{
switch (g.symbol()) {
case '2': // regularization g is unit L2 norm squared
X.row_add(i, - theta[i] * da / lambda_n, w);
break;
case 'q':
X.row_add(i, - theta[i] * da / lambda_n, v);
break;
case 'e': // regularization g is elastic net (unit L2 + L1)
X.row_add(i, - theta[i] * da / lambda_n, v);
X.row_sst(i, g.l1penalty(), v, w);
break;
default: // generic update w = \nabla g*(v)
X.row_add(i, - theta[i] * da / lambda_n, v);
g.conj_grad(v, w);
break;
}
a[i] = ai1;
}
}
epoch++;
// re-calculate v if necessary to combat numerical error accumulation (useful for async?)
//X.aATxby(-1.0 / lambda_n, a, 0, v); // here need to multiply by theta as well!
//g.conj_grad(v, w);
// compute primal and dual objective values and duality gap
if (g.symbol() == 'q') {
Vector::elem_add(*g.offset(), v, w);
}
X.aAxby(1.0f, w, 0, Xw);
Pw = f.sum_loss(theta, Xw, y) / n + lambda*g(w);
if (g.symbol() == '2') {
Da = -f.sum_conjugate(theta, a, y) / n - lambda*g.conjugate(w);
}
else {
Da = -f.sum_conjugate(theta, a, y) / n - lambda*g.conjugate(v);
}
t_elpsd = timer.seconds_from_start();
if (display) {
display_progress(epoch, Pw, Da, t_elpsd);
}
Pw_Da = Pw - Da;
if (Pw_Da < eps_gap) {
break;
}
}
// delete pointers initialized with "new"
delete randidx;
return epoch;
}
}

110
src/SDCA/test_sdca.cpp Normal file
Просмотреть файл

@ -0,0 +1,110 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <string>
#include <iostream>
#include <algorithm>
#include <random>
#include <numeric>
#include <stdexcept>
#include <ctime>
#include <math.h>
#include "utils.h"
#include "randalgms.h"
using namespace std;
using namespace randalgms;
//int main_test_sdca(int argc, char* argv[])
int main(int argc, char* argv[])
{
if (argc < 2) {
std::cout << "Need to specify training data file." << std::endl;
std::exit(0);
}
string trainingfile = argv[1];
string testingfile;
if (argc >= 3){ testingfile = argv[2]; }
// read training and testing files
vector<spmatT> labels;
vector<spmatT> weights;
vector<spmatT> values;
vector<size_t> colidx;
vector<size_t> rowptr;
std::cout << "Loading training data ... " << std::endl;
size_t n_examples = load_datafile(trainingfile, labels, weights, values, colidx, rowptr, false, true, true);
SparseMatrixCSR X(values, colidx, rowptr, false);
Vector y(labels);
size_t dim = X.ncols();
SparseMatrixCSR *X_test = nullptr;
Vector *y_test = nullptr;
if (!testingfile.empty()){
std::cout << "Loading test data ... " << std::endl;
load_datafile(testingfile, labels, weights, values, colidx, rowptr, false, true, true);
X_test = new SparseMatrixCSR(values, colidx, rowptr, false);
y_test = new Vector(labels);
dim = X.ncols() >= X_test->ncols() ? X.ncols() : X_test->ncols();
X.reset_ncols(dim);
X_test->reset_ncols(dim);
}
// training using SDCA and display error rates
Vector w(dim), a(X.nrows());
ISmoothLoss *f = new LogisticLoss();
//ISmoothLoss *f = new SmoothedHinge();
//ISmoothLoss *f = new SquareLoss();
IUnitRegularizer *g = new SquaredL2Norm();
//IUnitRegularizer *g = new ElasticNet(1);
OffsetQuadratic q(dim);
//IUnitRegularizer *g = &q;
q.update_offset(w);
double lambda = 1.0e-5;
int max_epoch = 100;
double eps_gap = 1e-8;
X.normalize_rows();
sdca(X, y, *f, lambda, *g, max_epoch, eps_gap, w, a, 'p', 'd');
a.fill_zero();
sdca(X, y, *f, lambda, *g, max_epoch, eps_gap, w, a, 'p', 'p');
a.fill_zero();
sdca(X, y, *f, lambda, *g, max_epoch, eps_gap, w, a, 'p', 'b');
/*
q.update_offset(w);
a.fill_zero();
sdca(X, y, *f, lambda, *g, max_epoch, eps_gap, w, a, 'u', 'd');
// shuffle the row indices and do it again
std::vector<size_t> idx(X.nrows());
std::iota(std::begin(idx), std::end(idx), 0);
std::shuffle(std::begin(idx), std::end(idx), std::default_random_engine(0));
SubSparseMatrixCSR Xshfl(X, idx);
Vector yshfl(y, idx);
// w.fill_zero(); // no need to clean w because only a is used to initialize SDCA
a.fill_zero();
Vector ashfl(a, idx);
sdca(Xshfl, yshfl, *f, lambda, *g, max_epoch, eps_gap, w, ashfl, 'w', 'd');
*/
float train_err = binary_error_rate(X, y, w);
std::cout << "Training error rate = " << train_err * 100 << " %" << std::endl;
if (!testingfile.empty()) {
float test_err = binary_error_rate(*X_test, *y_test, w);
std::cout << "Testing error rate = " << test_err * 100 << " %" << std::endl;
}
// do not forget to delete object pointers!
if (X_test != nullptr) { delete X_test; }
if (y_test != nullptr) { delete y_test; }
delete f;
if (g->symbol() != 'q') delete g;
return 0;
}

40
src/SplitData/ReadMe.txt Normal file
Просмотреть файл

@ -0,0 +1,40 @@
========================================================================
CONSOLE APPLICATION : SplitData Project Overview
========================================================================
AppWizard has created this SplitData application for you.
This file contains a summary of what you will find in each of the files that
make up your SplitData application.
SplitData.vcxproj
This is the main project file for VC++ projects generated using an Application Wizard.
It contains information about the version of Visual C++ that generated the file, and
information about the platforms, configurations, and project features selected with the
Application Wizard.
SplitData.vcxproj.filters
This is the filters file for VC++ projects generated using an Application Wizard.
It contains information about the association between the files in your project
and the filters. This association is used in the IDE to show grouping of files with
similar extensions under a specific node (for e.g. ".cpp" files are associated with the
"Source Files" filter).
SplitData.cpp
This is the main application source file.
/////////////////////////////////////////////////////////////////////////////
Other standard files:
StdAfx.h, StdAfx.cpp
These files are used to build a precompiled header (PCH) file
named SplitData.pch and a precompiled types file named StdAfx.obj.
/////////////////////////////////////////////////////////////////////////////
Other notes:
AppWizard uses "TODO:" comments to indicate parts of the source code you
should add to or customize.
/////////////////////////////////////////////////////////////////////////////

Просмотреть файл

@ -0,0 +1,48 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
// SplitData.cpp : Defines the entry point for the console application.
#include <string>
#include <iostream>
#include <iomanip>
#include <vector>
#include <algorithm>
#include <omp.h>
#include "timer.h"
#include "utils.h"
#include "functions.h"
#include "split_file.h"
using namespace functions;
int main(int argc, char* argv[])
{
if (argc < 3) {
std::cout << "Need to specify input file name and number of files to split." << std::endl;
std::exit(0);
}
std::string filename(argv[1]);
std::string numsplit(argv[2]);
int n = std::stoi(numsplit);
char sorted = 'n';
if (argc >=4 ){
sorted = argv[3][0];
}
// record time for reading file and writing files
HighResTimer timer;
timer.start();
size_t N;
if (sorted == 's' || sorted == 'S') {
N = split_data_sorted(filename, n);
}
else {
N = split_datafile(filename, n);
}
std::cout << "Read and wrote " << N << " lines in " << timer.seconds_from_start() << " seconds." << std::endl;
return 0;
}

Просмотреть файл

@ -0,0 +1,160 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>15.0</VCProjectVersion>
<ProjectGuid>{AF0B3A60-1C23-4229-96EC-E1071F4DAFDB}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>SplitData</RootNamespace>
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<OpenMPSupport>true</OpenMPSupport>
<AdditionalIncludeDirectories>../Utils;../Numerics</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
<AdditionalDependencies>Utils.lib;Numerics.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>../Utils;../Numerics</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
<AdditionalDependencies>Utils.lib;Numerics.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PrecompiledHeader>
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<Text Include="ReadMe.txt" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="SplitData.cpp" />
<ClCompile Include="split_file.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="split_file.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

Просмотреть файл

@ -0,0 +1,33 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<Text Include="ReadMe.txt" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="SplitData.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="split_file.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="split_file.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
</Project>

Просмотреть файл

@ -0,0 +1,126 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <numeric>
#include <stdexcept>
#include <cstdio>
#include <cstdint>
#include <random>
#include <omp.h>
#include "utils.h"
#include "timer.h"
#include "split_file.h"
// OpenMP 3.0 supports unsigned for-loop iterator, so can replace std::int64_t by size_t
using std::int64_t;
size_t split_datafile(const std::string dataname, const unsigned int n)
{
ifstream ifs(dataname);
if (!ifs.is_open()) {
throw runtime_error("Data file " + dataname + " cannot be opened.");
}
size_t buffer_size = 256 * 1024;
struct FileBuffer {
char buf[256 * 1024];
};
std::vector<FileBuffer> buffer(n);
std::vector<ofstream> ofs(n);
for (int i = 0; i < n; i++) {
ofs[i].rdbuf()->pubsetbuf(buffer[i].buf, buffer_size);
std::string filename = dataname + std::to_string(n) + '_' + std::to_string(i + 1);
ofs[i].open(filename);
}
// uniform random generator
std::random_device rdv;
std::mt19937_64 rng(rdv());
std::uniform_int_distribution<size_t> unif(0, n - 1);
std::string line;
size_t count = 0;
while (std::getline(ifs, line)) {
int i = unif(rng);
//int i = count % n;
ofs[i] << line << '\n';
count++;
}
// close all files
ifs.close();
for (int i = 0; i < n; i++) {
ofs[i].close();
}
return count;
}
size_t split_data_sorted(const std::string dataname, const unsigned int n)
{
ifstream ifs(dataname);
if (!ifs.is_open()) {
throw runtime_error("Data file " + dataname + " cannot be opened.");
}
// read lines and sort lexicographically
std::vector<std::string> alllines;
std::string line;
size_t count = 0;
while (std::getline(ifs, line)) {
alllines.push_back(line.substr(0,2));
count++;
}
ifs.close();
std::vector<size_t> indices(count);
std::vector<size_t> file_id(count, 0);
std::iota(indices.begin(), indices.end(), 0);
//std::sort(alllines.begin(), alllines.end());
//std::sort(alllines.begin(), alllines.end(), [](string s1, string s2) {return (s1[0] > s2[0]); });
std::sort(indices.begin(), indices.end(), [&alllines](size_t i1, size_t i2) {return (alllines[i1] > alllines[i2]); });
size_t quotient = count / n;
for (size_t i = 0; i < n*quotient; i++) {
file_id[indices[i]] = i / quotient;
}
// reopen the input file and then write to output files
size_t buffer_size = 256 * 1024;
struct FileBuffer {
char buf[256 * 1024];
};
std::vector<FileBuffer> buffer(n);
std::vector<ofstream> ofs(n);
for (int i = 0; i < n; i++) {
ofs[i].rdbuf()->pubsetbuf(buffer[i].buf, buffer_size);
std::string filename = dataname + std::to_string(n) + "_s0_" + std::to_string(i + 1);
ofs[i].open(filename);
}
ifs.open(dataname);
count = 0;
while (std::getline(ifs, line)) {
ofs[file_id[count]] << line << '\n';
count++;
}
// close all files
ifs.close();
for (int i = 0; i < n; i++) {
ofs[i].close();
}
return count;
}

Просмотреть файл

@ -0,0 +1,10 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include <string>
// split data file into smaller files
size_t split_datafile(const std::string dataname, const unsigned int n);
size_t split_data_sorted(const std::string dataname, const unsigned int n);

132
src/Utils/Utils.vcxproj Normal file
Просмотреть файл

@ -0,0 +1,132 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C}</ProjectGuid>
<RootNamespace>Utils</RootNamespace>
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<OpenMPSupport>false</OpenMPSupport>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<OpenMPSupport>true</OpenMPSupport>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<SubSystem>Console</SubSystem>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="loaddata.cpp" />
<ClCompile Include="loadlist.cpp" />
<ClCompile Include="test_utils.cpp" />
<ClCompile Include="utils.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="timer.h" />
<ClInclude Include="utils.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

Просмотреть файл

@ -0,0 +1,39 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="loaddata.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="test_utils.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="utils.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="loadlist.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="utils.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="timer.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
</Project>

167
src/Utils/loaddata.cpp Normal file
Просмотреть файл

@ -0,0 +1,167 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <stdexcept>
#include <cstdio>
#include <cstdint>
#include <chrono>
#include <omp.h>
#include "utils.h"
#include "timer.h"
// OpenMP 3.0 supports unsigned for-loop iterator, so can replace std::int64_t by size_t
using std::int64_t;
using namespace std;
// load datafile with line format: <label>[:<weight>] [<colidx>[:<value>]] ...
template <typename T>
size_t load_datafile(const string filename, vector<T> &labels, vector<T> &weights,
vector<T> &values, vector<size_t> &colidx, vector<size_t> &rowptr,
const bool add_bias, const bool disp_timing, const bool binary, const size_t start_colidx,
const bool skip_first)
{
ifstream ifs(filename);
if (!ifs.is_open()) {
throw runtime_error("Data file " + filename + " cannot be opened.");
}
// record time for reading file and post-processing
HighResTimer timer;
timer.start();
// Work with new vectors and then swap them with output vectors in the end
vector<T> _labels;
vector<T> _weights;
vector<T> _values;
vector<size_t> _colidx;
vector<size_t> _rowptr;
string line;
T lbl, wgh, val;
size_t idx, max_idx;
const char *pStart;
char *pEnd;
try {
while (getline(ifs, line))
{
// pStart now has a null-character '\0' appended at the end as c_str()
pStart = line.c_str();
// first read the label, and possibly a weight for the example
lbl = (T)strtod(pStart, &pEnd);
if (*pEnd == ':') {
wgh = (T)strtod(pEnd + 1, &pEnd); // pEnd + 1 to skip ':'
}
else {
wgh = 1.0f;
}
if (binary) { lbl = lbl > 0.5 ? 1 : -1; } // use {+1, -1} for binary labels
_labels.push_back(lbl);
_weights.push_back(wgh);
_rowptr.push_back(_values.size());
// some file has redundant information about dimensionality of feature vector
if (skip_first) {
idx = strtoul(pEnd, &pEnd, 10); // idx is unsigned long int with base 10
}
// read features in example. test for the null character '\0' at the line end
max_idx = 0;
while (*pEnd) // if not reaching end of line '\0'
{
idx = strtoul(pEnd, &pEnd, 10); // idx is unsigned long int with base 10
if (*pEnd == ':') {
val = (T)strtod(pEnd + 1, &pEnd); // pEnd + 1 to skip ':'
}
else {
val = 1.0f;
}
_values.push_back(val);
_colidx.push_back(idx);
max_idx = idx > max_idx ? idx : max_idx;
// skip white space, only necessary at end of each line to move to '\0'
while (isspace(*pEnd)) { pEnd++; }
}
if (add_bias) // only add bias after processing all features in the example
{
_values.push_back(1.0f); // can reset value in SparseMatrixCSR
_colidx.push_back(max_idx + 1); // will reset after reading all data
}
}
// set the last element of row pointers in SparseMatrixCSR format
_rowptr.push_back(_values.size());
}
catch (...)
{
throw runtime_error("Something wrong when reading data file: " + filename);
}
ifs.close();
float t_loading = timer.seconds_from_last();
// make sure column index starts with 0, as needed by sparse matrix operations
size_t num_vals = _colidx.size();
size_t min_idx = *min_element(_colidx.begin(), _colidx.end());
// for distributed computing, it is necessary to specify?!
if (min_idx < start_colidx) {
throw runtime_error("Error in locading data file " + filename + ": min_idx smaller than start_colidx.");
}
if (start_colidx > 0)
{
#pragma omp parallel for
for (int64_t i = 0; i < num_vals; i++) {
_colidx[i] -= start_colidx;
}
}
// reset bias index for all examples to be the same last index
size_t num_examples = _labels.size();
max_idx = *max_element(_colidx.begin(), _colidx.end());
if (add_bias)
{
#pragma omp parallel for
for (int64_t i = 0; i < num_examples; i++) {
size_t bias_idx = _rowptr[i + 1] - 1;
_colidx[bias_idx] = max_idx; // this already include bias indexing
}
}
// swap vectors to avoid reallocate memory, and clean up content of input vectors
labels.swap(_labels);
weights.swap(_weights);
values.swap(_values);
colidx.swap(_colidx);
rowptr.swap(_rowptr);
// display timing if disp_timing == true
float t_process = timer.seconds_from_last();
if (disp_timing) {
std::cout << std::endl << "Loading datafile: " << filename << std::endl;
std::cout << "Time for loading datafile: " << t_loading << " seconds." << std::endl;
std::cout << "Time for post-processing: " << t_process << " seconds." << std::endl;
std::cout << "Number of examples: " << num_examples << std::endl;
std::cout << "Number of features: " << max_idx + 1 << std::endl;
std::cout << "Number of nonzeros: " << values.size() << std::endl;
std::cout << std::endl;
}
return num_examples;
}
// Instantiate function templates with float and double types
template size_t load_datafile<float>(const string filename, vector<float> &labels, vector<float> &weights,
vector<float> &vals, vector<size_t> &cidx, vector<size_t> &rptr,
const bool add_bias, const bool disp_timing, const bool binary, const size_t start_colidx, const bool skip_first);
template size_t load_datafile<double>(const string filename, vector<double> &labels, vector<double> &weights,
vector<double> &vals, vector<size_t> &cidx, vector<size_t> &rptr,
const bool add_bias, const bool disp_timing, const bool binary, const size_t start_colidx, const bool skip_first);

172
src/Utils/loadlist.cpp Normal file
Просмотреть файл

@ -0,0 +1,172 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <stdexcept>
#include <cstdio>
#include <cstdint>
#include <chrono>
#include <omp.h>
#include "utils.h"
#include "timer.h"
// OpenMP 3.0 supports unsigned for-loop iterator, so can replace std::int64_t by size_t
using std::int64_t;
using namespace std;
// load datafile with line format: <label>[:<weight>] [<colidx>[:<value>]] ...
template <typename T>
size_t load_datafile(const list<string> filelist, vector<T> &labels, vector<T> &weights,
vector<T> &values, vector<size_t> &colidx, vector<size_t> &rowptr,
const bool add_bias, const bool disp_timing, const bool binary, const size_t start_colidx,
const bool skip_first)
{
// Work with new vectors and then swap them with output vectors in the end
vector<T> _labels;
vector<T> _weights;
vector<T> _values;
vector<size_t> _colidx;
vector<size_t> _rowptr;
string line;
T lbl, wgh, val;
size_t idx, max_idx;
const char *pStart;
char *pEnd;
// record time for reading file and post-processing
HighResTimer timer;
timer.start();
for (auto & filename : filelist)
{
ifstream ifs(filename);
if (!ifs.is_open()) {
throw runtime_error("Data file " + filename + " cannot be opened.");
}
try {
while (getline(ifs, line))
{
// pStart now has a null-character '\0' appended at the end as c_str()
pStart = line.c_str();
// first read the label, and possibly a weight for the example
lbl = (T)strtod(pStart, &pEnd);
if (*pEnd == ':') {
wgh = (T)strtod(pEnd + 1, &pEnd); // pEnd + 1 to skip ':'
}
else {
wgh = 1.0f;
}
if (binary) { lbl = lbl > 0.5 ? 1 : -1; } // use {+1, -1} for binary labels
_labels.push_back(lbl);
_weights.push_back(wgh);
_rowptr.push_back(_values.size());
// some file has redundant information about dimensionality of feature vector
if (skip_first) {
idx = strtoul(pEnd, &pEnd, 10); // idx is unsigned long int with base 10
}
// read features in example. test for the null character '\0' at the line end
max_idx = 0;
while (*pEnd) // if not reaching end of line '\0'
{
idx = strtoul(pEnd, &pEnd, 10); // idx is unsigned long int with base 10
if (*pEnd == ':') {
val = (T)strtod(pEnd + 1, &pEnd); // pEnd + 1 to skip ':'
}
else {
val = 1.0f;
}
_values.push_back(val);
_colidx.push_back(idx);
max_idx = idx > max_idx ? idx : max_idx;
// skip white space, only necessary at end of each line to move to '\0'
while (isspace(*pEnd)) { pEnd++; }
}
if (add_bias) // only add bias after processing all features in the example
{
_values.push_back(1.0f); // can reset value in SparseMatrixCSR
_colidx.push_back(max_idx + 1); // will reset after reading all data
}
}
}
catch (...)
{
throw runtime_error("Something wrong when reading data file: " + filename);
}
ifs.close();
}
// set the last element of row pointers in SparseMatrixCSR format
_rowptr.push_back(_values.size());
float t_loading = timer.seconds_from_last();
// make sure column index starts with 0, as needed by sparse matrix operations
size_t num_vals = _colidx.size();
size_t min_idx = *min_element(_colidx.begin(), _colidx.end());
// for distributed computing, it is necessary to specify?!
if (min_idx < start_colidx) {
throw runtime_error("Error in locading data file: min_idx smaller than start_colidx.");
}
if (start_colidx > 0)
{
#pragma omp parallel for
for (int64_t i = 0; i < num_vals; i++) {
_colidx[i] -= start_colidx;
}
}
// reset bias index for all examples to be the same last index
size_t num_examples = _labels.size();
max_idx = *max_element(_colidx.begin(), _colidx.end());
if (add_bias)
{
#pragma omp parallel for
for (int64_t i = 0; i < num_examples; i++) {
size_t bias_idx = _rowptr[i + 1] - 1;
_colidx[bias_idx] = max_idx; // this already include bias indexing
}
}
// swap vectors to avoid reallocate memory, and clean up content of input vectors
labels.swap(_labels);
weights.swap(_weights);
values.swap(_values);
colidx.swap(_colidx);
rowptr.swap(_rowptr);
// display timing if disp_timing == true
float t_process = timer.seconds_from_last();
if (disp_timing) {
std::cout << std::endl << "Loading datafiles:" << std::endl;
for (auto & filename : filelist) {
std::cout << " " << filename << std::endl;
}
std::cout << "Time for loading datafiles: " << t_loading << " seconds." << std::endl;
std::cout << "Time for post-processing: " << t_process << " seconds." << std::endl;
std::cout << "Number of examples: " << num_examples << std::endl;
std::cout << "Number of features: " << max_idx + 1 << std::endl;
std::cout << "Number of nonzeros: " << values.size() << std::endl;
std::cout << std::endl;
}
return num_examples;
}
// Instantiate function templates with float and double types
template size_t load_datafile<float>(const list<string> filelist, vector<float> &labels, vector<float> &weights,
vector<float> &vals, vector<size_t> &cidx, vector<size_t> &rptr,
const bool add_bias, const bool disp_timing, const bool binary, const size_t start_colidx, const bool skip_first);
template size_t load_datafile<double>(const list<string> filelist, vector<double> &labels, vector<double> &weights,
vector<double> &vals, vector<size_t> &cidx, vector<size_t> &rptr,
const bool add_bias, const bool disp_timing, const bool binary, const size_t start_colidx, const bool skip_first);

47
src/Utils/test_utils.cpp Normal file
Просмотреть файл

@ -0,0 +1,47 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <iostream>
#include <algorithm>
#include <vector>
#include "utils.h"
void main(int argc, char* argv[])
{
using floatT = float;
vector<floatT> labels;
vector<floatT> weights;
vector<floatT> values;
vector<size_t> colidx;
vector<size_t> rowptr;
//string filename;
//filename = argv[1];
//filename = "C:/Data/LIBSVMdata/covtype.libsvm.binary.scale"; // 4 seconds
//filename = "C:/Data/LIBSVMdata/splice1percent100_1"; // 12 seconds
//filename = "C:/Data/LIBSVMdata/ads1percent100_1"; // 4 seconds
//filename = "C:/Data/LIBSVMdata/rcv1_test.binary"; // 50 seconds
//filename = "C:/Data/LIBSVMdata/rcv1_train.binary"; // 2 seconds
//filename = "C:/Data/LIBSVMdata/news20.binary"; // 6 seconds
//filename = "C:/Data/MSFT_Ads/TrainingFIDOnly2.10G"; // 6 minutes
//filename = "//gcr/scratch/AZ-USCentral/lixiao/Data/MSFT_Ads/TrainingFIDOnly2.10G"; // 25 minutes
const string files[] =
{
//"C:/Data/LIBSVMdata/ads1percent100_1",
//"C:/Data/LIBSVMdata/rcv1_test.binary",
//"C:/Data/LIBSVMdata/news20.binary"
"C:/Data/part-00000.txt",
"C:/Data/part-r-00001.txt",
};
for (auto & filename : files) {
size_t n_examples = load_datafile(filename, labels, weights, values, colidx, rowptr, false, true, true);
}
std::list<string> filelist;
filelist.push_back("C:/Data/part-00000.txt");
filelist.push_back("C:/Data/part-r-00001.txt");
size_t n_examples = load_datafile(filelist, labels, weights, values, colidx, rowptr, false, true, true);
}

32
src/Utils/timer.h Normal file
Просмотреть файл

@ -0,0 +1,32 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include <chrono>
using HighResClock = std::chrono::high_resolution_clock;
using FloatSeconds = std::chrono::duration<float>;
class HighResTimer
{
private:
HighResClock::time_point t_start, t_last;
public:
HighResTimer() { this->start(); };
void start() { this->reset(); };
void reset() {
t_start = HighResClock::now();
t_last = t_start;
};
float seconds_from_start() {
t_last = HighResClock::now();
FloatSeconds t_elpsd = t_last - t_start;
return t_elpsd.count();
}
float seconds_from_last() {
auto t_now = HighResClock::now();
FloatSeconds t_elpsd = t_now - t_last;
t_last = t_now;
return t_elpsd.count();
}
};

20
src/Utils/utils.cpp Normal file
Просмотреть файл

@ -0,0 +1,20 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <vector>
#include <numeric>
#include <random>
#include <algorithm>
#include "utils.h"
// generate a random permutation vector for indices 0, 1, 2, ..., N-1
std::vector<size_t> random_permutation(const size_t N, const int seed)
{
std::vector<size_t> p(N);
std::iota(p.begin(), p.end(), 0);
std::random_device rdev;
unsigned int rand_seed = seed > 0 ? seed : rdev();
std::shuffle(p.begin(), p.end(), std::default_random_engine(rand_seed));
return std::move(p);
}

28
src/Utils/utils.h Normal file
Просмотреть файл

@ -0,0 +1,28 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include<list>
#include<string>
#include<vector>
using namespace std;
// generate a random permutation vector for indices 0, 1, 2, ..., N-1
std::vector<size_t> random_permutation(const size_t N, const int seed = -1);
// Load datafile with line format: <label>[:<weight>] [<colidx>[:<value>]] ...
// By default LIVSVM colidx starts with 1, but SparseMatrixCSR starts with 0
template <typename T>
size_t load_datafile(const string filename, vector<T> &labels, vector<T> &weights,
vector<T> &values, vector<size_t> &colidx, vector<size_t> &rowptr,
const bool add_bias = false, const bool disp_timing = false, const bool binary = false,
const size_t start_colidx = 1, const bool skip_first = false);
// Need to make some change to read criteo dataset (skip the second entry for each row)
template <typename T>
size_t load_datafile(const list<string> filelist, vector<T> &labels, vector<T> &weights,
vector<T> &values, vector<size_t> &colidx, vector<size_t> &rowptr,
const bool add_bias = false, const bool disp_timing = false, const bool binary = false,
const size_t start_colidx = 1, const bool skip_first = false);

87
src/pSDCA.sln Normal file
Просмотреть файл

@ -0,0 +1,87 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.26730.8
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Numerics", "Numerics\Numerics.vcxproj", "{3BB4DA69-B2E0-46D0-86DD-1757C5922506}"
ProjectSection(ProjectDependencies) = postProject
{3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C} = {3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Utils", "Utils\Utils.vcxproj", "{3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SplitData", "SplitData\SplitData.vcxproj", "{AF0B3A60-1C23-4229-96EC-E1071F4DAFDB}"
ProjectSection(ProjectDependencies) = postProject
{3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C} = {3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C}
{3BB4DA69-B2E0-46D0-86DD-1757C5922506} = {3BB4DA69-B2E0-46D0-86DD-1757C5922506}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LBFGS", "LBFGS\LBFGS.vcxproj", "{2AF44FB9-F85D-48D0-AC4F-EE06CFFB398E}"
ProjectSection(ProjectDependencies) = postProject
{3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C} = {3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C}
{3BB4DA69-B2E0-46D0-86DD-1757C5922506} = {3BB4DA69-B2E0-46D0-86DD-1757C5922506}
{F2136A94-90B2-44EE-B812-9FA8C0EDD175} = {F2136A94-90B2-44EE-B812-9FA8C0EDD175}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SDCA", "SDCA\SDCA.vcxproj", "{F2136A94-90B2-44EE-B812-9FA8C0EDD175}"
ProjectSection(ProjectDependencies) = postProject
{3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C} = {3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C}
{3BB4DA69-B2E0-46D0-86DD-1757C5922506} = {3BB4DA69-B2E0-46D0-86DD-1757C5922506}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{3BB4DA69-B2E0-46D0-86DD-1757C5922506}.Debug|x64.ActiveCfg = Debug|x64
{3BB4DA69-B2E0-46D0-86DD-1757C5922506}.Debug|x64.Build.0 = Debug|x64
{3BB4DA69-B2E0-46D0-86DD-1757C5922506}.Debug|x86.ActiveCfg = Debug|Win32
{3BB4DA69-B2E0-46D0-86DD-1757C5922506}.Debug|x86.Build.0 = Debug|Win32
{3BB4DA69-B2E0-46D0-86DD-1757C5922506}.Release|x64.ActiveCfg = Release|x64
{3BB4DA69-B2E0-46D0-86DD-1757C5922506}.Release|x64.Build.0 = Release|x64
{3BB4DA69-B2E0-46D0-86DD-1757C5922506}.Release|x86.ActiveCfg = Release|Win32
{3BB4DA69-B2E0-46D0-86DD-1757C5922506}.Release|x86.Build.0 = Release|Win32
{3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C}.Debug|x64.ActiveCfg = Debug|x64
{3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C}.Debug|x64.Build.0 = Debug|x64
{3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C}.Debug|x86.ActiveCfg = Debug|Win32
{3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C}.Debug|x86.Build.0 = Debug|Win32
{3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C}.Release|x64.ActiveCfg = Release|x64
{3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C}.Release|x64.Build.0 = Release|x64
{3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C}.Release|x86.ActiveCfg = Release|Win32
{3E2AC248-2A09-4F04-9FD8-4F2DEA665B1C}.Release|x86.Build.0 = Release|Win32
{AF0B3A60-1C23-4229-96EC-E1071F4DAFDB}.Debug|x64.ActiveCfg = Debug|x64
{AF0B3A60-1C23-4229-96EC-E1071F4DAFDB}.Debug|x64.Build.0 = Debug|x64
{AF0B3A60-1C23-4229-96EC-E1071F4DAFDB}.Debug|x86.ActiveCfg = Debug|Win32
{AF0B3A60-1C23-4229-96EC-E1071F4DAFDB}.Debug|x86.Build.0 = Debug|Win32
{AF0B3A60-1C23-4229-96EC-E1071F4DAFDB}.Release|x64.ActiveCfg = Release|x64
{AF0B3A60-1C23-4229-96EC-E1071F4DAFDB}.Release|x64.Build.0 = Release|x64
{AF0B3A60-1C23-4229-96EC-E1071F4DAFDB}.Release|x86.ActiveCfg = Release|Win32
{AF0B3A60-1C23-4229-96EC-E1071F4DAFDB}.Release|x86.Build.0 = Release|Win32
{2AF44FB9-F85D-48D0-AC4F-EE06CFFB398E}.Debug|x64.ActiveCfg = Debug|x64
{2AF44FB9-F85D-48D0-AC4F-EE06CFFB398E}.Debug|x64.Build.0 = Debug|x64
{2AF44FB9-F85D-48D0-AC4F-EE06CFFB398E}.Debug|x86.ActiveCfg = Debug|Win32
{2AF44FB9-F85D-48D0-AC4F-EE06CFFB398E}.Debug|x86.Build.0 = Debug|Win32
{2AF44FB9-F85D-48D0-AC4F-EE06CFFB398E}.Release|x64.ActiveCfg = Release|x64
{2AF44FB9-F85D-48D0-AC4F-EE06CFFB398E}.Release|x64.Build.0 = Release|x64
{2AF44FB9-F85D-48D0-AC4F-EE06CFFB398E}.Release|x86.ActiveCfg = Release|Win32
{2AF44FB9-F85D-48D0-AC4F-EE06CFFB398E}.Release|x86.Build.0 = Release|Win32
{F2136A94-90B2-44EE-B812-9FA8C0EDD175}.Debug|x64.ActiveCfg = Debug|x64
{F2136A94-90B2-44EE-B812-9FA8C0EDD175}.Debug|x64.Build.0 = Debug|x64
{F2136A94-90B2-44EE-B812-9FA8C0EDD175}.Debug|x86.ActiveCfg = Debug|Win32
{F2136A94-90B2-44EE-B812-9FA8C0EDD175}.Debug|x86.Build.0 = Debug|Win32
{F2136A94-90B2-44EE-B812-9FA8C0EDD175}.Release|x64.ActiveCfg = Release|x64
{F2136A94-90B2-44EE-B812-9FA8C0EDD175}.Release|x64.Build.0 = Release|x64
{F2136A94-90B2-44EE-B812-9FA8C0EDD175}.Release|x86.ActiveCfg = Release|Win32
{F2136A94-90B2-44EE-B812-9FA8C0EDD175}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {53CC80D2-85F9-430F-8BEE-5A3F752CE6C2}
EndGlobalSection
EndGlobal