Visual Studio Solution cleanup:

moved minibatchsourcehelpers.h to Common/Include, as it is shared amongst readers (exception: the HTK reader, which has a different version);
fixed some incorrect #include of basetypes.h;
copying of DLLs now does not copy if the file already exists and is not out of date;
prebuild.bat now only updates builddate.h if it has changed, to avoid CNTK.cpp to be recompiled each time;
fixed a few build warnings in the CUDA kernels (uninitialized variables);
removed unused PTasks references in Linux makefiles
This commit is contained in:
Frank Seide 2015-05-18 20:32:39 -07:00
Родитель 08625d482d
Коммит 178555d0a4
31 изменённых файлов: 71 добавлений и 4650 удалений

Просмотреть файл

@ -7,7 +7,7 @@
//
#include "stdafx.h"
#include "basetypes.h"
#include "Basics.h"
#include "fileutil.h"
#include "FileTest.h"
#include "File.h"

Просмотреть файл

@ -1,117 +0,0 @@
//
// <copyright file="minibatchsourcehelpers.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// minibatchsourcehelpers.h -- helper classes for minibatch sources
//
#pragma once
#include "Basics.h"
#include <stdio.h>
#include <vector>
#include <algorithm>
namespace msra { namespace dbn {
// ---------------------------------------------------------------------------
// randomordering -- class to help manage randomization of input data
// ---------------------------------------------------------------------------
static inline size_t rand (const size_t begin, const size_t end)
{
const size_t randno = ::rand() * RAND_MAX + ::rand(); // BUGBUG: still only covers 32-bit range
return begin + randno % (end - begin);
}
class randomordering // note: NOT thread-safe at all
{
// constants for randomization
const static size_t randomizeDisable=0;
typedef unsigned int INDEXTYPE; // don't use size_t, as this saves HUGE amounts of RAM
std::vector<INDEXTYPE> map; // [t] -> t' indices in randomized order
size_t currentseed; // seed for current sequence
size_t randomizationrange; // t - randomizationrange/2 <= t' < t + randomizationrange/2 (we support this to enable swapping)
// special values (randomizeDisable)
void invalidate() { currentseed = (size_t) -1; }
public:
randomordering() { invalidate(); randomizationrange = randomizeDisable;}
void resize (size_t len, size_t p_randomizationrange) { randomizationrange = p_randomizationrange; if (len > 0) map.resize (len); invalidate(); }
// return the randomized feature bounds for a time range
std::pair<size_t,size_t> bounds (size_t ts, size_t te) const
{
size_t tbegin = max (ts, randomizationrange/2) - randomizationrange/2;
size_t tend = min (te + randomizationrange/2, map.size());
return std::make_pair<size_t,size_t> (move(tbegin), move(tend));
}
// this returns the map directly (read-only) and will lazily initialize it for a given seed
const std::vector<INDEXTYPE> & operator() (size_t seed) //throw()
{
// if wrong seed then lazily recache the sequence
if (seed != currentseed && randomizationrange != randomizeDisable)
{
// test for numeric overflow
if (map.size()-1 != (INDEXTYPE) (map.size()-1))
throw std::runtime_error ("randomordering: INDEXTYPE has too few bits for this corpus");
// 0, 1, 2...
foreach_index (t, map) map[t] = (INDEXTYPE) t;
if (map.size() > RAND_MAX * (size_t) RAND_MAX)
throw std::runtime_error ("randomordering: too large training set: need to change to different random generator!");
srand ((unsigned int) seed);
size_t retries = 0;
foreach_index (t, map)
{
for (int tries = 0; tries < 5; tries++)
{
// swap current pos with a random position
// Random positions are limited to t+randomizationrange.
// This ensures some locality suitable for paging with a sliding window.
const size_t tbegin = max ((size_t) t, randomizationrange/2) - randomizationrange/2; // range of window --TODO: use bounds() function above
const size_t tend = min (t + randomizationrange/2, map.size());
assert (tend >= tbegin); // (guard against potential numeric-wraparound bug)
const size_t trand = rand (tbegin, tend); // random number within windows
assert ((size_t) t <= trand + randomizationrange/2 && trand < (size_t) t + randomizationrange/2);
// if range condition is fulfilled then swap
if (trand <= map[t] + randomizationrange/2 && map[t] < trand + randomizationrange/2
&& (size_t) t <= map[trand] + randomizationrange/2 && map[trand] < (size_t) t + randomizationrange/2)
{
::swap (map[t], map[trand]);
break;
}
// but don't multi-swap stuff out of its range (for swapping positions that have been swapped before)
// instead, try again with a different random number
retries++;
}
}
fprintf (stderr, "randomordering: %d retries for %d elements (%.1f%%) to ensure window condition\n", retries, map.size(), 100.0 * retries / map.size());
// ensure the window condition
foreach_index (t, map) assert ((size_t) t <= map[t] + randomizationrange/2 && map[t] < (size_t) t + randomizationrange/2);
#if 0 // and a live check since I don't trust myself here yet
foreach_index (t, map) if (!((size_t) t <= map[t] + randomizationrange/2 && map[t] < (size_t) t + randomizationrange/2))
{
fprintf (stderr, "randomordering: windowing condition violated %d -> %d\n", t, map[t]);
throw std::logic_error ("randomordering: windowing condition violated");
}
#endif
#if 0 // test whether it is indeed a unique complete sequence
auto map2 = map;
::sort (map2.begin(), map2.end());
foreach_index (t, map2) assert (map2[t] == (size_t) t);
#endif
fprintf (stderr, "randomordering: recached sequence for seed %d: %d, %d, ...\n", (int) seed, (int) map[0], (int) map[1]);
currentseed = seed;
}
return map; // caller can now access it through operator[]
}
size_t CurrentSeed() {return currentseed;}
};
typedef unsigned short CLASSIDTYPE; // type to store state ids; don't use size_t --saves HUGE amounts of RAM
};};

Просмотреть файл

@ -1,118 +0,0 @@
//
// <copyright file="minibatchsourcehelpers.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// minibatchsourcehelpers.h -- helper classes for minibatch sources
//
#pragma once
#include "Basics.h"
#include <stdio.h>
#include <vector>
#include <algorithm>
namespace msra { namespace dbn {
// ---------------------------------------------------------------------------
// randomordering -- class to help manage randomization of input data
// ---------------------------------------------------------------------------
static inline size_t rand (const size_t begin, const size_t end)
{
const size_t randno = ::rand() * RAND_MAX + ::rand(); // BUGBUG: still only covers 32-bit range
return begin + randno % (end - begin);
}
class randomordering // note: NOT thread-safe at all
{
// constants for randomization
const static size_t randomizeDisable=0;
typedef unsigned int INDEXTYPE; // don't use size_t, as this saves HUGE amounts of RAM
std::vector<INDEXTYPE> map; // [t] -> t' indices in randomized order
size_t currentseed; // seed for current sequence
size_t randomizationrange; // t - randomizationrange/2 <= t' < t + randomizationrange/2 (we support this to enable swapping)
// special values (randomizeDisable)
void invalidate() { currentseed = (size_t) -1; }
public:
randomordering() { invalidate(); randomizationrange = randomizeDisable;}
void resize (size_t len, size_t p_randomizationrange) { randomizationrange = p_randomizationrange; if (len > 0) map.resize (len); invalidate(); }
// return the randomized feature bounds for a time range
std::pair<size_t,size_t> bounds (size_t ts, size_t te) const
{
size_t tbegin = max (ts, randomizationrange/2) - randomizationrange/2;
size_t tend = min (te + randomizationrange/2, map.size());
return std::make_pair<size_t,size_t> (move(tbegin), move(tend));
}
// this returns the map directly (read-only) and will lazily initialize it for a given seed
const std::vector<INDEXTYPE> & operator() (size_t seed) //throw()
{
// if wrong seed then lazily recache the sequence
if (seed != currentseed && randomizationrange != randomizeDisable)
{
// test for numeric overflow
if (map.size()-1 != (INDEXTYPE) (map.size()-1))
throw std::runtime_error ("randomordering: INDEXTYPE has too few bits for this corpus");
// 0, 1, 2...
foreach_index (t, map) map[t] = (INDEXTYPE) t;
if (map.size() > RAND_MAX * (size_t) RAND_MAX)
throw std::runtime_error ("randomordering: too large training set: need to change to different random generator!");
srand ((unsigned int) seed);
size_t retries = 0;
foreach_index (t, map)
{
for (int tries = 0; tries < 5; tries++)
{
// swap current pos with a random position
// Random positions are limited to t+randomizationrange.
// This ensures some locality suitable for paging with a sliding window.
const size_t tbegin = max ((size_t) t, randomizationrange/2) - randomizationrange/2; // range of window --TODO: use bounds() function above
const size_t tend = min (t + randomizationrange/2, map.size());
assert (tend >= tbegin); // (guard against potential numeric-wraparound bug)
const size_t trand = rand (tbegin, tend); // random number within windows
assert ((size_t) t <= trand + randomizationrange/2 && trand < (size_t) t + randomizationrange/2);
// if range condition is fulfilled then swap
if (trand <= map[t] + randomizationrange/2 && map[t] < trand + randomizationrange/2
&& (size_t) t <= map[trand] + randomizationrange/2 && map[trand] < (size_t) t + randomizationrange/2)
{
::swap (map[t], map[trand]);
break;
}
// but don't multi-swap stuff out of its range (for swapping positions that have been swapped before)
// instead, try again with a different random number
retries++;
}
}
fprintf (stderr, "randomordering: %d retries for %d elements (%.1f%%) to ensure window condition\n", retries, map.size(), 100.0 * retries / map.size());
// ensure the window condition
foreach_index (t, map) assert ((size_t) t <= map[t] + randomizationrange/2 && map[t] < (size_t) t + randomizationrange/2);
#if 0 // and a live check since I don't trust myself here yet
foreach_index (t, map) if (!((size_t) t <= map[t] + randomizationrange/2 && map[t] < (size_t) t + randomizationrange/2))
{
fprintf (stderr, "randomordering: windowing condition violated %d -> %d\n", t, map[t]);
throw std::logic_error ("randomordering: windowing condition violated");
}
#endif
#if 0 // test whether it is indeed a unique complete sequence
auto map2 = map;
::sort (map2.begin(), map2.end());
foreach_index (t, map2) assert (map2[t] == (size_t) t);
#endif
fprintf (stderr, "randomordering: recached sequence for seed %d: %d, %d, ...\n", (int) seed, (int) map[0], (int) map[1]);
currentseed = seed;
}
return map; // caller can now access it through operator[]
}
size_t CurrentSeed() {return currentseed;}
};
typedef unsigned short CLASSIDTYPE; // type to store state ids; don't use size_t --saves HUGE amounts of RAM
};};

Просмотреть файл

@ -1,117 +0,0 @@
//
// <copyright file="minibatchsourcehelpers.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// minibatchsourcehelpers.h -- helper classes for minibatch sources
//
#pragma once
#include "Basics.h"
#include <stdio.h>
#include <vector>
#include <algorithm>
namespace msra { namespace dbn {
// ---------------------------------------------------------------------------
// randomordering -- class to help manage randomization of input data
// ---------------------------------------------------------------------------
static inline size_t rand (const size_t begin, const size_t end)
{
const size_t randno = ::rand() * RAND_MAX + ::rand(); // BUGBUG: still only covers 32-bit range
return begin + randno % (end - begin);
}
class randomordering // note: NOT thread-safe at all
{
// constants for randomization
const static size_t randomizeDisable=0;
typedef unsigned int INDEXTYPE; // don't use size_t, as this saves HUGE amounts of RAM
std::vector<INDEXTYPE> map; // [t] -> t' indices in randomized order
size_t currentseed; // seed for current sequence
size_t randomizationrange; // t - randomizationrange/2 <= t' < t + randomizationrange/2 (we support this to enable swapping)
// special values (randomizeDisable)
void invalidate() { currentseed = (size_t) -1; }
public:
randomordering() { invalidate(); randomizationrange = randomizeDisable;}
void resize (size_t len, size_t p_randomizationrange) { randomizationrange = p_randomizationrange; if (len > 0) map.resize (len); invalidate(); }
// return the randomized feature bounds for a time range
std::pair<size_t,size_t> bounds (size_t ts, size_t te) const
{
size_t tbegin = max (ts, randomizationrange/2) - randomizationrange/2;
size_t tend = min (te + randomizationrange/2, map.size());
return std::make_pair<size_t,size_t> (move(tbegin), move(tend));
}
// this returns the map directly (read-only) and will lazily initialize it for a given seed
const std::vector<INDEXTYPE> & operator() (size_t seed) //throw()
{
// if wrong seed then lazily recache the sequence
if (seed != currentseed && randomizationrange != randomizeDisable)
{
// test for numeric overflow
if (map.size()-1 != (INDEXTYPE) (map.size()-1))
throw std::runtime_error ("randomordering: INDEXTYPE has too few bits for this corpus");
// 0, 1, 2...
foreach_index (t, map) map[t] = (INDEXTYPE) t;
if (map.size() > RAND_MAX * (size_t) RAND_MAX)
throw std::runtime_error ("randomordering: too large training set: need to change to different random generator!");
srand ((unsigned int) seed);
size_t retries = 0;
foreach_index (t, map)
{
for (int tries = 0; tries < 5; tries++)
{
// swap current pos with a random position
// Random positions are limited to t+randomizationrange.
// This ensures some locality suitable for paging with a sliding window.
const size_t tbegin = max ((size_t) t, randomizationrange/2) - randomizationrange/2; // range of window --TODO: use bounds() function above
const size_t tend = min (t + randomizationrange/2, map.size());
assert (tend >= tbegin); // (guard against potential numeric-wraparound bug)
const size_t trand = rand (tbegin, tend); // random number within windows
assert ((size_t) t <= trand + randomizationrange/2 && trand < (size_t) t + randomizationrange/2);
// if range condition is fulfilled then swap
if (trand <= map[t] + randomizationrange/2 && map[t] < trand + randomizationrange/2
&& (size_t) t <= map[trand] + randomizationrange/2 && map[trand] < (size_t) t + randomizationrange/2)
{
::swap (map[t], map[trand]);
break;
}
// but don't multi-swap stuff out of its range (for swapping positions that have been swapped before)
// instead, try again with a different random number
retries++;
}
}
fprintf (stderr, "randomordering: %d retries for %d elements (%.1f%%) to ensure window condition\n", retries, map.size(), 100.0 * retries / map.size());
// ensure the window condition
foreach_index (t, map) assert ((size_t) t <= map[t] + randomizationrange/2 && map[t] < (size_t) t + randomizationrange/2);
#if 0 // and a live check since I don't trust myself here yet
foreach_index (t, map) if (!((size_t) t <= map[t] + randomizationrange/2 && map[t] < (size_t) t + randomizationrange/2))
{
fprintf (stderr, "randomordering: windowing condition violated %d -> %d\n", t, map[t]);
throw std::logic_error ("randomordering: windowing condition violated");
}
#endif
#if 0 // test whether it is indeed a unique complete sequence
auto map2 = map;
::sort (map2.begin(), map2.end());
foreach_index (t, map2) assert (map2[t] == (size_t) t);
#endif
fprintf (stderr, "randomordering: recached sequence for seed %d: %d, %d, ...\n", (int) seed, (int) map[0], (int) map[1]);
currentseed = seed;
}
return map; // caller can now access it through operator[]
}
size_t CurrentSeed() {return currentseed;}
};
typedef unsigned short CLASSIDTYPE; // type to store state ids; don't use size_t --saves HUGE amounts of RAM
};};

Просмотреть файл

@ -1,117 +0,0 @@
//
// <copyright file="minibatchsourcehelpers.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// minibatchsourcehelpers.h -- helper classes for minibatch sources
//
#pragma once
#include "Basics.h"
#include <stdio.h>
#include <vector>
#include <algorithm>
namespace msra { namespace dbn {
// ---------------------------------------------------------------------------
// randomordering -- class to help manage randomization of input data
// ---------------------------------------------------------------------------
static inline size_t rand (const size_t begin, const size_t end)
{
const size_t randno = ::rand() * RAND_MAX + ::rand(); // BUGBUG: still only covers 32-bit range
return begin + randno % (end - begin);
}
class randomordering // note: NOT thread-safe at all
{
// constants for randomization
const static size_t randomizeDisable=0;
typedef unsigned int INDEXTYPE; // don't use size_t, as this saves HUGE amounts of RAM
std::vector<INDEXTYPE> map; // [t] -> t' indices in randomized order
size_t currentseed; // seed for current sequence
size_t randomizationrange; // t - randomizationrange/2 <= t' < t + randomizationrange/2 (we support this to enable swapping)
// special values (randomizeDisable)
void invalidate() { currentseed = (size_t) -1; }
public:
randomordering() { invalidate(); randomizationrange = randomizeDisable;}
void resize (size_t len, size_t p_randomizationrange) { randomizationrange = p_randomizationrange; if (len > 0) map.resize (len); invalidate(); }
// return the randomized feature bounds for a time range
std::pair<size_t,size_t> bounds (size_t ts, size_t te) const
{
size_t tbegin = max (ts, randomizationrange/2) - randomizationrange/2;
size_t tend = min (te + randomizationrange/2, map.size());
return std::make_pair<size_t,size_t> (move(tbegin), move(tend));
}
// this returns the map directly (read-only) and will lazily initialize it for a given seed
const std::vector<INDEXTYPE> & operator() (size_t seed) //throw()
{
// if wrong seed then lazily recache the sequence
if (seed != currentseed && randomizationrange != randomizeDisable)
{
// test for numeric overflow
if (map.size()-1 != (INDEXTYPE) (map.size()-1))
throw std::runtime_error ("randomordering: INDEXTYPE has too few bits for this corpus");
// 0, 1, 2...
foreach_index (t, map) map[t] = (INDEXTYPE) t;
if (map.size() > RAND_MAX * (size_t) RAND_MAX)
throw std::runtime_error ("randomordering: too large training set: need to change to different random generator!");
srand ((unsigned int) seed);
size_t retries = 0;
foreach_index (t, map)
{
for (int tries = 0; tries < 5; tries++)
{
// swap current pos with a random position
// Random positions are limited to t+randomizationrange.
// This ensures some locality suitable for paging with a sliding window.
const size_t tbegin = max ((size_t) t, randomizationrange/2) - randomizationrange/2; // range of window --TODO: use bounds() function above
const size_t tend = min (t + randomizationrange/2, map.size());
assert (tend >= tbegin); // (guard against potential numeric-wraparound bug)
const size_t trand = rand (tbegin, tend); // random number within windows
assert ((size_t) t <= trand + randomizationrange/2 && trand < (size_t) t + randomizationrange/2);
// if range condition is fulfilled then swap
if (trand <= map[t] + randomizationrange/2 && map[t] < trand + randomizationrange/2
&& (size_t) t <= map[trand] + randomizationrange/2 && map[trand] < (size_t) t + randomizationrange/2)
{
::swap (map[t], map[trand]);
break;
}
// but don't multi-swap stuff out of its range (for swapping positions that have been swapped before)
// instead, try again with a different random number
retries++;
}
}
fprintf (stderr, "randomordering: %d retries for %d elements (%.1f%%) to ensure window condition\n", retries, map.size(), 100.0 * retries / map.size());
// ensure the window condition
foreach_index (t, map) assert ((size_t) t <= map[t] + randomizationrange/2 && map[t] < (size_t) t + randomizationrange/2);
#if 0 // and a live check since I don't trust myself here yet
foreach_index (t, map) if (!((size_t) t <= map[t] + randomizationrange/2 && map[t] < (size_t) t + randomizationrange/2))
{
fprintf (stderr, "randomordering: windowing condition violated %d -> %d\n", t, map[t]);
throw std::logic_error ("randomordering: windowing condition violated");
}
#endif
#if 0 // test whether it is indeed a unique complete sequence
auto map2 = map;
::sort (map2.begin(), map2.end());
foreach_index (t, map2) assert (map2[t] == (size_t) t);
#endif
fprintf (stderr, "randomordering: recached sequence for seed %d: %d, %d, ...\n", (int) seed, (int) map[0], (int) map[1]);
currentseed = seed;
}
return map; // caller can now access it through operator[]
}
size_t CurrentSeed() {return currentseed;}
};
typedef unsigned short CLASSIDTYPE; // type to store state ids; don't use size_t --saves HUGE amounts of RAM
};};

Просмотреть файл

@ -1,130 +0,0 @@
//
// <copyright file="minibatchsourcehelpers.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
// minibatchsourcehelpers.h -- helper classes for minibatch sources
//
// F. Seide, Oct 2012
//
// $Log: /Speech_To_Speech_Translation/dbn/dbn/minibatchsourcehelpers.h $
//
// 3 10/09/12 7:23p Fseide
// moved class minibatchiterator to minibatchiterator.h, and dealt with
// the fallout
//
// 2 10/09/12 7:12p Fseide
// moved all minibatch sources to respective new source files
//
// 1 10/09/12 6:45p Fseide
// began to move the minibatch sources to separate source files
#pragma once
#include "basetypes.h"
#include <stdio.h>
#include <vector>
#include <algorithm>
namespace msra { namespace dbn {
// ---------------------------------------------------------------------------
// randomordering -- class to help manage randomization of input data
// ---------------------------------------------------------------------------
static inline size_t rand (const size_t begin, const size_t end)
{
const size_t randno = ::rand() * RAND_MAX + ::rand(); // BUGBUG: still only covers 32-bit range
return begin + randno % (end - begin);
}
class randomordering // note: NOT thread-safe at all
{
// constants for randomization
const static size_t randomizeDisable=0;
typedef unsigned int INDEXTYPE; // don't use size_t, as this saves HUGE amounts of RAM
std::vector<INDEXTYPE> map; // [t] -> t' indices in randomized order
size_t currentseed; // seed for current sequence
size_t randomizationrange; // t - randomizationrange/2 <= t' < t + randomizationrange/2 (we support this to enable swapping)
// special values (randomizeDisable)
void invalidate() { currentseed = (size_t) -1; }
public:
randomordering() { invalidate(); randomizationrange = randomizeDisable;}
void resize (size_t len, size_t p_randomizationrange) { randomizationrange = p_randomizationrange; if (len > 0) map.resize (len); invalidate(); }
// return the randomized feature bounds for a time range
std::pair<size_t,size_t> bounds (size_t ts, size_t te) const
{
size_t tbegin = max (ts, randomizationrange/2) - randomizationrange/2;
size_t tend = min (te + randomizationrange/2, map.size());
return std::make_pair<size_t,size_t> (move(tbegin), move(tend));
}
// this returns the map directly (read-only) and will lazily initialize it for a given seed
const std::vector<INDEXTYPE> & operator() (size_t seed) //throw()
{
// if wrong seed then lazily recache the sequence
if (seed != currentseed && randomizationrange != randomizeDisable)
{
// test for numeric overflow
if (map.size()-1 != (INDEXTYPE) (map.size()-1))
throw std::runtime_error ("randomordering: INDEXTYPE has too few bits for this corpus");
// 0, 1, 2...
foreach_index (t, map) map[t] = (INDEXTYPE) t;
if (map.size() > RAND_MAX * (size_t) RAND_MAX)
throw std::runtime_error ("randomordering: too large training set: need to change to different random generator!");
srand ((unsigned int) seed);
size_t retries = 0;
foreach_index (t, map)
{
for (int tries = 0; tries < 5; tries++)
{
// swap current pos with a random position
// Random positions are limited to t+randomizationrange.
// This ensures some locality suitable for paging with a sliding window.
const size_t tbegin = max ((size_t) t, randomizationrange/2) - randomizationrange/2; // range of window --TODO: use bounds() function above
const size_t tend = min (t + randomizationrange/2, map.size());
assert (tend >= tbegin); // (guard against potential numeric-wraparound bug)
const size_t trand = rand (tbegin, tend); // random number within windows
assert ((size_t) t <= trand + randomizationrange/2 && trand < (size_t) t + randomizationrange/2);
// if range condition is fulfilled then swap
if (trand <= map[t] + randomizationrange/2 && map[t] < trand + randomizationrange/2
&& (size_t) t <= map[trand] + randomizationrange/2 && map[trand] < (size_t) t + randomizationrange/2)
{
::swap (map[t], map[trand]);
break;
}
// but don't multi-swap stuff out of its range (for swapping positions that have been swapped before)
// instead, try again with a different random number
retries++;
}
}
fprintf (stderr, "randomordering: %d retries for %d elements (%.1f%%) to ensure window condition\n", retries, map.size(), 100.0 * retries / map.size());
// ensure the window condition
foreach_index (t, map) assert ((size_t) t <= map[t] + randomizationrange/2 && map[t] < (size_t) t + randomizationrange/2);
#if 0 // and a live check since I don't trust myself here yet
foreach_index (t, map) if (!((size_t) t <= map[t] + randomizationrange/2 && map[t] < (size_t) t + randomizationrange/2))
{
fprintf (stderr, "randomordering: windowing condition violated %d -> %d\n", t, map[t]);
throw std::logic_error ("randomordering: windowing condition violated");
}
#endif
#if 0 // test whether it is indeed a unique complete sequence
auto map2 = map;
::sort (map2.begin(), map2.end());
foreach_index (t, map2) assert (map2[t] == (size_t) t);
#endif
fprintf (stderr, "randomordering: recached sequence for seed %d: %d, %d, ...\n", (int) seed, (int) map[0], (int) map[1]);
currentseed = seed;
}
return map; // caller can now access it through operator[]
}
size_t CurrentSeed() {return currentseed;}
};
typedef unsigned short CLASSIDTYPE; // type to store state ids; don't use size_t --saves HUGE amounts of RAM
};};

Просмотреть файл

@ -19,7 +19,7 @@
#include "ModelEditLanguage.h"
#include "SGD.h"
#include <string>
#include <basetypes.h>
#include "Basics.h"
#include "commandArgUtil.h"
#include "SimpleEvaluator.h"
#include "SimpleOutputWriter.h"

Просмотреть файл

@ -83,7 +83,7 @@
<DelayLoadDLLs>CNTKMath.dll; nvml.dll; cudart64_70.dll</DelayLoadDLLs>
</Link>
<PostBuildEvent>
<Command>XCOPY /I /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" $(TargetDir)</Command>
<Command>xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" $(TargetDir)</Command>
<Message>Copying NVidia GDK extension DLL to target folder</Message>
</PostBuildEvent>
<CustomBuildStep>
@ -125,7 +125,7 @@
<AdditionalLibraryDirectories>"c:\Program Files\NVIDIA Corporation\GDK\gdk_win7_amd64_release\nvml\lib"</AdditionalLibraryDirectories>
</Link>
<PostBuildEvent>
<Command>XCOPY /I /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" $(TargetDir)</Command>
<Command>xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" $(TargetDir)</Command>
<Message>Copying NVidia GDK extension DLL to target folder</Message>
</PostBuildEvent>
<CustomBuildStep>
@ -151,7 +151,6 @@
<Text Include="modelEditorFromScratch.txt" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\Common\hostname.h" />
<ClInclude Include="..\..\Common\Include\basetypes.h" />
<ClInclude Include="..\..\Common\Include\Basics.h" />
<ClInclude Include="..\..\Common\Include\BestGpu.h" />
@ -161,6 +160,7 @@
<ClInclude Include="..\..\Common\Include\File.h" />
<ClInclude Include="..\..\Common\Include\fileutil.h" />
<ClInclude Include="..\..\Common\Include\hostname.h" />
<ClInclude Include="..\..\Common\Include\minibatchsourcehelpers.h" />
<ClInclude Include="..\..\Common\Include\nvml.h" />
<ClInclude Include="..\..\Common\Include\TimerUtility.h" />
<ClInclude Include="CompositeComputationNode.h" />
@ -203,6 +203,9 @@
<ClCompile Include="stdafx.cpp" />
<ClCompile Include="tests.cpp" />
</ItemGroup>
<ItemGroup>
<None Include="prebuild.bat" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>

Просмотреть файл

@ -1,9 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="..\..\Common\BestGpu.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\ConfigFile.cpp">
<Filter>Common</Filter>
</ClCompile>
@ -41,17 +38,14 @@
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="CNTK.cpp" />
<ClCompile Include="..\..\Common\BestGpu.cpp">
<Filter>GPU Interfacing</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\Common\Include\basetypes.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\BestGpu.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\hostname.h">
<Filter>Common</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\commandArgUtil.h">
<Filter>Common\Include</Filter>
</ClInclude>
@ -61,9 +55,6 @@
<ClInclude Include="..\..\Common\Include\File.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\nvml.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\DataReader.h">
<Filter>Common\Include</Filter>
</ClInclude>
@ -136,6 +127,15 @@
<ClInclude Include="..\..\Common\Include\Basics.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\nvml.h">
<Filter>GPU Interfacing</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\minibatchsourcehelpers.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\BestGpu.h">
<Filter>Common\Include</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Text Include="modelEditor.txt">
@ -170,5 +170,13 @@
<Filter Include="Misc">
<UniqueIdentifier>{3c119a92-ffb2-4850-adae-01778324974d}</UniqueIdentifier>
</Filter>
<Filter Include="GPU Interfacing">
<UniqueIdentifier>{8d99b2cc-5209-40e4-8b4b-a7616973ae3b}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<None Include="prebuild.bat">
<Filter>Misc</Filter>
</None>
</ItemGroup>
</Project>

Просмотреть файл

@ -1,7 +0,0 @@
#include "stdafx.h"
#include "ComputationNetwork.h"
#include "PTaskGraphBuilder.h"
namespace Microsoft { namespace MSR { namespace CNTK {
}}}

Просмотреть файл

@ -1,17 +0,0 @@
//
// <copyright file="PTask.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
// A single include file to collect together all includes for PTask.
#define CUDA_SUPPORT
#include "PTaskAPI.h"
#include "primitive_types.h"
#include "HostTask.h"
using namespace PTask;
// TODO: Base the path on the properties specified in config.txt.
#define PTASK_GRAPH_VIZ_FILE "C:\\temp\\PTaskGraph.dot"

Просмотреть файл

@ -1,58 +0,0 @@
//
// <copyright file="PTaskComputationNetwork.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#include <string>
#include "ComputationNetwork.h"
#include "PTask.h"
#include "PTaskGraphBuilder.h"
namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
class PTaskComputationNetwork : public ComputationNetwork<ElemType>
{
public:
PTaskComputationNetwork(short deviceId=AUTOPLACEMATRIX)
: ComputationNetwork<ElemType>(deviceId)
{
m_PTaskGraphBuilder = new PTaskGraphBuilder<ElemType>();
}
virtual ~PTaskComputationNetwork() { }
virtual void LoadFromFile(const std::wstring& fileName, FileOptions fileFormat = FileOptions::fileOptionsBinary) override
{
// Let the base class implementation deserialize all the state
// and construct its regular CN ...
this->ComputationNetwork<ElemType>::LoadFromFile(fileName, fileFormat);
// ... then use that state to create the corresponding PTask graph.
m_PTaskGraphBuilder->BuildFromComputationNetwork(this);
}
virtual void ComputeGradient(ComputationNodePtr rootNode) override
{
//printf("PTaskComputationNetwork::ComputeGradient called.\n");
this->ComputationNetwork<ElemType>::ComputeGradient(rootNode);
}
private:
// Copy constructor, should never be called.
PTaskComputationNetwork(const PTaskComputationNetwork<ElemType>& deepCopyFrom) {};
// Assignment operator, should never be called.
PTaskComputationNetwork<ElemType>& operator=(const PTaskComputationNetwork<ElemType>& deepCopyFrom)
{
assert(false);
return const_cast<PTaskComputationNetwork<ElemType>&>(deepCopyFrom); // return a value to avoid compile errors
};
PTaskGraphBuilder<ElemType>* m_PTaskGraphBuilder;
};
}}}

Просмотреть файл

@ -1,209 +0,0 @@
//
// <copyright file="PTaskExecutionEngine.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#include "PTaskComputationNetwork.h"
namespace Microsoft { namespace MSR { namespace CNTK {
// PTaskNodeEvaluator
// Process the Network Description Language into a Computation Network useable
// by PTaskExecutionEngine.
template <typename ElemType>
class PTaskNodeEvaluator : public NDLNodeEvaluator<ElemType>
{
public:
// Constructor - create evaluator
PTaskNodeEvaluator(PTaskComputationNetwork<ElemType>& cn)
: m_net(cn)
{ }
// Evaluate - evaluate a node and translate into underlying
// node - node we are evaluating
// baseName - base name for all symbols at this level
virtual void Evaluate(NDLNode<ElemType>* node, const wstring& baseName, const int pass)
{
// constants don't need to be evaluated, they just translate into numbers...
if (node->GetType() == ndlTypeConstant
|| node->GetType() == ndlTypeArray) // currently arrays only used for node lists, in the future may be used for other things
return;
// get the parameters
std::vector<NDLNode<ElemType>*> parameter = node->GetParameters();
if (parameter.size() < 1)
{
Error("Node with no parameters, %s\n", node->GetName().c_str());
}
// get the name for the symbol to be used by CN nodes
std::wstring name = msra::strfun::utf16(node->GetName());
if (!baseName.empty())
{
name = baseName + L"." + name;
}
if (node->GetValue() == "InputValue")
{
if (pass > 0)
return;
// get dimensions of input
size_t rows = parameter[0]->GetScalar();
// check for second dimension, otherwise default to 1
size_t cols = 1;
if (parameter.size() > 1)
{
cols = parameter[1]->GetScalar();
}
ComputationNodePtr input = m_net.CreateInputNode(name, rows, cols);
node->SetEvalValue(input);
}
else if (node->GetValue() == "LearnableParameter")
{
// get dimensions of input
size_t rows = parameter[0]->GetScalar();
// check for second dimension, otherwise default to 1
size_t cols = 1;
if (parameter.size() > 1)
{
cols = parameter[1]->GetScalar();
}
if (pass == 0)
{
bool needGradient = true;
ComputationNodePtr nodePtr = m_net.CreateLearnableParameter(name, rows, cols);
node->SetEvalValue(nodePtr);
nodePtr->NeedGradient() = needGradient;
}
else
{
static int randomSeed = 1;
ComputationNodePtr nodePtr = (ComputationNodePtr)m_net.GetNodeFromName(name);
bool init = true;
bool uniformInit=true;
if (init)
{
InitLearnableParameters(nodePtr, cols, randomSeed++, uniformInit);
}
}
}
else if (node->GetValue() == "ConstantScalarParameter")
{
if (pass > 0)
return;
size_t rows = 1;
size_t cols = 1;
bool needGradient = false;
bool init = false;
ElemType val = parameter[0]->GetScalar();
ComputationNodePtr nodePtr = m_net.CreateLearnableParameter(name, rows, cols);
node->SetEvalValue(nodePtr);
nodePtr->NeedGradient() = needGradient;
nodePtr->FunctionValues().SetValue(val);
}
else
{
ComputationNodePtr nodePtr = NULL;
if (pass == 0)
{
nodePtr = m_net.CreateComputationNode(node->GetValue(), name);
node->SetEvalValue(nodePtr);
}
std::vector<void*> inputs = EvaluateParameters(node, baseName, pass);
if (pass == 0)
{
switch (inputs.size())
{
case 1:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]));
break;
case 2:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]));
break;
case 3:
nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]));
break;
default:
Error("Invalid number of parameters name = '%s' call = '%s'\n", node->GetName().c_str(), node->GetValue().c_str());
}
}
}
}
virtual ~PTaskNodeEvaluator()
{
}
private:
PTaskComputationNetwork<ElemType>& m_net;
typedef ComputationNode<ElemType>* ComputationNodePtr;
void InitLearnableParameters(ComputationNodePtr node, const size_t inputSize, ULONG randomSeed, bool uniformInit)
{
ElemType initValueScale = (ElemType)1.0;
if (uniformInit)
{
ElemType randRange = (ElemType)0.05; //initValueScale/sqrt(inputSize);
node->FunctionValues().SetUniformRandomValue(-randRange, randRange, randomSeed);
}
else
{
ElemType randInitstd = (ElemType)0.2 * initValueScale/sqrt((ElemType)inputSize);
node->FunctionValues().SetGaussianRandomValue(0,randInitstd,randomSeed);
}
}
};
template class PTaskComputationNetwork<float>;
template class PTaskComputationNetwork<double>;
// PTaskExecutionEngine
template <typename ElemType>
class PTaskExecutionEngine : public IExecutionEngine<ElemType>
{
public:
PTaskExecutionEngine()
{
m_nodeEvaluator = new PTaskNodeEvaluator<ElemType>(m_computationNetwork);
}
virtual ~PTaskExecutionEngine()
{
}
ComputationNetwork<ElemType>& GetComputationNetwork()
{
return m_computationNetwork;
}
NDLNodeEvaluator<ElemType>& GetNodeEvaluator()
{
return *m_nodeEvaluator;
}
private:
PTaskComputationNetwork<ElemType> m_computationNetwork;
PTaskNodeEvaluator<ElemType>* m_nodeEvaluator;
};
template class PTaskExecutionEngine<float>;
template class PTaskExecutionEngine<double>;
}}}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,405 +0,0 @@
//
// <copyright file="PTaskGraphBuilder.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once
#ifdef USE_PTASK
#include "PTask.h"
#else
typedef void Port;
typedef void Graph;
typedef void Task;
typedef void Channel;
typedef int CONTROLSIGNAL;
#define DBCTLC_BOF 1
#define DBCTLC_EOF 2
#define DBCTLC_NONE 3
#endif
#ifndef _WIN32 // BUGBUG: fix this once we need it
typedef unsigned int UINT;
typedef long long (*FARPROC)();
#endif
#include <string>
//#include <cuda_runtime.h>
namespace Microsoft { namespace MSR { namespace CNTK {
#if 0 // TODO: where is this used? It creates the dependency on cuda_runtime.h, which we prefer to not have
extern __declspec(thread) cudaStream_t t_stream;
// class for stream overrides for PTask
// auto-class to set stream override inside a function
// usage at each function that calls CUDA:
// onstream override(stream);
class onstream
{
cudaStream_t prevStream;
public:
onstream (cudaStream_t stream) { prevStream = GetStream(); SetStream(stream);}
~onstream() { SetStream(prevStream); }
};
#endif
// any pointer/reference and all scalar types that fit into 8 bytes can be parameters
enum ParamType
{
paramTypeNone,
paramTypeMatrix,
paramTypePointer,
paramTypeReference = paramTypePointer, // both the same in memory
paramTypeShort,
paramTypeInteger,
paramTypeLong = paramTypeInteger,
paramTypeSizet,
paramTypeLongLong = paramTypeSizet,
paramTypeSingle,
paramTypeDouble,
paramTypeChar,
paramTypeBool,
paramTypeNode, // pass the node as first parameter, to be used ONLY for CONSTANT values
};
enum ParamOptions
{
paramOptionsNull = 0, // invalid value
paramOptionsInput = 1, // and input value for a task
paramOptionsOutput = 2, // an output value from a task
paramOptionsTemporary = 4, // for variables only used within one task (and then thrown away)
paramOptionsInitialize = 8, // initialize buffer with value
paramOptionsRecurrantIterator = 16, // iterator for recurrancy
paramOptionsConstant = 32, // constant value
paramOptionsMaintainValue = 64, // maintain value for in/out parameters, if not specified in/out means pass-through variable
paramOptionsNoPush = 128, // flag for parameter routines to not push the values
paramOptionsInitOnBOF = 256, // Initialize the buffer on BOF signal
paramOptionsSaveOnEOF = 512, // Save on EOF
paramOptionsInitalValuesOnDestinations = 1024, // Initial values need to be set on all destination ports, needed for Update
};
enum TaskType
{
taskNull, // invalid value
taskEvaluate, // EvaluateThisNode() task
taskComputeInputPartial, // ComputeInputPartial() task
taskEvaluateRecurrent, // EvaluateThisNode() Recurrent task
taskComputeInputPartialRecurrent, // ComputeInputPartial() Recurrent task
taskUpdate, // update weight matricies
taskOutput, // output node, copy back to ComputationNode structure
};
// ParamData - parameter data, explains the parameter type
template<class ElemType>
class ParamData
{
public:
ParamType type; // data type of the parameter
void *assocData; // associated data (i.e. for Matrix, the matrix is the node this corresponds to)
Port* port; // PTask port that is created for this parameter
Port* portOut; // output port for in/out parameters
UINT options; // parameter options (see ParamOptions above)
ElemType initValue; // initialization data for "paramOptionsInitialize"
std::string name; // name of the parameter
// constructors
ParamData(ParamType type, const std::string& name, UINT options) : type(type), name(name), assocData(NULL), port(NULL), portOut(NULL), options(options), initValue(0)
{}
ParamData(ParamType type, const std::string& name, void* data, UINT options) : type(type), name(name), assocData(data), port(NULL), portOut(NULL), options(options), initValue(0)
{}
// Initialize a parameter with an inital value
// initValue - value to initialize the port with
void SetInitialize(ElemType initVal)
{
initValue = initVal;
options |= paramOptionsInitialize;
}
// SetConstant - Set a constant value to a port
// data - pointer to the data
// sizeData - size in bytes of the data
void SetConstant(void* data, int sizeData)
{
#ifdef USE_PTASK
GraphInputChannel* channel = (GraphInputChannel*)port->GetChannel(0);
Datablock* pblock = PTask::Runtime::AllocateDatablock(port->GetTemplate(), (void *)data, sizeData, NULL);
channel->Push(pblock);
pblock->Release();
#endif
}
};
// predeclaration
template<class ElemType>
class ComputationNetwork;
template<class ElemType>
class ComputationNode;
// Describes a PTask task.
// One instance is created for each actual task that will be added to the PTask graph.
// The descriptors are created first, to support phased assembly of the information
// about the tasks and their relationships.
template<class ElemType>
class TaskDescriptor
{
protected:
typedef ComputationNode<ElemType>* ComputationNodePtr;
public:
TaskDescriptor(
const ComputationNode<ElemType>* node,
TaskType taskType,
size_t input=0
);
virtual ~TaskDescriptor();
const std::string& TaskName() const { return m_taskName; }
std::string& TaskName() { return m_taskName; }
bool IsForwardTask() const { return m_taskType == taskEvaluate || m_taskType == taskEvaluateRecurrent; }
TaskType GetTaskType() const { return m_taskType;}
const ComputationNodePtr GetNode() const {return m_node;}
const Task* GetTask() const {return m_task;}
ParamData<ElemType>* GradientParam(int index=-1, UINT options=paramOptionsInput, ElemType initValue=ElemType(0.0));
ParamData<ElemType>* FunctionParam(int index=-1, UINT options=paramOptionsOutput);
ParamData<ElemType>* MatrixParam(const Matrix<ElemType>& matrix, const std::string& name, UINT options=paramOptionsInput);
ParamData<ElemType>* Param(ParamType paramType, const std::string& name, UINT options=paramOptionsInput, void* data=nullptr);
void SetFunction(FARPROC function) {m_function = function;}
#ifdef USE_PTASK
FARPROC GetFunction() {return m_function;}
void ConfigureInputsAndOutputs(UINT& uidCounter, std::map<const std::string, Port*>& valueNameToProducerPortMap);
void CreateTask(Graph* graph);
void CreateChannelsForInputs(
Graph* graph,
std::map<const std::string, Port*>& valueNameToProducerPortMap,
std::map<const std::string, std::vector<PTask::GraphInputChannel*>*>& inputNameToChannelsMap,
int verbosity);
void CreateInitializerChannel(
Graph* graph,
Port* port,
Matrix<ElemType>& matrix,
const std::string& name
);
void CreateBackAndInitChannel(Graph* graph, std::map<const std::string, PTask::GraphOutputChannel*>& outputNameToChannelsMap);
void FindEmptyOutPorts(Graph* graph);
// GetParamData - return the parameter data in parameter order
const std::vector<ParamData<ElemType>*>& GetParameters() const {return m_paramData;}
private:
Port* TaskDescriptor<ElemType>::CreatePortForTemplate(DatablockTemplate* dt,
UINT portType,
std::string& valueName,
UINT portIndex, UINT inoutPort, bool gpuBuffer,
UINT& uidCounter,
std::map<const std::string, Port*>& valueNameToProducerPortMap
);
std::vector<const std::string> m_inputNames;
std::vector<const std::string> m_outputNames;
UINT m_numInputPorts;
Port** m_inputPorts;
UINT m_numOutputPorts;
Port** m_outputPorts;
static DatablockTemplate* s_descriptorTemplate;
#endif
private:
std::vector<ParamData<ElemType>*> m_paramData; // parameter data for CNTK task
ComputationNodePtr m_node;
TaskType m_taskType;
std::string m_taskName;
Task* m_task;
FARPROC m_function;
};
template<class ElemType>
class PTaskGraphBuilder
{
private:
typedef ComputationNode<ElemType>* ComputationNodePtr;
typedef TaskDescriptor<ElemType>* TaskDescriptorPtr;
public:
PTaskGraphBuilder();
virtual ~PTaskGraphBuilder();
virtual void BuildFromComputationNetwork(ComputationNetwork<ElemType>* cn);
void StartPTaskGraph();
void UpdateParameters(void* sgd, const ElemType learnRatePerSample, const size_t expectedMBSize);
void PushActualMBSize(const std::list<ComputationNodePtr>& learnableNodes, size_t actualMBSize, CONTROLSIGNAL signal=DBCTLC_NONE);
void PushData(std::map<std::wstring, Matrix<ElemType>*>& data, CONTROLSIGNAL signal=DBCTLC_NONE);
void PushMatrix(const Matrix<ElemType>& matrix, Channel* channel, CONTROLSIGNAL signal=DBCTLC_NONE);
ElemType GetValue(ComputationNodePtr node);
void GetValue(ComputationNode<ElemType>* node, Matrix<ElemType>& matTo);
#ifdef USE_PTASK
//static void WINAPI OutputParameter(const ComputationalNode<ElemType>* node, Matrix<ElemType> &functionValues);
static void __stdcall ApplicationContextCallback(
APPLICATIONCONTEXTCALLBACKPOINT eCallbackPoint,
const Datablock * pDatablock,
void ** ppApplicationContext
);
private:
// Copy constructor, should never be called.
PTaskGraphBuilder(const PTaskGraphBuilder<ElemType>& deepCopyFrom) {};
// Assignment operator, should never be called.
PTaskGraphBuilder<ElemType>& operator=(const PTaskGraphBuilder<ElemType>& deepCopyFrom) {assert(false); return *this; /* NOTE: just doing this to appease the compiler*/};
// Create descriptors to model PTask tasks that ComputationNodes will be mapped to.
void CreateTaskDescriptorsForComputationNodes();
void CreateTaskDescriptorForNode(ComputationNodePtr node, TaskType taskFlag);
void CreateInputName(std::string inputName);
// Configure task inputs and outputs.
void ConfigureTaskInputsAndOutputs()
{
if (m_verbosity >= 1) fprintf(stderr, "\nConfiguring task inputs and outputs ...\n");
for (auto taskIter=m_taskNameToTaskDescriptorMap.begin(); taskIter != m_taskNameToTaskDescriptorMap.end(); taskIter++)
{
if (m_verbosity >= 2) fprintf(stderr, " Task %s\n", taskIter->first.c_str());
TaskDescriptorPtr taskDescriptor = taskIter->second;
taskDescriptor->ConfigureInputsAndOutputs(m_portUIDCounter, m_valueNameToProducerPortMap);
}
}
// Create actual PTask tasks from task descriptors.
void CreateTasksFromDescriptors()
{
if (m_verbosity >= 1) fprintf(stderr, "\nCreating tasks from descriptors ...\n");
for (auto taskIter=m_taskNameToTaskDescriptorMap.begin(); taskIter != m_taskNameToTaskDescriptorMap.end(); taskIter++)
{
if (m_verbosity >= 2) fprintf(stderr, " Task %s\n", taskIter->first.c_str());
TaskDescriptorPtr taskDescriptor = taskIter->second;
taskDescriptor->CreateTask(m_PTaskGraph);
}
}
// Create PTask channels.
void CreateChannels()
{
if (m_verbosity >= 1) fprintf(stderr, "\nCreating PTask channels ...\n");
for (auto taskIter=m_taskNameToTaskDescriptorMap.begin(); taskIter != m_taskNameToTaskDescriptorMap.end(); taskIter++)
{
if (m_verbosity >= 2) fprintf(stderr, " Task %s\n", taskIter->first.c_str());
TaskDescriptorPtr taskDescriptor = taskIter->second;
taskDescriptor->CreateChannelsForInputs(m_PTaskGraph, m_valueNameToProducerPortMap, m_inputNameToChannelsMap, m_verbosity);
}
}
void CreateBackAndInitChannels()
{
if (m_verbosity >= 1) fprintf(stderr, "\nCreating LearnableParameter extra channels...\n");
for (auto taskIter=m_taskNameToTaskDescriptorMap.begin(); taskIter != m_taskNameToTaskDescriptorMap.end(); taskIter++)
{
if (m_verbosity >= 2) fprintf(stderr, " Task %s\n", taskIter->first.c_str());
TaskDescriptorPtr taskDescriptor = taskIter->second;
taskDescriptor->CreateBackAndInitChannel(m_PTaskGraph, m_outputNameToChannelsMap);
}
}
void FindEmptyOutPorts()
{
if (m_verbosity >= 1) fprintf(stderr, "\nFinding empty ports to plug...\n");
for (auto taskIter=m_taskNameToTaskDescriptorMap.begin(); taskIter != m_taskNameToTaskDescriptorMap.end(); taskIter++)
{
TaskDescriptorPtr taskDescriptor = taskIter->second;
taskDescriptor->FindEmptyOutPorts(m_PTaskGraph);
}
}
void CreatePropogationPath();
void CreateOutputChannels(const vector<ComputationNodePtr>& nodes);
// LimitAccelerators - Limit the Accelerators to the ones chosen by the user (or decided by the "bestGPU" algorithm)
void LimitAccelerators()
{
UINT uiIndex = 0;
// get the devices from here
std::vector<int>::iterator vi;
std::vector<int> devices = g_bestGpu->GetDevices(BestGpu::RequeryDevices, bestGpuRequery);
for(vi=devices.begin(); vi!=devices.end(); vi++)
{
if (m_verbosity >= 1)
printf("-- CUDA accelerator with CUDA id %d ENABLED for PTask\n", *vi);
PTask::Runtime::EnableAccelerator(ACCELERATOR_CLASS_CUDA, *vi);
}
}
// Start the PTask graph executing.
void StartGraph()
{
// Output the graph in Graphviz 'dot' format.
// Obtain Graphviz from http://www.graphviz.org/Download_windows.php
// Currently using version 2.34. Add C:\Program Files (x86)\Graphviz2.34\bin to PATH.
// Use:
// dot -Tpng C:\temp\PTaskGraph.dot -o PTaskGraph.png
// to render as PNG image.
if (m_verbosity >= 1)
{
fprintf(stderr, "Outputting graph to %s in Graphviz 'dot' format ...\n", PTASK_GRAPH_VIZ_FILE);
fprintf(stderr, " Convert to .png with: dot -Tpng C:\\temp\\PTaskGraph.dot -o PTaskGraph.png\n");
fprintf(stderr, " See PTaskGraphBuilder::StartGraph() for details.\n");
}
m_PTaskGraph->WriteDOTFile(PTASK_GRAPH_VIZ_FILE);
if (m_verbosity >= 1) fprintf(stderr, "Checking graph semantics ...\n");
Runtime::CheckGraphSemantics(m_PTaskGraph, TRUE, TRUE);
if (m_verbosity >= 1) fprintf(stderr, "Starting graph ...\n");
m_PTaskGraph->Run(TRUE); // single threaded for debugging
}
bool IsRunning()
{
return m_PTaskGraph->IsRunning();
}
TaskDescriptor<ElemType>* PTaskGraphBuilder<ElemType>::GetPTaskDescriptorOutput(ComputationNodePtr node) const;
std::vector<ComputationNodePtr> m_computationNodes;
std::map<const std::string, TaskDescriptorPtr> m_taskNameToTaskDescriptorMap;
std::map<const std::string, Port*> m_valueNameToProducerPortMap;
std::map<const std::string,
std::vector<PTask::GraphInputChannel*>*> m_inputNameToChannelsMap;
std::map<const std::string,
PTask::GraphOutputChannel*> m_outputNameToChannelsMap;
UINT m_portUIDCounter;
Graph* m_PTaskGraph;
int m_verbosity;
// state for PTask nodes
ComputationNetwork<ElemType>* m_cn;
#endif
};
#ifdef USE_PTASK
// the Host Task driver
template <class ElemType>
static void __stdcall
HostTaskDriver(LPDEPENDENTCONTEXT depContext);
#endif
}}}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,9 +1,11 @@
@echo off
setlocal enabledelayedexpansion
echo #ifndef _BUILDINFO_H > buildinfo.h
echo #define _BUILDINFO_H >> buildinfo.h
::: This is called as a pre-build step for the CNTK executable.
::: It creates buildinfo.h, which makes version information available to the executable itself.
echo #ifndef _BUILDINFO_H > buildinfo.h$$
echo #define _BUILDINFO_H >> buildinfo.h$$
FOR /F "usebackq" %%i IN (`hostname`) DO SET HOST=%%i
:: assuming hostname always exists
@ -11,22 +13,25 @@ FOR /F "usebackq" %%i IN (`hostname`) DO SET HOST=%%i
:: not sure whether git in path ?
git --version 2 > nul
if not %ERRORLEVEL% == 9009 (
echo #define _GIT_EXIST >> buildinfo.h
echo #define _GIT_EXIST >> buildinfo.h$$
FOR /F "usebackq" %%i IN (`git rev-parse --abbrev-ref HEAD`) DO SET BRANCH=%%i
FOR /F "usebackq" %%i IN (`git rev-parse HEAD`) DO SET COMMIT=%%i
echo #define _BUILDBRANCH_ "!BRANCH!" >> buildinfo.h
echo #define _BUILDSHA1_ "!COMMIT!" >> buildinfo.h
)
echo #define _BUILDBRANCH_ "!BRANCH!" >> buildinfo.h$$
echo #define _BUILDSHA1_ "!COMMIT!" >> buildinfo.h$$
)
echo #define _BUILDER_ "%USERNAME%" >> buildinfo.h
echo #define _BUILDMACHINE_ "!HOST!" >> buildinfo.h
echo #define _BUILDER_ "%USERNAME%" >> buildinfo.h$$
echo #define _BUILDMACHINE_ "!HOST!" >> buildinfo.h$$
set a=%~dp0
set buildpath="%a:\=\\%"
echo #define _BUILDPATH_ %buildpath% >> buildinfo.h
echo #define _BUILDPATH_ %buildpath% >> buildinfo.h$$
set cuda_path="%CUDA_PATH:\=\\%"
echo #define _CUDA_PATH_ %cuda_path% >> buildinfo.h
echo #define _CUDA_PATH_ %cuda_path% >> buildinfo.h$$
echo #endif >> buildinfo.h
echo #endif >> buildinfo.h$$
::: update file only if it changed (otherwise CNTK.cpp will get rebuilt each time)
fc buildinfo.h$$ buildinfo.h > NUL
if not ERRORLEVEL 0 move /Y buildinfo.h$$ buildinfo.h

Просмотреть файл

@ -78,7 +78,7 @@
<DelayLoadDLLs>CNTKMath.dll; nvml.dll; cudart64_70.dll</DelayLoadDLLs>
</Link>
<PostBuildEvent>
<Command>XCOPY /I /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" $(TargetDir)</Command>
<Command>xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" $(TargetDir)</Command>
<Message>Copying NVidia GDK extension DLL to target folder</Message>
</PostBuildEvent>
</ItemDefinitionGroup>
@ -108,12 +108,13 @@
<DelayLoadDLLs>CNTKMath.dll; nvml.dll; cudart64_70.dll</DelayLoadDLLs>
</Link>
<PostBuildEvent>
<Command>XCOPY /I /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" $(TargetDir)</Command>
<Command>xcopy /I /D /Y "%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml*.dll" $(TargetDir)</Command>
<Message>Copying NVidia GDK extension DLL to target folder</Message>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="..\..\Common\Include\basetypes.h" />
<ClInclude Include="..\..\Common\Include\Basics.h" />
<ClInclude Include="..\..\Common\Include\Eval.h" />
<ClInclude Include="..\..\Common\Include\File.h" />
<ClInclude Include="..\..\Common\Include\fileutil.h" />

Просмотреть файл

@ -43,6 +43,9 @@
<ClInclude Include="..\..\Common\Include\TimerUtility.h">
<Filter>Common\Include</Filter>
</ClInclude>
<ClInclude Include="..\..\Common\Include\Basics.h">
<Filter>Common\Include</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="Common">

Просмотреть файл

@ -1,29 +0,0 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2012
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cn", "cn\cn.vcxproj", "{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}"
EndProject
Global
GlobalSection(TeamFoundationVersionControl) = preSolution
SccNumberOfProjects = 2
SccEnterpriseProvider = {4CA58AB2-18FA-4F8D-95D4-32DDF27D184C}
SccTeamFoundationServer = http://vstfcodebox:8080/tfs/kappa
SccLocalPath0 = .
SccProjectUniqueName1 = cn\\cn.vcxproj
SccProjectName1 = cn
SccLocalPath1 = cn
EndGlobalSection
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Debug|x64.ActiveCfg = Debug|x64
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Debug|x64.Build.0 = Debug|x64
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Release|x64.ActiveCfg = Release|x64
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

Просмотреть файл

@ -1,10 +0,0 @@
""
{
"FILE_VERSION" = "9237"
"ENLISTMENT_CHOICE" = "NEVER"
"PROJECT_FILE_RELATIVE_PATH" = ""
"NUMBER_OF_EXCLUDED_FILES" = "0"
"ORIGINAL_PROJECT_FILE_PATH" = ""
"NUMBER_OF_NESTED_PROJECTS" = "0"
"SOURCE_CONTROL_SETTINGS_PROVIDER" = "PROJECT"
}

Просмотреть файл

@ -68,7 +68,7 @@ COMMON_SRC = Common/fileutil.cpp Common/DataWriter.cpp Common/ConfigFile.cpp Com
MATH_SRC = Math/Math/Matrix.cpp Math/Math/CPUMatrix.cpp Math/Math/CPUSparseMatrix.cpp Math/Math/NoGPU.cpp
CN_SRC = MachineLearning/CNTK/NetworkDescriptionLanguage.cpp MachineLearning/CNTK/CNTK.cpp MachineLearning/CNTK/ComputationNode.cpp \
MachineLearning/CNTK/ModelEditLanguage.cpp MachineLearning/CNTK/PTaskGraphBuilder.cpp \
MachineLearning/CNTK/ModelEditLanguage.cpp \
MachineLearning/CNTK/SimpleNetworkBuilder.cpp MachineLearning/CNTK/tests.cpp MachineLearning/CNTKEval/CNTKEval.cpp
BINARYREADER_SRC = DataReader/BinaryReader/BinaryWriter.cpp DataReader/BinaryReader/BinaryReader.cpp DataReader/BinaryReader/BinaryFile.cpp
HTKMLFREADER_SRC = DataReader/HTKMLFReader_linux/HTKMLFWriter.cpp DataReader/HTKMLFReader_linux/DataWriter.cpp DataReader/HTKMLFReader_linux/DataReader.cpp DataReader/HTKMLFReader_linux/HTKMLFReader.cpp

Просмотреть файл

@ -77,7 +77,7 @@ COMMON_SRC = Common/fileutil.cpp Common/DataWriter.cpp Common/ConfigFile.cpp Com
MATH_SRC = Math/Math/Matrix.cpp Math/Math/GPUMatrix.cu Math/Math/GPUMatrixCUDAKernels.cu Math/Math/GPUSparseMatrix.cu Math/Math/GPUWatcher.cu \
Math/Math/CPUMatrix.cpp Math/Math/CPUSparseMatrix.cpp #Math/Math/InstantiateTemplates.cu
CN_SRC = MachineLearning/CNTK/NetworkDescriptionLanguage.cpp MachineLearning/CNTK/CNTK.cpp MachineLearning/CNTK/ComputationNode.cpp \
MachineLearning/CNTK/ModelEditLanguage.cpp MachineLearning/CNTK/PTaskGraphBuilder.cpp \
MachineLearning/CNTK/ModelEditLanguage.cpp \
MachineLearning/CNTK/SimpleNetworkBuilder.cpp MachineLearning/CNTK/tests.cpp MachineLearning/CNTKEval/CNTKEval.cpp
BINARYREADER_SRC = #DataReader/BinaryReader/BinaryWriter.cpp DataReader/BinaryReader/BinaryReader.cpp DataReader/BinaryReader/BinaryFile.cpp
HTKMLFREADER_SRC = DataReader/HTKMLFReader_linux/HTKMLFWriter.cpp DataReader/HTKMLFReader_linux/DataWriter.cpp DataReader/HTKMLFReader_linux/DataReader.cpp DataReader/HTKMLFReader_linux/HTKMLFReader.cpp

Просмотреть файл

@ -73,7 +73,7 @@ COMMON_SRC = Common/fileutil.cpp Common/DataWriter.cpp Common/ConfigFile.cpp Com
MATH_SRC = Math/Math/Matrix.cpp Math/Math/CPUMatrix.cpp Math/Math/CPUSparseMatrix.cpp Math/Math/NoGPU.cpp
CN_SRC = MachineLearning/CNTK/NetworkDescriptionLanguage.cpp MachineLearning/CNTK/CNTK.cpp MachineLearning/CNTK/ComputationNode.cpp \
MachineLearning/CNTK/ModelEditLanguage.cpp MachineLearning/CNTK/PTaskGraphBuilder.cpp \
MachineLearning/CNTK/ModelEditLanguage.cpp \
MachineLearning/CNTK/SimpleNetworkBuilder.cpp MachineLearning/CNTK/tests.cpp MachineLearning/CNTKEval/CNTKEval.cpp
BINARYREADER_SRC = DataReader/BinaryReader/BinaryWriter.cpp DataReader/BinaryReader/BinaryReader.cpp DataReader/BinaryReader/BinaryFile.cpp
HTKMLFREADER_SRC = DataReader/HTKMLFReader_linux/HTKMLFWriter.cpp DataReader/HTKMLFReader_linux/DataWriter.cpp DataReader/HTKMLFReader_linux/DataReader.cpp DataReader/HTKMLFReader_linux/HTKMLFReader.cpp

Просмотреть файл

@ -80,7 +80,7 @@ COMMON_SRC = Common/fileutil.cpp Common/DataWriter.cpp Common/ConfigFile.cpp Com
MATH_SRC = Math/Math/Matrix.cpp Math/Math/GPUMatrix.cu Math/Math/GPUMatrixCUDAKernels.cu Math/Math/GPUSparseMatrix.cu Math/Math/GPUWatcher.cu \
Math/Math/CPUMatrix.cpp Math/Math/CPUSparseMatrix.cpp #Math/Math/InstantiateTemplates.cu
CN_SRC = MachineLearning/CNTK/NetworkDescriptionLanguage.cpp MachineLearning/CNTK/CNTK.cpp MachineLearning/CNTK/ComputationNode.cpp \
MachineLearning/CNTK/ModelEditLanguage.cpp MachineLearning/CNTK/PTaskGraphBuilder.cpp \
MachineLearning/CNTK/ModelEditLanguage.cpp \
MachineLearning/CNTK/SimpleNetworkBuilder.cpp MachineLearning/CNTK/tests.cpp MachineLearning/CNTKEval/CNTKEval.cpp
BINARYREADER_SRC = #DataReader/BinaryReader/BinaryWriter.cpp DataReader/BinaryReader/BinaryReader.cpp DataReader/BinaryReader/BinaryFile.cpp
HTKMLFREADER_SRC = DataReader/HTKMLFReader_linux/HTKMLFWriter.cpp DataReader/HTKMLFReader_linux/DataWriter.cpp DataReader/HTKMLFReader_linux/DataReader.cpp DataReader/HTKMLFReader_linux/HTKMLFReader.cpp

Просмотреть файл

@ -7,10 +7,10 @@
#include <string>
#include "CppUnitTest.h"
#include "..\Math\Matrix.h"
#include "..\..\common\include\basetypes.h"
#include "..\..\common\include\Basics.h"
#include "..\..\common\include\fileutil.h"
#include "..\..\common\include\file.h"
#include "..\..\common\file.cpp"
#include "..\..\common\include\File.h"
#include "..\..\common\File.cpp"
#include "..\..\common\fileutil.cpp"

Просмотреть файл

@ -86,10 +86,10 @@
<Profile>true</Profile>
</Link>
<PostBuildEvent>
<Command>xcopy /D /I /Y "$(CUDA_PATH)\bin\cudart64_70.dll" $(OutputPath)</Command>
<Command>xcopy /D /I /Y "$(CUDA_PATH)\bin\cudart64_*.dll" $(OutputPath)</Command>
</PostBuildEvent>
<PostBuildEvent>
<Message>Copying cudart64_65.dll</Message>
<Message>Copying cudart64 DLL</Message>
</PostBuildEvent>
<CudaCompile>
<TargetMachinePlatform>64</TargetMachinePlatform>
@ -133,8 +133,8 @@
<DelayLoadDLLs>cublas64_65.dll;cusparse64_65.dll;curand64_65.dll;cudart64_65.dll;%(DelayLoadDLLs)</DelayLoadDLLs>
</Link>
<PostBuildEvent>
<Command>xcopy /D /I /Y "$(CUDA_PATH)\bin\cudart64_70.dll" $(OutputPath)</Command>
<Message>Copying cudart64_65.dll</Message>
<Command>xcopy /D /I /Y "$(CUDA_PATH)\bin\cudart64_*.dll" $(OutputPath)</Command>
<Message>Copying cudart64 DLL</Message>
</PostBuildEvent>
<CudaCompile>
<FastMath>true</FastMath>

Просмотреть файл

@ -1741,8 +1741,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Resize(a.GetNumRows(), a.GetNumCols());
PrepareDevice();
LONG64 N=(LONG64)GetNumElements();
int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock);
//LONG64 N=(LONG64)GetNumElements();
//int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock);
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
@ -1760,7 +1760,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
UNCONST(ElemType, a, my_a);
UNCONST(ElemType, b, my_b);
cudaEvent_t done;
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
int p = 512;
@ -1806,7 +1806,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
UNCONST(ElemType, a, my_a);
UNCONST(ElemType, b, my_b);
cudaEvent_t done;
cudaEvent_t done = nullptr;
if (do_sync) CUDA_CALL(cudaEventCreate(&done));
int p = 512;
int width = a.GetNumCols();
@ -1837,6 +1837,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
assert(GetNumRows() == a.GetNumRows());
assert(GetNumCols() == b.GetNumRows());
assert(a.GetNumCols() == b.GetNumRows());
c; // TODO: this function seems like a stub
/*
EnsureAuxMemory();
int p = 512;

Просмотреть файл

@ -3245,7 +3245,7 @@ __global__ void _assignNceDerivative(
int rowIndex = i / sampleCount;
ElemType er = tmp[i]; // precalculated error for this output node
ElemType log_pnw = val[2 * i + 1];
//ElemType log_pnw = val[2 * i + 1];
// calculate gradients
int loadPerThread = (width + blockDim.x - 1) / blockDim.x;

Просмотреть файл

@ -51,7 +51,7 @@
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
<LinkIncremental>false</LinkIncremental>
<IncludePath>..\..\common\include;$(ACML_PATH)\include;$(CUDA_PATH)\include;$(IncludePath)</IncludePath>
<LibraryPath>$(SolutionDir)$(Platform)\$(Configuration);$(ACML_PATH)\lib;$(CUDA_PATH)\lib\$(Platform);$(LibraryPath)</LibraryPath>
<IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
@ -81,7 +81,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>libacml_mp_dll.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
<DelayLoadDLLs>cublas64_70.dll;cusparse64_70.dll;curand64_70.dll;cudart64_70.dll;libacml_dll.dll;libacml_mp_dll.dll;nvcuda.dll;%(DelayLoadDLLs)</DelayLoadDLLs>
<DelayLoadDLLs>cublas64_70.dll; cusparse64_70.dll; curand64_70.dll; cudart64_70.dll; libacml_dll.dll; libacml_mp_dll.dll; %(DelayLoadDLLs)</DelayLoadDLLs>
<Profile>true</Profile>
</Link>
<PostBuildEvent>
@ -132,7 +132,7 @@
<AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
<AdditionalDependencies>libacml_mp_dll.lib;%(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
<DelayLoadDLLs>cublas64_70.dll;cusparse64_70.dll;curand64_70.dll;cudart64_70.dll;libacml_dll.dll;libacml_mp_dll.dll;nvcuda.dll;%(DelayLoadDLLs)</DelayLoadDLLs>
<DelayLoadDLLs>cublas64_70.dll; cusparse64_70.dll; curand64_70.dll; cudart64_70.dll; libacml_dll.dll; libacml_mp_dll.dll; %(DelayLoadDLLs)</DelayLoadDLLs>
</Link>
<PostBuildEvent>
<Command>xcopy /D /I /Y "$(ACML_PATH)\lib\*.dll" $(OutputPath)</Command>