Add Proposal Layer as a native UDF

This commit is contained in:
Alexey Reznichenko 2017-08-07 18:34:57 +02:00
Родитель a69d06231a
Коммит 65471cdcd9
13 изменённых файлов: 830 добавлений и 7 удалений

Просмотреть файл

@ -1613,6 +1613,13 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CNTKLibraryCSTrainingTest",
{50EF9EE6-5018-453E-A063-F77044EF1A97} = {50EF9EE6-5018-453E-A063-F77044EF1A97}
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ProposalLayer", "ProposalLayer", "{3631994A-59E6-4CD6-99A4-6D332F8DABE2}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ProposalLayerLib", "Examples\Extensibility\ProposalLayer\ProposalLayerLib\ProposalLayerLib.vcxproj", "{91EA9F28-B9B6-4FC7-A47D-9838F5915700}"
ProjectSection(ProjectDependencies) = postProject
{E5606ECE-48CA-4464-BB12-09D81D02B9EF} = {E5606ECE-48CA-4464-BB12-09D81D02B9EF}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug_CpuOnly|x64 = Debug_CpuOnly|x64
@ -2297,6 +2304,18 @@ Global
{0DF2109B-BB85-4718-82DE-1C0536D4F2C3}.Release_UWP|x64.ActiveCfg = Release_NoOpt|x64
{0DF2109B-BB85-4718-82DE-1C0536D4F2C3}.Release|x64.ActiveCfg = Release|x64
{0DF2109B-BB85-4718-82DE-1C0536D4F2C3}.Release|x64.Build.0 = Release|x64
{91EA9F28-B9B6-4FC7-A47D-9838F5915700}.Debug_CpuOnly|x64.ActiveCfg = Debug_CpuOnly|x64
{91EA9F28-B9B6-4FC7-A47D-9838F5915700}.Debug_CpuOnly|x64.Build.0 = Debug_CpuOnly|x64
{91EA9F28-B9B6-4FC7-A47D-9838F5915700}.Debug_UWP|x64.ActiveCfg = Debug_CpuOnly|x64
{91EA9F28-B9B6-4FC7-A47D-9838F5915700}.Debug|x64.ActiveCfg = Debug|x64
{91EA9F28-B9B6-4FC7-A47D-9838F5915700}.Debug|x64.Build.0 = Debug|x64
{91EA9F28-B9B6-4FC7-A47D-9838F5915700}.Release_CpuOnly|x64.ActiveCfg = Release_CpuOnly|x64
{91EA9F28-B9B6-4FC7-A47D-9838F5915700}.Release_CpuOnly|x64.Build.0 = Release_CpuOnly|x64
{91EA9F28-B9B6-4FC7-A47D-9838F5915700}.Release_NoOpt|x64.ActiveCfg = Release_NoOpt|x64
{91EA9F28-B9B6-4FC7-A47D-9838F5915700}.Release_NoOpt|x64.Build.0 = Release_NoOpt|x64
{91EA9F28-B9B6-4FC7-A47D-9838F5915700}.Release_UWP|x64.ActiveCfg = Release_CpuOnly|x64
{91EA9F28-B9B6-4FC7-A47D-9838F5915700}.Release|x64.ActiveCfg = Release|x64
{91EA9F28-B9B6-4FC7-A47D-9838F5915700}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@ -2521,5 +2540,7 @@ Global
{B6DED59B-B52A-4D44-9B61-26FF0382764A} = {6F19321A-65E7-4829-B00C-3886CD6C6EDE}
{B3B46744-DBB5-42C2-BAD7-9151D9486045} = {6E565B48-1923-49CE-9787-9BBB9D96F4C5}
{0DF2109B-BB85-4718-82DE-1C0536D4F2C3} = {B3B46744-DBB5-42C2-BAD7-9151D9486045}
{3631994A-59E6-4CD6-99A4-6D332F8DABE2} = {3BF56127-6F0F-41CF-BFCE-31165A0A5E73}
{91EA9F28-B9B6-4FC7-A47D-9838F5915700} = {3631994A-59E6-4CD6-99A4-6D332F8DABE2}
EndGlobalSection
EndGlobal

Просмотреть файл

@ -0,0 +1,49 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <vector>
#include "Rectangle2D.h"
// TODO: the current implementation is O(N^2), it should be possible to do this in O(N logN),
// see the sweeping line algorithm: http://algs4.cs.princeton.edu/93intersection/
// Returns first topN elements from the input vector that do not overlap with any preceeding rectangle
// by more than a threshold ratio.
std::vector<Rectangle2D> NonMaximumSupression(const std::vector<Rectangle2D>& in, float threshold, size_t topN)
{
std::vector<Rectangle2D> out;
out.reserve(topN);
std::vector<bool> suppressed(in.size(), false);
for (auto i = 0; i < in.size() ; i++)
{
const auto& box1 = in[i];
if (suppressed[i])
continue;
out.push_back(box1);
if (out.size() == topN)
break;
for (auto j = i+1; j < in.size(); j++)
{
if (suppressed[j])
continue;
const auto& box2 = in[j];
float overlap = box1.Overlap(box2);
float overlapRatio = overlap / (box1.Area() + box2.Area() - overlap);
if (overlapRatio >= threshold)
suppressed[j] = true;
}
}
return out;
}

Просмотреть файл

@ -0,0 +1,12 @@
#include "ProposalLayerLib.h"
using namespace CNTK;
extern "C"
#ifdef _WIN32
__declspec (dllexport)
#endif
Function* CreateProposalLayer(const Variable* operands, size_t /*numOperands*/, const Dictionary* attributes, const wchar_t* name)
{
return new ProposalLayer({operands[0], operands[1], operands[2]}, *attributes, name);
}

Просмотреть файл

@ -0,0 +1,243 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include "CNTKLibrary.h"
#include "RpnUtils.h"
#include "NmsUtils.h"
#include <queue>
using namespace CNTK;
using ScoredRectangle2D = std::pair<float, Rectangle2D>;
// returns true if the left box score is greater than the right box score.
static auto cmp = [](const ScoredRectangle2D& left, const ScoredRectangle2D& right)
{
if (left.first == right.first)
return left.second.Area() > right.second.Area();
return (left.first > right.first);
};
using SortedRectangle2DPriorityQueue = std::priority_queue<ScoredRectangle2D, std::vector<ScoredRectangle2D>, decltype(cmp)>;
class ProposalLayer final : public Function
{
uint32_t m_featStride{16};
std::vector<Rectangle2D> m_anchors;
size_t m_preNMSTopN, m_postNMSTopN;
float m_NMSThresh, m_minSize;
bool m_trainMode;
enum Inputs : uint32_t {
SCORES,
BBOX_DELTAS,
IM_INFO
};
public:
ProposalLayer(const std::vector<Variable>& inputs, const Dictionary& attributes, const std::wstring& name = L"ProposalLayer")
: Function(inputs, attributes, name),
m_preNMSTopN{ 0 }, m_postNMSTopN{ 0 }, m_NMSThresh{ 0 }, m_minSize{0}, m_trainMode(true)
{
if (attributes.Contains(L"feat_stride"))
m_featStride = static_cast<uint32_t>(attributes[L"feat_stride"].Value<int>());
std::vector<uint32_t> scales{ { 8, 16, 32 } };
if (attributes.Contains(L"scales"))
{
scales.clear();
const auto& valueVector = attributes[L"scales"].Value<std::vector<DictionaryValue>>();
for (const auto& value : valueVector)
scales.push_back(static_cast<uint32_t>(value.Value<int>()));
}
ChangeConfiguration(m_trainMode);
m_anchors = GenerateAnchors(scales);
}
private:
void ChangeConfiguration(bool train)
{
const auto& attributes = Attributes();
if (train)
{
m_preNMSTopN = attributes[L"train_pre_nms_topN"].Value<int>();
m_postNMSTopN = attributes[L"train_post_nms_topN"].Value<int>();
m_NMSThresh = static_cast<float>(attributes[L"train_nms_thresh"].Value<double>());
m_minSize = static_cast<float>(attributes[L"train_min_size"].Value<double>());
}
else
{
m_preNMSTopN = attributes[L"test_pre_nms_topN"].Value<int>();
m_postNMSTopN = attributes[L"test_post_nms_topN"].Value<int>();
m_NMSThresh = static_cast<float>(attributes[L"test_nms_thresh"].Value<double>());
m_minSize = static_cast<float>(attributes[L"test_min_size"].Value<double>());
}
}
std::vector<Rectangle2D> GetAllShiftedAnchors(size_t w, size_t h)
{
std::vector<Rectangle2D> anchors;
anchors.reserve(w * h * m_anchors.size());
for (const auto& anchor : m_anchors)
{
for (size_t j = 0; j < h; j++)
{
auto y = float(j * m_featStride);
for (size_t i = 0; i < w; i++)
{
auto x = float(i * m_featStride);
anchors.emplace_back(anchor.xmin + x, anchor.ymin + y, anchor.xmax + x, anchor.ymax + y);
}
}
}
return anchors;
}
BackPropStatePtr Forward(const std::vector<ValuePtr>& inputValues,
std::unordered_map<Variable, ValuePtr>& outputs,
const DeviceDescriptor& computeDevice,
const std::unordered_set<Variable>& outputsToRetainBackwardStateFor) override
{
bool trainMode = outputsToRetainBackwardStateFor.size() != 0;
if (m_trainMode != trainMode)
{
m_trainMode = trainMode;
ChangeConfiguration(m_trainMode);
}
if (computeDevice.Type() != DeviceKind::CPU)
throw std::runtime_error("ProposalLayer: only CPU evaluation is supported at the moment.");
auto scoresShape = inputValues[Inputs::SCORES]->Shape();
if (scoresShape[scoresShape.Rank() - 1] != 1)
throw std::runtime_error("ProposalLayer: only single item batches are supported");
auto height = scoresShape[0];
auto width = scoresShape[1];
// the first set of N (= m_anchors.size()) channels are bg probs
// the second set are the fg probs, which we want
std::vector<size_t> offsets{ 0,0,0,0 };
offsets[2] = m_anchors.size();
auto extent = scoresShape.Dimensions();
extent[2] -= m_anchors.size();
auto fgSlice = inputValues[Inputs::SCORES]->Data()->SliceView(offsets, extent, true);
auto scores = fgSlice->DataBuffer<float>();
// Enumerate all shifted anchors, which will produce (A * H * W) anchors
auto anchors = std::move(GetAllShiftedAnchors(width, height));
// Convert anchors into proposals via bbox transformations,
// input bbox deltas are stored as (H, W, 4 * A, 1)
const float* bboxDeltas = inputValues[Inputs::BBOX_DELTAS]->Data()->DataBuffer<float>();
auto proposals = std::move(TransformBboxInv(anchors, bboxDeltas, width * height));
// 2. clip predicted boxes to image
const float* imInfo = inputValues[Inputs::IM_INFO]->Data()->DataBuffer<float>();
ClipBoxes(proposals, imInfo);
m_preNMSTopN = (m_preNMSTopN > 0) ? std::min(proposals.size(), m_preNMSTopN) : proposals.size();
// 3. remove predicted boxes with either height or width < threshold
// (NOTE: convert min_size to input image scale. Original size = im_info[4:6], scaled size = im_info[2:4])
// 4. Take top pre_nms_topN(e.g. 6000) proposals with highest scores.
// Priority queue here is effectively a min heap, the top of the pq has the lowest score
// among the top-N elements.
SortedRectangle2DPriorityQueue pq(cmp);
auto cntkImageScale = imInfo[2] / imInfo[4];
auto minSize = m_minSize * cntkImageScale;
for (size_t i = 0; i < proposals.size(); i++)
{
const auto& box = proposals[i];
if (box.Width() < minSize || box.Height() < minSize)
continue;
pq.emplace(scores[i], box);
if (pq.size() > m_preNMSTopN)
pq.pop();
}
proposals.erase(proposals.begin() + pq.size(), proposals.end());
for (size_t i = 0; !pq.empty(); i++)
{
proposals[pq.size()-1] = std::move(pq.top().second);
pq.pop();
}
// at this point, proposals contains m_preNMSTopN boxes sorted in the descending order of
// their scores (from the highest to lowest).
// 5. apply nms(e.g.threshold = 0.7), take after_nms_topN(e.g. 300) top proposals.
m_postNMSTopN = (m_postNMSTopN > 0) ? std::min(proposals.size(), m_postNMSTopN) : proposals.size();
proposals = NonMaximumSupression(proposals, m_NMSThresh, m_postNMSTopN);
auto numFoundProposals = proposals.size();
// 6. pad with zeros if too few rois were found
if (numFoundProposals < m_postNMSTopN)
proposals.resize(numFoundProposals, Rectangle2D(0, 0, 0, 0));
// 7. create the output value and copy the data.
NDArrayView outputData(DataType::Float, NDShape({ 4, numFoundProposals }),
proposals.data(), numFoundProposals * 4 * sizeof(float), computeDevice);
auto outputValue = MakeSharedObject<Value>(
MakeSharedObject<NDArrayView>(DataType::Float, NDShape({ 4, numFoundProposals }), computeDevice));
outputValue->Data()->CopyFrom(outputData);
// Output rois blob
outputs[this->Output()] = outputValue;
return nullptr;
}
void Backward(const BackPropStatePtr& state,
const std::unordered_map<Variable, ValuePtr>& rootGradientValues,
std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs) override
{
state; rootGradientValues; backPropagatedGradientValuesForInputs;
return;
}
const std::wstring& OpName() const override
{
static const std::wstring opName = L"ProposalLayerOp";
return opName;
}
size_t CurrentVersion() const override { NOT_IMPLEMENTED; }
void InferOutputs(std::vector<Variable>& outputs) override
{
auto firstOperand = Function::Inputs()[0];
auto proposalShape = NDShape({ 4, NDShape::FreeDimension });
auto dtype = firstOperand.GetDataType();
auto dynamicAxes = firstOperand.DynamicAxes();
outputs.push_back(OutputVariable(proposalShape, dtype, dynamicAxes , false, L"rpn_rois_raw"));
}
FunctionPtr Clone(const std::vector<Variable>& clonedInputs) override
{
return AsComposite(MakeSharedObject<ProposalLayer>(clonedInputs, this->Attributes(), this->Name()));
}
};

Просмотреть файл

@ -0,0 +1,124 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release_NoOpt|x64">
<Configuration>Release_NoOpt</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug_CpuOnly|x64">
<Configuration>Debug_CpuOnly</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release_CpuOnly|x64">
<Configuration>Release_CpuOnly</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<ItemGroup>
<ClCompile Include="ProposalLayerLib.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="NmsUtils.h" />
<ClInclude Include="ProposalLayerLib.h" />
<ClInclude Include="Rectangle2D.h" />
<ClInclude Include="RpnUtils.h" />
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{91EA9F28-B9B6-4FC7-A47D-9838F5915700}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>ProposalLayerLib</RootNamespace>
<ProjectName>ProposalLayerLib</ProjectName>
</PropertyGroup>
<Import Project="$(SolutionDir)\CNTK.Cpp.props" />
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="$(DebugBuild)" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v140</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings" />
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="$(DebugBuild)">
<LinkIncremental>true</LinkIncremental>
<TargetName>Cntk.$(ProjectName)-$(CntkComponentVersion)</TargetName>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)">
<LinkIncremental>false</LinkIncremental>
<TargetName>Cntk.$(ProjectName)-$(CntkComponentVersion)</TargetName>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\CNTKv2LibraryDll\API</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(OutDir);$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(DebugBuild)">
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_USRDLL;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<TreatWarningAsError>false</TreatWarningAsError>
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Cntk.Core-$(CntkComponentVersion).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_USRDLL;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
<TreatWarningAsError>false</TreatWarningAsError>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>Cntk.Core-$(CntkComponentVersion).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(CpuOnlyBuild)">
<ClCompile>
<PreprocessorDefinitions>CPUONLY;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">MultiThreadedDebug</RuntimeLibrary>
<RuntimeLibrary Condition="'$(Configuration)|$(Platform)'=='Release_CpuOnly|x64'">MultiThreaded</RuntimeLibrary>
</ClCompile>
</ItemDefinitionGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

Просмотреть файл

@ -0,0 +1,36 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="ProposalLayerLib.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="ProposalLayerLib.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="NmsUtils.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="Rectangle2D.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="RpnUtils.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
</Project>

Просмотреть файл

@ -0,0 +1,72 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <algorithm>
#include <stdexcept>
//
// An immutable data type to encapsulate a two-dimensional axis-aligned rectangle
// with float-value coordinates. The rectangle is closed — it includes the points on the boundary.
//
// Ported from java version available at http://algs4.cs.princeton.edu/code/edu/princeton/cs/algs4/Rectangle2D.java.html
//
#pragma pack(push, 1)
struct Point2D {
float x, y;
};
// Immutable data type for 2D axis-aligned rectangle.
struct Rectangle2D {
float xmin, ymin; // minimum x- and y-coordinates
float xmax, ymax; // maximum x- and y-coordinates
Rectangle2D(float xmin, float ymin, float xmax, float ymax)
:xmin{ xmin }, ymin{ ymin }, xmax{ xmax }, ymax{ ymax }
{
if (xmax < xmin || ymax < ymin)
throw std::invalid_argument("Invalid rectangle");
}
Rectangle2D(float w, float h, const Point2D& center)
:xmin{ center.x - 0.5f * w },
ymin{ center.y - 0.5f * h },
xmax{ center.x + 0.5f * w - 1.f },
ymax{ center.y + 0.5f * h - 1.f }
{
if (xmax < xmin || ymax < ymin)
throw std::invalid_argument("Invalid rectangle");
}
float Width() const { return xmax - xmin + 1.f; }
float Height() const { return ymax - ymin + 1.f; }
float Area() const { return Width() * Height(); }
bool Intersects(const Rectangle2D& that) const
{
return xmax >= that.xmin && ymax >= that.ymin
&& that.xmax >= xmin && that.ymax >= ymin;
}
float Overlap(const Rectangle2D& that) const
{
if (!Intersects(that))
return 0;
Rectangle2D overlap{ std::max(xmin, that.xmin), std::max(ymin, that.ymin),
std::min(xmax, that.xmax), std::min(ymax, that.ymax) };
return overlap.Area();
}
Point2D Center() const {
return { 0.5f * (xmin + xmax + 1.f), 0.5f * (ymin + ymax + 1.f) };
}
};
#pragma pack(pop)

Просмотреть файл

@ -0,0 +1,103 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include <cmath>
#include <vector>
#include <stdint.h>
#include "Rectangle2D.h"
// Generate anchor(reference) windows by enumerating aspect ratios X scales
// wrt a reference(0, 0, 15, 15) window.
std::vector<Rectangle2D> GenerateAnchors(const std::vector<uint32_t>& scales, const std::vector<float>& ratios = { 0.5, 1, 2 }, float baseSize = 16.f)
{
Rectangle2D base(0.f, 0.f, baseSize-1, baseSize-1);
auto area = base.Area();
auto center = base.Center();
std::vector<Rectangle2D> anchors;
anchors.reserve(ratios.size() * scales.size());
for (const auto& ratio : ratios)
{
auto areaRatio = area / ratio;
auto w = std::round(sqrt(areaRatio));
auto h = std::round(w * ratio);
for (const auto& scale : scales)
{
auto scaledW = w * scale;
auto scaledH = h * scale;
anchors.emplace_back(scaledW, scaledH, center);
}
}
return anchors;
}
// Deltas contain a tuple of 4 floats (dx, dy, dw, dh) for every box
// in the boxes vector. Returns a vector of boxes transformed according
// to the following rule:
// new_center_x = dx * widths + center_x
// new_w = exp(dw) * width
// new_xmin = new_center_x - 0.5 * new_w
std::vector<Rectangle2D> TransformBboxInv(const std::vector<Rectangle2D>& boxes, const float* deltas, const size_t stride)
{
std::vector<Rectangle2D> predBoxes;
predBoxes.reserve(boxes.size());
size_t index = 0;
for (const auto& box : boxes)
{
auto offset = index++;
if (index % stride == 0)
// each block consists of four strides, index points to the end of the first stride,
// jump over the remaining three.
index += 3 * stride;
float dx = deltas[offset]; offset += stride;
float dy = deltas[offset]; offset += stride;
float dw = deltas[offset]; offset += stride;
float dh = deltas[offset];
auto center = box.Center();
auto x = std::min(dx, 10.f) * box.Width() + center.x;
auto y = std::min(dy, 10.f) * box.Height() + center.y;
auto w = std::exp(std::min(dw, 10.f)) * box.Width();
auto h = std::exp(std::min(dh, 10.f)) * box.Height();
// The following seems incorrect, but it matches Caffe implementation.
predBoxes.emplace_back(x - 0.5f * w, y - 0.5f * h, x + 0.5f * w, y + 0.5f * h);
}
return predBoxes;
}
// Clip boxes to image boundaries.
// imInfo - a tuple of 6 floats (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
// e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
void ClipBoxes(std::vector<Rectangle2D>& boxes, const float* imInfo)
{
auto i = 0;
auto pad_width = imInfo[i++];
auto pad_height = imInfo[i++];
auto scaled_image_width = imInfo[i++];
auto scaled_image_height = imInfo[i++];
auto xmin = (pad_width - scaled_image_width) / 2;
auto xmax = xmin + scaled_image_width - 1;
auto ymin = (pad_height - scaled_image_height) / 2;
auto ymax = ymin + scaled_image_height - 1;
for (auto& box : boxes)
{
box.xmin = std::max(std::min(box.xmin, xmax), xmin);
box.xmax = std::max(std::min(box.xmax, xmax), xmin);
box.ymin = std::max(std::min(box.ymin, ymax), ymin);
box.ymax = std::max(std::min(box.ymax, ymax), ymin);
}
}

Просмотреть файл

@ -0,0 +1,88 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
import os, sys, argparse, copy
import cntk as C
from cntk import ops
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(abs_path, "..", "..", "Image", "Detection", "FasterRCNN"))
C.device.try_set_default_device(C.device.cpu())
from FasterRCNN import eval_faster_rcnn_mAP, set_global_vars
from config import cfg
ops.register_native_user_function('ProposalLayerOp', 'Cntk.ProposalLayerLib-' + C.__version__.rstrip('+'), 'CreateProposalLayer')
def clone_with_native_proposal_layer(model):
def filter(x):
return type(x) == C.Function and \
x.op_name == 'UserFunction' and \
x.name == 'ProposalLayer'
def converter(x):
layer_config = copy.deepcopy(x.attributes)
layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N
layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N
layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH)
layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE)
layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N
layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH)
layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE)
return ops.native_user_function('ProposalLayerOp', list(x.inputs), layer_config, 'native_proposal_layer')
return C.misc.convert(model, filter, converter)
def convert(model_path):
device = C.cpu()
model = C.Function.load(model_path, device=device)
# Replace all python proposal layer user-functions with native proposal layer
# user functions.
return clone_with_native_proposal_layer(model)
def evaluate(model_path):
# ProposalLayer currently only runs on the CPU
eval_device = C.cpu()
model = C.Function.load(model_path, device=eval_device)
set_global_vars(False)
return eval_faster_rcnn_mAP(model)
#############################
# main function boilerplate #
#############################
if __name__=='__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-path', '--model_path',
help='Filepath of a model created with FasterRCNN.py', required=True)
parser.add_argument('-eval', '--eval_model',
help='Evaluate a FasterRCNN model (with or without a native Proposal Layer)',
required=False, default=False)
args = parser.parse_args()
if args.eval_model:
evaluate(args.model_path)
else:
model = convert(args.model_path)
path = os.path.dirname(args.model_path)
filename = 'native_proposal_layer_' + os.path.basename(args.model_path)
model.save(os.path.join(path, filename))

Просмотреть файл

@ -26,7 +26,6 @@ class ProposalLayer(UserFunction):
'''
def __init__(self, arg1, arg2, arg3, name='ProposalLayer', param_str=None):
super(ProposalLayer, self).__init__([arg1, arg2, arg3], name=name)
self.param_str_ = param_str if param_str is not None else "'feat_stride': 16\n'scales':\n - 8 \n - 16 \n - 32"
# parse the layer parameter string, which must be valid YAML
@ -36,6 +35,10 @@ class ProposalLayer(UserFunction):
self._anchors = generate_anchors(scales=np.array(anchor_scales))
self._num_anchors = self._anchors.shape[0]
attributes = {'feat_stride' : self._feat_stride, 'scales' : anchor_scales}
super(ProposalLayer, self).__init__([arg1, arg2, arg3], attributes=attributes, name=name)
if DEBUG:
print ('feat_stride: {}'.format(self._feat_stride))
print ('anchors:')

Просмотреть файл

@ -565,6 +565,27 @@ $(BINARY_CONVOLUTION_EXAMPLE_LIB): $(BINARY_CONVOLUTION_EXAMPLE_LIBRARY_OBJ) | $
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR)) $(patsubst %,$(RPATH)%, $(LIBDIR) $(ORIGINDIR)) -o $@ $^ -l$(CNTKLIBRARY) $(SOURCEDIR)/../Examples/Extensibility/BinaryConvolution/BinaryConvolutionLib/halide/halide_convolve_nofeatures.a
##############################################
# Native implementation of the Proposal Layer
##############################################
PROPOSAL_LAYER_LIBRARY_SRC =\
$(SOURCEDIR)/../Examples/Extensibility/ProposalLayer/ProposalLayerLib/ProposalLayerLib.cpp \
PROPOSAL_LAYER_LIBRARY_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(PROPOSAL_LAYER_LIBRARY_SRC))
PROPOSAL_LAYER_LIB:= $(LIBDIR)/Cntk.ProposalLayerLib-$(CNTK_COMPONENT_VERSION).so
ALL_LIBS += $(PROPOSAL_LAYER_LIB)
PYTHON_LIBS += $(PROPOSAL_LAYER_LIB)
SRC += $(PROPOSAL_LAYER_LIBRARY_SRC)
$(PROPOSAL_LAYER_LIB): $(PROPOSAL_LAYER_LIBRARY_OBJ) | $(CNTKLIBRARY_LIB)
@echo $(SEPARATOR)
@echo creating $@ for $(ARCH) with build type $(BUILDTYPE)
@mkdir -p $(dir $@)
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR)) $(patsubst %,$(RPATH)%, $(LIBDIR) $(ORIGINDIR)) -o $@ $^ -l$(CNTKLIBRARY)
########################################
# LibEval
########################################

Просмотреть файл

@ -5,12 +5,11 @@
# ==============================================================================
import numpy as np
import os
import os, sys
import pytest
import sys
from cntk import load_model
from cntk import load_model, cntk_py
from cntk.cntk_py import DeviceKind_GPU
from cntk.device import try_set_default_device, gpu
from cntk.device import try_set_default_device, gpu, cpu
from cntk.logging.graph import get_node_outputs
from cntk.ops.tests.ops_test_utils import cntk_device
from _cntk_py import force_deterministic_algorithms
@ -19,6 +18,7 @@ force_deterministic_algorithms()
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(abs_path)
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection", "FasterRCNN"))
sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Extensibility", "ProposalLayer"))
from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model
grocery_path = prepare_Grocery_data()
@ -63,6 +63,53 @@ def test_fasterrcnn_grocery_training_e2e(device_id):
meanAP = eval_faster_rcnn_mAP(eval_model)
assert meanAP > 0.01
@win35_linux34
def test_native_fasterrcnn_eval(tmpdir, device_id):
from config import cfg
cfg["CNTK"].FORCE_DETERMINISTIC = True
cfg["CNTK"].DEBUG_OUTPUT = False
cfg["CNTK"].VISUALIZE_RESULTS = False
cfg["CNTK"].FAST_MODE = True
cfg["CNTK"].MAP_FILE_PATH = grocery_path
from FasterRCNN import set_global_vars
set_global_vars(False)
if cntk_device(device_id).type() != DeviceKind_GPU:
pytest.skip('test only runs on GPU') # it runs very slow in CPU
try_set_default_device(cntk_device(device_id))
# since we do not use a reader for evaluation we need unzipped data
externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
if externalData:
extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
model_file = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model")
else:
model_file = os.path.join(abs_path, *"../../../../Examples/Image/PretrainedModels/AlexNet.model".split("/"))
from FasterRCNN import train_faster_rcnn_e2e, eval_faster_rcnn_mAP
np.random.seed(seed=3)
eval_model = train_faster_rcnn_e2e(model_file, debug_output=False)
meanAP_python = eval_faster_rcnn_mAP(eval_model)
cntk_py.always_allow_setting_default_device()
try_set_default_device(cpu())
from native_proposal_layer import clone_with_native_proposal_layer
model_with_native_pl = clone_with_native_proposal_layer(eval_model)
meanAP_native = eval_faster_rcnn_mAP(model_with_native_pl)
# 0.2067 (python) vs 0.2251 (native) -- the difference stems
# from different sorting algorithms: quicksort in python and
# heapsort in c++ (both are not stable).
assert abs(meanAP_python - meanAP_native) < 0.02
@win35_linux34
def test_fasterrcnn_grocery_training_4stage(device_id):
from config import cfg

Просмотреть файл

@ -1621,8 +1621,12 @@ class UserFunction(Function):
name (str): name of this function
'''
def __init__(self, inputs, as_numpy=True, name=''):
super(UserFunction, self).__init__(inputs, name)
def __init__(self, inputs, as_numpy=True, attributes=None, name=''):
if attributes is None:
super(UserFunction, self).__init__(inputs, name)
else:
attributes = _py_dict_to_cntk_dict(attributes)
super(UserFunction, self).__init__(inputs, attributes, name)
self.set_native(False)
self.as_numpy = as_numpy