Merge remote-tracking branch 'remotes/origin/master' into guoguo/linuxBuildFix
This commit is contained in:
Коммит
b415924234
81
CNTK.sln
81
CNTK.sln
|
@ -1,7 +1,7 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2013
|
||||
VisualStudioVersion = 12.0.31101.0
|
||||
VisualStudioVersion = 12.0.21005.1
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKMathDll", "Math\Math\Math.vcxproj", "{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
|
@ -204,6 +204,7 @@ EndProject
|
|||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKComputationNetworkLib", "MachineLearning\CNTKComputationNetworkLib\CNTKComputationNetworkLib.vcxproj", "{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
|
||||
{EAD17188-072C-4726-B840-A769C36DAD1B} = {EAD17188-072C-4726-B840-A769C36DAD1B}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKSGDLib", "MachineLearning\CNTKSGDLib\CNTKSGDLib.vcxproj", "{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}"
|
||||
|
@ -350,127 +351,85 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FullUtterance", "FullUttera
|
|||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
Debug|x64 = Debug|x64
|
||||
Release|Win32 = Release|Win32
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Debug|x64.Build.0 = Debug|x64
|
||||
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|Win32.ActiveCfg = Release|x64
|
||||
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|x64.ActiveCfg = Release|x64
|
||||
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}.Release|x64.Build.0 = Release|x64
|
||||
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Debug|x64.Build.0 = Debug|x64
|
||||
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Release|Win32.ActiveCfg = Release|x64
|
||||
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Release|x64.ActiveCfg = Release|x64
|
||||
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Release|x64.Build.0 = Release|x64
|
||||
{6CEE834A-8104-46A8-8902-64C81BD7928F}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{6CEE834A-8104-46A8-8902-64C81BD7928F}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{6CEE834A-8104-46A8-8902-64C81BD7928F}.Debug|x64.Build.0 = Debug|x64
|
||||
{6CEE834A-8104-46A8-8902-64C81BD7928F}.Release|Win32.ActiveCfg = Release|x64
|
||||
{6CEE834A-8104-46A8-8902-64C81BD7928F}.Release|x64.ActiveCfg = Release|x64
|
||||
{6CEE834A-8104-46A8-8902-64C81BD7928F}.Release|x64.Build.0 = Release|x64
|
||||
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|x64.Build.0 = Debug|x64
|
||||
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|Win32.ActiveCfg = Release|x64
|
||||
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|x64.ActiveCfg = Release|x64
|
||||
{33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|x64.Build.0 = Release|x64
|
||||
{668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Debug|x64.Build.0 = Debug|x64
|
||||
{668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Release|Win32.ActiveCfg = Release|x64
|
||||
{668BEED5-AC07-4F35-B3AE-EE65A7F9C976}.Release|x64.ActiveCfg = Release|x64
|
||||
{E6646FFE-3588-4276-8A15-8D65C22711C1}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{E6646FFE-3588-4276-8A15-8D65C22711C1}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{E6646FFE-3588-4276-8A15-8D65C22711C1}.Debug|x64.Build.0 = Debug|x64
|
||||
{E6646FFE-3588-4276-8A15-8D65C22711C1}.Release|Win32.ActiveCfg = Release|x64
|
||||
{E6646FFE-3588-4276-8A15-8D65C22711C1}.Release|x64.ActiveCfg = Release|x64
|
||||
{E6646FFE-3588-4276-8A15-8D65C22711C1}.Release|x64.Build.0 = Release|x64
|
||||
{1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Debug|x64.Build.0 = Debug|x64
|
||||
{1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Release|Win32.ActiveCfg = Release|x64
|
||||
{1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Release|x64.ActiveCfg = Release|x64
|
||||
{1D5787D4-52E4-45DB-951B-82F220EE0C6A}.Release|x64.Build.0 = Release|x64
|
||||
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Debug|x64.Build.0 = Debug|x64
|
||||
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Release|Win32.ActiveCfg = Release|x64
|
||||
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Release|x64.ActiveCfg = Release|x64
|
||||
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6}.Release|x64.Build.0 = Release|x64
|
||||
{482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Debug|x64.Build.0 = Debug|x64
|
||||
{482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Release|Win32.ActiveCfg = Release|x64
|
||||
{482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Release|x64.ActiveCfg = Release|x64
|
||||
{482999D1-B7E2-466E-9F8D-2119F93EAFD9}.Release|x64.Build.0 = Release|x64
|
||||
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Debug|x64.Build.0 = Debug|x64
|
||||
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Release|Win32.ActiveCfg = Release|x64
|
||||
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Release|x64.ActiveCfg = Release|x64
|
||||
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Release|x64.Build.0 = Release|x64
|
||||
{B3DD765E-694E-4494-BAD7-37BBF2942517}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{B3DD765E-694E-4494-BAD7-37BBF2942517}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{B3DD765E-694E-4494-BAD7-37BBF2942517}.Debug|x64.Build.0 = Debug|x64
|
||||
{B3DD765E-694E-4494-BAD7-37BBF2942517}.Release|Win32.ActiveCfg = Release|x64
|
||||
{B3DD765E-694E-4494-BAD7-37BBF2942517}.Release|x64.ActiveCfg = Release|x64
|
||||
{B3DD765E-694E-4494-BAD7-37BBF2942517}.Release|x64.Build.0 = Release|x64
|
||||
{9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Debug|x64.Build.0 = Debug|x64
|
||||
{9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Release|Win32.ActiveCfg = Release|x64
|
||||
{9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Release|x64.ActiveCfg = Release|x64
|
||||
{9A2F2441-5972-4EA8-9215-4119FCE0FB68}.Release|x64.Build.0 = Release|x64
|
||||
{014DA766-B37B-4581-BC26-963EA5507931}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{014DA766-B37B-4581-BC26-963EA5507931}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{014DA766-B37B-4581-BC26-963EA5507931}.Debug|x64.Build.0 = Debug|x64
|
||||
{014DA766-B37B-4581-BC26-963EA5507931}.Release|Win32.ActiveCfg = Release|x64
|
||||
{014DA766-B37B-4581-BC26-963EA5507931}.Release|x64.ActiveCfg = Release|x64
|
||||
{014DA766-B37B-4581-BC26-963EA5507931}.Release|x64.Build.0 = Release|x64
|
||||
{D667AF32-028A-4A5D-BE19-F46776F0F6B2}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{D667AF32-028A-4A5D-BE19-F46776F0F6B2}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{D667AF32-028A-4A5D-BE19-F46776F0F6B2}.Debug|x64.Build.0 = Debug|x64
|
||||
{D667AF32-028A-4A5D-BE19-F46776F0F6B2}.Release|Win32.ActiveCfg = Release|x64
|
||||
{D667AF32-028A-4A5D-BE19-F46776F0F6B2}.Release|x64.ActiveCfg = Release|x64
|
||||
{D667AF32-028A-4A5D-BE19-F46776F0F6B2}.Release|x64.Build.0 = Release|x64
|
||||
{DBB3C106-B0B4-4059-8477-C89528CEC1B0}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{DBB3C106-B0B4-4059-8477-C89528CEC1B0}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{DBB3C106-B0B4-4059-8477-C89528CEC1B0}.Release|Win32.ActiveCfg = Release|x64
|
||||
{DBB3C106-B0B4-4059-8477-C89528CEC1B0}.Release|x64.ActiveCfg = Release|x64
|
||||
{CE429AA2-3778-4619-8FD1-49BA3B81197B}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{CE429AA2-3778-4619-8FD1-49BA3B81197B}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{CE429AA2-3778-4619-8FD1-49BA3B81197B}.Debug|x64.Build.0 = Debug|x64
|
||||
{CE429AA2-3778-4619-8FD1-49BA3B81197B}.Release|Win32.ActiveCfg = Release|x64
|
||||
{CE429AA2-3778-4619-8FD1-49BA3B81197B}.Release|x64.ActiveCfg = Release|x64
|
||||
{CE429AA2-3778-4619-8FD1-49BA3B81197B}.Release|x64.Build.0 = Release|x64
|
||||
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}.Debug|x64.Build.0 = Debug|x64
|
||||
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}.Release|Win32.ActiveCfg = Release|x64
|
||||
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}.Release|x64.ActiveCfg = Release|x64
|
||||
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}.Release|x64.Build.0 = Release|x64
|
||||
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}.Debug|x64.Build.0 = Debug|x64
|
||||
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}.Release|Win32.ActiveCfg = Release|x64
|
||||
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}.Release|x64.ActiveCfg = Release|x64
|
||||
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513}.Release|x64.Build.0 = Release|x64
|
||||
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}.Debug|x64.Build.0 = Debug|x64
|
||||
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}.Release|Win32.ActiveCfg = Release|x64
|
||||
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}.Release|x64.ActiveCfg = Release|x64
|
||||
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}.Release|x64.Build.0 = Release|x64
|
||||
{EAD17188-072C-4726-B840-A769C36DAD1B}.Debug|Win32.ActiveCfg = Debug|x64
|
||||
{EAD17188-072C-4726-B840-A769C36DAD1B}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{EAD17188-072C-4726-B840-A769C36DAD1B}.Debug|x64.Build.0 = Debug|x64
|
||||
{EAD17188-072C-4726-B840-A769C36DAD1B}.Release|Win32.ActiveCfg = Release|x64
|
||||
{EAD17188-072C-4726-B840-A769C36DAD1B}.Release|x64.ActiveCfg = Release|x64
|
||||
{EAD17188-072C-4726-B840-A769C36DAD1B}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
|
@ -478,46 +437,46 @@ Global
|
|||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(NestedProjects) = preSolution
|
||||
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
|
||||
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
|
||||
{482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
|
||||
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
|
||||
{B3DD765E-694E-4494-BAD7-37BBF2942517} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
|
||||
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
|
||||
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
|
||||
{EAD17188-072C-4726-B840-A769C36DAD1B} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
|
||||
{6CEE834A-8104-46A8-8902-64C81BD7928F} = {D45DF403-6781-444E-B654-A96868C5BE68}
|
||||
{33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33EBFE78-A1A8-4961-8938-92A271941F94}
|
||||
{668BEED5-AC07-4F35-B3AE-EE65A7F9C976} = {D45DF403-6781-444E-B654-A96868C5BE68}
|
||||
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC} = {D45DF403-6781-444E-B654-A96868C5BE68}
|
||||
{DBB3C106-B0B4-4059-8477-C89528CEC1B0} = {D45DF403-6781-444E-B654-A96868C5BE68}
|
||||
{C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} = {D45DF403-6781-444E-B654-A96868C5BE68}
|
||||
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B} = {D45DF403-6781-444E-B654-A96868C5BE68}
|
||||
{5E666C53-2D82-49C9-9127-3FDDC321C741} = {D45DF403-6781-444E-B654-A96868C5BE68}
|
||||
{E6646FFE-3588-4276-8A15-8D65C22711C1} = {33EBFE78-A1A8-4961-8938-92A271941F94}
|
||||
{1D5787D4-52E4-45DB-951B-82F220EE0C6A} = {33EBFE78-A1A8-4961-8938-92A271941F94}
|
||||
{62836DC1-DF77-4B98-BF2D-45C943B7DDC6} = {33EBFE78-A1A8-4961-8938-92A271941F94}
|
||||
{482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
|
||||
{0F30EBCF-09F3-4EED-BF54-4214BCE53FEC} = {D45DF403-6781-444E-B654-A96868C5BE68}
|
||||
{B3DD765E-694E-4494-BAD7-37BBF2942517} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
|
||||
{33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33EBFE78-A1A8-4961-8938-92A271941F94}
|
||||
{9A2F2441-5972-4EA8-9215-4119FCE0FB68} = {33EBFE78-A1A8-4961-8938-92A271941F94}
|
||||
{014DA766-B37B-4581-BC26-963EA5507931} = {33EBFE78-A1A8-4961-8938-92A271941F94}
|
||||
{D667AF32-028A-4A5D-BE19-F46776F0F6B2} = {33EBFE78-A1A8-4961-8938-92A271941F94}
|
||||
{3ED0465D-23E7-4855-9694-F788717B6533} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
|
||||
{CE429AA2-3778-4619-8FD1-49BA3B81197B} = {33EBFE78-A1A8-4961-8938-92A271941F94}
|
||||
{065AF55D-AF02-448B-BFCD-52619FDA4BD0} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
|
||||
{3ED0465D-23E7-4855-9694-F788717B6533} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
|
||||
{3E9C89B1-C045-4F42-92B2-F9FFFFC2DBD4} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
|
||||
{C70E1572-20FF-496C-A0A9-10AA6755A07C} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
|
||||
{98D2C32B-0C1F-4E19-A626-65F7BA4600CF} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0}
|
||||
{EA67F51F-1FE8-462D-9F3E-01161685AD59} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0}
|
||||
{DE1A06BA-EC5C-4E0D-BCA8-3EA555310C58} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0}
|
||||
{63024704-A2D7-497E-AD4B-5C10C6AA1374} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0}
|
||||
{F9BEB27E-8AF5-464E-8D45-0000D5AFA2D3} = {EA67F51F-1FE8-462D-9F3E-01161685AD59}
|
||||
{889C1CCF-92B3-450B-B00D-FC9A9D5BE464} = {EA67F51F-1FE8-462D-9F3E-01161685AD59}
|
||||
{DBB3C106-B0B4-4059-8477-C89528CEC1B0} = {D45DF403-6781-444E-B654-A96868C5BE68}
|
||||
{CE429AA2-3778-4619-8FD1-49BA3B81197B} = {33EBFE78-A1A8-4961-8938-92A271941F94}
|
||||
{C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} = {D45DF403-6781-444E-B654-A96868C5BE68}
|
||||
{4BBF2950-3DBD-469A-AD57-6CACBEBAF541} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}
|
||||
{5F733BBA-FE83-4668-8F83-8B0E78A36619} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}
|
||||
{19EE975B-232D-49F0-94C7-6F1C6424FB53} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}
|
||||
{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B} = {D45DF403-6781-444E-B654-A96868C5BE68}
|
||||
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
|
||||
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
|
||||
{5E666C53-2D82-49C9-9127-3FDDC321C741} = {D45DF403-6781-444E-B654-A96868C5BE68}
|
||||
{88F85A64-105D-4CDA-8199-B7A312FC8A27} = {19EE975B-232D-49F0-94C7-6F1C6424FB53}
|
||||
{8241108A-7824-4FF2-BECA-7521A9D89DCF} = {19EE975B-232D-49F0-94C7-6F1C6424FB53}
|
||||
{6D1353D6-F196-466F-B886-F16D48759B20} = {5E666C53-2D82-49C9-9127-3FDDC321C741}
|
||||
{B6725C9F-A6D2-4269-9B74-7888A90F7884} = {5E666C53-2D82-49C9-9127-3FDDC321C741}
|
||||
{B27DD434-EECD-4EE0-A03B-1150EB87258E} = {B6725C9F-A6D2-4269-9B74-7888A90F7884}
|
||||
{A4884465-CFBB-4A64-A9DE-690E1A63EF7E} = {B6725C9F-A6D2-4269-9B74-7888A90F7884}
|
||||
{3E9C89B1-C045-4F42-92B2-F9FFFFC2DBD4} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
|
||||
{C70E1572-20FF-496C-A0A9-10AA6755A07C} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
|
||||
{EAD17188-072C-4726-B840-A769C36DAD1B} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
|
||||
{88F85A64-105D-4CDA-8199-B7A312FC8A27} = {19EE975B-232D-49F0-94C7-6F1C6424FB53}
|
||||
{8241108A-7824-4FF2-BECA-7521A9D89DCF} = {19EE975B-232D-49F0-94C7-6F1C6424FB53}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
|
|
@ -102,6 +102,14 @@ void Eval<ElemType>::GetNodeDimensions(std::map<std::wstring, size_t>& dimension
|
|||
m_eval->GetNodeDimensions(dimensions, nodeGroup);
|
||||
}
|
||||
|
||||
// StartEvaluateMinibatchLoop - Prepare network for Evaluate() calls.
|
||||
// ouputNodeName - name of node that will be evaluated
|
||||
template<class ElemType>
|
||||
void Eval<ElemType>::StartEvaluateMinibatchLoop(const std::wstring & outputNodeName)
|
||||
{
|
||||
m_eval->StartEvaluateMinibatchLoop(outputNodeName);
|
||||
}
|
||||
|
||||
// Evaluate - Evalute using the model with the given inputs and outputs
|
||||
// inputs - map from node name to input vector
|
||||
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
|
||||
|
@ -122,4 +130,4 @@ void Eval<ElemType>::ResetState()
|
|||
template class Eval<double>;
|
||||
template class Eval<float>;
|
||||
|
||||
}}}
|
||||
}}}
|
||||
|
|
|
@ -47,7 +47,8 @@ public:
|
|||
|
||||
virtual void LoadModel(const std::wstring& modelFileName)=0;
|
||||
virtual void GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup)=0;
|
||||
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs)=0;
|
||||
virtual void StartEvaluateMinibatchLoop(const std::wstring & outputNodeName) = 0;
|
||||
virtual void Evaluate(std::map<std::wstring, std::vector<ElemType>*>& inputs, std::map<std::wstring, std::vector<ElemType>*>& outputs) = 0;
|
||||
virtual void ResetState() = 0;
|
||||
};
|
||||
|
||||
|
@ -92,6 +93,10 @@ public:
|
|||
// nodeGroup - type of node we are requesting (input/output/specified)
|
||||
virtual void GetNodeDimensions(std::map<std::wstring, size_t>& dimensions, NodeGroup nodeGroup);
|
||||
|
||||
// StartEvaluateMinibatchLoop - Prepare network for Evaluate() calls.
|
||||
// ouputNodeName - name of node that will be evaluated
|
||||
virtual void StartEvaluateMinibatchLoop(const std::wstring & outputNodeName);
|
||||
|
||||
// Evaluate - Evalute using the model with the given inputs and outputs
|
||||
// inputs - map from node name to input vector
|
||||
// outputs - map from node name to output vector, outputs vectors need to be preallocated by caller, sizing will happen during evaluation
|
||||
|
|
|
@ -1320,15 +1320,16 @@ class argvector: public std::vector<T>
|
|||
}
|
||||
|
||||
// convert wstring toks2[0] to T val and check type
|
||||
static void parse(const std::wstring& in, size_t& val)
|
||||
template<typename INT>
|
||||
static void parseint(const std::wstring& in, INT& val)
|
||||
{
|
||||
float fval = (float) msra::strfun::todouble(in);
|
||||
val = (size_t) fval;
|
||||
if (val != fval)
|
||||
{
|
||||
double dval = msra::strfun::todouble(in);
|
||||
val = (INT)dval;
|
||||
if (val != dval)
|
||||
RuntimeError("argvector: invalid arg value");
|
||||
}
|
||||
}
|
||||
static void parse(const std::wstring& in, size_t& val) { parseint(in, val); }
|
||||
static void parse(const std::wstring& in, int& val) { parseint(in, val); }
|
||||
static void parse(const std::wstring& in, std::wstring& val)
|
||||
{
|
||||
val = in;
|
||||
|
|
|
@ -984,8 +984,9 @@ public:
|
|||
void getedgeacscores (std::vector<float> & edgeacscores);
|
||||
void getedgealignments (std::vector<unsigned short> & edgealignments);
|
||||
//to work with CNTK's GPU memory
|
||||
void setmode(bool cpumode/*, size_t DeviceId*/);
|
||||
void release(bool cpumode);
|
||||
void setdevice(size_t DeviceId);
|
||||
size_t getdevice();
|
||||
void release(bool cpumode);
|
||||
void setloglls(const Microsoft::MSR::CNTK::Matrix<float>& loglls);
|
||||
void setloglls(const Microsoft::MSR::CNTK::Matrix<double>& loglls);
|
||||
void getgamma(Microsoft::MSR::CNTK::Matrix<float>& loglls);
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <string> // for the error message in checkoverflow() only
|
||||
#include <stdexcept>
|
||||
#include <stdint.h>
|
||||
#include <cstdio>
|
||||
|
||||
#undef INITIAL_STRANGE // [v-hansu] intialize structs to strange values
|
||||
#define PARALLEL_SIL // [v-hansu] process sil on CUDA, used in other files, please search this
|
||||
|
@ -25,7 +26,12 @@ static void checkoverflow (size_t fieldval, size_t targetval, const char * field
|
|||
if (fieldval != targetval)
|
||||
{
|
||||
char buf[1000];
|
||||
sprintf(buf, "lattice: bit field %s too small for value 0x%x (cut from 0x%x)", fieldname, (unsigned int)targetval, (unsigned int)fieldval);
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1900
|
||||
sprintf_s
|
||||
#else
|
||||
std::snprintf
|
||||
#endif
|
||||
(buf, sizeof(buf), "lattice: bit field %s too small for value 0x%x (cut from 0x%x)", fieldname, (unsigned int)targetval, (unsigned int)fieldval);
|
||||
throw std::runtime_error (buf);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,108 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: AcceleratorManager.h
|
||||
//
|
||||
// summary: Declares the accelerator manager class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#pragma once
|
||||
#include <deque>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include "accelerator.h"
|
||||
#include "PhysicalDevice.h"
|
||||
#include "Lockable.h"
|
||||
#include <map>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Task;
|
||||
|
||||
class AcceleratorManager : public Lockable
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
AcceleratorManager();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~AcceleratorManager(void);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Adds a device. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void AddDevice(Accelerator * pAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if 'pAccelerator' is available. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> true if available, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsAvailable(Accelerator * pAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Searches for the first available. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="cls"> The cls. </param>
|
||||
/// <param name="v"> [in,out] [in,out] If non-null, the v. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL FindAvailable(ACCELERATOR_CLASS cls, std::vector<Accelerator*> &v);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the physical accelerator count. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> The physical accelerator count. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetPhysicalAcceleratorCount();
|
||||
|
||||
protected:
|
||||
|
||||
/// <summary> The devices </summary>
|
||||
std::vector<PhysicalDevice*> m_devices;
|
||||
|
||||
/// <summary> The available devices </summary>
|
||||
std::vector<PhysicalDevice*> m_available;
|
||||
|
||||
/// <summary> The inflight devices </summary>
|
||||
std::vector<PhysicalDevice*> m_inflight;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Searches for the first match for the given accelerator*. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
PhysicalDevice * Find(Accelerator* pAccelerator);
|
||||
};
|
||||
};
|
|
@ -1,633 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: AsyncContext.h
|
||||
//
|
||||
// summary: Declares the asynchronous context class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __ASYNC_CONTEXT_H__
|
||||
#define __ASYNC_CONTEXT_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include <deque>
|
||||
#include <set>
|
||||
#include "ReferenceCounted.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Task;
|
||||
class SyncPoint;
|
||||
class AsyncDependence;
|
||||
class Accelerator;
|
||||
|
||||
class AsyncContext : public ReferenceCounted {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDeviceContext"> [in] non-null, context for the device. </param>
|
||||
/// <param name="pTaskContext"> [in] non-null, context for the task. </param>
|
||||
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
AsyncContext(
|
||||
__in Accelerator * pDeviceContext,
|
||||
__in Task * pTaskContext,
|
||||
__in ASYNCCONTEXTTYPE eAsyncContextType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~AsyncContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates a dependence on the synchronization point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual AsyncDependence *
|
||||
CreateDependence(
|
||||
__in ASYNCHRONOUS_OPTYPE eOperationType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates a dependence on the synchronization point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual AsyncDependence *
|
||||
CreateDependence(
|
||||
__in SyncPoint * pSyncPoint,
|
||||
__in ASYNCHRONOUS_OPTYPE eOperationType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates a synchronization point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual SyncPoint * CreateSyncPoint(void * pPSSyncObject);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroys a synchronization point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL DestroySyncPoint(SyncPoint * pSyncPoint);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Synchronizes the context. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SynchronizeContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Notifies the device synchronized. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/8/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void NotifyDeviceSynchronized();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence--asynchronous; puts a fence in the command queue
|
||||
/// for this context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
OrderSubsequentOperationsAfter(
|
||||
__in AsyncDependence * pDependence
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence--synchronous </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
SynchronousWait(
|
||||
__in AsyncDependence * pDependence
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Synchronous wait for dependence resolution. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/25/2013. </remarks>
|
||||
///
|
||||
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
SynchronousWait(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if 'pDependence' is dependence resolved. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if dependence resolved, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
QueryDependenceOutstanding(
|
||||
__in AsyncDependence * pDependence
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the device context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the device context. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Accelerator * GetDeviceContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the task context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/13/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the task context. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Task * GetTaskContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform context object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the platform context object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void * GetPlatformContextObject()=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Initialize()=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if this accelerator backing this context encapsulates a backend
|
||||
/// framework that provides explicit APIs for managing outstanding (Asynchronous)
|
||||
/// operations. When this is the case, the corresponding AsyncContext subclass can
|
||||
/// manage outstanding dependences explicitly to increase concurrency and avoid
|
||||
/// syncing with the device. When it is *not* the case, we must synchronize when we
|
||||
/// data to and from this accelerator context and contexts that *do* support an
|
||||
/// explicit async API. For example, CUDA supports the stream and event API to
|
||||
/// explicitly manage dependences and we use this feature heavily to allow task
|
||||
/// dispatch to get far ahead of device- side dispatch. However when data moves
|
||||
/// between CUAccelerators and other accelerator classes, we must use synchronous
|
||||
/// operations or provide a way to wait for outstanding dependences from those
|
||||
/// contexts to resolve. This method is used to tell us whether we can create an
|
||||
/// outstanding dependence after making calls that queue work, or whether we need to
|
||||
/// synchronize.
|
||||
///
|
||||
/// This method simply calls the method of the same name on the (device context)
|
||||
/// accelerator, and is only provided for convenience.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsExplicitAsyncOperations();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Locks the accelerator. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/26/2013. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual VOID LockAccelerator();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Unlocks the accelerator. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/26/2013. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual VOID UnlockAccelerator();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Non-blocking check whether the dependence is still outstanding. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/2/2013. </remarks>
|
||||
///
|
||||
/// <param name="pDep"> [in,out] If non-null, the dep. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
NonblockingQueryOutstanding(
|
||||
__inout AsyncDependence * pDep
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Synchronous wait for outstanding async op--do not acquire locks
|
||||
/// required to update async and device context state in response
|
||||
/// to a successful query or wait. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/2/2013. </remarks>
|
||||
///
|
||||
/// <param name="pDep"> [in,out] If non-null, the dep. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
LocklessWaitOutstanding(
|
||||
__inout AsyncDependence * pDep
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the type (dedicated purpose) of the asynchronous context. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/8/2013. </remarks>
|
||||
///
|
||||
/// <returns> The asynchronous context type. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ASYNCCONTEXTTYPE
|
||||
GetAsyncContextType(
|
||||
VOID
|
||||
);
|
||||
|
||||
protected:
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence--synchronously. Because we may have to make backend
|
||||
/// framework calls (e.g. to wait or check CUDA event states) we may require
|
||||
/// a number of fairly coarse locks, including an accelerator lock. When calling
|
||||
/// this from task dispatch context, the caller must acquire all locks up front
|
||||
/// since there are lock ordering displines such as (Accelerator->Datablock) that
|
||||
/// are there to prevent deadlock for concurrent tasks.
|
||||
///
|
||||
/// This version assumes (or rather only asserts) that accelerator locks are held
|
||||
/// already, so it can be called from dispatch context: Task is a friend class
|
||||
/// to enable this while minimizing the potential for abuse.
|
||||
///
|
||||
/// This is a building block for the public version, which first collects locks,
|
||||
/// but which cannot be called from a dispatch context as a result.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
__SynchronousWaitLocksHeld(
|
||||
__in AsyncDependence * pDependence
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence--synchronously. Because we may have to make backend
|
||||
/// framework calls (e.g. to wait or check CUDA event states) we may require
|
||||
/// a number of fairly coarse locks, including an accelerator lock. When calling
|
||||
/// this from task dispatch context, the caller must acquire all locks up front
|
||||
/// since there are lock ordering displines such as (Accelerator->Datablock) that
|
||||
/// are there to prevent deadlock for concurrent tasks.
|
||||
///
|
||||
/// This version assumes (or rather only asserts) that accelerator locks are held
|
||||
/// already, so it can be called from dispatch context: Task is a friend class
|
||||
/// to enable this while minimizing the potential for abuse.
|
||||
///
|
||||
/// This is a building block for the public version, which first collects locks,
|
||||
/// but which cannot be called from a dispatch context as a result.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/25/2013. </remarks>
|
||||
///
|
||||
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
__SynchronousWaitLocksHeld(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sync points, once marked resolved, can never return to the outstanding state.
|
||||
/// Consequently, if a lock-free check of the oustanding flag returns false, there is
|
||||
/// no danger of a race. Conversely, checking if the state is unknown requires
|
||||
/// accelerator and context locks which restrict concurrency and have lock ordering
|
||||
/// disciplines that make it difficult to *always* have these locks when this check
|
||||
/// is required. So a quick check without a lock that can avoid locks when they are
|
||||
/// unnecessary is a handy tool.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/26/2013. </remarks>
|
||||
///
|
||||
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
|
||||
///
|
||||
/// <returns> true if synchronise point resolved no lock, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
__IsSyncPointResolvedNoLock(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> DEBUG instrumentation for analyzing the composition of outstanding dependences
|
||||
/// on this async context. How many are flagged as resolved, how many are *actually*
|
||||
/// resolved, is the queue monotonic?
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/31/2014. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
AnalyzeOutstandingQueue(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> garbage collect the outstanding queue. Anything no longer outstanding
|
||||
/// can be removed from the queue. The original version is very
|
||||
/// conservative about how much it actually cleans up--it only checks flags
|
||||
/// (and thus avoids back-end API calls to check event status), which is
|
||||
/// good for performance until the number of outstanding deps piles up.
|
||||
/// This version attempts to balance these effects by making API calls
|
||||
/// if the number of outstanding deps goes beyond a threshold. This version
|
||||
/// can be reinstated with a static member variable s_bUseConservativeGC.
|
||||
/// The threshold at which to start making API calls is controlled by
|
||||
/// PTask::Runtime::[Get|Set]AsyncContextGCQueryThreshold().
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/31/2014. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
GarbageCollectOutstandingQueue(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> garbage collect the outstanding queue. Anything no longer outstanding
|
||||
/// can be removed from the queue. This (old, obsolete) version is very
|
||||
/// conservative about how much it actually cleans up--it only checks flags
|
||||
/// (and thus avoids back-end API calls to check event status), which is
|
||||
/// good for performance until the number of outstanding deps piles up.
|
||||
/// The new version attempts to balance these effects by making API calls
|
||||
/// if the number of outstanding deps goes beyond a threshold. This version
|
||||
/// can be reinstated with a static member variable s_bUseConservativeGC.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
GarbageCollectOutstandingQueueConservatively(
|
||||
VOID
|
||||
);
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Truncate queue. Only to be called when the context is known to be synchronized!
|
||||
/// Marks all outstanding sync points as resolved, and removes them from the
|
||||
/// outstanding queue.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
TruncateOutstandingQueue(
|
||||
__in BOOL bContextSynchronized
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Truncate queue. Only to be called when the context is known to be synchronized!
|
||||
/// Marks all outstanding sync points as resolved, and removes them from the
|
||||
/// outstanding queue.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void TruncateOutstandingQueueFrom(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence asynchronously by inserting a dependence
|
||||
/// in the current context (stream) on the event in the sync point.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
InsertFence(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific create synchronization point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual SyncPoint *
|
||||
PlatformSpecificCreateSyncPoint(
|
||||
void * pPSSyncObject
|
||||
)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific destroy synchronization point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificDestroySynchronizationPoint(
|
||||
__in SyncPoint * pSyncPoint
|
||||
)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence asynchronously. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificInsertFence(
|
||||
__in SyncPoint * pSyncPoint
|
||||
)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence synchronously. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificSynchronousWait(
|
||||
__in SyncPoint * pSyncPoint
|
||||
)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence synchronously without locking the async context
|
||||
/// or underlying accelerator: this simplifies lock acquisition for such
|
||||
/// waits, but at the expense of leaving live dependences that are
|
||||
/// actually resolved. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificLocklessSynchronousWait(
|
||||
__in SyncPoint * pSyncPoint
|
||||
)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if we can platform specific synchronize context. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificSynchronizeContext(
|
||||
VOID
|
||||
)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if the sync point is resolved (and marks it if so). </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificQueryOutstanding(
|
||||
__inout SyncPoint * pSyncPoint
|
||||
)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific nonblocking check whether the event remains outstanding. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/2/2013. </remarks>
|
||||
///
|
||||
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificNonblockingQueryOutstanding(
|
||||
__inout SyncPoint * pSyncPoint
|
||||
)=0;
|
||||
|
||||
std::deque<SyncPoint*> m_qOutstanding;
|
||||
Accelerator * m_pDeviceContext;
|
||||
Task * m_pTaskContext;
|
||||
ASYNCCONTEXTTYPE m_eAsyncContextType;
|
||||
static BOOL s_bUseConservativeGC;
|
||||
|
||||
friend class AsyncDependence;
|
||||
friend class SyncPoint;
|
||||
friend class Task;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a string describing this refcount object. Allows subclasses to
|
||||
/// provide overrides that make leaks easier to find when detected by the
|
||||
/// rc profiler.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/9/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the rectangle profile descriptor. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual std::string GetRCProfileDescriptor();
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,180 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: AsyncDependence.h
|
||||
//
|
||||
// summary: Declares the asynchronous dependence class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __ASYNC_DEPENDENCE_H__
|
||||
#define __ASYNC_DEPENDENCE_H__
|
||||
|
||||
#include "ReferenceCounted.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class SyncPoint;
|
||||
class AsyncContext;
|
||||
class PBuffer;
|
||||
|
||||
class AsyncDependence : public ReferenceCounted {
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the outstanding
|
||||
/// asynchronous operations. </param>
|
||||
/// <param name="pSyncPoint"> [in,out] If non-null, the sync point on which to depend. </param>
|
||||
/// <param name="eOperationType"> Type of the operation. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
AsyncDependence(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in SyncPoint * pSyncPoint,
|
||||
__in ASYNCHRONOUS_OPTYPE eOperationType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~AsyncDependence();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the context. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual AsyncContext * GetContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform context object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the platform context object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void * GetPlatformContextObject();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform wait object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the platform wait object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void * GetPlatformWaitObject();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the synchronise point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the synchronise point. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
SyncPoint * GetSyncPoint();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets operation type. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 5/1/2013. </remarks>
|
||||
///
|
||||
/// <returns> The operation type. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
ASYNCHRONOUS_OPTYPE GetOperationType();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is outstanding. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/25/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if outstanding, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL IsOutstanding();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Blocking wait complete. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/25/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL SynchronousExclusiveWait();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Lockless wait outstanding: without acquiring any locks attempt to perform a
|
||||
/// synchronous wait for any outstanding async dependences on this buffer that
|
||||
/// conflict with an operation of the given type. This is an experimental API,
|
||||
/// enable/disable with PTask::Runtime::*etTaskDispatchLocklessIncomingDepWait(),
|
||||
/// attempting to leverage the fact that CUDA apis for waiting on events (which
|
||||
/// appear to be thread-safe and decoupled from a particular device context)
|
||||
/// to minimize serialization associated with outstanding dependences on data
|
||||
/// consumed by tasks that do not require accelerators for any other reason than to
|
||||
/// wait for such operations to complete.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/1/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL LocklessWaitOutstanding();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if the dependence is outstanding without acquiring device
|
||||
/// and context locks required to react to resolution if we detect it.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/2/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL NonblockingQueryOutstanding();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if the sync point this dependence encapsulates has been
|
||||
/// marked resolved or not. The transition from outstanding to resolved
|
||||
/// is monotonic, so we can make this check without a lock, provided
|
||||
/// that only a FALSE return value is considered actionable.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/2/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL QueryOutstandingFlag();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Blocking wait until complete--locks already held. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/25/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL __SynchronousWaitLocksHeld();
|
||||
|
||||
AsyncContext * m_pAsyncContext;
|
||||
SyncPoint * m_pSyncPoint;
|
||||
ASYNCHRONOUS_OPTYPE m_eOperationType;
|
||||
|
||||
friend class PBuffer;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,556 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: BlockPool.h
|
||||
//
|
||||
// summary: Declares the block pool class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __BLOCK_POOL_H__
|
||||
#define __BLOCK_POOL_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
|
||||
#include "datablocktemplate.h"
|
||||
#include "channel.h"
|
||||
#include "port.h"
|
||||
#include "PBuffer.h"
|
||||
#include <deque>
|
||||
#include <vector>
|
||||
#include "BlockPoolOwner.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class BlockPool : public Lockable {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BlockPool(
|
||||
__in DatablockTemplate * pTemplate,
|
||||
__in BUFFERACCESSFLAGS ePermissions,
|
||||
__in UINT uiPoolSize,
|
||||
__in BlockPoolOwner * pPoolOwner
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~BlockPool();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void SetRequestsPageLocked(BOOL bPageLocked);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL GetRequestsPageLocked();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a growable. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="bGrowable"> true if growable. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void SetGrowable(BOOL bGrowable);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is growable. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if growable, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL IsGrowable();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets eager device materialize. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="bEager"> true to eager. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void SetEagerDeviceMaterialize(BOOL bEager);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets eager device materialize. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL GetEagerDeviceMaterialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets pool size. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="uiSize"> The size. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void SetPoolSize(UINT uiSize);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets pool size. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> The pool size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT GetPoolSize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Adds a view memory space. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void AddViewMemorySpace(Accelerator* pAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Adds a view memory space. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void AddViewMemorySpace(UINT uiMemorySpace);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a destination buffer for a block with an upstream
|
||||
/// allocator. Succeeds only if the pool happens to have blocks
|
||||
/// backed by sufficient resources in all channels that are backed.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the destination buffer. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock *
|
||||
GetPooledBlock(
|
||||
__in Accelerator * pAccelerator=NULL,
|
||||
__in UINT uiDataBytes=0,
|
||||
__in UINT uiMetaBytes=0,
|
||||
__in UINT uiTemplateBytes=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Adds to the pool. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void AddNewBlock(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return to the pool. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ReturnBlock(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object has block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if block pool, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasBlockPool();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
|
||||
/// we have an upstream allocator (meta) port, the runtime will not create a block
|
||||
/// pool for the corresponding output port. This turns out to put device-side
|
||||
/// allocation on the critical path in some cases, so we provide a way to override
|
||||
/// that behavior and allow a port to create a pool based on some size hints. When
|
||||
/// there is a block available with sufficient space in the pool, the meta port can
|
||||
/// avoid the allocation and draw from the pool.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="nPoolSize"> Size of the block pool. </param>
|
||||
/// <param name="nStride"> The stride. </param>
|
||||
/// <param name="nDataBytes"> The data in bytes. </param>
|
||||
/// <param name="nMetaBytes"> The meta in bytes. </param>
|
||||
/// <param name="nTemplateBytes"> The template in bytes. </param>
|
||||
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
|
||||
/// <param name="bEagerMaterialize"> (optional) the eager materialize. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
ForceBlockPoolHint(
|
||||
__in UINT nPoolSize,
|
||||
__in UINT nStride,
|
||||
__in UINT nDataBytes,
|
||||
__in UINT nMetaBytes,
|
||||
__in UINT nTemplateBytes,
|
||||
__in BOOL bPageLockHostViews=FALSE,
|
||||
__in BOOL bEagerMaterialize=FALSE
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Synchronous version: allocates buffers and populates any device side
|
||||
/// views in one go. If graph construction performance matters, this is
|
||||
/// not a good way to do it, since memory allocation causes synchronization.
|
||||
/// The asynchronous variant does it in several passes, allowing us
|
||||
/// to overlap the copy.
|
||||
///
|
||||
/// Allocation of data-blocks and platform-specific buffers can be a signficant
|
||||
/// latency expense at dispatch time. We can actually preallocate output datablocks
|
||||
/// and create device- side buffers at graph construction time. For each node in the
|
||||
/// graph, allocate data blocks on any output ports, and create device-specific
|
||||
/// buffers for all accelerators capable of executing the node.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPool(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
|
||||
/// to null, and then release it. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
DestroyBlockPool(
|
||||
VOID
|
||||
);
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles only the first pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPoolAsync(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles the second pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
FinalizeBlockPoolAsync(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is enabled. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if enabled, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL IsEnabled();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets high water mark. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT GetHighWaterMark();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the low water mark. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT GetLowWaterMark();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the currently available count. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT GetAvailableBlockCount();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the total number of blocks owned by the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT GetOwnedBlockCount();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets the number of blocks by which the pool should grow if
|
||||
/// it grows in response to dynamic demand.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/20/2013. </remarks>
|
||||
///
|
||||
/// <param name="uiBlockCount"> Number of blocks. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void SetGrowIncrement(UINT uiBlockCount);
|
||||
|
||||
UINT GetGrowIncrement();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate a block based on the hint size (rather than the template!).
|
||||
/// We do not support an async variant of this yet.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> (optional) If non-null, the accelerator. </param>
|
||||
/// <param name="uiDataBytes"> The data in bytes. </param>
|
||||
/// <param name="uiMetaBytes"> The meta in bytes. </param>
|
||||
/// <param name="uiTemplateBytes"> The template in bytes. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the new block. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
Datablock *
|
||||
AllocateBlockWithPoolHint(
|
||||
__in UINT uiDataBytes,
|
||||
__in UINT uiMetaBytes,
|
||||
__in UINT uiTemplateBytes
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in,out] (optional) If non-null, the accelerator. </param>
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
/// <param name="bPooled"> true to pooled. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
Datablock *
|
||||
AllocateBlockForPool(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate a block as part of asynchronous pool construction.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/30/2013. </remarks>
|
||||
///
|
||||
/// <param name="bFinalized"> [in,out] The finalized. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
Datablock *
|
||||
AllocateBlockForPoolAsync(
|
||||
__out BOOL &bFinalized
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Finalize a block allocated with the async variant. Basically
|
||||
/// we need to populate any views on this pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/30/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void FinalizeBlock(
|
||||
__in Datablock * pBlock
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Grows the pool by the given number of blocks. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/20/2013. </remarks>
|
||||
///
|
||||
/// <param name="uiBlockCount"> Number of blocks. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Grow(UINT uiBlockCount);
|
||||
|
||||
/// <summary> The template. </summary>
|
||||
DatablockTemplate * m_pTemplate;
|
||||
/// <summary> Size of the maximum block pool </summary>
|
||||
int m_nMaxPoolSize;
|
||||
/// <summary> The block pool </summary>
|
||||
std::deque<Datablock*> m_pBlockPool;
|
||||
/// <summary> True if we have provided hints for block pool management
|
||||
/// that are not present in the template.
|
||||
/// </summary>
|
||||
BOOL m_bPoolHintsSet;
|
||||
/// <summary> If the m_bPoolHintsSet member is true, this member
|
||||
/// controls the size of the block pool.
|
||||
/// </summary>
|
||||
UINT m_nPoolHintPoolSize;
|
||||
/// <summary> If the m_bPoolHintsSet member is true, this member
|
||||
/// controls the stride of the block pool.
|
||||
/// </summary>
|
||||
UINT m_nPoolHintStride;
|
||||
/// <summary> If the m_bPoolHintsSet member is true, this member
|
||||
/// controls the data channel size of the block pool.
|
||||
/// </summary>
|
||||
UINT m_nPoolHintDataBytes;
|
||||
/// <summary> If the m_bPoolHintsSet member is true, this member
|
||||
/// controls the meta channel size of the block pool.
|
||||
/// </summary>
|
||||
UINT m_nPoolHintMetaBytes;
|
||||
/// <summary> If the m_bPoolHintsSet member is true, this member
|
||||
/// controls the template channel size of the block pool.
|
||||
/// </summary>
|
||||
UINT m_nPoolHintTemplateBytes;
|
||||
/// <summary> True if host buffers for datablocks in this pool
|
||||
/// should be allocated from page-locked memory
|
||||
/// </summary>
|
||||
BOOL m_bPageLockHostViews;
|
||||
/// <summary> true to eager device materialize. </summary>
|
||||
BOOL m_bEagerDeviceMaterialize;
|
||||
/// <summary> The memory spaces in which these blocks can reasonably
|
||||
/// require a view. </summary>
|
||||
std::set<Accelerator*> m_vAccelerators;
|
||||
/// <summary> The permissions for blocks in this pool. </summary>
|
||||
BUFFERACCESSFLAGS m_ePermissions;
|
||||
/// <summary> true if growable. </summary>
|
||||
BOOL m_bGrowable;
|
||||
/// <summary> true if this object has initial value. </summary>
|
||||
BOOL m_bHasInitialValue;
|
||||
/// <summary> The initial value. </summary>
|
||||
HOSTMEMORYEXTENT m_vInitialValue;
|
||||
/// <summary> The owner of the pool. </summary>
|
||||
BlockPoolOwner * m_pPoolOwner;
|
||||
/// <summary> blocks allocated with async variant that require finalization. </summary>
|
||||
std::vector<Datablock*> m_vOutstandingBlocks;
|
||||
/// <summary> The dirty. </summary>
|
||||
std::set<Datablock*> m_vDirty;
|
||||
/// <summary> The block count high water mark. </summary>
|
||||
UINT m_uiHighWaterMark;
|
||||
/// <summary> The block count low water mark. </summary>
|
||||
UINT m_uiLowWaterMark;
|
||||
/// <summary> The owned blocks. </summary>
|
||||
UINT m_uiOwnedBlocks;
|
||||
/// <summary> The grow increment. </summary>
|
||||
UINT m_uiGrowIncrement;
|
||||
/// <summary> true to enable, false to disable. </summary>
|
||||
BOOL m_bEnabled;
|
||||
|
||||
BOOL Contains(Datablock * pBlock);
|
||||
void ReleaseBlocks();
|
||||
void LockTargetAccelerators();
|
||||
void UnlockTargetAccelerators();
|
||||
|
||||
friend class Port;
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check that the block pool contain only datablocks with no control signals. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/2/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void CheckBlockPoolStates();
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,445 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: BlockPoolOwner.h
|
||||
//
|
||||
// summary: Declares the block pool owner class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __BLOCK_POOL_OWNER_H__
|
||||
#define __BLOCK_POOL_OWNER_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include <deque>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Graph;
|
||||
class Datablock;
|
||||
class DatablockTemplate;
|
||||
|
||||
class BlockPoolOwner {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes the block pool manager. Because PTask objects are
|
||||
/// reference counted, it is difficult to enforce life-cycle relationships
|
||||
/// that appear to be implied by member containment. For block pools, it
|
||||
/// is entirely possible that user code (or internal code) keeps a reference to a datablock
|
||||
/// after the block pool from which it came is destroyed or deleted. Consequently,
|
||||
/// the block pool owner pointer is not guaranteed to be valid when a block is released,
|
||||
/// and we must keep a global list of what block pool objects are actually valid and
|
||||
/// active to avoid attempting to return a block to a pool that has been deleted.
|
||||
/// This method creates the data structures pertinent to maintaining that information.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
InitializeBlockPoolManager(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroy the block pool manager. Because PTask objects are
|
||||
/// reference counted, it is difficult to enforce life-cycle relationships
|
||||
/// that appear to be implied by member containment. For block pools, it
|
||||
/// is entirely possible that user code (or internal code) keeps a reference to a datablock
|
||||
/// after the block pool from which it came is destroyed or deleted. Consequently,
|
||||
/// the block pool owner pointer is not guaranteed to be valid when a block is released,
|
||||
/// and we must keep a global list of what block pool objects are actually valid and
|
||||
/// active to avoid attempting to return a block to a pool that has been deleted.
|
||||
/// This method cleans up the data structures pertinent to maintaining that information.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
DestroyBlockPoolManager(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Is a block pool owner pointer valid? Because PTask objects are reference counted,
|
||||
/// it is difficult to enforce life-cycle relationships that appear to be implied by
|
||||
/// member containment. For block pools, it is entirely possible that user code (or
|
||||
/// internal code) keeps a reference to a datablock after the block pool from which
|
||||
/// it came is destroyed or deleted. Consequently, the block pool owner pointer is
|
||||
/// not guaranteed to be valid when a block is released, and we must keep a global
|
||||
/// list of what block pool objects are actually valid and active to avoid attempting
|
||||
/// to return a block to a pool that has been deleted.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <param name="pOwner"> [in,out] If non-null, the owner. </param>
|
||||
///
|
||||
/// <returns> true if a pool owner is active, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
IsPoolOwnerActive(
|
||||
__in BlockPoolOwner * pOwner
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Add a new block pool owner to the global list. Because PTask objects are
|
||||
/// reference counted, it is difficult to enforce life-cycle relationships that
|
||||
/// appear to be implied by member containment. For block pools, it is entirely
|
||||
/// possible that user code (or internal code) keeps a reference to a datablock after
|
||||
/// the block pool from which it came is destroyed or deleted. Consequently, the
|
||||
/// block pool owner pointer is not guaranteed to be valid when a block is released,
|
||||
/// and we must keep a global list of what block pool objects are actually valid and
|
||||
/// active to avoid attempting to return a block to a pool that has been deleted.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <param name="pGraph"> [in,out] If non-null, the graph. </param>
|
||||
/// <param name="pOwner"> [in,out] If non-null, the owner. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
RegisterActivePoolOwner(
|
||||
__in Graph * pGraph,
|
||||
__in BlockPoolOwner * pOwner
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Retire a block pool owner from the global list. Because PTask objects are
|
||||
/// reference counted, it is difficult to enforce life-cycle relationships that
|
||||
/// appear to be implied by member containment. For block pools, it is entirely
|
||||
/// possible that user code (or internal code) keeps a reference to a datablock after
|
||||
/// the block pool from which it came is destroyed or deleted. Consequently, the
|
||||
/// block pool owner pointer is not guaranteed to be valid when a block is released,
|
||||
/// and we must keep a global list of what block pool objects are actually valid and
|
||||
/// active to avoid attempting to return a block to a pool that has been deleted.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <param name="pOwner"> [in,out] If non-null, the owner. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
RetirePoolOwner(
|
||||
__in BlockPoolOwner * pOwner
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Retire all block pool owner from the given graph. Because PTask objects are
|
||||
/// reference counted, it is difficult to enforce life-cycle relationships that
|
||||
/// appear to be implied by member containment. For block pools, it is entirely
|
||||
/// possible that user code (or internal code) keeps a reference to a datablock after
|
||||
/// the block pool from which it came is destroyed or deleted. Consequently, the
|
||||
/// block pool owner pointer is not guaranteed to be valid when a block is released,
|
||||
/// and we must keep a global list of what block pool objects are actually valid and
|
||||
/// active to avoid attempting to return a block to a pool that has been deleted.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <param name="pOwner"> [in,out] If non-null, the owner. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
RetireGraph(
|
||||
__in Graph * pGraph
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> add a new block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void AddNewBlock(Datablock * pBlock)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return a block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ReturnToPool(Datablock * pBlock)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if the owned pool is a global pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/30/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if global pool, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL BlockPoolIsGlobal()=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
///
|
||||
/// Allocation of data-blocks and platform-specific buffers can be a signficant
|
||||
/// latency expense at dispatch time. We can actually preallocate output datablocks
|
||||
/// and create device- side buffers at graph construction time. For each node in the
|
||||
/// graph, allocate data blocks on any output ports, and create device-specific
|
||||
/// buffers for all accelerators capable of executing the node.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPool(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles only the first pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPoolAsync(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles the second pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
FinalizeBlockPoolAsync(
|
||||
VOID
|
||||
)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
|
||||
/// to null, and then release it. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
DestroyBlockPool(
|
||||
VOID
|
||||
)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object has block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if block pool, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasBlockPool()=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
|
||||
/// we have an upstream allocator (meta) port, the runtime will not create a block
|
||||
/// pool for the corresponding output port. This turns out to put device-side
|
||||
/// allocation on the critical path in some cases, so we provide a way to override
|
||||
/// that behavior and allow a port to create a pool based on some size hints. When
|
||||
/// there is a block available with sufficient space in the pool, the meta port can
|
||||
/// avoid the allocation and draw from the pool.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="nPoolSize"> Size of the block pool. </param>
|
||||
/// <param name="nStride"> The stride. </param>
|
||||
/// <param name="nDataBytes"> The data in bytes. </param>
|
||||
/// <param name="nMetaBytes"> The meta in bytes. </param>
|
||||
/// <param name="nTemplateBytes"> The template in bytes. </param>
|
||||
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
|
||||
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
ForceBlockPoolHint(
|
||||
__in UINT nPoolSize,
|
||||
__in UINT nStride,
|
||||
__in UINT nDataBytes,
|
||||
__in UINT nMetaBytes,
|
||||
__in UINT nTemplateBytes,
|
||||
__in BOOL bPageLockHostViews=FALSE,
|
||||
__in BOOL bEagerDeviceMaterialize=FALSE
|
||||
)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets pool size. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> The pool size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
GetPoolSize()=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetRequestsPageLocked(BOOL bPageLocked)=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL GetRequestsPageLocked()=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the owner name. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the owner name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual char * GetPoolOwnerName()=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Queries if a block pool is active and able to deliver/return blocks. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if a block pool is active, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsBlockPoolActive()=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets high water mark. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetHighWaterMark()=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the total number of blocks owned by the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The total number of blocks owned by the pool (whether they are queued or not). </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetOwnedBlockCount()=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the low water mark. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetLowWaterMark()=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the currently available count. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetAvailableBlockCount()=0;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a destination buffer for a block with an upstream
|
||||
/// allocator. Succeeds only if the pool happens to have blocks
|
||||
/// backed by sufficient resources in all channels that are backed.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the destination buffer. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock *
|
||||
GetBlockFromPool(
|
||||
__in Accelerator * pAccelerator=NULL,
|
||||
__in UINT uiDataBytes=0,
|
||||
__in UINT uiMetaBytes=0,
|
||||
__in UINT uiTemplateBytes=0
|
||||
)=0;
|
||||
|
||||
/// <summary> The lock for the block pool owners. </summary>
|
||||
static CRITICAL_SECTION s_csBlockPoolOwners;
|
||||
|
||||
/// <summary> true if block pool owner managment is initialized. </summary>
|
||||
static LONG s_bPoolOwnersInit;
|
||||
|
||||
/// <summary> The active pool owners. </summary>
|
||||
static std::map<BlockPoolOwner*, Graph*> s_vActivePoolOwners;
|
||||
|
||||
/// <summary> The dead pool owners. </summary>
|
||||
static std::map<BlockPoolOwner*, Graph*> s_vDeadPoolOwners;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,213 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: CLAsyncContext.h
|
||||
//
|
||||
// summary: Declares the OpenCL asynchronous context class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __CL_ASYNC_CONTEXT_H__
|
||||
#define __CL_ASYNC_CONTEXT_H__
|
||||
#ifdef OPENCL_SUPPORT
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "accelerator.h"
|
||||
#include "claccelerator.h"
|
||||
#include "task.h"
|
||||
#include "channel.h"
|
||||
#include "hrperft.h"
|
||||
#include "AsyncContext.h"
|
||||
#include "AsyncDependence.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> OpenCL asynchronous context. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2012.
|
||||
///
|
||||
/// FIXME: TODO:
|
||||
/// -------------------
|
||||
/// OpenCL supports events and command queues such that we can implement fine grain
|
||||
/// dependences exactly as they are implemented for the cuda backend. Currently there
|
||||
/// just isn't enough demand for the OpenCL backend to justify prioritizing that
|
||||
/// development effort. Hence, all OpenCL calls are currently synchronous, and the
|
||||
/// platform-specific work of managing dependences and waiting for them to resove can
|
||||
/// be completely elided.
|
||||
///
|
||||
/// </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class CLAsyncContext : public AsyncContext {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDeviceContext"> [in,out] If non-null, context for the device. </param>
|
||||
/// <param name="pTaskContext"> [in,out] If non-null, context for the task. </param>
|
||||
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
CLAsyncContext(
|
||||
__in Accelerator * pDeviceContext,
|
||||
__in Task * pTaskContext,
|
||||
__in ASYNCCONTEXTTYPE eAsyncContextType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~CLAsyncContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Initialize();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific create synchronization point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual SyncPoint *
|
||||
PlatformSpecificCreateSyncPoint(
|
||||
void * pPSSyncObject
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific destroy synchronization point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificDestroySynchronizationPoint(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if we can platform specific synchronize context. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificSynchronizeContext(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence asynchronously. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificInsertFence(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence synchronously. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificSynchronousWait(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if the sync point is resolved (and marks it if so). </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificQueryOutstanding(
|
||||
__inout SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific nonblocking check whether the event remains outstanding. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/2/2013. </remarks>
|
||||
///
|
||||
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificNonblockingQueryOutstanding(
|
||||
__inout SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence synchronously without locking the async context
|
||||
/// or underlying accelerator: this simplifies lock acquisition for such
|
||||
/// waits, but at the expense of leaving live dependences that are
|
||||
/// actually resolved. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificLocklessSynchronousWait(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform context object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the platform context object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void *
|
||||
GetPlatformContextObject();
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
||||
#endif
|
|
@ -1,249 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: CUAsyncContext.h
|
||||
//
|
||||
// summary: Declares the cu asynchronous context class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __CUDA_ASYNC_CONTEXT_H__
|
||||
#define __CUDA_ASYNC_CONTEXT_H__
|
||||
#ifdef CUDA_SUPPORT
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "accelerator.h"
|
||||
#include "AsyncContext.h"
|
||||
#include "cuhdr.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Task;
|
||||
class SyncPoint;
|
||||
class Accelerator;
|
||||
class AsyncDependence;
|
||||
|
||||
class CUAsyncContext : public AsyncContext {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDeviceContext"> [in,out] If non-null, context for the device. </param>
|
||||
/// <param name="pTaskContext"> [in,out] If non-null, context for the task. </param>
|
||||
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
CUAsyncContext(
|
||||
__in Accelerator * pDeviceContext,
|
||||
__in Task * pTaskContext,
|
||||
__in ASYNCCONTEXTTYPE eAsyncContextType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~CUAsyncContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Initialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Notifies the device synchronized. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/8/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void NotifyDeviceSynchronized();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific create synchronization point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual SyncPoint *
|
||||
PlatformSpecificCreateSyncPoint(
|
||||
void * pPSSyncObject
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific destroy synchronization point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificDestroySynchronizationPoint(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if we can platform specific synchronize context. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificSynchronizeContext(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence asynchronously. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificInsertFence(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence synchronously. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificSynchronousWait(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if the sync point is resolved (and marks it if so). </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificQueryOutstanding(
|
||||
__inout SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific nonblocking check whether the event remains outstanding. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/2/2013. </remarks>
|
||||
///
|
||||
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificNonblockingQueryOutstanding(
|
||||
__inout SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence synchronously without locking the async context
|
||||
/// or underlying accelerator: this simplifies lock acquisition for such
|
||||
/// waits, but at the expense of leaving live dependences that are
|
||||
/// actually resolved. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificLocklessSynchronousWait(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform context object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the platform context object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void *
|
||||
GetPlatformContextObject();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets stream priority. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/20/2014. </remarks>
|
||||
///
|
||||
/// <returns> The stream priority. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
int GetStreamPriority();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets stream priority. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/20/2014. </remarks>
|
||||
///
|
||||
/// <param name="nPriority"> The priority. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void SetStreamPriority(int nPriority);
|
||||
|
||||
/// <summary> The stream. </summary>
|
||||
CUstream m_hStream;
|
||||
|
||||
/// <summary> The last fence. </summary>
|
||||
CUevent m_hLastFence;
|
||||
|
||||
/// <summary> The event. </summary>
|
||||
CUevent m_hEvent;
|
||||
|
||||
/// <summary> The stream priority. </summary>
|
||||
int m_nStreamPriority;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a string describing this refcount object. Allows subclasses to
|
||||
/// provide overrides that make leaks easier to find when detected by the
|
||||
/// rc profiler.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/9/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the rectangle profile descriptor. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual std::string GetRCProfileDescriptor();
|
||||
};
|
||||
|
||||
};
|
||||
#endif // CUDA_SUPPORT
|
||||
#endif
|
|
@ -1,181 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: ChannelProfiler.h
|
||||
//
|
||||
// summary: Declares the channel profiler class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __CHANNEL_PROFILER_H__
|
||||
#define __CHANNEL_PROFILER_H__
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include <sstream>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Channel;
|
||||
|
||||
typedef struct __channel_stats_t {
|
||||
|
||||
/// <summary> The block throughput limit. </summary>
|
||||
UINT uiBlockTransitLimit;
|
||||
|
||||
/// <summary> The blocks delivered. </summary>
|
||||
UINT uiBlocksDelivered;
|
||||
|
||||
/// <summary> The maximum occupancy. </summary>
|
||||
UINT uiMaxOccupancy;
|
||||
|
||||
/// <summary> The cumulative occupancy. </summary>
|
||||
UINT uiCumulativeOccupancy;
|
||||
|
||||
/// <summary> The occupancy samples. </summary>
|
||||
UINT uiOccupancySamples;
|
||||
|
||||
/// <summary> The capacity. </summary>
|
||||
UINT uiCapacity;
|
||||
|
||||
/// <summary> true if the channel is/was a pool owner. </summary>
|
||||
BOOL bPoolOwner;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Resets the stats object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/19/2013. </remarks>
|
||||
///
|
||||
/// <param name="parameter1"> The first parameter. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Reset(
|
||||
VOID
|
||||
)
|
||||
{
|
||||
uiBlockTransitLimit = 0;
|
||||
uiBlocksDelivered = 0;
|
||||
uiMaxOccupancy = 0;
|
||||
uiCumulativeOccupancy = 0;
|
||||
uiOccupancySamples = 0;
|
||||
uiCapacity = 0;
|
||||
bPoolOwner = 0;
|
||||
}
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Updates the stats object with a current snapshot of the channel state. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/19/2013. </remarks>
|
||||
///
|
||||
/// <param name="pChannel"> [in,out] If non-null, the channel. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Update(
|
||||
Channel * pChannel
|
||||
)
|
||||
{
|
||||
uiBlockTransitLimit = pChannel->GetBlockTransitLimit();
|
||||
uiBlocksDelivered = pChannel->GetCumulativeBlockTransit();
|
||||
uiMaxOccupancy = pChannel->GetMaxOccupancy();
|
||||
uiCumulativeOccupancy = pChannel->GetCumulativeOccupancy();
|
||||
uiOccupancySamples = pChannel->GetOccupancySamples();
|
||||
uiCapacity = pChannel->GetCapacity();
|
||||
bPoolOwner = pChannel->IsPoolOwner();
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/18/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
__channel_stats_t::__channel_stats_t(
|
||||
VOID
|
||||
)
|
||||
{
|
||||
Reset();
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/19/2013. </remarks>
|
||||
///
|
||||
/// <param name="pChannel"> [in,out] If non-null, the channel. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
__channel_stats_t::__channel_stats_t(
|
||||
Channel * pChannel
|
||||
)
|
||||
{
|
||||
Update(pChannel);
|
||||
}
|
||||
|
||||
} CHANNELSTATISTICS;
|
||||
|
||||
class ChannelProfiler {
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/18/2013. </remarks>
|
||||
///
|
||||
/// <param name="pChannel"> [in,out] If non-null, the channel. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
ChannelProfiler(Channel * pChannel);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/18/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~ChannelProfiler();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/18/2013. </remarks>
|
||||
///
|
||||
/// <param name="bEnable"> true to enable, false to disable. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Initialize(BOOL bEnable);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> De-initialises this object and frees any resources it is using. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/18/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Deinitialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Reports the given ss. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/18/2013. </remarks>
|
||||
///
|
||||
/// <param name="ss"> [in,out] The ss. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Report(std::ostream& ss);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Merge instance statistics. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/18/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void MergeInstanceStatistics();
|
||||
|
||||
protected:
|
||||
|
||||
Channel * m_pChannel;
|
||||
static BOOL m_bChannelProfile;
|
||||
static BOOL m_bChannelProfileInit;
|
||||
static CRITICAL_SECTION m_csChannelStats;
|
||||
static std::map<std::string, std::map<std::string, CHANNELSTATISTICS*>*> m_vChannelStats;
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,998 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: CoherenceProfiler.h
|
||||
//
|
||||
// summary: Declares the coherence profiler class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __COHERENCE_PROFILER_H__
|
||||
#define __COHERENCE_PROFILER_H__
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <assert.h>
|
||||
|
||||
class CHighResolutionTimer;
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Port;
|
||||
class Task;
|
||||
class Datablock;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Event types that can cause a coherence state transition. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef enum COHERENCEEVENT_t {
|
||||
|
||||
/// <summary> The event causing the transition was not specified. </summary>
|
||||
CET_UNSPECIFIED = 0,
|
||||
|
||||
/// <summary> The state transition was triggered by a binding to task input</summary>
|
||||
CET_BIND_INPUT = 1,
|
||||
|
||||
/// <summary> The state transition was triggered by a binding to taks output</summary>
|
||||
CET_BIND_OUTPUT = 2,
|
||||
|
||||
/// <summary> The state transition was triggered by a binding to a task constant port</summary>
|
||||
CET_BIND_CONSTANT = 3,
|
||||
|
||||
/// <summary> The state transition was triggered by pushing into multiple consumer channels </summary>
|
||||
CET_PUSH_DOWNSTREAM_SHARE = 4,
|
||||
|
||||
/// <summary> The state transition was triggered by a user request for a pointer in host space</summary>
|
||||
CET_POINTER_REQUEST = 5,
|
||||
|
||||
/// <summary> The state transition was triggered by the deletion of the block</summary>
|
||||
CET_BLOCK_DELETE = 6,
|
||||
|
||||
/// <summary> The state transition was triggered by the cloning of the block </summary>
|
||||
CET_BLOCK_CLONE = 7,
|
||||
|
||||
/// <summary> The state transition was triggered by block allocation </summary>
|
||||
CET_BLOCK_CREATE = 8,
|
||||
|
||||
/// <summary> we are updating the host view of the block, but don't actually have
|
||||
/// access to the information we need to figure out what action
|
||||
/// triggered the view update. Most likely a user request
|
||||
/// </summary>
|
||||
CET_HOST_VIEW_UPDATE = 9,
|
||||
|
||||
/// <summary> we are updating the device view of the block, but don't actually have
|
||||
/// access to the information we need to figure out what action
|
||||
/// triggered the view update. Most likely a user request
|
||||
/// </summary>
|
||||
CET_ACCELERATOR_VIEW_UPDATE = 10,
|
||||
|
||||
/// <summary> Buffers are being allocated for this block </summary>
|
||||
CET_BUFFER_ALLOCATE = 11,
|
||||
|
||||
/// <summary> a request to grow the buffer caused some buffer reallocation and
|
||||
/// potentially view updates as a side effect. </summary>
|
||||
CET_GROW_BUFFER = 12,
|
||||
|
||||
/// <summary> a request to synthesize a metadata block caused the traffic </summary>
|
||||
CET_SYNTHESIZE_BLOCK = 13,
|
||||
|
||||
/// <summary> needed a pinned host buffer in addition to a dev buffer </summary>
|
||||
CET_PINNED_HOST_VIEW_CREATE = 14,
|
||||
|
||||
} COHERENCEEVENTTYPE;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Defines a structure for collecting detailed data for
|
||||
/// a coherence state transition. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef struct COHERENCETRANSITION_t {
|
||||
public:
|
||||
|
||||
/// <summary> True if this transition has completed and this record should
|
||||
/// no longer be allowed to change.
|
||||
/// </summary>
|
||||
BOOL bFinalized;
|
||||
|
||||
/// <summary> True if a data transfer occurred for this transition. </summary>
|
||||
BOOL bXferOccurred;
|
||||
|
||||
/// <summary> The timestamp at the start of the transition. </summary>
|
||||
double nStartTimestamp;
|
||||
|
||||
/// <summary> The timestamp at the end of the transition. </summary>
|
||||
double nEndTimestamp;
|
||||
|
||||
/// <summary> Identifier for the source memory space. </summary>
|
||||
UINT uiSrcMemorySpaceId;
|
||||
|
||||
/// <summary> Identifier for the destination memory space. </summary>
|
||||
UINT uiDstMemorySpaceId;
|
||||
|
||||
/// <summary> The event that triggered this transition. </summary>
|
||||
COHERENCEEVENTTYPE eTriggerEvent;
|
||||
|
||||
/// <summary> The requested state of the block in response to the event. </summary>
|
||||
BUFFER_COHERENCE_STATE eTargetState;
|
||||
|
||||
/// <summary> The start state of the block (snapshot of the state per memory space). </summary>
|
||||
BUFFER_COHERENCE_STATE eStartState[MAX_MEMORY_SPACES];
|
||||
|
||||
/// <summary> The end state of the block (snapshot of the state per memory space). </summary>
|
||||
BUFFER_COHERENCE_STATE eEndState[MAX_MEMORY_SPACES];
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
COHERENCETRANSITION_t(
|
||||
double dStartTimestamp
|
||||
)
|
||||
{
|
||||
bFinalized = FALSE;
|
||||
bXferOccurred = FALSE;
|
||||
nStartTimestamp = dStartTimestamp;
|
||||
eTriggerEvent = CET_UNSPECIFIED;
|
||||
eTargetState = BSTATE_NO_ENTRY;
|
||||
uiSrcMemorySpaceId = HOST_MEMORY_SPACE_ID;
|
||||
uiDstMemorySpaceId = HOST_MEMORY_SPACE_ID;
|
||||
for(int i=0; i<MAX_MEMORY_SPACES; i++) {
|
||||
eStartState[i] = BSTATE_NO_ENTRY;
|
||||
eEndState[i] = BSTATE_NO_ENTRY;
|
||||
}
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Finalizes this record. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/19/2012. </remarks>
|
||||
///
|
||||
/// <param name="dEndTimestamp"> The end timestamp. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
Finalize(
|
||||
double dEndTimestamp,
|
||||
BOOL bTransfer
|
||||
)
|
||||
{
|
||||
nEndTimestamp = dEndTimestamp;
|
||||
bFinalized = TRUE;
|
||||
bXferOccurred = bTransfer;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check that all the memory spaces have compatible states in the snapshot. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT
|
||||
GetNumberOfValidCopies(
|
||||
__in BUFFER_COHERENCE_STATE * pSnapshot
|
||||
)
|
||||
{
|
||||
UINT nValidEntries = 0;
|
||||
UINT nMemSpaces = MemorySpace::GetNumberOfMemorySpaces();
|
||||
|
||||
for(UINT i=HOST_MEMORY_SPACE_ID; i<nMemSpaces; i++) {
|
||||
|
||||
// count up number of copies in various states.
|
||||
BUFFER_COHERENCE_STATE uiCoherenceState = pSnapshot[i];
|
||||
switch(uiCoherenceState) {
|
||||
case BSTATE_NO_ENTRY: break;
|
||||
case BSTATE_INVALID: break;
|
||||
case BSTATE_SHARED: nValidEntries++; break;
|
||||
case BSTATE_EXCLUSIVE: nValidEntries++; break;
|
||||
}
|
||||
}
|
||||
|
||||
return nValidEntries;
|
||||
}
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check that all the memory spaces have compatible states in the snapshot. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
ValidState(
|
||||
__in BUFFER_COHERENCE_STATE * pSnapshot
|
||||
)
|
||||
{
|
||||
UINT nInvalidEntries = 0;
|
||||
UINT nNoEntryEntries = 0;
|
||||
UINT nExclusiveCopies = 0;
|
||||
UINT nSharedCopies = 0;
|
||||
UINT nMemSpaces = MemorySpace::GetNumberOfMemorySpaces();
|
||||
|
||||
for(UINT i=HOST_MEMORY_SPACE_ID; i<nMemSpaces; i++) {
|
||||
|
||||
// count up number of copies in various states.
|
||||
BUFFER_COHERENCE_STATE uiCoherenceState = pSnapshot[i];
|
||||
switch(uiCoherenceState) {
|
||||
case BSTATE_NO_ENTRY: nInvalidEntries++; break;
|
||||
case BSTATE_INVALID: nNoEntryEntries++; break;
|
||||
case BSTATE_SHARED: nSharedCopies++; break;
|
||||
case BSTATE_EXCLUSIVE: nExclusiveCopies++; break;
|
||||
}
|
||||
}
|
||||
|
||||
BOOL bCorrectSharedState = (nSharedCopies >= 0 && nExclusiveCopies == 0);
|
||||
BOOL bCorrectExclusiveState = (nSharedCopies == 0 && nExclusiveCopies == 1);
|
||||
assert(bCorrectSharedState || bCorrectExclusiveState);
|
||||
return bCorrectSharedState || bCorrectExclusiveState;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check that all the memory spaces have compatible states in the snapshot. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BUFFER_COHERENCE_STATE
|
||||
GetCollectiveState(
|
||||
__in BUFFER_COHERENCE_STATE * pSnapshot
|
||||
)
|
||||
{
|
||||
assert(ValidState(pSnapshot));
|
||||
UINT nInvalidEntries = 0;
|
||||
UINT nNoEntryEntries = 0;
|
||||
UINT nExclusiveCopies = 0;
|
||||
UINT nSharedCopies = 0;
|
||||
UINT nMemSpaces = MemorySpace::GetNumberOfMemorySpaces();
|
||||
|
||||
for(UINT i=HOST_MEMORY_SPACE_ID; i<nMemSpaces; i++) {
|
||||
|
||||
// count up number of copies in various states.
|
||||
BUFFER_COHERENCE_STATE uiCoherenceState = pSnapshot[i];
|
||||
switch(uiCoherenceState) {
|
||||
case BSTATE_NO_ENTRY: nInvalidEntries++; break;
|
||||
case BSTATE_INVALID: nNoEntryEntries++; break;
|
||||
case BSTATE_SHARED: nSharedCopies++; break;
|
||||
case BSTATE_EXCLUSIVE: nExclusiveCopies++; break;
|
||||
}
|
||||
}
|
||||
|
||||
if(nExclusiveCopies > 0) return BSTATE_EXCLUSIVE;
|
||||
if(nSharedCopies > 0) return BSTATE_SHARED;
|
||||
if(nInvalidEntries > 0) return BSTATE_INVALID;
|
||||
return BSTATE_NO_ENTRY;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a start state. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSnapshot"> If non-null, the snapshot. </param>
|
||||
///
|
||||
/// <returns> The start state. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BUFFER_COHERENCE_STATE
|
||||
GetStartState(
|
||||
VOID
|
||||
)
|
||||
{
|
||||
return GetCollectiveState(eStartState);
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the final state for the state transition. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSnapshot"> If non-null, the snapshot. </param>
|
||||
///
|
||||
/// <returns> The start state. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BUFFER_COHERENCE_STATE
|
||||
GetFinalState(
|
||||
VOID
|
||||
)
|
||||
{
|
||||
return GetCollectiveState(eEndState);
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> number of valid copies in an accelerator space. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT
|
||||
GetNumberOfValidAcceleratorCopies(
|
||||
__in BUFFER_COHERENCE_STATE * pSnapshot
|
||||
)
|
||||
{
|
||||
UINT nValidEntries = 0;
|
||||
UINT nMemSpaces = MemorySpace::GetNumberOfMemorySpaces();
|
||||
|
||||
for(UINT i=HOST_MEMORY_SPACE_ID+1; i<nMemSpaces; i++) {
|
||||
|
||||
// count up number of copies in various states.
|
||||
BUFFER_COHERENCE_STATE uiCoherenceState = pSnapshot[i];
|
||||
switch(uiCoherenceState) {
|
||||
case BSTATE_NO_ENTRY: break;
|
||||
case BSTATE_INVALID: break;
|
||||
case BSTATE_SHARED: nValidEntries++; break;
|
||||
case BSTATE_EXCLUSIVE: nValidEntries++; break;
|
||||
}
|
||||
}
|
||||
|
||||
return nValidEntries;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> number of valid copies in host space. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT
|
||||
GetNumberOfValidHostCopies(
|
||||
__in BUFFER_COHERENCE_STATE * pSnapshot
|
||||
)
|
||||
{
|
||||
|
||||
BUFFER_COHERENCE_STATE uiCoherenceState = pSnapshot[HOST_MEMORY_SPACE_ID];
|
||||
switch(uiCoherenceState) {
|
||||
case BSTATE_NO_ENTRY: return 0;
|
||||
case BSTATE_INVALID: return 0;
|
||||
case BSTATE_SHARED: return 1;
|
||||
case BSTATE_EXCLUSIVE: return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> number of valid copies in an accelerator space. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT
|
||||
GetStartNumberOfValidAcceleratorCopies(
|
||||
void
|
||||
)
|
||||
{
|
||||
return GetNumberOfValidAcceleratorCopies(eStartState);
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> number of valid copies in an accelerator space. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT
|
||||
GetFinalNumberOfValidAcceleratorCopies(
|
||||
void
|
||||
)
|
||||
{
|
||||
return GetNumberOfValidAcceleratorCopies(eEndState);
|
||||
}
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> number of valid copies in host space. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT
|
||||
GetStartNumberOfValidHostCopies(
|
||||
void
|
||||
)
|
||||
{
|
||||
return GetNumberOfValidHostCopies(eStartState);
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> number of valid copies in host space. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSnapshot"> [in,out] If non-null, the snapshot. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT
|
||||
GetFinalNumberOfValidHostCopies(
|
||||
void
|
||||
)
|
||||
{
|
||||
return GetNumberOfValidHostCopies(eEndState);
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> was this transfer a Host -> Device transfer? </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/20/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if h to d xfer, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
IsHToDXfer(
|
||||
VOID
|
||||
)
|
||||
{
|
||||
if(!bXferOccurred) return FALSE;
|
||||
UINT nValidHostViewsS = GetStartNumberOfValidHostCopies();
|
||||
UINT nValidAccViewsS = GetStartNumberOfValidAcceleratorCopies();
|
||||
UINT nValidAccViewsF = GetFinalNumberOfValidAcceleratorCopies();
|
||||
switch(GetFinalState()) {
|
||||
case BSTATE_NO_ENTRY: assert(FALSE); break; // why transfer if there is no buffer?
|
||||
case BSTATE_INVALID: assert(FALSE); break; // why transfer to create an invalid entry?
|
||||
case BSTATE_SHARED: return nValidAccViewsF > nValidAccViewsS && (nValidHostViewsS > 0 || uiSrcMemorySpaceId == HOST_MEMORY_SPACE_ID);
|
||||
case BSTATE_EXCLUSIVE: return nValidAccViewsF > 0 && (nValidHostViewsS > 0 || uiSrcMemorySpaceId == HOST_MEMORY_SPACE_ID);
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> was this transfer a Device -> Host transfer? </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/20/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if d to h xfer, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
IsDToHXfer(
|
||||
VOID
|
||||
)
|
||||
{
|
||||
if(!bXferOccurred) return FALSE;
|
||||
UINT nValidHostViewsS = GetStartNumberOfValidHostCopies();
|
||||
UINT nValidHostViewsF = GetFinalNumberOfValidHostCopies();
|
||||
UINT nValidAccViewsS = GetStartNumberOfValidAcceleratorCopies();
|
||||
UINT nValidAccViewsF = GetFinalNumberOfValidAcceleratorCopies();
|
||||
switch(GetFinalState()) {
|
||||
case BSTATE_NO_ENTRY: assert(FALSE); break; // why transfer if there is no buffer?
|
||||
case BSTATE_INVALID: assert(FALSE); break; // why transfer to create an invalid entry?
|
||||
case BSTATE_SHARED: return nValidAccViewsS > 0 && nValidHostViewsS == 0 && nValidHostViewsF > 0;
|
||||
case BSTATE_EXCLUSIVE: return nValidAccViewsF < nValidAccViewsS && nValidHostViewsF > 0;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> was this transfer a Device -> Device transfer? </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/20/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if d to d xfer, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
IsDToDXfer(
|
||||
VOID
|
||||
)
|
||||
{
|
||||
if(!bXferOccurred) return FALSE;
|
||||
UINT nValidHostViewsS = GetStartNumberOfValidHostCopies();
|
||||
UINT nValidAccViewsS = GetStartNumberOfValidAcceleratorCopies();
|
||||
UINT nValidAccViewsF = GetFinalNumberOfValidAcceleratorCopies();
|
||||
if(nValidAccViewsS == 0) return FALSE; // no valid start device view to xfer
|
||||
if(nValidAccViewsF == 0) return FALSE; // no valid end device view
|
||||
switch(GetFinalState()) {
|
||||
case BSTATE_NO_ENTRY: assert(FALSE); break; // why transfer if there is no buffer?
|
||||
case BSTATE_INVALID: assert(FALSE); break; // why transfer to create an invalid entry?
|
||||
|
||||
case BSTATE_SHARED:
|
||||
// if the final state is shared, and there
|
||||
// there was a valid device view to begin with
|
||||
// then the number of device views must be strictly increasing.
|
||||
// Otherwise, either no new dev view was created (meaning no X->D xfer) or
|
||||
// some device view had to have been invalidated, which our system would not do.
|
||||
if(nValidAccViewsF <= nValidAccViewsS) return FALSE; // no additional device views
|
||||
switch(GetStartState()) {
|
||||
case BSTATE_NO_ENTRY: return FALSE;
|
||||
case BSTATE_INVALID: return FALSE;
|
||||
case BSTATE_SHARED:
|
||||
// copy could come from host or device.
|
||||
if(nValidHostViewsS == 0) return TRUE; // *had* to come from device
|
||||
return uiSrcMemorySpaceId != HOST_MEMORY_SPACE_ID;
|
||||
case BSTATE_EXCLUSIVE:
|
||||
// there was only one copy to begin with so
|
||||
// the source had to be device if there was a valid device view
|
||||
return nValidAccViewsS > 0;
|
||||
}
|
||||
|
||||
return nValidAccViewsS > 0 && nValidAccViewsF > nValidAccViewsS;
|
||||
case BSTATE_EXCLUSIVE:
|
||||
// if the final state is exclusive, then
|
||||
// the mem space in which we have a valid view must have changed.
|
||||
// we would only do a D->D transfer if there was not a valid host
|
||||
// view available, since (with some obvious exceptions), we generally
|
||||
// must do D->D transfers through the host, so would prefer a host
|
||||
// view if it was available.
|
||||
return nValidHostViewsS == 0;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
} COHERENCETRANSITION;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Defines a structure for tracking per-datablock instance
|
||||
/// history of coherence traffic participation. If the PROFILE_MIGRATION
|
||||
/// compiler directive is selected, each datablock will maintain
|
||||
/// its own history in this structure, and each history will be merged
|
||||
/// in the the static view defined below upon deletion. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef struct COHERENCEHISTORY_t {
|
||||
public:
|
||||
/// <summary> History of all ports to which a block has been bound. </summary>
|
||||
std::map<__int64, Port*>* pvPortBindHistory;
|
||||
|
||||
/// <summary> History of all IO consumer ports to which a block has been bound. </summary>
|
||||
std::map<__int64, Port*>* pvIOCPortBindHistory;
|
||||
|
||||
/// <summary> The set of all tasks which have touched this block. </summary>
|
||||
std::map<__int64, Task*>* pvTaskBindHistory;
|
||||
|
||||
/// <summary> The accelerator bind history (tracked as accelerator id). </summary>
|
||||
std::map<__int64, UINT>* pvAcceleratorBindHistory;
|
||||
|
||||
/// <summary> The accelerator bind history (tracked as accelerator id). </summary>
|
||||
std::map<__int64, UINT>* pvDepAcceleratorBindHistory;
|
||||
|
||||
/// <summary> The coherence state history. </summary>
|
||||
std::map<__int64, COHERENCETRANSITION*>* pvStateHistory;
|
||||
|
||||
/// <summary> The dbuid of the datablock for which this occurred. </summary>
|
||||
UINT uiDBUID;
|
||||
|
||||
/// <summary> The number of times this block required D->H xfer. </summary>
|
||||
LONG nDToHCopies;
|
||||
|
||||
/// <summary> The number of times this block required H->D xfer. </summary>
|
||||
LONG nHToDCopies;
|
||||
|
||||
/// <summary> The number of times this block required D->D xfer. </summary>
|
||||
LONG nDToDCopies;
|
||||
|
||||
/// <summary> The number of times this block required H->H xfer.
|
||||
/// This is a sanity check--it better be 0!
|
||||
/// </summary>
|
||||
LONG nHToHCopies;
|
||||
|
||||
/// <summary> The total number of bytes transferred over the life cycle of
|
||||
/// this datablock. </summary>
|
||||
LONG nTotalSyncBytes;
|
||||
|
||||
/// <summary> The number of times a block was bound concurrently
|
||||
/// to multiple ports. This may have some error due to
|
||||
/// the resolution of the timer, and may need to be revised. </summary>
|
||||
UINT uiConcurrentPortBindings;
|
||||
|
||||
/// <summary> The number of times a block was bound concurrently
|
||||
/// to multiple ports. This may have some error due to
|
||||
/// the resolution of the timer, and may need to be revised. </summary>
|
||||
UINT uiConcurrentTaskBindings;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
COHERENCEHISTORY_t(
|
||||
UINT uiDatablockID
|
||||
)
|
||||
{
|
||||
pvPortBindHistory = new std::map<__int64, Port*>();
|
||||
pvIOCPortBindHistory = new std::map<__int64, Port*>();
|
||||
pvTaskBindHistory = new std::map<__int64, Task*>();
|
||||
pvAcceleratorBindHistory = new std::map<__int64, UINT>();
|
||||
pvDepAcceleratorBindHistory = new std::map<__int64, UINT>();
|
||||
pvStateHistory = new std::map<__int64, COHERENCETRANSITION*>();
|
||||
nDToHCopies = 0;
|
||||
nHToDCopies = 0;
|
||||
nDToDCopies = 0;
|
||||
nHToHCopies = 0;
|
||||
nTotalSyncBytes = 0;
|
||||
uiConcurrentPortBindings = 0;
|
||||
uiConcurrentTaskBindings = 0;
|
||||
uiDBUID = uiDatablockID;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
~COHERENCEHISTORY_t(
|
||||
VOID
|
||||
)
|
||||
{
|
||||
std::map<__int64, COHERENCETRANSITION*>::iterator mi;
|
||||
for(mi=pvStateHistory->begin(); mi!=pvStateHistory->end(); mi++)
|
||||
delete mi->second;
|
||||
delete pvPortBindHistory;
|
||||
delete pvIOCPortBindHistory;
|
||||
delete pvTaskBindHistory;
|
||||
delete pvStateHistory;
|
||||
delete pvAcceleratorBindHistory;
|
||||
delete pvDepAcceleratorBindHistory;
|
||||
}
|
||||
|
||||
} COHERENCEHISTORY;
|
||||
|
||||
class CoherenceProfiler {
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/19/2013. </remarks>
|
||||
///
|
||||
/// <param name="pDatablock"> [in,out] If non-null, the datablock. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
CoherenceProfiler(Datablock * pDatablock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/19/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~CoherenceProfiler();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes the coherence traffic profiler. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="bEnable"> true to enable, false to disable. </param>
|
||||
/// <param name="bVerbose"> true to verbose. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL Initialize(BOOL bEnable, BOOL bVerbose=FALSE);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Deinitializes the coherence traffic profiler. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL Deinitialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dumps the coherence traffic statistics. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Report(std::ostream& ios);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dumps the coherence traffic statistics. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static std::stringstream * GetReport();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dumps the coherence traffic statistics. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void GetDetailedReport(std::ostream& ios);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Coherence tracker record view update start. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDatablock"> If non-null, the datablock. </param>
|
||||
/// <param name="nDestMemorySpaceID"> Identifier for the memory space. </param>
|
||||
/// <param name="eEventType"> Type of the event. </param>
|
||||
///
|
||||
/// <returns> new transition object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
COHERENCETRANSITION *
|
||||
RecordViewUpdateStart(
|
||||
__in UINT nDestMemorySpaceID,
|
||||
__in COHERENCEEVENTTYPE eEventType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Coherence tracker record view update end. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDatablock"> If non-null, the datablock. </param>
|
||||
/// <param name="nSrcMemorySpaceID"> Identifier for the source memory space. </param>
|
||||
/// <param name="uiRequestedState"> The requested coherence state. This affects whether other
|
||||
/// accelerator views require invalidation. </param>
|
||||
/// <param name="bTransferOccurred"> The transfer occurred. </param>
|
||||
/// <param name="pTx"> non-null, the state transition descriptor. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
RecordViewUpdateEnd(
|
||||
__in UINT nSrcMemorySpaceID,
|
||||
__in BUFFER_COHERENCE_STATE uiRequestedState,
|
||||
__in BOOL bTransferOccurred,
|
||||
__in COHERENCETRANSITION * pTx
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record port binding. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/19/2012. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> (optional) [in] If non-null, the port the block will occupy. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void RecordPortBinding(Port * pPort);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record task binding. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/19/2012. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> (optional) [in] If non-null, the port the block will occupy. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void RecordTaskBinding(Task * pTask);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record binding. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/20/2012. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> (optional) [in] If non-null, the port the block will occupy. </param>
|
||||
/// <param name="pTask"> [in,out] If non-null, the task. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void RecordBinding(Port * pPort, Task * pTask, Port * pIOConsumer);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Coherence tracker set detailed. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/21/2012. </remarks>
|
||||
///
|
||||
/// <param name="bDetailed"> true to collect detailed stats. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void SetDetailed(BOOL bDetailed);
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes the coherence history for this block. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void InitializeInstanceHistory();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Deinitializes the coherence history for this block. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void DeinitializeInstanceHistory();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Merge the coherence history for this block with the static view. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void MergeHistory();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Merge the coherence histories for all blocks into the static view. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/18/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void MergeHistories();
|
||||
|
||||
/// <summary> The datablock this profiler object is tracking. </summary>
|
||||
Datablock * m_pDatablock;
|
||||
|
||||
/// <summary> Coherence history and statistics for this block, including:
|
||||
/// 1. all ports to which this block has been bound.
|
||||
/// 2. The set of all tasks which have touched this block
|
||||
/// 3. The number of times this block required D->H xfer.
|
||||
/// 4. The number of times this block required H->D xfer.
|
||||
/// 5. The number of times this block required D->D xfer.
|
||||
/// 6. The number of times this block required H->H xfer.
|
||||
/// 7. The total number of bytes transferred over the life cycle of
|
||||
/// this datablock.
|
||||
/// 8. The history of state transitions. </summary>
|
||||
COHERENCEHISTORY* m_pCoherenceHistory;
|
||||
|
||||
/// <summary> True if we are in the middle of recording a state transition
|
||||
/// in the coherence profiler. Helps us catch situations where we
|
||||
/// accidentally attempt nested recording of transitions, which
|
||||
/// would deeply screw up the results.
|
||||
/// </summary>
|
||||
BOOL m_bCoherenceProfilerTransitionActive;
|
||||
|
||||
/// <summary> The dev to dev migrations with invalidation. </summary>
|
||||
static LONG m_nDToDMigrationsExclusive;
|
||||
|
||||
/// <summary> The dev to dev migrations with shared state. </summary>
|
||||
static LONG m_nDToDMigrationsShared;
|
||||
|
||||
/// <summary> The host to dev migrations with invalidation. </summary>
|
||||
static LONG m_nHToDMigrationsExclusive;
|
||||
|
||||
/// <summary> The host to dev migrations without invalidation. </summary>
|
||||
static LONG m_nHToDMigrationsShared;
|
||||
|
||||
/// <summary> The dev to host migrations with invalidation. </summary>
|
||||
static LONG m_nDToHMigrationsExclusive;
|
||||
|
||||
/// <summary> The dev to host migrations without invalidation. </summary>
|
||||
static LONG m_nDToHMigrationsShared;
|
||||
|
||||
/// <summary> The number of times a coherence event caused multiple
|
||||
/// valid views to be abandoned. </summary>
|
||||
static LONG m_nMultiViewInvalidations;
|
||||
|
||||
/// <summary> The number of state transitions whose cause was unspecified. </summary>
|
||||
static LONG m_nCETUnspecified;
|
||||
|
||||
/// <summary> The number of state transitions triggered by a binding to task input</summary>
|
||||
static LONG m_nCETBindInput;
|
||||
|
||||
/// <summary> The number of state transitions triggered by a binding to taks output</summary>
|
||||
static LONG m_nCETBindOutput;
|
||||
|
||||
/// <summary> The number of state transitions triggered by a binding to a task constant port</summary>
|
||||
static LONG m_nCETBindConstant;
|
||||
|
||||
/// <summary> The number of state transitions triggered by pushing into multiple consumer channels </summary>
|
||||
static LONG m_nCETDownstreamShare;
|
||||
|
||||
/// <summary> The number of state transitions triggered by a user request for a pointer in host space</summary>
|
||||
static LONG m_nCETPointerRequest;
|
||||
|
||||
/// <summary> The number of state transitions triggered by the deletion of the block</summary>
|
||||
static LONG m_nCETBlockDelete;
|
||||
|
||||
/// <summary> The number of state transitions triggered by the cloning of the block </summary>
|
||||
static LONG m_nCETBlockClone;
|
||||
|
||||
/// <summary> The number of state transitions triggered by block allocation </summary>
|
||||
static LONG m_nCETBlockCreate;
|
||||
|
||||
/// <summary> The number of state transitions triggered when we are updating the host view of
|
||||
/// the block, but don't actually have access to the information we need to figure
|
||||
/// out what action triggered the view update. Most likely a user request.
|
||||
/// </summary>
|
||||
static LONG m_nCETHostViewUpdate;
|
||||
|
||||
/// <summary> The number of state transitions triggered when we are updating the device view of
|
||||
/// the block, but don't actually have access to the information we need to figure
|
||||
/// out what action triggered the view update. Most likely a user request.
|
||||
/// </summary>
|
||||
static LONG m_nCETAcceleratorViewUpdate;
|
||||
|
||||
/// <summary> The number of state transitions triggered when Buffers are being allocated for a
|
||||
/// block.
|
||||
/// </summary>
|
||||
static LONG m_nCETBufferAllocate;
|
||||
|
||||
/// <summary> The number of state transitions triggered when a request to grow the buffer
|
||||
/// caused some buffer reallocation and potentially view updates as a side effect.
|
||||
/// </summary>
|
||||
static LONG m_nCETGrowBuffer;
|
||||
|
||||
/// <summary> The number of state transitions triggered when a request to synthesize
|
||||
/// a metadata block caused the traffic </summary>
|
||||
static LONG m_nCETSynthesizeBlock;
|
||||
|
||||
/// <summary> The number of state transitions triggered when
|
||||
/// needed a pinned host buffer in addition to a dev buffer </summary>
|
||||
static LONG m_nCETPinnedHostView;
|
||||
|
||||
/// <summary> Is the profiler initialised? </summary>
|
||||
static LONG m_nCoherenceProfilerInit;
|
||||
|
||||
/// <summary> Is the profiler enabled? </summary>
|
||||
static LONG m_nCoherenceProfilerEnabled;
|
||||
|
||||
/// <summary> true if the coherence tracker should emit copious text. </summary>
|
||||
static BOOL m_bCoherenceProfilerVerbose;
|
||||
|
||||
/// <summary> The detailed statistics. </summary>
|
||||
static BOOL m_bCoherenceStatisticsDetailed;
|
||||
|
||||
/// <summary> The per task histories. </summary>
|
||||
static std::map<UINT, COHERENCEHISTORY*> m_vHistories;
|
||||
|
||||
/// <summary> The timer. </summary>
|
||||
static CHighResolutionTimer * m_pTimer;
|
||||
|
||||
/// <summary> List of names task names. Required because we will no longer have
|
||||
/// valid task pointers when we check for leaks (all tasks *should* be
|
||||
/// deleted by that point), and we want to be able to find the task
|
||||
/// that allocated a block if it was leaked and provide it's name as
|
||||
/// a debug assist.
|
||||
/// </summary>
|
||||
static std::map<PTask::Task*, std::string> m_vTaskNames;
|
||||
|
||||
/// <summary> List of port names. Required because we will no longer have
|
||||
/// valid port pointers when we check for leaks (all ports *should* be
|
||||
/// deleted by that point), and we want to be able to find the last
|
||||
/// port that touched any leaked blocks.
|
||||
/// </summary>
|
||||
static std::map<PTask::Port*, std::string> m_vPortNames;
|
||||
|
||||
/// <summary> The coherence profiler lock. Protects the static data structures
|
||||
/// collecting data xfer statistics.
|
||||
/// </summary>
|
||||
static CRITICAL_SECTION m_csCoherenceProfiler;
|
||||
|
||||
friend class Datablock;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,256 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: CompiledKernel.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _COMPILED_KERNEL_H_
|
||||
#define _COMPILED_KERNEL_H_
|
||||
#include "accelerator.h"
|
||||
#include "ptlock.h"
|
||||
#include <map>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> function signature for host tasks that have dependences on other accelerators.
|
||||
/// The BOOL array contains entries which are true if that entry corresponds to an
|
||||
/// input already materialized on the dependent device, false otherwise. The
|
||||
/// pvDeviceBindings array contains entries which are meaningful when the entry at
|
||||
/// the same index in the BOOL array is true, and is a platform-specific device id.
|
||||
/// Generated code must know how to use these IDs.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/16/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef void (__stdcall *LPFNTASKINITIALIZER)(DWORD dwThreadId, int nDeviceId);
|
||||
|
||||
class CompiledKernel
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="lpszSourceFile"> [in] non-null, source file. </param>
|
||||
/// <param name="lpszOperation"> [in] non-null, the operation. </param>
|
||||
/// <param name="lpszInitializerBinary"> [in,out] If non-null, the initializer binary. </param>
|
||||
/// <param name="lpszInitializerEntryPoint"> [in,out] If non-null, the initializer entry
|
||||
/// point. </param>
|
||||
/// <param name="eInitializerPSObjectClass"> (Optional) the initializer ps object class. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
CompiledKernel(
|
||||
__in char * lpszSourceFile,
|
||||
__in char * lpszOperation,
|
||||
__in char * lpszInitializerBinary,
|
||||
__in char * lpszInitializerEntryPoint,
|
||||
__in ACCELERATOR_CLASS eInitializerPSObjectClass=ACCELERATOR_CLASS_UNKNOWN
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~CompiledKernel(void);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary>
|
||||
/// Gets the platform specific binary associated with the given accelerator. Generally
|
||||
/// speaking, we will compile a kernel separately for every accelerator in the system capable
|
||||
/// of running it, since the accelerators may have different capabilities. This method
|
||||
/// retrieves the result of that compilation, which is an object whose type depends on the
|
||||
/// platform supported by the accelerator. For example, in directX, this retrieves a compute
|
||||
/// shader interface.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in] non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the platform specific binary. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void * GetPlatformSpecificBinary(Accelerator * pAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a platform specific binary. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in] non-null, the accelerator. </param>
|
||||
/// <param name="pPlatformSpecificBinary"> [in] non-null, the platform specific binary. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetPlatformSpecificBinary(Accelerator * pAccelerator, void * pPlatformSpecificBinary);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a platform specific module. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in] non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the platform specific module. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void * GetPlatformSpecificModule(Accelerator * pAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a platform specific module. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in] non-null, the accelerator. </param>
|
||||
/// <param name="pPlatformSpecificModule"> [in,out] If non-null, the platform specific module. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetPlatformSpecificModule(Accelerator * pAccelerator, void * pPlatformSpecificModule);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the source file. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the source file. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual const char * GetSourceFile();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the operation. The operation is the top-level entry
|
||||
/// point into kernel code, and must be specified, since a single
|
||||
/// source file may contain many such entry points.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the operation. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual const char * GetOperation();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the source binary for init routine. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the source file. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual const char * GetInitializerBinary();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the entry point for any initializer routines.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the operation. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual const char * GetInitializerEntryPoint();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets the initializer binary. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/13/2013. </remarks>
|
||||
///
|
||||
/// <param name="hModule"> The module. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetInitializerBinary(HMODULE hModule);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets the initializer entry point. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/13/2013. </remarks>
|
||||
///
|
||||
/// <param name="lpvProcAddress"> [in,out] If non-null, the lpv proc address. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetInitializerEntryPoint(void * lpvProcAddress);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this kernel has a static initializer that should be called as part
|
||||
/// of putting the graph in the run state. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/13/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if static initializer, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasStaticInitializer();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if any present initializer routines requires platform-specific
|
||||
/// device objects to provided when called. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/13/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL InitializerRequiresPSObjects();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets initializer required ps classes. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/13/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the initializer required ps classes. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ACCELERATOR_CLASS GetInitializerRequiredPSClass();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Executes the initializer, with a list of platform specific resources.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/13/2013. </remarks>
|
||||
///
|
||||
/// <param name="vPSDeviceObjects"> [in,out] [in,out] If non-null, the ps device objects. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
InvokeInitializer(
|
||||
__in DWORD dwThreadId,
|
||||
__in std::set<Accelerator*>& vPSDeviceObjects
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Executes the initializer, if present.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/13/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL InvokeInitializer(DWORD dwThreadId);
|
||||
|
||||
protected:
|
||||
char * m_lpszSourceFile;
|
||||
char * m_lpszOperation;
|
||||
char * m_lpszInitializerBinary;
|
||||
char * m_lpszInitializerEntryPoint;
|
||||
ACCELERATOR_CLASS m_eInitializerPSObjectClass;
|
||||
std::map<Accelerator *, void *> m_vPlatformSpecificKernels;
|
||||
std::map<Accelerator *, void *> m_vPlatformSpecificModules;
|
||||
HANDLE m_lpvInitializerModule;
|
||||
void * m_lpvInitializerProcAddress;
|
||||
BOOL m_bInitializerInvoked;
|
||||
|
||||
static std::map<std::string, HMODULE> m_vLoadedDlls;
|
||||
static std::map<std::string, std::map<std::string, FARPROC>> m_vEntryPoints;
|
||||
static PTLock m_vModuleLock;
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
||||
|
|
@ -1,195 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: DXAsyncContext.h
|
||||
//
|
||||
// summary: Declares the DirectX asynchronous context class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __DX_ASYNC_CONTEXT_H__
|
||||
#define __DX_ASYNC_CONTEXT_H__
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "accelerator.h"
|
||||
#include "dxaccelerator.h"
|
||||
#include "task.h"
|
||||
#include "channel.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include "hrperft.h"
|
||||
#include "AsyncContext.h"
|
||||
#include "AsyncDependence.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class DXAsyncContext : public AsyncContext {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDeviceContext"> [in,out] If non-null, context for the device. </param>
|
||||
/// <param name="pTaskContext"> [in,out] If non-null, context for the task. </param>
|
||||
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DXAsyncContext(
|
||||
__in Accelerator * pDeviceContext,
|
||||
__in Task * pTaskContext,
|
||||
__in ASYNCCONTEXTTYPE eAsyncContextType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~DXAsyncContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Initialize();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific create synchronization point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual SyncPoint *
|
||||
PlatformSpecificCreateSyncPoint(
|
||||
void * pPSSyncObject
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific destroy synchronization point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificDestroySynchronizationPoint(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if we can platform specific synchronize context. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificSynchronizeContext(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence asynchronously. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificInsertFence(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence synchronously. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificSynchronousWait(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if the sync point is resolved (and marks it if so). </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificQueryOutstanding(
|
||||
__inout SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific nonblocking check whether the event remains outstanding. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/2/2013. </remarks>
|
||||
///
|
||||
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificNonblockingQueryOutstanding(
|
||||
__inout SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence synchronously without locking the async context
|
||||
/// or underlying accelerator: this simplifies lock acquisition for such
|
||||
/// waits, but at the expense of leaving live dependences that are
|
||||
/// actually resolved. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificLocklessSynchronousWait(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform context object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the platform context object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void *
|
||||
GetPlatformContextObject();
|
||||
|
||||
ID3D11DeviceContext * m_pDXContext;
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,202 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: DatablockProfiler.h
|
||||
//
|
||||
// summary: Declares the datablock profiler class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __DATABLOCK_PROFILER_H__
|
||||
#define __DATABLOCK_PROFILER_H__
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "ReferenceCounted.h"
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
class CHighResolutionTimer;
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Port;
|
||||
class Task;
|
||||
class BlockPool;
|
||||
class BlockPoolOwner;
|
||||
class Datablock;
|
||||
|
||||
class DatablockProfiler {
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/19/2013. </remarks>
|
||||
///
|
||||
/// <param name="pDatablock"> [in,out] If non-null, the datablock. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DatablockProfiler(Datablock * pDatablock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/19/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~DatablockProfiler();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes the datablock profiler. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL Initialize(BOOL bEnable, BOOL bVerbose=FALSE);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Deinitializes the datablock profiler. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL Deinitialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dumps the databasedatablock profiler leaks. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Report(std::ostream& ios);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Profile allocation. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void RecordAllocation(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Profile deletion. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void RecordDeletion(Datablock*pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record port binding. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/19/2012. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> (optional) [in] If non-null, the port the block will occupy. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void RecordBinding(Port * pPort);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record task binding. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/19/2012. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> (optional) [in] If non-null, the port the block will occupy. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void RecordBinding(Task * pTask);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record binding. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/20/2012. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> (optional) [in] If non-null, the port the block will occupy. </param>
|
||||
/// <param name="pTask"> [in,out] If non-null, the task. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void RecordBinding(Port * pPort, Task * pTask, Port * pIOConsumer);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record pool binding. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/19/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void RecordPoolBinding();
|
||||
|
||||
/// <summary> The set of all ports to which this block has been bound. </summary>
|
||||
std::set<Port*> m_vPortBindings;
|
||||
|
||||
/// <summary> The set of all tasks which have touched this block. </summary>
|
||||
std::set<Task*> m_vTaskBindings;
|
||||
|
||||
/// <summary> The set of pool owners with block pools used to allocate blocks.
|
||||
/// Necessary because block pooling can cause blocks to be reused
|
||||
/// between allocation and deletion. Maintained as a map to string
|
||||
/// since the owner may be deleted by the time we attempt deletion.
|
||||
/// </summary>
|
||||
std::map<BlockPoolOwner*, std::string> m_vPools;
|
||||
|
||||
/// <summary> List of names task names. Required because we will no longer have
|
||||
/// valid task pointers when we check for leaks (all tasks *should* be
|
||||
/// deleted by that point), and we want to be able to find the task
|
||||
/// that allocated a block if it was leaked and provide it's name as
|
||||
/// a debug assist.
|
||||
/// </summary>
|
||||
static std::map<PTask::Task*, std::string> m_vTaskNames;
|
||||
|
||||
/// <summary> List of port names. Required because we will no longer have
|
||||
/// valid port pointers when we check for leaks (all ports *should* be
|
||||
/// deleted by that point), and we want to be able to find the last
|
||||
/// port that touched any leaked blocks.
|
||||
/// </summary>
|
||||
static std::map<PTask::Port*, std::string> m_vPortNames;
|
||||
|
||||
protected:
|
||||
|
||||
Datablock * m_pDatablock;
|
||||
|
||||
/// <summary> The number of datablock allocations. </summary>
|
||||
static LONG m_nDBAllocations;
|
||||
|
||||
/// <summary> The datablock deletion count. </summary>
|
||||
static LONG m_nDBDeletions;
|
||||
|
||||
/// <summary> The number of clone allocations. </summary>
|
||||
static LONG m_nDBCloneAllocations;
|
||||
|
||||
/// <summary> The number of clone deletions. </summary>
|
||||
static LONG m_nDBCloneDeletions;
|
||||
|
||||
/// <summary> Is the profiler initialised? </summary>
|
||||
static LONG m_nDBProfilerInit;
|
||||
|
||||
/// <summary> Is the profiler initialised? </summary>
|
||||
static LONG m_nDBProfilerEnabled;
|
||||
|
||||
/// <summary> true if the allocation tracker should emit copious text. </summary>
|
||||
static BOOL m_bDBProfilerVerbose;
|
||||
|
||||
/// <summary> The set of datablocks currently allocated but not yet deleted. </summary>
|
||||
static std::set<PTask::Datablock*> m_vAllAllocations;
|
||||
|
||||
// these structures are also needed by the coherence profiler.
|
||||
// if both compile-time options are selected, then these are already
|
||||
// defined by the time we get here
|
||||
|
||||
/// <summary> The profiler lock. Protects the allocation counts,
|
||||
/// the allocation set, and the port and task maps.
|
||||
/// </summary>
|
||||
static CRITICAL_SECTION m_csDBProfiler;
|
||||
|
||||
friend class Datablock;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,291 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: DeviceMemoryStatus.h
|
||||
//
|
||||
// summary: Declares the device memory status class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __DEVICE_MEMORY_STATUS_H__
|
||||
#define __DEVICE_MEMORY_STATUS_H__
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "Lockable.h"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdlib.h>
|
||||
#include <map>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Memory status for a memory type on a device.
|
||||
/// Currently we track global and page-locked memory.
|
||||
/// Could easily expand to track other types. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef struct DeviceMemoryStatus_t {
|
||||
|
||||
/// <summary> The name. </summary>
|
||||
std::string m_name;
|
||||
|
||||
/// <summary> A record of all allocations: maps the pointer to the size </summary>
|
||||
std::map<void *, unsigned __int64> m_vAllocations;
|
||||
|
||||
/// <summary> The size in bytes of the memory space. </summary>
|
||||
unsigned __int64 m_uiMemorySpaceSize;
|
||||
|
||||
/// <summary> The size in bytes of the smallest allocated extent. </summary>
|
||||
unsigned __int64 m_uiMinAllocExtentSize;
|
||||
|
||||
/// <summary> The size in bytes of the largest allocated extent. </summary>
|
||||
unsigned __int64 m_uiMaxAllocExtentSize;
|
||||
|
||||
/// <summary> (historical) the low water mark for total allocated bytes. </summary>
|
||||
unsigned __int64 m_uiLowWaterMarkBytes;
|
||||
|
||||
/// <summary> (historical) the high water mark for total allocated bytes. </summary>
|
||||
unsigned __int64 m_uiHighWaterMarkBytes;
|
||||
|
||||
/// <summary> (current state) the total bytes currently allocated. </summary>
|
||||
unsigned __int64 m_uiCurrentlyAllocatedBytes;
|
||||
|
||||
/// <summary> (current state) the total number of currently allocated buffers. </summary>
|
||||
unsigned __int64 m_uiCurrentlyAllocatedBuffers;
|
||||
|
||||
/// <summary> The total number of allocation requests. </summary>
|
||||
unsigned __int64 m_uiAllocationRequests;
|
||||
|
||||
/// <summary> The total deallocation requests. </summary>
|
||||
unsigned __int64 m_uiDeallocationRequests;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DeviceMemoryStatus_t(
|
||||
std::string &szName,
|
||||
char * lpszUniquifier
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
~DeviceMemoryStatus_t();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Resets this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Reset(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record a memory allocation. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///
|
||||
/// <param name="pMemoryExtent"> [in,out] If non-null, extent of the memory. </param>
|
||||
/// <param name="uiBytes"> The bytes. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
RecordAllocation(
|
||||
__in void * pMemoryExtent,
|
||||
__in size_t uiBytes
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record a memory deallocation. We provide "require entry" flag to
|
||||
/// simplify tracking of page-locked allocations which are a strict subset
|
||||
/// of all allocations. If we are removing an entry from the global tracking,
|
||||
/// we require that an entry for it be found, otherwise we complain. If
|
||||
/// we are removing entries from the page-locked tracking, it is not an
|
||||
/// error if there is no entry present.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///
|
||||
/// <param name="pMemoryExtent"> [in,out] If non-null, extent of the memory. </param>
|
||||
/// <param name="bRequireEntry"> true to pinned allocation. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
RecordDeallocation(
|
||||
__in void * pMemoryExtent,
|
||||
__in BOOL bRequireEntry
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dumps the allocation statistics. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Report(
|
||||
std::ostream &ios
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Updates the memory space size described by uiBytes. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///
|
||||
/// <param name="uiBytes"> The bytes. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
UpdateMemorySpaceSize(
|
||||
unsigned __int64 uiBytes
|
||||
);
|
||||
|
||||
} MEMSTATEDESC;
|
||||
|
||||
typedef struct GlobalDeviceMemoryState_t {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
GlobalDeviceMemoryState_t(
|
||||
std::string& szDeviceName
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
~GlobalDeviceMemoryState_t(
|
||||
VOID
|
||||
);
|
||||
|
||||
/// <summary> synchronization. </summary>
|
||||
void Lock();
|
||||
void Unlock();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Resets the stats. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Reset(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record a memory allocation. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///
|
||||
/// <param name="pMemoryExtent"> [in,out] If non-null, extent of the memory. </param>
|
||||
/// <param name="uiBytes"> The bytes. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
RecordAllocation(
|
||||
__in void * pMemoryExtent,
|
||||
__in size_t uiBytes,
|
||||
__in BOOL bPinned
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record a memory deallocation. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///
|
||||
/// <param name="pMemoryExtent"> [in,out] If non-null, extent of the memory. </param>
|
||||
/// <param name="bPinnedAllocation"> true to pinned allocation. </param>
|
||||
/// <param name="uiBytes"> The bytes. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
RecordDeallocation(
|
||||
__in void * pMemoryExtent
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dumps the allocation statistics. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Report(
|
||||
std::ostream &ios
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets global memory state. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the global memory state. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
MEMSTATEDESC *
|
||||
GetGlobalMemoryState(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets global memory state. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the global memory state. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
MEMSTATEDESC *
|
||||
GetPageLockedMemoryState(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Updates the memory space size described by uiBytes. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///
|
||||
/// <param name="uiBytes"> The bytes. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
UpdateMemorySpaceSize(
|
||||
unsigned __int64 uiBytes
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return the percentage of this memory space that is allocated. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 9/10/2013. </remarks>
|
||||
///
|
||||
/// <returns> The allocated percent. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT
|
||||
GetAllocatedPercent(
|
||||
void
|
||||
);
|
||||
|
||||
protected:
|
||||
|
||||
MEMSTATEDESC m_global;
|
||||
MEMSTATEDESC m_pagelocked;
|
||||
CRITICAL_SECTION m_lock;
|
||||
|
||||
} DEVICEMEMORYSTATE;
|
||||
|
||||
};
|
||||
|
||||
#endif // __DEVICE_MEMORY_STATUS_H__
|
|
@ -1,299 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: GeometryEstimator.h
|
||||
//
|
||||
// summary: Declares the geometry estimator class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
#ifndef __GEOMETRY_ESTIMATOR_H__
|
||||
#define __GEOMETRY_ESTIMATOR_H__
|
||||
|
||||
#include "PTaskRuntime.h"
|
||||
#include <map>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Argument descriptor: provided the peeked value of a datablock and the source port
|
||||
/// from which it was peeked.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 5/1/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef struct _ptask_arg_t {
|
||||
Datablock * pBlock;
|
||||
Port * pSourcePort;
|
||||
Port * pAllocator;
|
||||
DatablockTemplate * pPortTemplate;
|
||||
} PTASKARGDESC, *PPTASKARGDESC;
|
||||
|
||||
static const int PTGE_DEFAULT_BASIC_GROUP = 256;
|
||||
static const int PTGE_DEFAULT_BASIC_GROUP_X = 32;
|
||||
static const int PTGE_DEFAULT_BASIC_GROUP_Y = 32;
|
||||
static const int PTGE_DEFAULT_BASIC_GROUP_Z = 1;
|
||||
static const int PTGE_DEFAULT_ELEMENTS_PER_THREAD = 1;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Geometry estimator callback function prototype. Allows the user is provide a
|
||||
/// custom estimator function.
|
||||
/// </summary>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef void
|
||||
(__stdcall *LPFNGEOMETRYESTIMATOR)(
|
||||
UINT nArguments,
|
||||
PTASKARGDESC ** ppArguments,
|
||||
PTASKDIM3 * pBlockDims,
|
||||
PTASKDIM3 * pGridDims,
|
||||
int nElementsPerThread,
|
||||
int nBasicGroupSizeX,
|
||||
int nBasicGroupSizeY,
|
||||
int nBasicGroupSizeZ
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Values that represent canonical estimator functions. Most estimators are so
|
||||
/// common that it makes no sense to force the user to code them explicitly. These
|
||||
/// values provide a library of common estimators.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 5/1/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef enum _estimator_fns {
|
||||
|
||||
/// <summary> No size estimator function has been provided.
|
||||
/// </summary>
|
||||
NO_SIZE_ESTIMATOR = 0,
|
||||
|
||||
/// <summary> Estimate the geometry based on the size of the
|
||||
/// datablock bound to the first port.
|
||||
/// </summary>
|
||||
BASIC_INPUT_SIZE_ESTIMATOR = 1, //
|
||||
|
||||
/// <summary> Estimate the geometry based on the max of the
|
||||
/// record counts over all input datablocks.
|
||||
/// </summary>
|
||||
MAX_INPUT_SIZE_ESTIMATOR = 2,
|
||||
|
||||
/// <summary> Estimate the geometry based on the max of the
|
||||
/// record counts over all output datablocks.
|
||||
/// </summary>
|
||||
MAX_OUTPUT_SIZE_ESTIMATOR = 3,
|
||||
|
||||
/// <summary> Ports are bound to a particular dimension
|
||||
/// of the iteration space. This estimator
|
||||
/// looks for explicit port bindings and assembles
|
||||
/// the iteration space accordingly. </summary>
|
||||
EXPLICIT_DIMENSION_ESTIMATOR = 4,
|
||||
|
||||
/// <summary> The user commits to provide a callback to
|
||||
/// estimate the dispatch dimensions.
|
||||
/// </summary>
|
||||
USER_DEFINED_ESTIMATOR = 5
|
||||
|
||||
// ....
|
||||
|
||||
} GEOMETRYESTIMATORTYPE;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Geometry estimator. Functions for estimating dispatch dimensions based on
|
||||
/// dynamically available information.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 5/1/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class GeometryEstimator {
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Basic Input size geometry estimator. Accepts as input all the datablocks that
|
||||
/// will be bound to inputs for a given task, but examines only the block bound to
|
||||
/// parameter 0. This is a legacy function: achtung!
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="nArguments"> The number of arguments. </param>
|
||||
/// <param name="ppArguments"> [in] non-null, a vector of input data blocks. </param>
|
||||
/// <param name="pBlockDims"> [out] non-null, the thread block dimensions. </param>
|
||||
/// <param name="pGridDims"> [out] non-null, the grid dimensions . </param>
|
||||
/// <param name="nElementsPerThread"> (optional) The elements assumed by kernel code to be
|
||||
/// assigned to each thread. Default is 1. </param>
|
||||
/// <param name="nBasicGroupSizeX"> (optional) size of the basic group. Default is 512. </param>
|
||||
/// <param name="nBasicGroupSizeY"> The basic group size y coordinate. </param>
|
||||
/// <param name="nBasicGroupSizeZ"> The basic group size z coordinate. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
BasicInputSizeGeometryEstimator(
|
||||
__in UINT nArguments,
|
||||
__in PTask::PTASKARGDESC ** ppArguments,
|
||||
__out PTask::PTASKDIM3 * pBlockDims,
|
||||
__out PTask::PTASKDIM3 * pGridDims,
|
||||
__in int nElementsPerThread,
|
||||
__in int nBasicGroupSizeX,
|
||||
__in int nBasicGroupSizeY,
|
||||
__in int nBasicGroupSizeZ
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Max Input size geometry estimator. Accepts as input all the datablocks that will
|
||||
/// be bound to inputs for a given task, and takes the max over all the record counts
|
||||
/// to find the conservative maximum number of thread blocks that will be required to
|
||||
/// ensure each input element is processed.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="nArguments"> The number of arguments. </param>
|
||||
/// <param name="ppArguments"> [in] non-null, a vector of input data blocks. </param>
|
||||
/// <param name="pBlockDims"> [out] non-null, the thread block dimensions. </param>
|
||||
/// <param name="pGridDims"> [out] non-null, the grid dimensions . </param>
|
||||
/// <param name="nElementsPerThread"> (optional) The elements assumed by kernel code to be
|
||||
/// assigned to each thread. Default is 1. </param>
|
||||
/// <param name="nBasicGroupSizeX"> (optional) size of the basic group. Default is 512. </param>
|
||||
/// <param name="nBasicGroupSizeY"> The basic group size y coordinate. </param>
|
||||
/// <param name="nBasicGroupSizeZ"> The basic group size z coordinate. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
MaxInputSizeGeometryEstimator(
|
||||
__in UINT nArguments,
|
||||
__in PTask::PTASKARGDESC ** ppArguments,
|
||||
__out PTask::PTASKDIM3 * pBlockDims,
|
||||
__out PTask::PTASKDIM3 * pGridDims,
|
||||
__in int nElementsPerThread,
|
||||
__in int nBasicGroupSizeX,
|
||||
__in int nBasicGroupSizeY,
|
||||
__in int nBasicGroupSizeZ
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Max output size geometry estimator. Accepts as input all the datablocks that will
|
||||
/// be bound to outputs for a given task, and takes the max over all the record
|
||||
/// counts to find the conservative maximum number of thread blocks that will be
|
||||
/// required to ensure each input element is processed. Note that this is a somewhat
|
||||
/// more subtle task than examining input blocks because output blocks with MetaPorts
|
||||
/// serving as input allocator will not be allocated yet.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="nArguments"> The number of arguments. </param>
|
||||
/// <param name="ppArguments"> [in] non-null, a vector of input data blocks. </param>
|
||||
/// <param name="pBlockDims"> [out] non-null, the thread block dimensions. </param>
|
||||
/// <param name="pGridDims"> [out] non-null, the grid dimensions . </param>
|
||||
/// <param name="nElementsPerThread"> (optional) The elements assumed by kernel code to be
|
||||
/// assigned to each thread. Default is 1. </param>
|
||||
/// <param name="nBasicGroupSizeX"> (optional) size of the basic group. Default is 512. </param>
|
||||
/// <param name="nBasicGroupSizeY"> The basic group size y coordinate. </param>
|
||||
/// <param name="nBasicGroupSizeZ"> The basic group size z coordinate. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
MaxOutputSizeGeometryEstimator(
|
||||
__in UINT nArguments,
|
||||
__in PTask::PTASKARGDESC ** ppArguments,
|
||||
__out PTask::PTASKDIM3 * pBlockDims,
|
||||
__out PTask::PTASKDIM3 * pGridDims,
|
||||
__in int nElementsPerThread,
|
||||
__in int nBasicGroupSizeX,
|
||||
__in int nBasicGroupSizeY,
|
||||
__in int nBasicGroupSizeZ
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Ports are bound to dimensions of the iteration space such that the datablock size
|
||||
/// maps directly to one dimension of space. Accept all port/block pairs and use
|
||||
/// those with an explicit binding to assemble the iteration space.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="nArguments"> The number of arguments. </param>
|
||||
/// <param name="ppArguments"> [in] non-null, a vector of input data blocks. </param>
|
||||
/// <param name="pBlockDims"> [out] non-null, the thread block dimensions. </param>
|
||||
/// <param name="pGridDims"> [out] non-null, the grid dimensions . </param>
|
||||
/// <param name="nElementsPerThread"> (optional) The elements assumed by kernel code to be
|
||||
/// assigned to each thread. Default is 1. </param>
|
||||
/// <param name="nBasicGroupSizeX"> (optional) size of the basic group. Default is 32. </param>
|
||||
/// <param name="nBasicGroupSizeY"> (optional) the basic group size y coordinate. </param>
|
||||
/// <param name="nBasicGroupSizeZ"> (optional) the basic group size z coordinate. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
ExplicitDimensionEstimator(
|
||||
UINT nArguments,
|
||||
PTask::PTASKARGDESC ** ppArguments,
|
||||
PTask::PTASKDIM3 * pBlockDims,
|
||||
PTask::PTASKDIM3 * pGridDims,
|
||||
int nElementsPerThread,
|
||||
int nBasicGroupSizeX,
|
||||
int nBasicGroupSizeY,
|
||||
int nBasicGroupSizeZ
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Adds peeked blocks from all the ports in the given map to the argument list.
|
||||
/// Helps assemble the argument list input for an estimator.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 5/1/2012. </remarks>
|
||||
///
|
||||
/// <param name="pPortMap"> [in,out] If non-null, the port map. </param>
|
||||
/// <param name="ppArgs"> [in,out] If non-null, the arguments. </param>
|
||||
/// <param name="nPortIndex"> [in,out] Zero-based index of the n port. </param>
|
||||
/// <param name="nMaxToAdd"> (optional) the maximum number of ports to add. -1 means
|
||||
/// unbounded. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
AddToArgumentList(
|
||||
std::map<UINT, Port*>* pPortMap,
|
||||
PTASKARGDESC ** ppArgs,
|
||||
int &nPortIndex,
|
||||
int nMaxToAdd=-1
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Adds peeked blocks from all the ports for all relevant port maps to the argument
|
||||
/// list. Helps assemble the argument list input for an estimator.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 5/1/2012. </remarks>
|
||||
///
|
||||
/// <param name="pTask"> [in,out] If non-null, the port map. </param>
|
||||
/// <param name="pppArgs"> [in,out] If non-null, the ppp arguments. </param>
|
||||
///
|
||||
/// <returns> the number of arguments in the given list. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static int
|
||||
CreateEstimatorArgumentList(
|
||||
Task * pTask,
|
||||
PTASKARGDESC *** pppArgs
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Estimate task geometry for a cuda task. This implementation is
|
||||
/// platform specific because the interface for specifying launch dimensions
|
||||
/// is specific to cuda.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 5/1/2012. </remarks>
|
||||
///
|
||||
/// <param name="pTask"> [in,out] If non-null, the task. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
EstimateCUTaskGeometry(
|
||||
Task * pTask
|
||||
);
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif // __GEOMETRY_ESTIMATOR_H__
|
|
@ -1,423 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: GlobalBlockPool.h
|
||||
//
|
||||
// summary: Declares the global block pool class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __GLOBAL_BLOCK_POOL_H__
|
||||
#define __GLOBAL_BLOCK_POOL_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include <Windows.h>
|
||||
#include "datablock.h"
|
||||
#include "BlockPoolOwner.h"
|
||||
#include "BlockPool.h"
|
||||
#include <deque>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class GlobalBlockPool : public BlockPoolOwner, public Lockable {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pDatablockTemplate"> [in,out] If non-null, the p. </param>
|
||||
/// <param name="eAcceleratorClass"> acc class. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
GlobalBlockPool(
|
||||
__in DatablockTemplate * pDatablockTemplate,
|
||||
__in ACCELERATOR_CLASS eAcceleratorClass,
|
||||
__in BUFFERACCESSFLAGS ePermissions
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pDatablockTemplate"> [in,out] If non-null, the p. </param>
|
||||
/// <param name="eAcceleratorClass"> acc class. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
GlobalBlockPool(
|
||||
__in UINT nDataBytes,
|
||||
__in UINT nMetaBytes,
|
||||
__in UINT nTemplateBytes,
|
||||
__in ACCELERATOR_CLASS eAcceleratorClass,
|
||||
__in BUFFERACCESSFLAGS ePermissions
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~GlobalBlockPool();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object has block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if block pool, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasBlockPool();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is global pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/30/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if global pool, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL BlockPoolIsGlobal();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
|
||||
/// we have an upstream allocator (meta) port, the runtime will not create a block
|
||||
/// pool for the corresponding output port. This turns out to put device-side
|
||||
/// allocation on the critical path in some cases, so we provide a way to override
|
||||
/// that behavior and allow a port to create a pool based on some size hints. When
|
||||
/// there is a block available with sufficient space in the pool, the meta port can
|
||||
/// avoid the allocation and draw from the pool.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="nPoolSize"> Size of the block pool. </param>
|
||||
/// <param name="nStride"> The stride. </param>
|
||||
/// <param name="nDataBytes"> The data in bytes. </param>
|
||||
/// <param name="nMetaBytes"> The meta in bytes. </param>
|
||||
/// <param name="nTemplateBytes"> The template in bytes. </param>
|
||||
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
|
||||
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
ForceBlockPoolHint(
|
||||
__in UINT nPoolSize,
|
||||
__in UINT nStride,
|
||||
__in UINT nDataBytes,
|
||||
__in UINT nMetaBytes,
|
||||
__in UINT nTemplateBytes,
|
||||
__in BOOL bPageLockHostViews=FALSE,
|
||||
__in BOOL bEagerDeviceMaterialize=FALSE
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
///
|
||||
/// Allocation of data-blocks and platform-specific buffers can be a signficant
|
||||
/// latency expense at dispatch time. We can actually preallocate output datablocks
|
||||
/// and create device- side buffers at graph construction time. For each node in the
|
||||
/// graph, allocate data blocks on any output ports, and create device-specific
|
||||
/// buffers for all accelerators capable of executing the node.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPool(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Configure a block pool, but do not perform allocations on it yet.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
ConfigureBlockPool(
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
|
||||
/// to null, and then release it. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
DestroyBlockPool(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Queries if a block pool is active and able to deliver/return blocks. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if a block pool is active, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
IsBlockPoolActive(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the owner name. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the owner name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual char *
|
||||
GetPoolOwnerName(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets high water mark. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetHighWaterMark();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the total number of blocks owned by the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The total number of blocks owned by the pool (whether they are queued or not). </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetOwnedBlockCount();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the low water mark. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetLowWaterMark();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the currently available count. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetAvailableBlockCount();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles only the first pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPoolAsync(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles the second pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
FinalizeBlockPoolAsync(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> add a new block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void AddNewBlock(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return a block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ReturnToPool(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets pool size. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> The pool size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetPoolSize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetRequestsPageLocked(BOOL bPageLocked);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL GetRequestsPageLocked();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets pooled block. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the pooled block. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * GetPooledBlock();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this request matches what is present in this pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/14/2013. </remarks>
|
||||
///
|
||||
/// <param name="nDataBytes"> The data in bytes. </param>
|
||||
/// <param name="nMetaBytes"> The meta in bytes. </param>
|
||||
/// <param name="nTemplateBytes"> The template in bytes. </param>
|
||||
///
|
||||
/// <returns> true if matching request, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
IsMatchingRequest(
|
||||
__in UINT nDataBytes,
|
||||
__in UINT nMetaBytes,
|
||||
__in UINT nTemplateBytes
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a destination buffer for a block with an upstream
|
||||
/// allocator. Succeeds only if the pool happens to have blocks
|
||||
/// backed by sufficient resources in all channels that are backed.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the destination buffer. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock *
|
||||
GetBlockFromPool(
|
||||
__in Accelerator * pAccelerator=NULL,
|
||||
__in UINT uiDataBytes=0,
|
||||
__in UINT uiMetaBytes=0,
|
||||
__in UINT uiTemplateBytes=0
|
||||
);
|
||||
|
||||
protected:
|
||||
|
||||
/// <summary> The block pool. </summary>
|
||||
BlockPool * m_pBlockPool;
|
||||
|
||||
/// <summary> true if this object has block pool. </summary>
|
||||
BOOL m_bHasBlockPool;
|
||||
|
||||
/// <summary> The accelerator class. </summary>
|
||||
ACCELERATOR_CLASS m_eAcceleratorClass;
|
||||
|
||||
/// <summary> The permissions. </summary>
|
||||
BUFFERACCESSFLAGS m_ePermissions;
|
||||
|
||||
/// <summary> The template. </summary>
|
||||
DatablockTemplate * m_pTemplate;
|
||||
|
||||
/// <summary> The data in bytes. </summary>
|
||||
UINT m_nDataBytes;
|
||||
|
||||
/// <summary> The meta in bytes. </summary>
|
||||
UINT m_nMetaBytes;
|
||||
|
||||
/// <summary> The template in bytes. </summary>
|
||||
UINT m_nTemplateBytes;
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,257 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: GlobalPoolManager.h
|
||||
//
|
||||
// summary: Declares the global pool manager class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __GLOBAL_POOL_MANAGER__
|
||||
#define __GLOBAL_POOL_MANAGER__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include <Windows.h>
|
||||
#include "datablock.h"
|
||||
#include "GlobalBlockPool.h"
|
||||
#include "ptlock.h"
|
||||
#include <deque>
|
||||
#include <map>
|
||||
#include <tuple>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class CompiledKernel;
|
||||
class Graph;
|
||||
class Channel;
|
||||
class Port;
|
||||
class Task;
|
||||
class Datablock;
|
||||
class DatablockTemplate;
|
||||
|
||||
class GlobalPoolManager : public Lockable {
|
||||
|
||||
public:
|
||||
static GlobalPoolManager * Create();
|
||||
static void Destroy();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Require block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
|
||||
/// <param name="nDataSize"> Size of the data. </param>
|
||||
/// <param name="nMetaSize"> Size of the meta. </param>
|
||||
/// <param name="nTemplateSize"> Size of the template. </param>
|
||||
/// <param name="nBlocks"> (Optional) The blocks. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
RequireBlockPool(
|
||||
__in DatablockTemplate * pTemplate,
|
||||
__in int nDataSize,
|
||||
__in int nMetaSize,
|
||||
__in int nTemplateSize,
|
||||
__in int nBlocks=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Require block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <param name="nDataSize"> Size of the data. </param>
|
||||
/// <param name="nMetaSize"> Size of the meta. </param>
|
||||
/// <param name="nTemplateSize"> Size of the template. </param>
|
||||
/// <param name="nBlocks"> (Optional) The blocks. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
RequireBlockPool(
|
||||
__in int nDataSize,
|
||||
__in int nMetaSize,
|
||||
__in int nTemplateSize,
|
||||
__in int nBlocks=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Require block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
|
||||
/// <param name="nBlocks"> (Optional) The blocks. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
RequireBlockPool(
|
||||
__in DatablockTemplate * pTemplate,
|
||||
__in int nBlocks=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Find a block pool for the block. If there is no good fit,
|
||||
/// create one if the bCreateIfNotFound flag is set.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/30/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
/// <param name="bCreateIfNotFound"> The create if not found. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
AddBlockToBestFitPool(
|
||||
__in Datablock * pBlock,
|
||||
__in BOOL bCreateIfNotFound
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if we can allocate pools. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL AllocatePools();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroys the pools. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL DestroyPools();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate datablock. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
|
||||
/// <param name="uiDataSize"> Size of the data. </param>
|
||||
/// <param name="uiMetaSize"> Size of the meta. </param>
|
||||
/// <param name="uiTemplateSize"> Size of the template. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
Datablock *
|
||||
AllocateDatablock(
|
||||
__in DatablockTemplate * pTemplate,
|
||||
__in UINT uiDataSize,
|
||||
__in UINT uiMetaSize,
|
||||
__in UINT uiTemplateSize
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Request a pooled block. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/21/2013. </remarks>
|
||||
///
|
||||
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
|
||||
/// <param name="uiDataSize"> Size of the data. </param>
|
||||
/// <param name="uiMetaSize"> Size of the meta. </param>
|
||||
/// <param name="uiTemplateSize"> Size of the template. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static Datablock *
|
||||
RequestBlock(
|
||||
__in DatablockTemplate * pTemplate,
|
||||
__in UINT uiDataSize,
|
||||
__in UINT uiMetaSize,
|
||||
__in UINT uiTemplateSize
|
||||
);
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/21/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
GlobalPoolManager();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/21/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~GlobalPoolManager();
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Searches for the first matching pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
|
||||
/// <param name="uiDataSize"> Size of the data. </param>
|
||||
/// <param name="uiMetaSize"> Size of the meta. </param>
|
||||
/// <param name="uiTemplateSize"> Size of the template. </param>
|
||||
/// <param name="uiBlockControlCode"> The block control code. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the found matching pool. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
GlobalBlockPool *
|
||||
FindMatchingPool(
|
||||
__in DatablockTemplate * pTemplate,
|
||||
__in UINT uiDataSize,
|
||||
__in UINT uiMetaSize,
|
||||
__in UINT uiTemplateSize
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Find a block pool for the block. If there is no good fit,
|
||||
/// create one if the bCreateIfNotFound flag is set.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/30/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
/// <param name="bCreateIfNotFound"> The create if not found. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
__AddBlockToBestFitPool(
|
||||
__in Datablock * pBlock,
|
||||
__in BOOL bCreateIfNotFound
|
||||
);
|
||||
|
||||
static void WarnIfInitialized(char * lspzFunction);
|
||||
typedef std::tuple<DatablockTemplate*, int, int, int, int> POOLDESCRIPTOR;
|
||||
static GlobalPoolManager * g_pGlobalPoolManager;
|
||||
static BOOL g_bPoolsAllocated;
|
||||
static PTLock g_vPoolsLock;
|
||||
static std::map<int, POOLDESCRIPTOR> g_vRequiredPoolsUntyped;
|
||||
static std::map<DatablockTemplate*, POOLDESCRIPTOR> g_vRequiredPoolsTyped;
|
||||
std::map<int, GlobalBlockPool*> g_vUntypedBlockPools;
|
||||
std::map<DatablockTemplate*, GlobalBlockPool*> g_vTypedBlockPools;
|
||||
|
||||
virtual GlobalPoolManager * GetPoolManager() { return g_pGlobalPoolManager; }
|
||||
virtual BOOL ArePoolsAllocated() { return g_bPoolsAllocated; }
|
||||
virtual PTLock * GetPoolLock() { return &g_vPoolsLock; }
|
||||
virtual std::map<int, POOLDESCRIPTOR>* GetRequiredPoolsUntyped() { return &g_vRequiredPoolsUntyped; }
|
||||
virtual std::map<DatablockTemplate*, POOLDESCRIPTOR>* GetRequiredPoolsTyped() { return &g_vRequiredPoolsTyped; }
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,160 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: GraphProfiler.h
|
||||
//
|
||||
// summary: Declares the graph profiler class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __GRAPH_PROFILER_H__
|
||||
#define __GRAPH_PROFILER_H__
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Task;
|
||||
class Graph;
|
||||
|
||||
class GraphProfiler
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/19/2013. </remarks>
|
||||
///
|
||||
/// <param name="pGraph"> [in,out] If non-null, the graph. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
GraphProfiler(Graph * pGraph);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/19/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~GraphProfiler();
|
||||
|
||||
protected:
|
||||
|
||||
Graph * m_pGraph;
|
||||
|
||||
/// <summary> Lock for ad hoc graph stats. </summary>
|
||||
CRITICAL_SECTION m_csGraphStats;
|
||||
|
||||
/// <summary> The minimum number of concurrent inflight task threads. </summary>
|
||||
UINT m_uiMinConcurrentInflightThreads;
|
||||
|
||||
/// <summary> The maximum number of concurrent inflight task threads. </summary>
|
||||
UINT m_uiMaxConcurrentInflightThreads;
|
||||
|
||||
/// <summary> The concurrent inflight thread accumulator. </summary>
|
||||
UINT m_uiConcurrentInflightThreadAccumulator;
|
||||
|
||||
/// <summary> The minimum number of concurrent inflight dispatch attempts. </summary>
|
||||
UINT m_uiMinConcurrentInflightDispatches;
|
||||
|
||||
/// <summary> The maximum number of concurrent inflight dispatch attempts. </summary>
|
||||
UINT m_uiMaxConcurrentInflightDispatches;
|
||||
|
||||
/// <summary> The maximum concurrent inflight dispatch accumulator. </summary>
|
||||
UINT m_uiConcurrentInflightDispatchAccumulator;
|
||||
|
||||
/// <summary> The minimum task queue occupancy. </summary>
|
||||
UINT m_uiMinTaskQueueOccupancy;
|
||||
|
||||
/// <summary> The maximum task queue occupancy. </summary>
|
||||
UINT m_uiMaxTaskQueueOccupancy;
|
||||
|
||||
/// <summary> The task queue occupancy accumulator. </summary>
|
||||
UINT m_uiTaskQueueOccupancyAccumulator;
|
||||
|
||||
/// <summary> The task queue samples. </summary>
|
||||
UINT m_uiTaskQueueSamples;
|
||||
|
||||
/// <summary> The current number of inflight threads. </summary>
|
||||
UINT m_uiAliveThreads;
|
||||
|
||||
/// <summary> The awake threads. </summary>
|
||||
UINT m_uiAwakeThreads;
|
||||
|
||||
/// <summary> The blocked threads. </summary>
|
||||
UINT m_uiBlockedRunningThreads;
|
||||
|
||||
/// <summary> The blocked threads. </summary>
|
||||
UINT m_uiBlockedTaskAvailableThreads;
|
||||
|
||||
/// <summary> The exited threads. </summary>
|
||||
UINT m_uiExitedThreads;
|
||||
|
||||
/// <summary> The current number of inflight threads. </summary>
|
||||
UINT m_uiInflightThreads;
|
||||
|
||||
/// <summary> The current number of inflight dispatches. </summary>
|
||||
UINT m_uiInflightDispatchAttempts;
|
||||
|
||||
/// <summary> The number of updates to the inflight thread count. </summary>
|
||||
UINT m_uiInflightThreadUpdates;
|
||||
|
||||
/// <summary> The number of updates to the inflight dispatch count. </summary>
|
||||
UINT m_uiInflightDispatchUpdates;
|
||||
|
||||
/// <summary> The total number of dispatch attempts. </summary>
|
||||
UINT m_uiDispatchAttempts;
|
||||
|
||||
/// <summary> The successful dispatch attempts. </summary>
|
||||
UINT m_uiSuccessfulDispatchAttempts;
|
||||
|
||||
/// <summary> The total number of dequeue attempts. </summary>
|
||||
UINT m_uiDequeueAttempts;
|
||||
|
||||
/// <summary> The successful dequeu attempts. </summary>
|
||||
UINT m_uiSuccessfulDequeueAttempts;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initialises the graph statistics. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/1/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Initialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initialises the graph statistics. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/1/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Destroy();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Print graph statistics. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/1/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Report(std::ostream& ss);
|
||||
|
||||
void OnTaskThreadAlive();
|
||||
void OnTaskThreadExit();
|
||||
void OnTaskThreadBlockRunningGraph();
|
||||
void OnTaskThreadWakeRunningGraph();
|
||||
void OnTaskThreadBlockTasksAvailable();
|
||||
void OnTaskThreadWakeTasksAvailable();
|
||||
void OnTaskThreadDequeueAttempt();
|
||||
void OnTaskThreadDequeueComplete(Task * pTask);
|
||||
void OnTaskThreadDispatchAttempt();
|
||||
void OnTaskThreadDispatchComplete(BOOL bSuccess);
|
||||
|
||||
friend class Graph;
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,194 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: HostAsyncContext.h
|
||||
//
|
||||
// summary: Declares the host asynchronous context class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __HOST_ASYNC_CONTEXT_H__
|
||||
#define __HOST_ASYNC_CONTEXT_H__
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "accelerator.h"
|
||||
#include "hostaccelerator.h"
|
||||
#include "task.h"
|
||||
#include "channel.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include "hrperft.h"
|
||||
#include "AsyncContext.h"
|
||||
#include "AsyncDependence.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class HostAsyncContext : public AsyncContext {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDeviceContext"> [in,out] If non-null, context for the device. </param>
|
||||
/// <param name="pTaskContext"> [in,out] If non-null, context for the task. </param>
|
||||
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
HostAsyncContext(
|
||||
__in Accelerator * pDeviceContext,
|
||||
__in Task * pTaskContext,
|
||||
__in ASYNCCONTEXTTYPE eAsyncContextType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~HostAsyncContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Initialize();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific create synchronization point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual SyncPoint *
|
||||
PlatformSpecificCreateSyncPoint(
|
||||
void * pPSSyncObject
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific destroy synchronization point. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificDestroySynchronizationPoint(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if we can platform specific synchronize context. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificSynchronizeContext(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence asynchronously. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificInsertFence(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence synchronously. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificSynchronousWait(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if the sync point is resolved (and marks it if so). </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificQueryOutstanding(
|
||||
__inout SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform specific nonblocking check whether the event remains outstanding. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/2/2013. </remarks>
|
||||
///
|
||||
/// <param name="pSyncPoint"> [in,out] If non-null, the synchronise point. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificNonblockingQueryOutstanding(
|
||||
__inout SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait for dependence synchronously without locking the async context
|
||||
/// or underlying accelerator: this simplifies lock acquisition for such
|
||||
/// waits, but at the expense of leaving live dependences that are
|
||||
/// actually resolved. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDependence"> [in,out] If non-null, the dependence. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
PlatformSpecificLocklessSynchronousWait(
|
||||
__in SyncPoint * pSyncPoint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform context object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the platform context object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void *
|
||||
GetPlatformContextObject();
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,521 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: InitializerChannel.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _INITIALIZER_CHANNEL_H_
|
||||
#define _INITIALIZER_CHANNEL_H_
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "channel.h"
|
||||
#include "BlockPoolOwner.h"
|
||||
#include <deque>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class BlockPool;
|
||||
class Datablock;
|
||||
class DatablockTemplate;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> InitializerChannel. Channel subclass specialized to allocate data based
|
||||
/// on downstream Port template when pulled. Push is meaningless. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class InitializerChannel : public Channel, public BlockPoolOwner {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pGraph"> [in,out] If non-null, the graph. </param>
|
||||
/// <param name="pDatablockTemplate"> [in,out] If non-null, the datablock template. </param>
|
||||
/// <param name="hRuntimeTerminateEvent"> Handle of the graph terminate event. </param>
|
||||
/// <param name="hGraphTeardownEvt"> The graph teardown event. </param>
|
||||
/// <param name="hGraphStopEvent"> Handle of the graph stop event. </param>
|
||||
/// <param name="lpszChannelName"> [in,out] If non-null, name of the channel. </param>
|
||||
/// <param name="bHasBlockPool"> the has block pool. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
InitializerChannel(
|
||||
__in Graph * pGraph,
|
||||
__in DatablockTemplate * pDatablockTemplate,
|
||||
__in HANDLE hRuntimeTerminateEvent,
|
||||
__in HANDLE hGraphTeardownEvt,
|
||||
__in HANDLE hGraphStopEvent,
|
||||
__in char * lpszChannelName,
|
||||
__in BOOL bHasBlockPool
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~InitializerChannel();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if the channel is (or can be) connected to a data source or sink that can be
|
||||
/// streamed. Generally speaking, this is a property of the primitive whose IO
|
||||
/// resources are being exposed by this port; consequently this property must be set
|
||||
/// explicitly by the programmer when graph structures that are stateful are
|
||||
/// constructured. For example, in a sort primitive, the main input can be streamed
|
||||
/// (broken into multiple blocks) only if there is a merge network downstream of the
|
||||
/// node performing the sort. Code that feeds the main input port needs to know this
|
||||
/// to decide whether to grow blocks until all data is present, or two push partial
|
||||
/// input.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if the port can stream data, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL CanStream();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if channel is ready. This has a different meaning depending on the channel
|
||||
/// subtype in question, but in general means "is the channel ready to produce or
|
||||
/// consume datablocks?".
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="type"> (optional) the type of the channel. </param>
|
||||
///
|
||||
/// <returns> true if ready, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsReady(CHANNELENDPOINTTYPE type=CE_DST);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Pulls a datablock from the channel, potentially timing out after dwTimeout
|
||||
/// milliseconds.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="dwTimeout"> (optional) the timeout in milliseconds. Use 0xFFFFFFFF for no
|
||||
/// timeout. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * Pull(DWORD dwTimeout=0xFFFFFFFF);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Returns the first available datablock on the channel without removing it. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the currently available datablock object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * Peek();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Pushes a datablock into this channel, blocking until there is capacity
|
||||
/// for an optional timeout in milliseconds. Default timeout is infinite.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
/// <param name="dwTimeout"> (optional) the timeout in milliseconds. Use 0xFFFFFFFF for no
|
||||
/// timeout. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Push(Datablock* pBlock, DWORD dwTimeout=0xFFFFFFFF);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is block pool candidate. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/30/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if block pool candidate, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsBlockPoolCandidate();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is block pool candidate. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/30/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if block pool candidate, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsAcceleratorOnlyBlockPoolCandidate();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is block pool candidate. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/30/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if block pool candidate, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsPagelockedBlockPoolCandidate();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object has block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if block pool, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasBlockPool();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is global pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/30/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if global pool, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL BlockPoolIsGlobal();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
|
||||
/// we have an upstream allocator (meta) port, the runtime will not create a block
|
||||
/// pool for the corresponding output port. This turns out to put device-side
|
||||
/// allocation on the critical path in some cases, so we provide a way to override
|
||||
/// that behavior and allow a port to create a pool based on some size hints. When
|
||||
/// there is a block available with sufficient space in the pool, the meta port can
|
||||
/// avoid the allocation and draw from the pool.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="nPoolSize"> Size of the block pool. </param>
|
||||
/// <param name="nStride"> The stride. </param>
|
||||
/// <param name="nDataBytes"> The data in bytes. </param>
|
||||
/// <param name="nMetaBytes"> The meta in bytes. </param>
|
||||
/// <param name="nTemplateBytes"> The template in bytes. </param>
|
||||
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
|
||||
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
ForceBlockPoolHint(
|
||||
__in UINT nPoolSize,
|
||||
__in UINT nStride,
|
||||
__in UINT nDataBytes,
|
||||
__in UINT nMetaBytes,
|
||||
__in UINT nTemplateBytes,
|
||||
__in BOOL bPageLockHostViews=FALSE,
|
||||
__in BOOL bEagerDeviceMaterialize=FALSE
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
///
|
||||
/// Allocation of data-blocks and platform-specific buffers can be a signficant
|
||||
/// latency expense at dispatch time. We can actually preallocate output datablocks
|
||||
/// and create device- side buffers at graph construction time. For each node in the
|
||||
/// graph, allocate data blocks on any output ports, and create device-specific
|
||||
/// buffers for all accelerators capable of executing the node.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPool(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
|
||||
/// to null, and then release it. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
DestroyBlockPool(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Queries if a block pool is active and able to deliver/return blocks. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if a block pool is active, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
IsBlockPoolActive(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the owner name. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the owner name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual char *
|
||||
GetPoolOwnerName(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles only the first pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPoolAsync(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles the second pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
FinalizeBlockPoolAsync(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets high water mark. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetHighWaterMark();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the total number of blocks owned by the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The total number of blocks owned by the pool (whether they are queued or not). </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetOwnedBlockCount();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the low water mark. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetLowWaterMark();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the currently available count. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetAvailableBlockCount();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> add a new block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void AddNewBlock(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return a block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ReturnToPool(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets pool size. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> The pool size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetPoolSize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetRequestsPageLocked(BOOL bPageLocked);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL GetRequestsPageLocked();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this channel has downstream writers. An output channel is
|
||||
/// considered a writer because we must conservatively assume consumed
|
||||
/// blocks will be written.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/15/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if downstream writers, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasDownstreamWriters();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
|
||||
/// for this chanell in a way that is consistent with a well-formed graph. Called by
|
||||
/// CheckSemantics()
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="pos"> [in,out] output string stream. </param>
|
||||
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
|
||||
PTask::Graph * pGraph);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate a datablock. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * AllocateBlock(AsyncContext * pAsyncContext);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if the block that is (or would be) produced in demand to a pull call
|
||||
/// passes all/any predicates.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/1/2012. </remarks>
|
||||
///
|
||||
/// <param name="ppDemandAllocatedBlock"> [out] If non-null, on exit, the demand allocated
|
||||
/// block if all predicates are passed. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PassesPredicates(Datablock ** ppDemandAllocatedBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a destination buffer for a block with an upstream
|
||||
/// allocator. Succeeds only if the pool happens to have blocks
|
||||
/// backed by sufficient resources in all channels that are backed.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the destination buffer. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock *
|
||||
GetBlockFromPool(
|
||||
__in Accelerator * pAccelerator=NULL,
|
||||
__in UINT uiDataBytes=0,
|
||||
__in UINT uiMetaBytes=0,
|
||||
__in UINT uiTemplateBytes=0
|
||||
);
|
||||
|
||||
/// <summary> The peeked control propagation signal source. </summary>
|
||||
Datablock * m_pPeekedControlPropagationSignalSrc;
|
||||
|
||||
/// <summary> true if a data block was peeked to derive a control propagation signal. </summary>
|
||||
BOOL m_bControlBlockPeeked;
|
||||
|
||||
/// <summary> The code for the peeked control signal. </summary>
|
||||
CONTROLSIGNAL m_luiPeekedControlSignal;
|
||||
|
||||
/// <summary> The block pool. </summary>
|
||||
BlockPool * m_pBlockPool;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,346 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: InitializerPort.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _INITIALIZER_PORT_H_
|
||||
#define _INITIALIZER_PORT_H_
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "InputPort.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Task;
|
||||
class Graph;
|
||||
class Datablock;
|
||||
class DatablockTemplate;
|
||||
class AsyncContext;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary>
|
||||
/// Initializer port. An initializer port is always full, and when pulled, will return a new
|
||||
/// datablock with the value derived from the datablock template with which this port was
|
||||
/// created. When peeked, an initializer port always returns NULL.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class InitializerPort : public InputPort {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
InitializerPort();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~InitializerPort();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this port is occupied. Initializer ports are always occupied, by
|
||||
/// definition.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if occupied, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsOccupied();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Pulls on this port to read the next available datablock. Return a new datablock
|
||||
/// initialized according to the DatablockTemplate with which this port was created.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * Pull();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Peek at the next datablock on this port. Peek on an InitializerPort always
|
||||
/// returns NULL, because datablocks are created on demand in response to a pull.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the current top-of-stack object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * Peek();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Pushes an object into this port. This is a NO-OP for InitializerPorts, since this
|
||||
/// port type is designed only to be bound to input resources.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="p"> [in,out] If non-null, the Datablock* to push. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Push(Datablock* p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate a datablock. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in] (optional) If non-null, the async context where the
|
||||
/// block will be first used. </param>
|
||||
/// <param name="bPooled"> [in] true to pooled. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock *
|
||||
AllocateBlock(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in BOOL bPooled
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates a new InitializerPort. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
|
||||
/// <param name="uiId"> The identifier. </param>
|
||||
/// <param name="lpszVariableBinding"> [in,out] If non-null, the variable binding. </param>
|
||||
/// <param name="nParmIdx"> Zero-based index of the n parm. </param>
|
||||
/// <param name="nInOutRouteIdx"> Zero-based index of the n in out route. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static Port *
|
||||
Create(
|
||||
__in DatablockTemplate * pTemplate,
|
||||
__in UINT uiId,
|
||||
__in char * lpszVariableBinding,
|
||||
__in int nParmIdx,
|
||||
__in int nInOutRouteIdx
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
|
||||
/// for this port in a way that is consistent with a well-formed graph. Called by
|
||||
/// CheckSemantics()
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="pos"> [in,out] output string stream. </param>
|
||||
/// <param name="pGraph"> [in] non-null, the graph. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
CheckTypeSpecificSemantics(
|
||||
__inout std::ostream * pos,
|
||||
__in PTask::Graph * pGraph
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object has block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if block pool, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasBlockPool();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
|
||||
/// we have an upstream allocator (meta) port, the runtime will not create a block
|
||||
/// pool for the corresponding output port. This turns out to put device-side
|
||||
/// allocation on the critical path in some cases, so we provide a way to override
|
||||
/// that behavior and allow a port to create a pool based on some size hints. When
|
||||
/// there is a block available with sufficient space in the pool, the meta port can
|
||||
/// avoid the allocation and draw from the pool.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="nPoolSize"> Size of the block pool. </param>
|
||||
/// <param name="nStride"> The stride. </param>
|
||||
/// <param name="nDataBytes"> The data in bytes. </param>
|
||||
/// <param name="nMetaBytes"> The meta in bytes. </param>
|
||||
/// <param name="nTemplateBytes"> The template in bytes. </param>
|
||||
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
|
||||
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
ForceBlockPoolHint(
|
||||
__in UINT nPoolSize,
|
||||
__in UINT nStride,
|
||||
__in UINT nDataBytes,
|
||||
__in UINT nMetaBytes,
|
||||
__in UINT nTemplateBytes,
|
||||
__in BOOL bPageLockHostViews=FALSE,
|
||||
__in BOOL bEagerDeviceMaterialize=FALSE
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> add a new block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void AddNewBlock(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return a block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ReturnToPool(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets pool size. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> The pool size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetPoolSize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetRequestsPageLocked(BOOL bPageLocked);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL GetRequestsPageLocked();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
///
|
||||
/// Allocation of data-blocks and platform-specific buffers can be a signficant
|
||||
/// latency expense at dispatch time. We can actually preallocate output datablocks
|
||||
/// and create device- side buffers at graph construction time. For each node in the
|
||||
/// graph, allocate data blocks on any output ports, and create device-specific
|
||||
/// buffers for all accelerators capable of executing the node.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPool(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
|
||||
/// to null, and then release it. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
DestroyBlockPool(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles only the first pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPoolAsync(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles the second pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
FinalizeBlockPoolAsync(
|
||||
VOID
|
||||
);
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,493 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: InputPort.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _INPUT_PORT_H_
|
||||
#define _INPUT_PORT_H_
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "port.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Channel;
|
||||
class Datablock;
|
||||
class DatablockTemplate;
|
||||
class Accelerator;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> InputPort: a port subclass specialized to handle binding to input resources in
|
||||
/// Task nodes.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class InputPort : public Port {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
InputPort();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~InputPort();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is occupied. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if occupied, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsOccupied();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary>
|
||||
/// Pulls a datablock from this port, potentially blocking until one becomes available.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * Pull();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets an iteration source. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/28/2012. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> [in,out] If non-null, the port. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetIterationSource(Port * pPort);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the iteration source. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/28/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the iteration source. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Port * GetIterationSource();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Returns the datablock occupying this port without removing it. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the current block. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * Peek();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Pushes a datablock into this port. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pDatablockToPush"> [in,out] If non-null, the Datablock* to push. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Push(Datablock* pDatablockToPush);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Bind control channel. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pChannelToBind"> [in,out] If non-null, the channel to bind. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void BindControlChannel(Channel * pChannelToBind);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Unbind control channel. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void UnbindControlChannel();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the destination buffer. Should be a no-op for InputPort.</summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the destination buffer. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * GetDestinationBuffer(Accelerator * pAccelerator=NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a destination buffer. No-op for InputPort. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pDatablock"> [in,out] If non-null, the datablock. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetDestinationBuffer(Datablock * pDatablock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets an in out consumer. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pInOutConsumerPort"> [in,out] If non-null, the in out consumer port. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetInOutConsumer(Port* pInOutConsumerPort);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a block to be the permanently sticky block for this port. Obviously, only
|
||||
/// valid for certain kinds of ports (input varieties). Use for blocks that will have
|
||||
/// only one value for the lifetime of the graph, to avoid creating and manageing an
|
||||
/// exposed channel or initializer channel that will only every be used once. Do not
|
||||
/// connect an upstream channel to ports that have been configured with a permanent
|
||||
/// block.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="p"> If non-null, the Datablock* to push. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetPermanentBlock(Datablock * p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the in out consumer. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the in out consumer. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Port* GetInOutConsumer();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Releases the replayable block. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ReleaseReplayableBlock();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Start iteration. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/28/2012. </remarks>
|
||||
///
|
||||
/// <param name="uiIterations"> The iterations. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void BeginIterationScope(UINT uiIterations);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> complete scoped iteration. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/28/2012. </remarks>
|
||||
///
|
||||
/// <param name="uiIterations"> The iterations. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void EndIterationScope();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pDatablockTemplate"> [in,out] If non-null, the datablock template. </param>
|
||||
/// <param name="uiUniqueIdentifier"> Unique identifier. </param>
|
||||
/// <param name="lpszVariableBinding"> [in,out] If non-null, the variable binding. </param>
|
||||
/// <param name="nParameterIndex"> Zero-based index of the parameter. </param>
|
||||
/// <param name="nInOutRouteIdx"> Zero-based index of the in out route. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static Port * Create(DatablockTemplate * pDatablockTemplate,
|
||||
UINT uiUniqueIdentifier,
|
||||
char * lpszVariableBinding,
|
||||
int nParameterIndex,
|
||||
int nInOutRouteIdx
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object has block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if block pool, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasBlockPool();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
|
||||
/// we have an upstream allocator (meta) port, the runtime will not create a block
|
||||
/// pool for the corresponding output port. This turns out to put device-side
|
||||
/// allocation on the critical path in some cases, so we provide a way to override
|
||||
/// that behavior and allow a port to create a pool based on some size hints. When
|
||||
/// there is a block available with sufficient space in the pool, the meta port can
|
||||
/// avoid the allocation and draw from the pool.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="nPoolSize"> Size of the block pool. </param>
|
||||
/// <param name="nStride"> The stride. </param>
|
||||
/// <param name="nDataBytes"> The data in bytes. </param>
|
||||
/// <param name="nMetaBytes"> The meta in bytes. </param>
|
||||
/// <param name="nTemplateBytes"> The template in bytes. </param>
|
||||
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
|
||||
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
ForceBlockPoolHint(
|
||||
__in UINT nPoolSize,
|
||||
__in UINT nStride,
|
||||
__in UINT nDataBytes,
|
||||
__in UINT nMetaBytes,
|
||||
__in UINT nTemplateBytes,
|
||||
__in BOOL bPageLockHostViews=FALSE,
|
||||
__in BOOL bEagerDeviceMaterialize=FALSE
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> add a new block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void AddNewBlock(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return a block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ReturnToPool(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets pool size. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> The pool size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetPoolSize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetRequestsPageLocked(BOOL bPageLocked);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL GetRequestsPageLocked();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
///
|
||||
/// Allocation of data-blocks and platform-specific buffers can be a signficant
|
||||
/// latency expense at dispatch time. We can actually preallocate output datablocks
|
||||
/// and create device- side buffers at graph construction time. For each node in the
|
||||
/// graph, allocate data blocks on any output ports, and create device-specific
|
||||
/// buffers for all accelerators capable of executing the node.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPool(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
|
||||
/// to null, and then release it. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
DestroyBlockPool(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Queries if a block pool is active and able to deliver/return blocks. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if a block pool is active, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
IsBlockPoolActive(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles only the first pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPoolAsync(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles the second pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
FinalizeBlockPoolAsync(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Find the maximal capacity downstream port/channel path starting at this port.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 1/3/2014. </remarks>
|
||||
///
|
||||
/// <param name="vTasksVisited"> [in,out] [in,out] If non-null, the tasks visited. </param>
|
||||
/// <param name="vPath"> [in,out] [in,out] If non-null, full pathname of the file. </param>
|
||||
///
|
||||
/// <returns> The found maximal downstream capacity. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
FindMaximalDownstreamCapacity(
|
||||
__inout std::set<Task*>& vTasksVisited,
|
||||
__inout std::vector<Channel*>& vPath
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this port is an explicit memory space transition point.
|
||||
/// We return true only when we know for certain that this task
|
||||
/// executes on one GPU and at least one downstream tasks definitely
|
||||
/// needs a view of our outputs on another GPU. In general we can only
|
||||
/// tell this with high precision when there is task affinity involved.
|
||||
/// We use this to set the sharing hint on the access flags for blocks
|
||||
/// allocated, which in turn allows some back ends to better optimize GPU-side
|
||||
/// buffer allocation and data transfer.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/13/2014. </remarks>
|
||||
///
|
||||
/// <returns> true if explicit memory space transition point, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsExplicitMemorySpaceTransitionPoint();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
|
||||
/// for this port in a way that is consistent with a well-formed graph. Called by
|
||||
/// CheckSemantics()
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="pos"> [in,out] output string stream. </param>
|
||||
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
|
||||
PTask::Graph * pGraph);
|
||||
protected:
|
||||
|
||||
/// <summary> The output port that is the consumer
|
||||
/// if this port is part of an in/out pair
|
||||
/// </summary>
|
||||
Port * m_pInOutConsumer;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,153 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: Lockable.h
|
||||
//
|
||||
// summary: Declares the lockable object class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __LOCKABLE_OBJECT_H__
|
||||
#define __LOCKABLE_OBJECT_H__
|
||||
|
||||
#include <Windows.h>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include "primitive_types.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Lockable object. Super-class for all PTask runtime objects that implement coarse
|
||||
/// object-level locking with CRITICAL_SECTION objects. Since CRITICAL_SECTIONs are
|
||||
/// re-entrant, so are Lockables.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
class Lockable {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="lpszProtectedObjectName"> [in] If non-null, name of the protected object. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
Lockable(char * lpszProtectedObjectName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~Lockable();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Lock this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <returns> the new lock depth. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
int Lock();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Unlock this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <returns> the new lock depth. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
int Unlock();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is locked. This method is to be used in asserts that the
|
||||
/// current thread holds the lock, and *not* to be used to implement TryLock
|
||||
/// semantics!
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if held, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL LockIsHeld();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return the lock depth. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <returns> The lock depth. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
int GetLockDepth();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> In debug mode, enables/disables tracking for a particular object, returns
|
||||
/// true if tracking is enabled after the call. When tracking is enabled,
|
||||
/// all lock/unlock calls are logged to the console. A handy tool for teasing
|
||||
/// apart deadlocks.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="bEnable"> (Optional) the enable. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL TrackLockActivity(BOOL bEnable=TRUE);
|
||||
|
||||
private:
|
||||
|
||||
/// <summary> The lock </summary>
|
||||
CRITICAL_SECTION m_lock;
|
||||
|
||||
/// <summary> Depth of the lock </summary>
|
||||
int m_nLockDepth;
|
||||
|
||||
/// <summary> Name of the protected object </summary>
|
||||
char * m_lpszProtectedObjectName;
|
||||
|
||||
/// <summary> Handle of the owning thread, if we are in debug mode. </summary>
|
||||
DWORD m_dwOwningThreadId;
|
||||
|
||||
/// <summary> true if we should log lock/unlock activity for this object. </summary>
|
||||
BOOL m_bTrack;
|
||||
|
||||
/// <summary> The unnested acquires. </summary>
|
||||
UINT m_uiUnnestedAcquires;
|
||||
|
||||
/// <summary> The unnested releases. </summary>
|
||||
UINT m_uiUnnestedReleases;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Updates the owning thread identifier. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="bLocking"> true if this update is for the lock operation, otherwise this update
|
||||
/// is for an unlock. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void UpdateOwningThreadId(BOOL bLocking);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Logs lock activity. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="bLocking"> true to locking. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void LogLockActivity(BOOL bLocking);
|
||||
};
|
||||
};
|
||||
|
||||
#endif // __LOCKABLE_OBJECT_H__
|
|
@ -1,448 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: MemorySpace.h
|
||||
//
|
||||
// summary: Simple class describing a memory space
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __MEMORY_SPACE_H__
|
||||
#define __MEMORY_SPACE_H__
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "Lockable.h"
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
static const UINT HOST_MEMORY_SPACE_ID = 0;
|
||||
static const UINT MAX_MEMORY_SPACES = 12;
|
||||
static const UINT UNKNOWN_MEMORY_SPACE_ID = 0xFFFFFFFF;
|
||||
|
||||
class Accelerator;
|
||||
|
||||
typedef void * (__stdcall *LPFNSTATICALLOCATOR)(ULONG, ULONG);
|
||||
typedef void (__stdcall *LPFNSTATICDEALLOCATOR)(void*);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Memory status for a memory type on a device.
|
||||
/// Currently we track global and page-locked memory.
|
||||
/// Could easily expand to track other types. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
struct DeviceMemoryStatus_t;
|
||||
struct GlobalDeviceMemoryState_t;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Memory space object. Encapsulates data about what accelerators are associated
|
||||
/// with the space, whether there are specialized allocators for managing buffers
|
||||
/// created in other spaces that must communicate witht this one, whether we need an
|
||||
/// accelerator object to perform allocations in this space (or any static allocators
|
||||
/// otherwise).
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class MemorySpace : public Lockable {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the number of memory spaces active in the system. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <returns> The number of memory spaces. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT GetNumberOfMemorySpaces();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the accelerator from memory space identifier. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/30/2011. </remarks>
|
||||
///
|
||||
/// <param name="id"> The identifier. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the accelerator from memory space identifier. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static Accelerator * GetAcceleratorFromMemorySpaceId(UINT id);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the accelerator from memory space identifier. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/30/2011. </remarks>
|
||||
///
|
||||
/// <param name="id"> The identifier. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the accelerator from memory space identifier. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static MemorySpace * GetMemorySpaceFromId(UINT id);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the percentage of this space already allocated. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 9/10/2013. </remarks>
|
||||
///
|
||||
/// <returns> The allocated percent. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT GetAllocatedPercent(UINT uiMemorySpaceId);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets allocation percentages. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 9/10/2013. </remarks>
|
||||
///
|
||||
/// <param name="vDeviceMemories"> [in,out] The device memories. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void GetAllocationPercentages(std::map<UINT, UINT>& vDeviceMemories);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the accelerator from memory space identifier. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/30/2011. </remarks>
|
||||
///
|
||||
/// <param name="id"> The identifier. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the accelerator from memory space identifier. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL HasStaticAllocator(UINT id);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate an extent in this memory space. Fails if
|
||||
/// no static allocator is present. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/30/2011. </remarks>
|
||||
///
|
||||
/// <param name="uiMemorySpace"> The identifier. </param>
|
||||
/// <param name="ulBytesToAllocate"> The ul bytes to allocate. </param>
|
||||
/// <param name="ulFlags"> The ul flags. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the accelerator from memory space identifier. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void * AllocateMemoryExtent(UINT uiMemorySpace, ULONG ulBytesToAllocate, ULONG ulFlags);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Deallocate an extent in this memory space. Fails if no static allocator is present.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/30/2011. </remarks>
|
||||
///
|
||||
/// <param name="uiMemorySpace"> The identifier. </param>
|
||||
/// <param name="pMemoryExtent"> [in,out] The ul bytes to allocate. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void DeallocateMemoryExtent(UINT uiMemorySpace, void * pMemoryExtent);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Registers the memory space. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/30/2011. </remarks>
|
||||
///
|
||||
/// <param name="pSpace"> [in,out] memory space. </param>
|
||||
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void RegisterMemorySpace(MemorySpace * pSpace, Accelerator * pAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Associate the accelerator with the memory space. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/30/2011. </remarks>
|
||||
///
|
||||
/// <param name="pSpace"> [in,out] memory space. </param>
|
||||
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void RegisterMemorySpaceId(UINT id, Accelerator * pAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes the memory space map. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/6/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void InitializeMemorySpaces();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Unregisters the memory spaces at tear-down time. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/6/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void UnregisterMemorySpaces();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="lpszProtectedObjectName"> [in] If non-null, name of the protected object. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
MemorySpace(std::string& szDeviceName, UINT nMemorySpaceId);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/6/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~MemorySpace();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this memory space has a static buffer allocator function. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/6/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if static allocator, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL HasStaticAllocator();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate a memory extent in this memory space of the given
|
||||
/// size. If this memory space does not have a static allocator,
|
||||
/// return NULL. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/6/2012. </remarks>
|
||||
///
|
||||
/// <param name="ulNumberOfBytes"> The ul number of in bytes. </param>
|
||||
/// <param name="ulFlags"> The ul flags. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void * AllocateMemoryExtent(ULONG ulNumberOfBytes, ULONG ulFlags);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Deallocate memory extent. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/6/2012. </remarks>
|
||||
///
|
||||
/// <param name="pvMemoryExtent"> [in,out] If non-null, extent of the pv memory. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void DeallocateMemoryExtent(void* pvMemoryExtent);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the percentage of this space already allocated. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 9/10/2013. </remarks>
|
||||
///
|
||||
/// <returns> The allocated percent. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT __GetAllocatedPercent();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a pointer to any accelerator mapped to this space. Most spaces
|
||||
/// have just one, so this simplifies the process of getting an object
|
||||
/// that can provide allocation services if no static allocator is present.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/6/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else any accelerator. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
Accelerator * GetAnyAccelerator();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the number of accelerators mapped to this space. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/6/2012. </remarks>
|
||||
///
|
||||
/// <returns> The number of accelerators. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT GetNumberOfAccelerators();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets all accelerators in this space, by putting them in the user-provided buffer.
|
||||
/// At most nMaxAccelerators will be provided.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/6/2012. </remarks>
|
||||
///
|
||||
/// <param name="ppAccelerators"> [in,out] If non-null, the accelerators. </param>
|
||||
/// <param name="nMaxAccelerators"> The maximum accelerators. </param>
|
||||
///
|
||||
/// <returns> The number of accelerators in the result buffer, which may be different from
|
||||
/// nMaxAccelerators!
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT GetAccelerators(Accelerator ** ppAccelerators, UINT nMaxAccelerators);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Assign a unique memory space identifier. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT AssignUniqueMemorySpaceIdentifier();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a static allocator function for this memory space. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/6/2012. </remarks>
|
||||
///
|
||||
/// <param name="lpfnStaticAllocatorFunction"> The lpfn static allocator function. </param>
|
||||
/// <param name="lpfnStaticDeallocatorFunction"> The lpfn static deallocator function. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void SetStaticAllocator(LPFNSTATICALLOCATOR lpfnStaticAllocatorFunction,
|
||||
LPFNSTATICDEALLOCATOR lpfnStaticDeallocatorFunction
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Adds a deferred allocation entry for the proxy accelerator, indicating that
|
||||
/// allocations for this space should be deferred to accelerators for that space,
|
||||
/// when the resulting buffers will be used to commnunicate between those spaces.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/6/2012. </remarks>
|
||||
///
|
||||
/// <param name="pProxyAllocatorAccelerator"> [in,out] If non-null, the proxy allocator
|
||||
/// accelerator. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void AddDeferredAllocationEntry(Accelerator* pProxyAllocatorAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Adds an accelerator to this memory space. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/6/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void AddAccelerator(Accelerator * pAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Updates the space size bytes described by uiBytes. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///
|
||||
/// <param name="uiBytes"> The bytes. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void UpdateSpaceSizeBytes(unsigned __int64 uiBytes);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Resets the memory state. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Reset();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record a memory allocation. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///
|
||||
/// <param name="pMemoryExtent"> [in,out] If non-null, extent of the memory. </param>
|
||||
/// <param name="uiBytes"> The bytes. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
RecordAllocation(
|
||||
__in void * pMemoryExtent,
|
||||
__in size_t uiBytes,
|
||||
__in BOOL bPinned
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record a memory deallocation. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///
|
||||
/// <param name="pMemoryExtent"> [in,out] If non-null, extent of the memory. </param>
|
||||
/// <param name="bPinnedAllocation"> true to pinned allocation. </param>
|
||||
/// <param name="uiBytes"> The bytes. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
RecordDeallocation(
|
||||
__in void * pMemoryExtent
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dumps the allocation statistics. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Report(
|
||||
std::ostream &ios
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets memory state. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/15/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the memory state. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
struct GlobalDeviceMemoryState_t * GetMemoryState();
|
||||
|
||||
private:
|
||||
|
||||
/// <summary> Name of the device. </summary>
|
||||
std::string m_strDeviceName;
|
||||
|
||||
/// <summary> Identifier for the memory space </summary>
|
||||
UINT m_nMemorySpaceId;
|
||||
|
||||
/// <summary> Pointer to a static allocator function, if
|
||||
/// one exists for this memory space.
|
||||
/// </summary>
|
||||
LPFNSTATICALLOCATOR m_lpfnStaticAllocator;
|
||||
|
||||
/// <summary> Pointer to a static de-allocator function, if
|
||||
/// one exists for this memory space.
|
||||
/// </summary>
|
||||
LPFNSTATICDEALLOCATOR m_lpfnStaticDeallocator;
|
||||
|
||||
/// <summary> The deferred allocator map. Each entry in this
|
||||
/// set indicates that memory allocations in this space
|
||||
/// should be deferred to allocators provided by
|
||||
/// acclerators mapped to the space identified by the
|
||||
/// entry. For example, if this memory space describes
|
||||
/// the host memory space, it will contain an entry for
|
||||
/// every CUDA memory space because we should be using
|
||||
/// cuda APIs to allocate host memory for best performance.
|
||||
/// </summary>
|
||||
std::set<UINT> m_pDeferredAllocatorSpaces;
|
||||
|
||||
/// <summary> The accelerators mapped to this space. </summary>
|
||||
std::set<Accelerator*> m_pAccelerators;
|
||||
|
||||
/// <summary> State of the memory. </summary>
|
||||
struct GlobalDeviceMemoryState_t * m_pMemoryState;
|
||||
|
||||
/// <summary> Counter for assigning unique identifiers
|
||||
/// to Memory spaces objects.
|
||||
/// </summary>
|
||||
static UINT m_uiMemorySpaceIdCounter;
|
||||
|
||||
/// <summary> static MemorySpace map </summary>
|
||||
static MemorySpace* m_vMemorySpaceMap[MAX_MEMORY_SPACES];
|
||||
};
|
||||
};
|
||||
|
||||
#endif // __MEMORY_SPACE_H__
|
|
@ -1,686 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: MetaPort.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _META_PORT_H_
|
||||
#define _META_PORT_H_
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "port.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Channel;
|
||||
class Datablock;
|
||||
class DatablockTemplate;
|
||||
class Accelerator;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Meta port. A meta port is a port that consumes datablocks but does not bind them
|
||||
/// to Task inputs. Rather, the runtime uses the contained information to perform
|
||||
/// operations on behalf of the Task for which the MetaPort is an input. Currently,
|
||||
/// the only operation of this class is allocation of Datablocks on OutputPorts,
|
||||
/// although the mechanism will be generalized in the future. A MetaPort consumes a
|
||||
/// datablock, expecting it to contain a single integer value, which is the
|
||||
/// interpreted as the allocation size for the OutputPort specified in the
|
||||
/// m_pAllocatorPort member.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class MetaPort : public Port {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
MetaPort();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~MetaPort();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is occupied. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if occupied, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsOccupied();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Pulls the next datablock. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * Pull();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Peek at the next datablock on this port. Peek on an InitializerPort always
|
||||
/// returns NULL, because datablocks are created on demand in response to a pull.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the current top-of-stack object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * Peek();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Pushes an object into this port. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pDatablock"> [in,out] If non-null, the Datablock* to push. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Push(Datablock* pDatablock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Bind control channel. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pChannel"> [in,out] If non-null, the channel. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void BindControlChannel(Channel * pChannel);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Unbind control channel. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void UnbindControlChannel();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets the allocation port. This port must be an output port, and is the port on
|
||||
/// which a new datablock will be allocated when a block is consumed from this
|
||||
/// MetaPort.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> [in,out] If non-null, the port. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetAllocationPort(Port * pPort);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the allocation port. This port must be an output port, and is the port on
|
||||
/// which a new datablock will be allocated when a block is consumed from this
|
||||
/// MetaPort.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the allocation port. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Port * GetAllocationPort();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Adds an iteration target to the list. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/28/2012. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> [in,out] If non-null, the port. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void BindIterationTarget(Port * pPort);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Configure iteration targets. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/28/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ConfigureIterationTargets(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets an integer value from a block consumed from this MetaPort. Should not be
|
||||
/// called when the port is unoccupied because it will block on a Pull call. On exit,
|
||||
/// bControlBlock is TRUE if the consumed block carried a control signal;
|
||||
/// uiControlCode will be set accordingly if this is the case. The integer value can
|
||||
/// be used by iteration control or output allocation meta functions.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="bControlBlock"> [out] True on exit if the block pulled to compute the
|
||||
/// allocation size carried a control signal. </param>
|
||||
/// <param name="luiControlSignal"> [out] If the block pulled to compute the allocation size
|
||||
/// carried a control signal, the control code from that block. </param>
|
||||
///
|
||||
/// <returns> The integer value at offset 0 in the datablock's data channel. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
GetIntegerValue(
|
||||
BOOL &bControlBlock,
|
||||
CONTROLSIGNAL &luiControlSignal
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates a new MetaPort. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pDatablockTemplate"> [in] If non-null, the datablock template. </param>
|
||||
/// <param name="uiUniqueIdentifier"> Unique identifier (caller-supplied, uniqueness not
|
||||
/// enforced). </param>
|
||||
/// <param name="lpszVariableBinding"> [in] If non-null, the variable binding. </param>
|
||||
/// <param name="nBoundParameterIndex"> Zero-based index of the n bound parameter. </param>
|
||||
/// <param name="nInOutRouteIdx"> Zero-based index of the n in out route. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static Port *
|
||||
Create(
|
||||
__in DatablockTemplate * pDatablockTemplate,
|
||||
__in UINT uiUniqueIdentifier,
|
||||
__in char * lpszVariableBinding,
|
||||
__in int nBoundParameterIndex,
|
||||
__in int nInOutRouteIdx
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
|
||||
/// for this port in a way that is consistent with a well-formed graph. Called by
|
||||
/// CheckSemantics()
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="pos"> [in,out] output string stream. </param>
|
||||
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
|
||||
PTask::Graph * pGraph);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a destination buffer occupying this output port. Meaningless for MetaPorts,
|
||||
/// but required by the abstract superclass Port.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> (optional) [in] If non-null, an accelerator object to assist
|
||||
/// creating a datablock if none is available. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the destination buffer. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * GetDestinationBuffer(Accelerator * pAccelerator=NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a destination buffer. Meaningless for MetaPorts, but required by the
|
||||
/// abstract superclass Port.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="p"> [in,out] If non-null, the Datablock* to push. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetDestinationBuffer(Datablock * p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a block to be the permanently sticky block for this port. Obviously, only
|
||||
/// valid for certain kinds of ports (input varieties). Use for blocks that will have
|
||||
/// only one value for the lifetime of the graph, to avoid creating and manageing an
|
||||
/// exposed channel or initializer channel that will only every be used once. Do not
|
||||
/// connect an upstream channel to ports that have been configured with a permanent
|
||||
/// block.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="p"> If non-null, the Datablock* to push. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetPermanentBlock(Datablock * p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a meta function. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/10/2012. </remarks>
|
||||
///
|
||||
/// <param name="eMetaFunctionSpecifier"> Information describing the meta function. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetMetaFunction(METAFUNCTION eMetaFunctionSpecifier);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the meta function. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/10/2012. </remarks>
|
||||
///
|
||||
/// <returns> The meta function. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual METAFUNCTION GetMetaFunction();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform the work associated with this port's meta function. For example, if the
|
||||
/// port is an allocator, allocate a block for the downstream output port. If it is
|
||||
/// an iterator, set the iteration count on the Task.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/10/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDispatchAccelerator"> [in,out] If non-null, the dispatch accelerator. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
PerformMetaFunction(
|
||||
__in Accelerator * pDispatchAccelerator
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform any post-dispatch work associated with this port's meta function. For
|
||||
/// example, if the port is an iteration construct, reset the loop bounds and
|
||||
/// propagate any control signals associated with the iteration.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/10/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDispatchAccelerator"> [in,out] If non-null, the dispatch accelerator. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void FinalizeMetaFunction(Accelerator * pDispatchAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object has block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if block pool, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasBlockPool();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
|
||||
/// we have an upstream allocator (meta) port, the runtime will not create a block
|
||||
/// pool for the corresponding output port. This turns out to put device-side
|
||||
/// allocation on the critical path in some cases, so we provide a way to override
|
||||
/// that behavior and allow a port to create a pool based on some size hints. When
|
||||
/// there is a block available with sufficient space in the pool, the meta port can
|
||||
/// avoid the allocation and draw from the pool.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="nPoolSize"> Size of the block pool. </param>
|
||||
/// <param name="nStride"> The stride. </param>
|
||||
/// <param name="nDataBytes"> The data in bytes. </param>
|
||||
/// <param name="nMetaBytes"> The meta in bytes. </param>
|
||||
/// <param name="nTemplateBytes"> The template in bytes. </param>
|
||||
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
|
||||
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
ForceBlockPoolHint(
|
||||
__in UINT nPoolSize,
|
||||
__in UINT nStride,
|
||||
__in UINT nDataBytes,
|
||||
__in UINT nMetaBytes,
|
||||
__in UINT nTemplateBytes,
|
||||
__in BOOL bPageLockHostViews=FALSE,
|
||||
__in BOOL bEagerDeviceMaterialize=FALSE
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> add a new block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void AddNewBlock(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return a block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ReturnToPool(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets pool size. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> The pool size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetPoolSize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetRequestsPageLocked(BOOL bPageLocked);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL GetRequestsPageLocked();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
///
|
||||
/// Allocation of data-blocks and platform-specific buffers can be a signficant
|
||||
/// latency expense at dispatch time. We can actually preallocate output datablocks
|
||||
/// and create device- side buffers at graph construction time. For each node in the
|
||||
/// graph, allocate data blocks on any output ports, and create device-specific
|
||||
/// buffers for all accelerators capable of executing the node.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPool(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
|
||||
/// to null, and then release it. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
DestroyBlockPool(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Queries if a block pool is active and able to deliver/return blocks. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if a block pool is active, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
IsBlockPoolActive(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles only the first pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPoolAsync(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles the second pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
FinalizeBlockPoolAsync(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Searches for collaborating meta ports: if this port is an allocator
|
||||
/// for output ports with descriptor ports, block allocation may have
|
||||
/// dependences on other meta ports for the bound task. We need to know this
|
||||
/// at dispatch time, but it is a static property of the graph, so
|
||||
/// we pre-compute it as a side-effect of OnGraphComplete().
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/15/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void FindCollaboratingMetaPorts();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets an allocation hint. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/21/2013. </remarks>
|
||||
///
|
||||
/// <param name="uiAllocationHint"> The allocation hint. </param>
|
||||
/// <param name="bForceAllocHint"> true to force allocate hint. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
SetAllocationHint(
|
||||
__in UINT uiAllocationHint,
|
||||
__in BOOL bForceAllocHint
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this port has been configured with a statically known allocation size.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/21/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if static allocation size, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL IsStaticAllocationSize();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the channel allocation size when this meta port is an allocator for an
|
||||
/// output port with descriptor ports (meaning another meta port is responsible for
|
||||
/// computing that allocation size). If this meta port is not involved in such a
|
||||
/// graph structure, return 0.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/15/2013. </remarks>
|
||||
///
|
||||
/// <param name="pDispatchAccelerator"> [in,out] If non-null, the dispatch accelerator. </param>
|
||||
/// <param name="eFunc"> The function. </param>
|
||||
/// <param name="ppPortTemplate"> [out] on exit the template for the related collaborative
|
||||
/// port, if one is available. These are needed when initial
|
||||
/// values are supplied by the template. </param>
|
||||
///
|
||||
/// <returns> The meta buffer allocation size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT GetCollaborativeAllocationSize(
|
||||
__in Accelerator * pDispatchAccelerator,
|
||||
__in DESCRIPTORFUNC eFunc,
|
||||
__out DatablockTemplate ** ppPortTemplate
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Finalize collaborative allocations. If this port has completed a collaborative
|
||||
/// allocation (where other meta ports determine meta/template channel sizes)
|
||||
/// we need to finish the binding of an output block at those ports. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/15/2013. </remarks>
|
||||
///
|
||||
/// <param name="pDispatchAccelerator"> [in] non-null, the dispatch accelerator. </param>
|
||||
/// <param name="pBlock"> [in,out] non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void FinalizeCollaborativeAllocations(
|
||||
__in Accelerator * pDispatchAccelerator,
|
||||
__inout Datablock * pBlock
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform allocation. In this case, a datablock on a metaport provides an integer-
|
||||
/// valued allocation size for another output port on the ptask. Hence, this function
|
||||
/// looks at all metaports, and performs output datablock allocation as needed.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/10/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDispatchAccelerator"> [in,out] If non-null, the dispatch accelerator. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
PerformAllocation(
|
||||
__in Accelerator * pDispatchAccelerator
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Configure simple iteration. Simple iteration is distinguished from general
|
||||
/// iteration because it involves iterative invocation of a single PTask node. The
|
||||
/// mechanisms required to build this are so much simpler than those required to
|
||||
/// build general iteration over arbitrary subgraphs that it is worth bothering to
|
||||
/// distinguish the case. Here, the datablock recieved on this port contains an
|
||||
/// integer-valued iteration count, which we set on the task directly. Task::Dispatch
|
||||
/// is responsible for clearing the iteration count after dispatch.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/10/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDispatchAccelerator"> [in,out] If non-null, the dispatch accelerator. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void ConfigureSimpleIteration();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Configure general iteration. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/10/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void ConfigureGeneralIteration();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Finalize general iteration. (Update iteration state after task dispatch,
|
||||
/// and propagate control signals where appropriate). </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/10/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void FinalizeGeneralIteration();
|
||||
|
||||
/// <summary> The allocation port. This port must be an output port, and is the port on
|
||||
/// which a new datablock will be allocated when a block is consumed from this
|
||||
/// MetaPort.
|
||||
/// </summary>
|
||||
Port * m_pAllocationPort;
|
||||
|
||||
/// <summary> The meta function </summary>
|
||||
METAFUNCTION m_eMetaFunction;
|
||||
|
||||
/// <summary> The general iteration block </summary>
|
||||
Datablock * m_pGeneralIterationBlock;
|
||||
|
||||
/// <summary> Number of general iterations </summary>
|
||||
UINT m_nGeneralIterationCount;
|
||||
|
||||
/// <summary> The general iteration maximum </summary>
|
||||
UINT m_nGeneralIterationMax;
|
||||
|
||||
/// <summary> if this object is collaborative allocator and another meta port is responsible
|
||||
/// for computing the allocation size of the metadata buffer channel on the block
|
||||
/// allocated by *this* meta-port, we keep a pointer to that other port. Since
|
||||
/// deciding requires traversing part of the graph structure, we set this once so we
|
||||
/// don't have to do it again.
|
||||
/// </summary>
|
||||
Port * m_pCollaborativeMetaAllocator;
|
||||
|
||||
/// <summary> if this object is collaborative allocator and another meta port is responsible
|
||||
/// for computing the allocation size of the template buffer channel on the block
|
||||
/// allocated by *this* meta-port, we keep a pointer to that other port. Since
|
||||
/// deciding requires traversing part of the graph structure, we set this once so we
|
||||
/// don't have to do it again.
|
||||
/// </summary>
|
||||
Port * m_pCollaborativeTemplateAllocator;
|
||||
|
||||
/// <summary> An allocation size hint. </summary>
|
||||
UINT m_uiAllocHint;
|
||||
|
||||
/// <summary> true if the allocation hint takes precedence over the value
|
||||
/// received on the incoming channel for this port. </summary>
|
||||
BOOL m_bForceAllocHint;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,96 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: PBufferProfiler.h
|
||||
//
|
||||
// summary: Declares the buffer profiler class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef _PBUFFER_PROFILER_H_
|
||||
#define _PBUFFER_PROFILER_H_
|
||||
|
||||
#include "ptaskutils.h"
|
||||
#include "primitive_types.h"
|
||||
#include <deque>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include "hrperft.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Buffer profiler. Class encapsulating profiling/statistics tools for PBuffers.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class PBufferProfiler {
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
PBufferProfiler();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~PBufferProfiler();
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initialises the allocation profiler. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/25/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Initialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Deinit allocation profiler. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/25/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Deinitialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dumps the allocation profiler data. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/25/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Report(std::ostream &ios);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Adds an allocation data. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="uiAllocBytes"> The allocate in bytes. </param>
|
||||
/// <param name="uiAccID"> Identifier for the accumulate. </param>
|
||||
/// <param name="dLatency"> The latency. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Record(UINT uiAllocBytes, UINT uiAccID, double dLatency);
|
||||
|
||||
std::map<UINT, UINT> m_vAllocationSizes;
|
||||
std::map<UINT, UINT> m_vAllocationDevices;
|
||||
std::map<UINT, double> m_vAllocationLatencies;
|
||||
UINT m_nAllocations;
|
||||
CHighResolutionTimer * m_pAllocationTimer;
|
||||
LPCRITICAL_SECTION m_pcsAllocProfiler;
|
||||
UINT m_bAllocProfilerInit;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
||||
|
|
@ -1,252 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: pclbuffer.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _PCLBUFFER_H_
|
||||
#define _PCLBUFFER_H_
|
||||
#ifdef OPENCL_SUPPORT
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include "pbuffer.h"
|
||||
#include "ptaskutils.h"
|
||||
#include <map>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform-specific buffer class for OpenCL runtime access to
|
||||
/// . </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class PCLBuffer :
|
||||
public PBuffer
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <param name="pParent"> [in,out] If non-null, the parent. </param>
|
||||
/// <param name="bufferAccessFlags"> The buffer access flags. </param>
|
||||
/// <param name="nChannelIndex"> Zero-based index of the n channel. </param>
|
||||
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
|
||||
/// <param name="pAllocatorAccelerator"> (optional) [in,out] If non-null, the allocator
|
||||
/// accelerator. </param>
|
||||
/// <param name="uiUID"> (optional) the uid. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
PCLBuffer(Datablock * pParent,
|
||||
BUFFERACCESSFLAGS bufferAccessFlags,
|
||||
UINT nChannelIndex,
|
||||
Accelerator * pAccelerator=NULL,
|
||||
Accelerator * pAllocatorAccelerator=NULL,
|
||||
UINT uiUID=ptaskutils::nextuid()
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~PCLBuffer(void);
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force synchronize. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ForceSynchronize();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize host view. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, information describing the lpv. </param>
|
||||
/// <param name="pHostSourceBuffer"> [in,out] If non-null, buffer for host source data. </param>
|
||||
/// <param name="pBuffer"> [in,out] The data. </param>
|
||||
/// <param name="bForceSynchronous"> (optional) the elide synchronization. </param>
|
||||
/// <param name="bRequestOutstanding"> [in,out] The request outstanding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateHostView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in HOSTMEMORYEXTENT * pBuffer,
|
||||
__in BOOL bForceSynchronous,
|
||||
__out BOOL &bRequestOutstanding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize mutable accelerator view. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
|
||||
/// <param name="pInitialData"> [in,out] If non-null, the data. </param>
|
||||
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
|
||||
/// <param name="lpszBinding"> (optional) the binding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateMutableAcceleratorView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialData,
|
||||
__out BOOL& bOutstanding,
|
||||
__in void * pModule,
|
||||
__in const char * lpszBinding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize immutable accelerator view. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> If non-null, the data. </param>
|
||||
/// <param name="pInitialData"> [in,out] The bytes. </param>
|
||||
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
|
||||
/// <param name="lpszBinding"> (optional) the binding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateImmutableAcceleratorView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialData,
|
||||
__out BOOL& bOutstanding,
|
||||
__in void * pModule,
|
||||
__in const char * lpszBinding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes a device-side buffer that is expected to be bound to mutable device
|
||||
/// resources (not in constant memory).
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] (optional) If non-null, context for the
|
||||
/// asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
|
||||
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
|
||||
/// contents. </param>
|
||||
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to the
|
||||
/// buffer which will be used to label runtime- specific
|
||||
/// objects to aid in debugging. Ignored on release
|
||||
/// builds. </param>
|
||||
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
|
||||
/// addressable. </param>
|
||||
///
|
||||
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT
|
||||
CreateMutableBuffer(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialBufferContents,
|
||||
__in char * strDebugBufferName=NULL,
|
||||
__in bool bByteAddressable=true
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes a device-side buffer that is expected to be bound to immutable device
|
||||
/// resources (i.e. those in constant memory).
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the
|
||||
/// asynchronous. </param>
|
||||
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
|
||||
/// contents. </param>
|
||||
/// <param name="uiInitialContentsSizeBytes"> (optional) the initial contents size in bytes. </param>
|
||||
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to
|
||||
/// the buffer which will be used to label runtime-
|
||||
/// specific objects to aid in debugging. Ignored on
|
||||
/// release builds. </param>
|
||||
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
|
||||
/// addressable. </param>
|
||||
///
|
||||
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT
|
||||
CreateImmutableBuffer(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialBufferContents,
|
||||
__in char * strDebugBufferName=NULL,
|
||||
__in bool bByteAddressable=true
|
||||
);
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates readable bindable objects if the access flags indicate they will be
|
||||
/// required at dispatch time.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsReadable(char * szname = NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates writable bindable objects if the access flags indicate they will be
|
||||
/// required at dispatch time.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsWriteable(char * szname = NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates immutable bindable objects if needed for dispatch. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsImmutable(char * szname = NULL);
|
||||
|
||||
/// <summary> true if this buffer is going to bound to a device-side
|
||||
/// scalar variable. </summary>
|
||||
BOOL m_bScalarBinding;
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
||||
#endif
|
|
@ -1,437 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: pcubuffer.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _PCUBUFFER_H_
|
||||
#define _PCUBUFFER_H_
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include <cuda.h>
|
||||
#include "pbuffer.h"
|
||||
#include "ptaskutils.h"
|
||||
#include <map>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class CUAccelerator;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform-specific buffer for CUDA. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class PCUBuffer :
|
||||
public PBuffer
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <param name="pParent"> [in,out] If non-null, the parent. </param>
|
||||
/// <param name="bufferAccessFlags"> The buffer access flags. </param>
|
||||
/// <param name="nChannelIndex"> Zero-based index of the n channel. </param>
|
||||
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
|
||||
/// <param name="pAllocatorAccelerator"> (optional) [in,out] If non-null, the allocator
|
||||
/// accelerator. </param>
|
||||
/// <param name="uiUID"> (optional) the uid. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
PCUBuffer(Datablock * pParent,
|
||||
BUFFERACCESSFLAGS bufferAccessFlags,
|
||||
UINT nChannelIndex,
|
||||
Accelerator * pAccelerator=NULL,
|
||||
Accelerator * pAllocatorAccelerator=NULL,
|
||||
UINT uiUID=ptaskutils::nextuid()
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~PCUBuffer(void);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force synchronize. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ForceSynchronize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Device to device transfer. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDstBuffer"> [in,out] If non-null, the accelerator. </param>
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
DeviceToDeviceTransfer(
|
||||
__inout PBuffer * pDstBuffer,
|
||||
__in AsyncContext * pAsyncContext
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Device memcpy. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDstBuffer"> If non-null, the accelerator. </param>
|
||||
/// <param name="pSrcBuffer"> If non-null, buffer for source data. </param>
|
||||
/// <param name="pAsyncContext"> If non-null, context for the asynchronous. </param>
|
||||
/// <param name="uiCopyBytes"> The copy in bytes. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
Copy(
|
||||
__inout PBuffer * pDstBuffer,
|
||||
__inout PBuffer * pSrcBuffer,
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiCopyBytes
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Device memcpy. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDstBuffer"> [in,out] If non-null, the accelerator. </param>
|
||||
/// <param name="pSrcBuffer"> [in,out] If non-null, buffer for source data. </param>
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
Copy(
|
||||
__inout PBuffer * pDstBuffer,
|
||||
__inout PBuffer * pSrcBuffer,
|
||||
__in AsyncContext * pAsyncContext
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return true if the derived class supports a memset API. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/14/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsMemset();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> memset. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/14/2013. </remarks>
|
||||
///
|
||||
/// <param name="nValue"> The value. </param>
|
||||
/// <param name="szExtentBytes"> The extent in bytes. </param>
|
||||
///
|
||||
/// <returns> the number of bytes set </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual size_t
|
||||
FillExtent(
|
||||
__in int nValue,
|
||||
__in size_t szExtentBytes=0
|
||||
);
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize host view. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, information describing the lpv. </param>
|
||||
/// <param name="pBuffer"> [in,out] The data. </param>
|
||||
/// <param name="bForceSynchronous"> (optional) the elide synchronization. </param>
|
||||
/// <param name="bRequestOutstanding"> [in,out] The request outstanding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateHostView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in HOSTMEMORYEXTENT * pBuffer,
|
||||
__in BOOL bForceSynchronous,
|
||||
__out BOOL &bRequestOutstanding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize mutable accelerator view. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
|
||||
/// <param name="pInitialData"> [in,out] If non-null, the data. </param>
|
||||
/// <param name="bOutstanding"> [in,out] The outstanding. </param>
|
||||
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
|
||||
/// <param name="lpszBinding"> (optional) the binding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateMutableAcceleratorView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialData,
|
||||
__out BOOL& bOutstanding,
|
||||
__in void * pModule,
|
||||
__in const char * lpszBinding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize immutable accelerator view. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> If non-null, the data. </param>
|
||||
/// <param name="pInitialData"> [in,out] The bytes. </param>
|
||||
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
|
||||
/// <param name="lpszBinding"> (optional) the binding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateImmutableAcceleratorView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialData,
|
||||
__out BOOL& bOutstanding,
|
||||
__in void * pModule,
|
||||
__in const char * lpszBinding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes a device-side buffer that is expected to be bound to mutable device
|
||||
/// resources (not in constant memory).
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] (optional) If non-null, context for the
|
||||
/// asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
|
||||
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
|
||||
/// contents. </param>
|
||||
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to the
|
||||
/// buffer which will be used to label runtime- specific
|
||||
/// objects to aid in debugging. Ignored on release
|
||||
/// builds. </param>
|
||||
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
|
||||
/// addressable. </param>
|
||||
///
|
||||
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT
|
||||
CreateMutableBuffer(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialBufferContents,
|
||||
__in char * strDebugBufferName=NULL,
|
||||
__in bool bByteAddressable=true
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes a device-side buffer that is expected to be bound to immutable device
|
||||
/// resources (i.e. those in constant memory).
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the
|
||||
/// asynchronous. </param>
|
||||
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
|
||||
/// contents. </param>
|
||||
/// <param name="uiInitialContentsSizeBytes"> (optional) the initial contents size in bytes. </param>
|
||||
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to
|
||||
/// the buffer which will be used to label runtime-
|
||||
/// specific objects to aid in debugging. Ignored on
|
||||
/// release builds. </param>
|
||||
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
|
||||
/// addressable. </param>
|
||||
///
|
||||
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT
|
||||
CreateImmutableBuffer(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialBufferContents,
|
||||
__in char * strDebugBufferName=NULL,
|
||||
__in bool bByteAddressable=true
|
||||
);
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates readable bindable objects if the access flags indicate they will be
|
||||
/// required at dispatch time.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsReadable(char * szname = NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates writable bindable objects if the access flags indicate they will be
|
||||
/// required at dispatch time.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsWriteable(char * szname = NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates immutable bindable objects if needed for dispatch. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsImmutable(char * szname = NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if a device-side view of this data can be materialized
|
||||
/// using memset APIs rather than memcpy APIs. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/10/2012. </remarks>
|
||||
///
|
||||
/// <param name="uiBufferBytes"> The buffer in bytes. </param>
|
||||
/// <param name="pInitialData"> [in,out] If non-null, the data. </param>
|
||||
/// <param name="uiInitialDataBytes"> The bytes. </param>
|
||||
///
|
||||
/// <returns> true if device view memsettable, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
IsDeviceViewMemsettable(
|
||||
__in UINT uiBufferBytes,
|
||||
__in void * pInitialData,
|
||||
__in UINT uiInitialDataBytes
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a device memset stride. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/10/2012. </remarks>
|
||||
///
|
||||
/// <param name="uiBufferBytes"> The buffer in bytes. </param>
|
||||
/// <param name="uiInitialDataBytes"> The bytes. </param>
|
||||
///
|
||||
/// <returns> The device memset stride. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT
|
||||
GetDeviceMemsetStride(
|
||||
__in UINT uiBufferBytes,
|
||||
__in UINT uiInitialDataBytes
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a device memset count. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/10/2012. </remarks>
|
||||
///
|
||||
/// <param name="uiBufferBytes"> The buffer in bytes. </param>
|
||||
/// <param name="uiInitialDataBytes"> The bytes. </param>
|
||||
///
|
||||
/// <returns> The device memset count. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT
|
||||
GetDeviceMemsetCount(
|
||||
__in UINT uiBufferBytes,
|
||||
__in UINT uiInitialDataBytes
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a device memset value. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/10/2012. </remarks>
|
||||
///
|
||||
/// <param name="pInitialValue"> [in,out] The buffer in bytes. </param>
|
||||
///
|
||||
/// <returns> The device memset count. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
VOID *
|
||||
GetDeviceMemsetValue(
|
||||
__in void * pInitialValue
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the CUDA stream. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/10/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
///
|
||||
/// <returns> The stream. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
CUstream GetStream(AsyncContext * pAsyncContext);
|
||||
|
||||
/// <summary> The platform-specific accelerator. For convenience--we could typecast
|
||||
/// m_pAccelerator inherited from the super-class every time we want one,
|
||||
/// but it's ugly, and happens alot. </summary>
|
||||
CUAccelerator * m_pPSAcc;
|
||||
|
||||
/// <summary> Buffer for page locked allocations. Asynchronous transfers in CUDA
|
||||
/// require the host-side to be page-locked. When we create a buffer that
|
||||
/// requires asynchronous transfers, we will page-lock the initial data if it is
|
||||
/// provided, remembering to un-pin it at delete time, or allocate a page-locked
|
||||
/// buffer if it is not provided. </summary>
|
||||
void * m_pPageLockedBuffer;
|
||||
|
||||
/// <summary> true if the page locked buffer is owned by this object, and must
|
||||
/// therefore be freed (instead of un-pinned) at deletion time. </summary>
|
||||
BOOL m_bPageLockedBufferOwned;
|
||||
|
||||
/// <summary> true if the device buffer was created using cuMemAlloc and we
|
||||
/// are responsible for freeing it. If the device buffer was
|
||||
/// created by finding the device-side mapping for a page-locked
|
||||
/// buffer, then it shares the fate of the page-locked buffer
|
||||
/// and we must be careful not to free it.
|
||||
/// and should not free it. </summary>
|
||||
BOOL m_bDeviceBufferOwned;
|
||||
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
|
@ -1,335 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: pdxbuffer.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _PDXBUFFER_H_
|
||||
#define _PDXBUFFER_H_
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include "ptdxhdr.h"
|
||||
#include "pbuffer.h"
|
||||
#include "ptaskutils.h"
|
||||
#include <map>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class PDXBuffer :
|
||||
public PBuffer
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pParentDatablock"> [in,out] If non-null, the parent datablock. </param>
|
||||
/// <param name="bufferAccessFlags"> The buffer access flags. </param>
|
||||
/// <param name="nChannelIndex"> Zero-based index of the datablock channel this
|
||||
/// PBuffer is backing. </param>
|
||||
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
|
||||
/// <param name="pAllocatingAccelerator"> (optional) [in,out] If non-null, the allocating
|
||||
/// accelerator. </param>
|
||||
/// <param name="uiUniqueIdentifier"> (optional) unique identifier. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
PDXBuffer(Datablock * pParent,
|
||||
BUFFERACCESSFLAGS f,
|
||||
UINT nChannelIndex,
|
||||
Accelerator * p=NULL,
|
||||
Accelerator * pAllocator=NULL,
|
||||
UINT uiUID=ptaskutils::nextuid()
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~PDXBuffer(void);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force synchronize. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ForceSynchronize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Complete any outstanding ops. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/12/2014. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL CompleteOutstandingOps();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check for any outstanding ops. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/12/2014. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL HasOutstandingOps();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Device to device transfer. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDstBuffer"> [in,out] If non-null, the accelerator. </param>
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
DeviceToDeviceTransfer(
|
||||
__inout PBuffer * pDstBuffer,
|
||||
__in AsyncContext * pAsyncContext
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Acquires the synchronise. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/14/2014. </remarks>
|
||||
///
|
||||
/// <param name="uiAcquireKey"> The acquire key. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
PDXBuffer *
|
||||
PlatformSpecificAcquireSync(
|
||||
__in UINT64 uiAcquireKey
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Releases the synchronise. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/14/2014. </remarks>
|
||||
///
|
||||
/// <param name="uiReleaseKey"> The release key. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
PlatformSpecificReleaseSync(
|
||||
__in UINT64 uiReleaseKey
|
||||
);
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize host view. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, information describing the lpv. </param>
|
||||
/// <param name="pBuffer"> [in,out] The data. </param>
|
||||
/// <param name="bForceSynchronous"> (optional) the elide synchronization. </param>
|
||||
/// <param name="bRequestOutstanding"> [in,out] The request outstanding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateHostView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in HOSTMEMORYEXTENT * pBuffer,
|
||||
__in BOOL bForceSynchronous,
|
||||
__out BOOL &bRequestOutstanding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize mutable accelerator view. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
|
||||
/// <param name="pInitialData"> [in,out] If non-null, the data. </param>
|
||||
/// <param name="bOutstanding"> [in,out] The outstanding. </param>
|
||||
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
|
||||
/// <param name="lpszBinding"> (optional) the binding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateMutableAcceleratorView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialData,
|
||||
__out BOOL& bOutstanding,
|
||||
__in void * pModule,
|
||||
__in const char * lpszBinding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize immutable accelerator view. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> If non-null, the data. </param>
|
||||
/// <param name="pInitialData"> [in,out] The bytes. </param>
|
||||
/// <param name="bOutstanding"> [in,out] The outstanding. </param>
|
||||
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
|
||||
/// <param name="lpszBinding"> (optional) the binding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateImmutableAcceleratorView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialData,
|
||||
__out BOOL& bOutstanding,
|
||||
__in void * pModule,
|
||||
__in const char * lpszBinding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes a device-side buffer that is expected to be bound to mutable device
|
||||
/// resources (not in constant memory).
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] (optional) If non-null, context for the
|
||||
/// asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
|
||||
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
|
||||
/// contents. </param>
|
||||
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to the
|
||||
/// buffer which will be used to label runtime- specific
|
||||
/// objects to aid in debugging. Ignored on release
|
||||
/// builds. </param>
|
||||
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
|
||||
/// addressable. </param>
|
||||
///
|
||||
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT
|
||||
CreateMutableBuffer(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialBufferContents,
|
||||
__in char * strDebugBufferName=NULL,
|
||||
__in bool bByteAddressable=true
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes a device-side buffer that is expected to be bound to immutable device
|
||||
/// resources (i.e. those in constant memory).
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the
|
||||
/// asynchronous. </param>
|
||||
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
|
||||
/// contents. </param>
|
||||
/// <param name="uiInitialContentsSizeBytes"> (optional) the initial contents size in bytes. </param>
|
||||
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to
|
||||
/// the buffer which will be used to label runtime-
|
||||
/// specific objects to aid in debugging. Ignored on
|
||||
/// release builds. </param>
|
||||
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
|
||||
/// addressable. </param>
|
||||
///
|
||||
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT
|
||||
CreateImmutableBuffer(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialBufferContents,
|
||||
__in char * strDebugBufferName=NULL,
|
||||
__in bool bByteAddressable=true
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates readable bindable objects if the access flags indicate they will be
|
||||
/// required at dispatch time.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsReadable(char * szname = NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates writable bindable objects if the access flags indicate they will be
|
||||
/// required at dispatch time.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsWriteable(char * szname = NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates immutable bindable objects if needed for dispatch. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsImmutable(char * szname = NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates device to host staging buffer. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/11/2014. </remarks>
|
||||
///
|
||||
/// <param name="pDevice"> [in,out] If non-null, the device. </param>
|
||||
///
|
||||
/// <returns> The new hto d stage buffer. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
HRESULT
|
||||
PDXBuffer::CreateStagingBuffer(
|
||||
__in ID3D11Device * pDevice
|
||||
);
|
||||
|
||||
|
||||
HANDLE m_hDXGIHandle;
|
||||
IDXGIKeyedMutex * m_pDXGIKeyedMutex;
|
||||
IDXGIResource * m_pDXGIResource;
|
||||
ID3D11Query * m_pOutstandingQuery;
|
||||
ID3D11Buffer * m_pStageBuffer;
|
||||
ID3D11Buffer * m_pOutstandingOpBuffer;
|
||||
HOSTMEMORYEXTENT * m_pOutstandingHtoDTarget;
|
||||
HOSTMEMORYEXTENT * m_pOutstandingDtoHTarget;
|
||||
BOOL m_bHtoDStagePopulated;
|
||||
BOOL m_bDtoHStagePopulated;
|
||||
BOOL m_bP2PShareable;
|
||||
BOOL m_bP2PLocked;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
||||
|
|
@ -1,360 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: PDXTextureBuffer.h
|
||||
//
|
||||
// summary: Implements the a PBuffer subclass over DirectX backend that uses
|
||||
// ID3D11Textures* objects instead of ID3D11Buffer objects. The goal of the
|
||||
// implementation was to enable cross-GPU sharing of resources through
|
||||
// DX APIs, in hopes of avoiding the device-sync that is currently required
|
||||
// by any GPU-host copyback. The APIs in question work only on Texture2D
|
||||
// objects with no mip-maps: so I wrote a version that backs PBuffers with those
|
||||
// instead. Unfortunately, the sharing APIs *still* didn't work. Moreover, you can't
|
||||
// bind textures to compute shaders, so the whole thing wound up being a dead
|
||||
// end. Enough code was involved that it seemed worth preserving despite it's
|
||||
// out-of-the-box obsolescence.
|
||||
//
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef _PDXTEXTUREBUFFER_H_
|
||||
#define _PDXTEXTUREBUFFER_H_
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include "ptdxhdr.h"
|
||||
#include "pbuffer.h"
|
||||
#include "ptaskutils.h"
|
||||
#include <map>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class PDXTextureBuffer :
|
||||
public PBuffer
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pParentDatablock"> [in,out] If non-null, the parent datablock. </param>
|
||||
/// <param name="bufferAccessFlags"> The buffer access flags. </param>
|
||||
/// <param name="nChannelIndex"> Zero-based index of the datablock channel this
|
||||
/// PBuffer is backing. </param>
|
||||
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
|
||||
/// <param name="pAllocatingAccelerator"> (optional) [in,out] If non-null, the allocating
|
||||
/// accelerator. </param>
|
||||
/// <param name="uiUniqueIdentifier"> (optional) unique identifier. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
PDXTextureBuffer(
|
||||
__in Datablock * pParent,
|
||||
__in BUFFERACCESSFLAGS f,
|
||||
__in UINT nChannelIndex,
|
||||
__in Accelerator * p=NULL,
|
||||
__in Accelerator * pAllocator=NULL,
|
||||
__in UINT uiUID=ptaskutils::nextuid()
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~PDXTextureBuffer(void);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force synchronize. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ForceSynchronize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Complete any outstanding ops. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/12/2014. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL CompleteOutstandingOps();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check for any outstanding ops. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/12/2014. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL HasOutstandingOps();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Device to device transfer. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDstBuffer"> [in,out] If non-null, the accelerator. </param>
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
DeviceToDeviceTransfer(
|
||||
__inout PBuffer * pDstBuffer,
|
||||
__in AsyncContext * pAsyncContext
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Acquires the synchronise. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/14/2014. </remarks>
|
||||
///
|
||||
/// <param name="uiAcquireKey"> The acquire key. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
PDXTextureBuffer *
|
||||
PlatformSpecificAcquireSync(
|
||||
__in UINT64 uiAcquireKey
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Releases the synchronise. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/14/2014. </remarks>
|
||||
///
|
||||
/// <param name="uiReleaseKey"> The release key. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
PlatformSpecificReleaseSync(
|
||||
__in UINT64 uiReleaseKey
|
||||
);
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize host view. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, information describing the lpv. </param>
|
||||
/// <param name="pBuffer"> [in,out] The data. </param>
|
||||
/// <param name="bForceSynchronous"> (optional) the elide synchronization. </param>
|
||||
/// <param name="bRequestOutstanding"> [in,out] The request outstanding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateHostView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in HOSTMEMORYEXTENT * pBuffer,
|
||||
__in BOOL bForceSynchronous,
|
||||
__out BOOL &bRequestOutstanding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize mutable accelerator view. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
|
||||
/// <param name="pInitialData"> [in,out] If non-null, the data. </param>
|
||||
/// <param name="bOutstanding"> [in,out] The outstanding. </param>
|
||||
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
|
||||
/// <param name="lpszBinding"> (optional) the binding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateMutableAcceleratorView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialData,
|
||||
__out BOOL& bOutstanding,
|
||||
__in void * pModule,
|
||||
__in const char * lpszBinding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize immutable accelerator view. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> If non-null, the data. </param>
|
||||
/// <param name="pInitialData"> [in,out] The bytes. </param>
|
||||
/// <param name="bOutstanding"> [in,out] The outstanding. </param>
|
||||
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
|
||||
/// <param name="lpszBinding"> (optional) the binding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateImmutableAcceleratorView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialData,
|
||||
__out BOOL& bOutstanding,
|
||||
__in void * pModule,
|
||||
__in const char * lpszBinding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes a device-side buffer that is expected to be bound to mutable device
|
||||
/// resources (not in constant memory).
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] (optional) If non-null, context for the
|
||||
/// asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
|
||||
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
|
||||
/// contents. </param>
|
||||
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to the
|
||||
/// buffer which will be used to label runtime- specific
|
||||
/// objects to aid in debugging. Ignored on release
|
||||
/// builds. </param>
|
||||
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
|
||||
/// addressable. </param>
|
||||
///
|
||||
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT
|
||||
CreateMutableBuffer(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialBufferContents,
|
||||
__in char * strDebugBufferName=NULL,
|
||||
__in bool bByteAddressable=true
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes a device-side buffer that is expected to be bound to immutable device
|
||||
/// resources (i.e. those in constant memory).
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the
|
||||
/// asynchronous. </param>
|
||||
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
|
||||
/// contents. </param>
|
||||
/// <param name="uiInitialContentsSizeBytes"> (optional) the initial contents size in bytes. </param>
|
||||
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to
|
||||
/// the buffer which will be used to label runtime-
|
||||
/// specific objects to aid in debugging. Ignored on
|
||||
/// release builds. </param>
|
||||
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
|
||||
/// addressable. </param>
|
||||
///
|
||||
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT
|
||||
CreateImmutableBuffer(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialBufferContents,
|
||||
__in char * strDebugBufferName=NULL,
|
||||
__in bool bByteAddressable=true
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates readable bindable objects if the access flags indicate they will be
|
||||
/// required at dispatch time.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsReadable(char * szname = NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates writable bindable objects if the access flags indicate they will be
|
||||
/// required at dispatch time.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsWriteable(char * szname = NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates immutable bindable objects if needed for dispatch. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsImmutable(char * szname = NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates host to device stage buffer. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/11/2014. </remarks>
|
||||
///
|
||||
/// <param name="pDevice"> [in,out] If non-null, the device. </param>
|
||||
///
|
||||
/// <returns> The new hto d stage buffer. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
HRESULT
|
||||
CreateHtoDStageBuffer(
|
||||
__in ID3D11Device * pDevice
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates device to host staging buffer. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/11/2014. </remarks>
|
||||
///
|
||||
/// <param name="pDevice"> [in,out] If non-null, the device. </param>
|
||||
///
|
||||
/// <returns> The new hto d stage buffer. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
HRESULT
|
||||
CreateDtoHStageBuffer(
|
||||
__in ID3D11Device * pDevice
|
||||
);
|
||||
|
||||
|
||||
HANDLE m_hDXGIHandle;
|
||||
IDXGIKeyedMutex * m_pDXGIKeyedMutex;
|
||||
IDXGIResource * m_pDXGIResource;
|
||||
ID3D11Resource * m_pDtoHStageBuffer;
|
||||
ID3D11Resource * m_pHtoDStageBuffer;
|
||||
BOOL m_bHtoDStagePopulated;
|
||||
BOOL m_bDtoHStagePopulated;
|
||||
BOOL m_bP2PShareable;
|
||||
BOOL m_bP2PLocked;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
||||
|
|
@ -1,335 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: phbuffer.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _PHBUFFER_H_
|
||||
#define _PHBUFFER_H_
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include "pbuffer.h"
|
||||
#include "ptaskutils.h"
|
||||
#include <map>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Device specific buffer representing host memory. This should be a wrapper around
|
||||
/// a simple buffer created with malloc (or potentially run-time specific allocator
|
||||
/// from another platform). The essential idea is that in a given Datablock's buffer
|
||||
/// map, the PHBuffer entry should always be the place to look for a host-accessible
|
||||
/// buffer. When an up-to-date one is not available, then we start materializing
|
||||
/// views from other memory spaces.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class PHBuffer :
|
||||
public PBuffer
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pParent"> [in,out] If non-null, the parent. </param>
|
||||
/// <param name="accessFlags"> The access flags. </param>
|
||||
/// <param name="nChannelIndex"> Zero-based index of the n channel. </param>
|
||||
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
|
||||
/// <param name="pAllocAccelerator"> (optional) [in,out] If non-null, the allocate
|
||||
/// accelerator. </param>
|
||||
/// <param name="uiUID"> (optional) the uid. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
PHBuffer(Datablock * pParent,
|
||||
BUFFERACCESSFLAGS accessFlags,
|
||||
UINT nChannelIndex,
|
||||
Accelerator * pAccelerator=NULL,
|
||||
Accelerator * pAllocAccelerator=NULL,
|
||||
UINT uiUID=ptaskutils::nextuid()
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~PHBuffer(void);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force synchronize. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ForceSynchronize();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize host view. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, information describing the lpv. </param>
|
||||
/// <param name="pBuffer"> [in,out] The data. </param>
|
||||
/// <param name="bForceSynchronous"> (optional) the elide synchronization. </param>
|
||||
/// <param name="bRequestOutstanding"> [in,out] The request outstanding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateHostView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in HOSTMEMORYEXTENT * pBuffer,
|
||||
__in BOOL bForceSynchronous,
|
||||
__out BOOL &bRequestOutstanding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize mutable accelerator view. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
|
||||
/// <param name="pInitialData"> [in,out] If non-null, the data. </param>
|
||||
/// <param name="bOutstanding"> [in,out] The outstanding. </param>
|
||||
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
|
||||
/// <param name="lpszBinding"> (optional) the binding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateMutableAcceleratorView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialData,
|
||||
__out BOOL& bOutstanding,
|
||||
__in void * pModule,
|
||||
__in const char * lpszBinding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Materialize immutable accelerator view. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> If non-null, the data. </param>
|
||||
/// <param name="pInitialData"> [in,out] The bytes. </param>
|
||||
/// <param name="bOutstanding"> [in,out] The outstanding. </param>
|
||||
/// <param name="pModule"> [in,out] (optional) If non-null, the module. </param>
|
||||
/// <param name="lpszBinding"> (optional) the binding. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
__populateImmutableAcceleratorView(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialData,
|
||||
__out BOOL& bOutstanding,
|
||||
__in void * pModule,
|
||||
__in const char * lpszBinding
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes a device-side buffer that is expected to be bound to mutable device
|
||||
/// resources (not in constant memory).
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] (optional) If non-null, context for the
|
||||
/// asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
|
||||
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
|
||||
/// contents. </param>
|
||||
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to the
|
||||
/// buffer which will be used to label runtime- specific
|
||||
/// objects to aid in debugging. Ignored on release
|
||||
/// builds. </param>
|
||||
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
|
||||
/// addressable. </param>
|
||||
///
|
||||
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT
|
||||
CreateMutableBuffer(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialBufferContents,
|
||||
__in char * strDebugBufferName=NULL,
|
||||
__in bool bByteAddressable=true
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes a device-side buffer that is expected to be bound to immutable device
|
||||
/// resources (i.e. those in constant memory).
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the
|
||||
/// asynchronous. </param>
|
||||
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
|
||||
/// contents. </param>
|
||||
/// <param name="uiInitialContentsSizeBytes"> (optional) the initial contents size in bytes. </param>
|
||||
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to
|
||||
/// the buffer which will be used to label runtime-
|
||||
/// specific objects to aid in debugging. Ignored on
|
||||
/// release builds. </param>
|
||||
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
|
||||
/// addressable. </param>
|
||||
///
|
||||
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT
|
||||
CreateImmutableBuffer(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialBufferContents,
|
||||
__in char * strDebugBufferName=NULL,
|
||||
__in bool bByteAddressable=true
|
||||
);
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates readable bindable objects if the access flags indicate they will be
|
||||
/// required at dispatch time.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsReadable(char * szname = NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates writable bindable objects if the access flags indicate they will be
|
||||
/// required at dispatch time.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsWriteable(char * szname = NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates immutable bindable objects if needed for dispatch. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="szname"> [in] If non-null, the a string used to label the object that can be
|
||||
/// used for debugging. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTRESULT CreateBindableObjectsImmutable(char * szname = NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return true if the derived class supports a memset API. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/14/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsMemset();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> memset. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/14/2013. </remarks>
|
||||
///
|
||||
/// <param name="nValue"> The value. </param>
|
||||
/// <param name="szExtentBytes"> The extent in bytes. </param>
|
||||
///
|
||||
/// <returns> the number of bytes set </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual size_t
|
||||
FillExtent(
|
||||
__in int nValue,
|
||||
__in size_t szExtentBytes=0
|
||||
);
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes a host buffer. Since most of the views created by required overrides
|
||||
/// in PBuffer are meaningless in host memory (e.g. immutability)
|
||||
/// we provide one routine to create buffers, and map all the required overrides to
|
||||
/// it.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] (optional) If non-null, context for the
|
||||
/// asynchronous. </param>
|
||||
/// <param name="uiBufferSizeBytes"> The buffer size in bytes. </param>
|
||||
/// <param name="pInitialBufferContents"> (optional) [in] If non-null, the initial buffer
|
||||
/// contents. </param>
|
||||
/// <param name="strDebugBufferName"> (optional) [in] If non-null, a name to assign to the
|
||||
/// buffer which will be used to label runtime- specific
|
||||
/// objects to aid in debugging. Ignored on release
|
||||
/// builds. </param>
|
||||
/// <param name="bByteAddressable"> (optional) true if the buffer should be byte
|
||||
/// addressable. </param>
|
||||
///
|
||||
/// <returns> PTRESULT (use PTSUCCESS/PTFAILED macros) </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
PTRESULT
|
||||
InitializeBuffer(
|
||||
__in AsyncContext * pAsyncContext,
|
||||
__in UINT uiBufferSizeBytes,
|
||||
__in HOSTMEMORYEXTENT * pInitialBufferContents,
|
||||
__in char * strDebugBufferName,
|
||||
__in bool bByteAddressable
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Finalize the dimensions of the device buffer that will be created to back this
|
||||
/// PHBuffer. We specialize the host buffer implementation to not
|
||||
/// require the block to be sealed to allocate buffers!
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/4/2012. </remarks>
|
||||
///
|
||||
/// <param name="bByteAddressable"> [out] (optional) true if the buffer should be byte
|
||||
/// addressable. </param>
|
||||
/// <param name="uiBufferSizeBytes"> (optional) the buffer size in bytes. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
FinalizeDimensions(
|
||||
__out bool &bByteAddressable,
|
||||
__in UINT uiBufferSizeBytes
|
||||
);
|
||||
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,165 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: Partitioner.h
|
||||
//
|
||||
// summary: Declares the partitioner class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __PARTITIONER_H__
|
||||
#define __PARTITIONER_H__
|
||||
|
||||
#include <Windows.h>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include "primitive_types.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Graph;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Graph partitioner class. Based on Renato et al.'s optimal cut partitioner.
|
||||
///
|
||||
/// Currently, calls out to a .exe. In the future will use a DLL-based version directly.
|
||||
/// Work preparing for the DLL-based version is currently guarded by
|
||||
/// #ifdef USE_GRAPH_PARTITIONER_DLL
|
||||
///
|
||||
/// <remarks> Crossbac, 12/10/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class Partitioner {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> jcurrey, 2/1/2014. </remarks>
|
||||
///
|
||||
/// TODO JC params
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
Partitioner(
|
||||
Graph * graph,
|
||||
int numPartitions = 2,
|
||||
const char * workingDir = NULL,
|
||||
const char * fileNamePrefix = NULL
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> jcurrey 2/1/2014. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~Partitioner();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Partition the ptask graph into nPartition. If successful, return true.
|
||||
///
|
||||
/// Currently only 2 partitions are supported.
|
||||
///
|
||||
/// <remarks> jcurrey, 2/1/2014. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL Partition();
|
||||
|
||||
protected:
|
||||
friend class Graph;
|
||||
|
||||
/// <summary> The input ptask graph being partitioned. </summary>
|
||||
Graph * m_graph;
|
||||
|
||||
/// <summary> The number of partitions to divide the graph into. </summary>
|
||||
int m_numPartitions;
|
||||
|
||||
/// <summary> The directory in which files related to the execution of the partitioner will be written. </summary>
|
||||
std::string m_workingDir;
|
||||
|
||||
/// <summary> The prefix of the names of the files which will be written. </summary>
|
||||
std::string m_fileNamePrefix;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Read the partitioner's solution from a file into an array. </summary>
|
||||
///
|
||||
/// <remarks> jcurrey, 2/1/2014. </remarks>
|
||||
///
|
||||
/// TODO JC params
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
BOOL ReadSolutionFile(
|
||||
const char * fileName,
|
||||
int expectedNumValues,
|
||||
int * values
|
||||
);
|
||||
|
||||
#ifdef USE_GRAPH_PARTITIONER_DLL
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/10/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
Partitioner(Graph * pGraph);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/10/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~Partitioner();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Partition the ptask graph into nPartition. If successful, return true, and set
|
||||
/// nSolutionValue and nSolutionEvaluation, which are (somewhat obscure)
|
||||
/// metrics of the quality of the solution.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/10/2013. </remarks>
|
||||
///
|
||||
/// <param name="nPartitions"> The partitions. </param>
|
||||
/// <param name="nSolutionValue"> [out] The solution value. </param>
|
||||
/// <param name="nSolutionEvaluation"> [out] The solution evaluation. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL Partition(int nPartitions, int& nSolutionValue, int& nSolutionEvaluation);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Assign the partition created by a successful call to Partition to the
|
||||
/// underlying PTask graph. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/10/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL AssignPartition();
|
||||
|
||||
protected:
|
||||
|
||||
/// <summary> The input ptask graph being partitioned. </summary>
|
||||
Graph * m_pGraph;
|
||||
|
||||
/// <summary> The solution: an integer-valued partition id per node in m_pGraph </summary>
|
||||
int * m_pSolution;
|
||||
|
||||
/// <summary> true if the operation was a success, false if it failed. </summary>
|
||||
BOOL m_bSolutionValid;
|
||||
|
||||
/// <summary> The solution value. </summary>
|
||||
int m_nSolutionValue;
|
||||
|
||||
/// <summary> The solution evaluation. </summary>
|
||||
int m_nSolutionEvaluation;
|
||||
|
||||
friend class Graph;
|
||||
#endif // USE_GRAPH_PARTITIONER_DLL
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
#endif // __PARTITIONER_H__
|
|
@ -1,290 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: PhysicalDevice.h
|
||||
//
|
||||
// summary: Declares the physical device class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#pragma once
|
||||
#include <deque>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include "oclhdr.h"
|
||||
#include "ptdxhdr.h"
|
||||
#include "cuhdr.h"
|
||||
#include "accelerator.h"
|
||||
#include "primitive_types.h"
|
||||
#include "PhysicalDevice.h"
|
||||
#include "Lockable.h"
|
||||
#include <map>
|
||||
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// Forward declarations
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class Task;
|
||||
class DXAccelerator;
|
||||
#ifdef CUDA_SUPPORT
|
||||
class CUAccelerator;
|
||||
#endif
|
||||
#ifdef OPENCL_SUPPORT
|
||||
class CLAccelerator;
|
||||
#endif
|
||||
class HostAccelerator;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> DIRECTX_DEVICERECORD: everything we have available to uniquely identify a device
|
||||
/// through the DXGI API.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef struct dxdevrec_t {
|
||||
IDXGIAdapter * pAdapter;
|
||||
DXGI_ADAPTER_DESC desc;
|
||||
} DIRECTX_DEVICERECORD;
|
||||
|
||||
#ifdef OPENCL_SUPPORT
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> OPENCL_DEVICERECORD: everything we have available to uniquely identify a device
|
||||
/// through OpenCL.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef struct cldevrec_t {
|
||||
cl_platform_id platform;
|
||||
cl_device_id device;
|
||||
} OPENCL_DEVICERECORD;
|
||||
#endif
|
||||
|
||||
#ifdef CUDA_SUPPORT
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> CUDA_DEVICERECORD: everything we have available to uniquely identify a device
|
||||
/// through CUDA APIs.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef struct cudevrec_t {
|
||||
CUdevice device;
|
||||
} CUDA_DEVICERECORD;
|
||||
#endif
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Physical device object mapping a unique physical accelerator such as a GPU card
|
||||
/// to Accelerator objects that use it through the various back-end runtimes that
|
||||
/// PTask supports (DirectX, CUDA, OpenCL).
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class PhysicalDevice : public Lockable
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
PhysicalDevice();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~PhysicalDevice(void);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this physical device is busy. We need this at the physical device layer
|
||||
/// because a physical device may be busy through it's CUDA accelerator interface but
|
||||
/// not through its DirectX interface, for example.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if busy, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsBusy();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Mark this device as busy, meaning it is performing a dispatch for some Task.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="b"> true to b. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetBusy(BOOL b);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if the Accelerator 'pAccelerator' is a runtime-specific interface
|
||||
/// for this physical device.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in] non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> true if same device, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsSameDevice(Accelerator * pAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if 'pDevice' is same device. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pDevice"> [in,out] If non-null, the device. </param>
|
||||
/// <param name="pDesc"> [in,out] If non-null, the description. </param>
|
||||
/// <param name="nPlatformIndex"> Zero-based index of the n platform. </param>
|
||||
///
|
||||
/// <returns> true if same device, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsSameDevice(IDXGIAdapter * pDevice, DXGI_ADAPTER_DESC * pDesc, UINT nPlatformIndex);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if 'platform'/'device' is same device. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="platform"> The platform. </param>
|
||||
/// <param name="device"> The device. </param>
|
||||
/// <param name="nPlatformIndex"> Zero-based index of the n platform. </param>
|
||||
///
|
||||
/// <returns> true if same device, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
#ifdef OPENCL_SUPPORT
|
||||
virtual BOOL IsSameDevice(cl_platform_id platform, cl_device_id device, UINT nPlatformIndex);
|
||||
#endif
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if 'device' is same device. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="device"> The device. </param>
|
||||
/// <param name="nPlatformIndex"> Zero-based index of the n platform. </param>
|
||||
///
|
||||
/// <returns> true if same device, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
#ifdef CUDA_SUPPORT
|
||||
virtual BOOL IsSameDevice(CUdevice device, UINT nPlatformIndex);
|
||||
#endif
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Adds an Accelerator interface to this physical device record. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in] non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL AddInterface(Accelerator * pAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if this physical device has an Accelerator interface that can be used
|
||||
/// to execute tasks with the given accelerator class.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="cls"> The accelerator class. </param>
|
||||
///
|
||||
/// <returns> true if the device has an interface of the given class, false otherwise. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Supports(ACCELERATOR_CLASS cls);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets an accelerator interface on this physical device that can be used to execute
|
||||
/// tasks of the given accelerator class. Return NULL if no such interface is present.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="cls"> The accelerator class. </param>
|
||||
///
|
||||
/// <returns> null if no appropriate interface is available, else the accelerator interface.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Accelerator * GetAcceleratorInterface(ACCELERATOR_CLASS cls);
|
||||
|
||||
protected:
|
||||
// note that we track a device entry per supported runtime: this is because the device may have
|
||||
// support from only a subset of the runtimes (e.g. Tesla cards, which do not enumerate as
|
||||
// Adapters and therefore enjoy OpenCL and CUDA support, but no DirectX support.
|
||||
|
||||
/// <summary> Data that uniquely identify the physical device
|
||||
/// using DirectX/DXGI APIs. NULL if no DirectX support is available
|
||||
/// for this physical device.
|
||||
/// </summary>
|
||||
DIRECTX_DEVICERECORD * m_pDirectXDevice;
|
||||
|
||||
/// <summary> Data that uniquely identify the physical device
|
||||
/// using OpenCL APIs. NULL if no OpenCL support is available
|
||||
/// for this physical device.
|
||||
/// </summary>
|
||||
#ifdef OPENCL_SUPPORT
|
||||
OPENCL_DEVICERECORD * m_pOpenCLDevice;
|
||||
#endif
|
||||
|
||||
/// <summary> Data that uniquely identify the physical device
|
||||
/// using CUDA APIs. NULL if no CUDA support is available
|
||||
/// for this physical device.
|
||||
/// </summary>
|
||||
#ifdef CUDA_SUPPORT
|
||||
CUDA_DEVICERECORD * m_pCUDADevice;
|
||||
#endif
|
||||
|
||||
/// <summary> The DirectX Accelerator object that maps to this physical
|
||||
/// device. NULL if no DirectX support is available
|
||||
/// for this physical device.
|
||||
/// </summary>
|
||||
DXAccelerator * m_pDXAccelerator;
|
||||
|
||||
/// <summary> The CUDA Accelerator object that maps to this physical
|
||||
/// device. NULL if no CUDA support is available
|
||||
/// for this physical device.
|
||||
/// </summary>
|
||||
#ifdef CUDA_SUPPORT
|
||||
CUAccelerator * m_pCUAccelerator;
|
||||
#endif
|
||||
|
||||
/// <summary> The OpenCL Accelerator object that maps to this physical
|
||||
/// device. NULL if no OpenCL support is available
|
||||
/// for this physical device.
|
||||
/// </summary>
|
||||
#ifdef OPENCL_SUPPORT
|
||||
CLAccelerator * m_pCLAccelerator;
|
||||
#endif
|
||||
|
||||
/// <summary> The Host Accelerator object that maps to this physical
|
||||
/// device. Not used.
|
||||
/// </summary>
|
||||
HostAccelerator * m_pHostAccelerator;
|
||||
|
||||
/// <summary> true if this device is in flight, meaning it is currently
|
||||
/// being used in the dispatch of a Task.
|
||||
/// </summary>
|
||||
BOOL m_bInFlight;
|
||||
};
|
||||
};
|
|
@ -1,249 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: Recorder.h
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _RECORDER_H_
|
||||
#define _RECORDER_H_
|
||||
|
||||
#ifndef XMLSUPPORT
|
||||
|
||||
//namespace PTask {
|
||||
// class BindDescriptorPort { public : BindDescriptorPort(void * pDescribedPort, void * pDescriberPort, int func) {} };
|
||||
// class BindControlPort { public : BindControlPort(void * pDescribedPort, void * pDescriberPort, int func) {} };
|
||||
// class BindControlPropagationPort { public : BindControlPropagationPort(void * pDescribedPort, void * pDescriberPort) {} };
|
||||
// class SetPredicationType { public : SetPredicationType(void * pDescribedPort, int pDescriberPort, int func) {} };
|
||||
// class SetComputeGeometry { public : SetComputeGeometry(void * pDescribedPort, int pDescriberPort, int func, int blah) {} };
|
||||
// class Recorder { public: static void Record(void * action); };
|
||||
//}
|
||||
|
||||
#define INITRECORDER()
|
||||
#define DESTROYRECORDER()
|
||||
#define RECORDACTION(x,y,z,w)
|
||||
#define RECORDACTION2P(x,y,z)
|
||||
#define RECORDACTION4P(x,y,z,w,t)
|
||||
#else
|
||||
#define INITRECORDER() Recorder::Initialize()
|
||||
#define DESTROYRECORDER() Recorder::Destroy()
|
||||
#define RECORDACTION(x,y,z,w) Recorder::Record(new PTask::##x((y),(z),(w)))
|
||||
#define RECORDACTION2P(x,y,z) Recorder::Record(new PTask::##x((y),(z)))
|
||||
#define RECORDACTION4P(x,y,z,w,t) Recorder::Record(new PTask::##x((y),(z),(w),(t)))
|
||||
|
||||
#include "XMLWriter.h"
|
||||
#include "XMLReader.h"
|
||||
#include "port.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Graph;
|
||||
class Task;
|
||||
|
||||
typedef enum _recorded_action_type {
|
||||
|
||||
BINDCONTROLPORT,
|
||||
BINDCONTROLPROPAGATIONCHANNEL,
|
||||
BINDCONTROLPROPAGATIONPORT,
|
||||
BINDDESCRIPTORPORT,
|
||||
BINDITERATIONSCOPE,
|
||||
SETBLOCKANDGRIDSIZE,
|
||||
SETCOMPUTEGEOMETRY,
|
||||
SETPREDICATIONTYPE
|
||||
|
||||
} RECORDEDACTIONTYPE;
|
||||
|
||||
class RecordedAction {
|
||||
public:
|
||||
RecordedAction(RECORDEDACTIONTYPE type, std::string name);
|
||||
virtual void Write(XMLWriter * writer)=0;
|
||||
virtual void Read(XMLReader * reader)=0;
|
||||
virtual void Replay(XMLReader * reader)=0;
|
||||
const char * GetName();
|
||||
virtual ~RecordedAction() { }
|
||||
|
||||
protected:
|
||||
RECORDEDACTIONTYPE m_type;
|
||||
std::string m_name;
|
||||
};
|
||||
|
||||
class BindControlPort : public RecordedAction {
|
||||
public:
|
||||
BindControlPort();
|
||||
BindControlPort(
|
||||
Port * pController,
|
||||
Port * pGatedPort,
|
||||
BOOL bInitiallyOpen
|
||||
);
|
||||
virtual ~BindControlPort() { }
|
||||
|
||||
void Write(XMLWriter * writer);
|
||||
void Read(XMLReader * reader);
|
||||
void Replay(XMLReader * reader);
|
||||
|
||||
protected:
|
||||
UINT m_controllerPortUID;
|
||||
UINT m_gatedPortUID;
|
||||
BOOL m_initiallyOpen;
|
||||
};
|
||||
|
||||
class BindControlPropagationChannel : public RecordedAction {
|
||||
public:
|
||||
BindControlPropagationChannel();
|
||||
BindControlPropagationChannel(
|
||||
Port * pInputPort,
|
||||
Channel * pControlledChannel
|
||||
);
|
||||
virtual ~BindControlPropagationChannel() { }
|
||||
|
||||
void Write(XMLWriter * writer);
|
||||
void Read(XMLReader * reader);
|
||||
void Replay(XMLReader * reader);
|
||||
|
||||
protected:
|
||||
UINT m_inputPortUID;
|
||||
std::string m_controlledChannelName;
|
||||
};
|
||||
|
||||
class BindControlPropagationPort : public RecordedAction {
|
||||
public:
|
||||
BindControlPropagationPort();
|
||||
BindControlPropagationPort(
|
||||
Port * pInputPort,
|
||||
Port * pOutputPort
|
||||
);
|
||||
virtual ~BindControlPropagationPort() { }
|
||||
void Write(XMLWriter * writer);
|
||||
void Read(XMLReader * reader);
|
||||
void Replay(XMLReader * reader);
|
||||
|
||||
protected:
|
||||
UINT m_inputPortUID;
|
||||
UINT m_outputPortUID;
|
||||
};
|
||||
|
||||
class BindDescriptorPort : public RecordedAction {
|
||||
public:
|
||||
BindDescriptorPort();
|
||||
BindDescriptorPort(
|
||||
Port * pDescribedPort,
|
||||
Port * pDescriberPort,
|
||||
DESCRIPTORFUNC func
|
||||
);
|
||||
virtual ~BindDescriptorPort() { }
|
||||
|
||||
void Write(XMLWriter * writer);
|
||||
void Read(XMLReader * reader);
|
||||
void Replay(XMLReader * reader);
|
||||
|
||||
protected:
|
||||
UINT m_describedPortUID;
|
||||
UINT m_describerPortUID;
|
||||
DESCRIPTORFUNC m_func;
|
||||
};
|
||||
|
||||
class BindIterationScope : public RecordedAction {
|
||||
public:
|
||||
BindIterationScope();
|
||||
BindIterationScope(
|
||||
Port * pMetaPort,
|
||||
Port * pScopedPort
|
||||
);
|
||||
virtual ~BindIterationScope() { }
|
||||
|
||||
void Write(XMLWriter * writer);
|
||||
void Read(XMLReader * reader);
|
||||
void Replay(XMLReader * reader);
|
||||
|
||||
protected:
|
||||
UINT m_metaPortUID;
|
||||
UINT m_scopedPortUID;
|
||||
};
|
||||
|
||||
class SetBlockAndGridSize : public RecordedAction {
|
||||
public:
|
||||
SetBlockAndGridSize();
|
||||
SetBlockAndGridSize(
|
||||
Task * task,
|
||||
PTASKDIM3 grid,
|
||||
PTASKDIM3 block
|
||||
);
|
||||
virtual ~SetBlockAndGridSize() { }
|
||||
|
||||
void Write(XMLWriter * writer);
|
||||
void Read(XMLReader * reader);
|
||||
void Replay(XMLReader * reader);
|
||||
|
||||
protected:
|
||||
std::string m_taskName;
|
||||
PTASKDIM3 m_grid;
|
||||
PTASKDIM3 m_block;
|
||||
};
|
||||
|
||||
class SetComputeGeometry : public RecordedAction {
|
||||
public:
|
||||
SetComputeGeometry();
|
||||
SetComputeGeometry(
|
||||
Task * task,
|
||||
int tgx,
|
||||
int tgy,
|
||||
int tgz);
|
||||
virtual ~SetComputeGeometry() { }
|
||||
|
||||
void Write(XMLWriter * writer);
|
||||
void Read(XMLReader * reader);
|
||||
void Replay(XMLReader * reader);
|
||||
|
||||
protected:
|
||||
std::string m_taskName;
|
||||
int m_tgx;
|
||||
int m_tgy;
|
||||
int m_tgz;
|
||||
};
|
||||
|
||||
class SetPredicationType : public RecordedAction {
|
||||
public:
|
||||
SetPredicationType();
|
||||
SetPredicationType(
|
||||
Channel * pChannel,
|
||||
CHANNELENDPOINTTYPE eEndpoint,
|
||||
CHANNELPREDICATE eCanonicalPredicator
|
||||
);
|
||||
virtual ~SetPredicationType() { }
|
||||
|
||||
void Write(XMLWriter * writer);
|
||||
void Read(XMLReader * reader);
|
||||
void Replay(XMLReader * reader);
|
||||
|
||||
protected:
|
||||
std::string m_channelName;
|
||||
int m_endpointType;
|
||||
int m_canonicalPredicate;
|
||||
};
|
||||
|
||||
class Recorder {
|
||||
public:
|
||||
|
||||
// HACK: Recorder is a singleton for now.
|
||||
// TODO: Move to a Recorder per Graph, once can obain handle to Graph instance
|
||||
// from all methods which want to record (such as methods on Port and Channel).
|
||||
// One possible solution is to move all recordable actions to be methods on Graph.
|
||||
static Recorder * Instance();
|
||||
static void Record(RecordedAction * action);
|
||||
static void Initialize();
|
||||
static void Destroy();
|
||||
|
||||
RecordedAction * CreateAction(const char * actionName);
|
||||
std::vector<RecordedAction *>* GetRecordedActions();
|
||||
|
||||
protected:
|
||||
Recorder();
|
||||
virtual ~Recorder();
|
||||
Recorder(Recorder const&);
|
||||
Recorder& operator=(Recorder const&);
|
||||
void RecordAction(RecordedAction * action);
|
||||
|
||||
std::vector<RecordedAction *> m_vRecordedActions;
|
||||
static Recorder * s_pInstance;
|
||||
};
|
||||
|
||||
}; // namespace PTask
|
||||
#endif
|
||||
#endif
|
|
@ -1,91 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: RefCountProfiler.h
|
||||
//
|
||||
// summary: Declares the reference count profiler class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __REFERENCE_COUNTED_PROFILER_H__
|
||||
#define __REFERENCE_COUNTED_PROFILER_H__
|
||||
|
||||
#include <Windows.h>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include <set>
|
||||
#include "primitive_types.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class ReferenceCounted;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Profiler class for reference counted objects
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/18/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class ReferenceCountedProfiler
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes the refcount profiler. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL Initialize(BOOL bEnable);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Deinitializes the refcount profiler. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Deinitialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dumps the refcount profiler leaks. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Report(std::ostream& ss);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Profile allocation. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the item. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void RecordAllocation(ReferenceCounted * pItem);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Profile deletion. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the item. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void RecordDeletion(ReferenceCounted * pItem);
|
||||
|
||||
protected:
|
||||
|
||||
static LONG m_nRCAllocations;
|
||||
static LONG m_nRCDeletions;
|
||||
static LONG m_nRCProfilerInit;
|
||||
static LONG m_nRCProfilerEnable;
|
||||
static LONG m_nRCProfilerIDCount;
|
||||
static CRITICAL_SECTION m_csRCProfiler;
|
||||
static std::set<PTask::ReferenceCounted*> m_vAllAllocations;
|
||||
};
|
||||
};
|
||||
|
||||
#endif // __REFERENCE_COUNTED_PROFILER_H__
|
|
@ -1,172 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: ReferenceCounted.h
|
||||
//
|
||||
// summary: Declares the reference counted class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __REFERENCE_COUNTED_H__
|
||||
#define __REFERENCE_COUNTED_H__
|
||||
|
||||
#include <Windows.h>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include <set>
|
||||
#include "primitive_types.h"
|
||||
#include "Lockable.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Reference counted super-class, allowing to share implementation of ref count
|
||||
/// management code.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class ReferenceCounted : public Lockable
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
ReferenceCounted();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="lpszProtectedObjectName"> [in] non-null, name of the protected object. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
ReferenceCounted(char * lpszProtectedObjectName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
virtual ~ReferenceCounted();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Adds a reference. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual LONG AddRef();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Release a reference. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual LONG Release();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the reference count. (for debugging only) </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <returns> current reference count for the object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
LONG RefCount();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Datablock.toString() </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="os"> [in,out] The operating system. </param>
|
||||
/// <param name="pChannel"> [in,out] If non-null, the channel. </param>
|
||||
///
|
||||
/// <returns> The shifted result. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
friend std::ostream& operator<<(std::ostream &os, ReferenceCounted * pBlock);
|
||||
|
||||
protected:
|
||||
|
||||
/// <summary> Number of outanding references to this object.
|
||||
/// When m_uiRefCount drops to zero, it will be
|
||||
/// garbage collected. NB: Ideally, the refcount would be private. However,
|
||||
/// class Datablock inherits from ReferenceCounted but has to override Release to return blocks to
|
||||
/// their block pools rather than deleting them (if they are pooled). Doing this requires the
|
||||
/// ability to do interlocked operations on the m_uiRefCount member of the super-class. A sad
|
||||
/// side effect of this is that we are forced to make m_uiRefCount protected rather than private.
|
||||
/// </summary>
|
||||
LONG m_uiRefCount;
|
||||
|
||||
public:
|
||||
|
||||
/// <summary> The unique id of this RC object. </summary>
|
||||
LONG m_uiUID;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes the refcount profiler. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL RCProfileInitialize(BOOL bEnable);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dumps the refcount profiler leaks. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void RCProfileDumpLeaks();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Profile allocation. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the item. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void RCProfileAllocation(ReferenceCounted * pItem);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Profile deletion. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the item. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void RCProfileDeletion(ReferenceCounted * pItem);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a string describing this refcount object. Allows subclasses to
|
||||
/// provide overrides that make leaks easier to find when detected by the
|
||||
/// rc profiler.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/9/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the rectangle profile descriptor. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual std::string GetRCProfileDescriptor();
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
#endif // __REFERENCE_COUNTED_H__
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,249 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: ScopedPoolManager.h
|
||||
//
|
||||
// summary: Declares the scoped pool manager class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __SCOPED_POOL_MANAGER__
|
||||
#define __SCOPED_POOL_MANAGER__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include <Windows.h>
|
||||
#include "datablock.h"
|
||||
#include "GlobalBlockPool.h"
|
||||
#include "ptlock.h"
|
||||
#include <deque>
|
||||
#include <map>
|
||||
#include <tuple>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class CompiledKernel;
|
||||
class Graph;
|
||||
class Channel;
|
||||
class Port;
|
||||
class Task;
|
||||
class Datablock;
|
||||
class DatablockTemplate;
|
||||
|
||||
class ScopedPoolManager : public Lockable {
|
||||
|
||||
typedef std::tuple<DatablockTemplate*, int, int, int, int> POOLDESCRIPTOR;
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/27/2014. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
ScopedPoolManager(Graph * pScopedGraph);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/27/2014. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~ScopedPoolManager();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Require block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
|
||||
/// <param name="nDataSize"> Size of the data. </param>
|
||||
/// <param name="nMetaSize"> Size of the meta. </param>
|
||||
/// <param name="nTemplateSize"> Size of the template. </param>
|
||||
/// <param name="nBlocks"> (Optional) The blocks. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
RequireBlockPool(
|
||||
__in DatablockTemplate * pTemplate,
|
||||
__in int nDataSize,
|
||||
__in int nMetaSize,
|
||||
__in int nTemplateSize,
|
||||
__in int nBlocks=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Require block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <param name="nDataSize"> Size of the data. </param>
|
||||
/// <param name="nMetaSize"> Size of the meta. </param>
|
||||
/// <param name="nTemplateSize"> Size of the template. </param>
|
||||
/// <param name="nBlocks"> (Optional) The blocks. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
RequireBlockPool(
|
||||
__in int nDataSize,
|
||||
__in int nMetaSize,
|
||||
__in int nTemplateSize,
|
||||
__in int nBlocks=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Require block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
|
||||
/// <param name="nBlocks"> (Optional) The blocks. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
RequireBlockPool(
|
||||
__in DatablockTemplate * pTemplate,
|
||||
__in int nBlocks=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Find a block pool for the block. If there is no good fit,
|
||||
/// create one if the bCreateIfNotFound flag is set.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/30/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
/// <param name="bCreateIfNotFound"> The create if not found. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
AddBlockToBestFitPool(
|
||||
__in Datablock * pBlock,
|
||||
__in BOOL bCreateIfNotFound
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if we can allocate pools. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL AllocatePools();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroys the pools. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL DestroyPools();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate datablock. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
|
||||
/// <param name="uiDataSize"> Size of the data. </param>
|
||||
/// <param name="uiMetaSize"> Size of the meta. </param>
|
||||
/// <param name="uiTemplateSize"> Size of the template. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
Datablock *
|
||||
AllocateDatablock(
|
||||
__in DatablockTemplate * pTemplate,
|
||||
__in UINT uiDataSize,
|
||||
__in UINT uiMetaSize,
|
||||
__in UINT uiTemplateSize
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Request a pooled block. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/21/2013. </remarks>
|
||||
///
|
||||
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
|
||||
/// <param name="uiDataSize"> Size of the data. </param>
|
||||
/// <param name="uiMetaSize"> Size of the meta. </param>
|
||||
/// <param name="uiTemplateSize"> Size of the template. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
Datablock *
|
||||
RequestBlock(
|
||||
__in DatablockTemplate * pTemplate,
|
||||
__in UINT uiDataSize,
|
||||
__in UINT uiMetaSize,
|
||||
__in UINT uiTemplateSize
|
||||
);
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Searches for the first matching pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
|
||||
/// <param name="uiDataSize"> Size of the data. </param>
|
||||
/// <param name="uiMetaSize"> Size of the meta. </param>
|
||||
/// <param name="uiTemplateSize"> Size of the template. </param>
|
||||
/// <param name="uiBlockControlCode"> The block control code. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the found matching pool. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
GlobalBlockPool *
|
||||
FindMatchingPool(
|
||||
__in DatablockTemplate * pTemplate,
|
||||
__in UINT uiDataSize,
|
||||
__in UINT uiMetaSize,
|
||||
__in UINT uiTemplateSize
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Find a block pool for the block. If there is no good fit,
|
||||
/// create one if the bCreateIfNotFound flag is set.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/30/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
/// <param name="bCreateIfNotFound"> The create if not found. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
__AddBlockToBestFitPool(
|
||||
__in Datablock * pBlock,
|
||||
__in BOOL bCreateIfNotFound
|
||||
);
|
||||
|
||||
Graph * m_pGraph;
|
||||
BOOL m_bPoolsAllocated;
|
||||
BOOL m_bDestroyed;
|
||||
std::map<int, POOLDESCRIPTOR> m_vRequiredPoolsUntyped;
|
||||
std::map<DatablockTemplate*, POOLDESCRIPTOR> m_vRequiredPoolsTyped;
|
||||
std::map<int, GlobalBlockPool*> m_vUntypedBlockPools;
|
||||
std::map<DatablockTemplate*, GlobalBlockPool*> m_vTypedBlockPools;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,377 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: StickyPort.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _STICKY_PORT_H_
|
||||
#define _STICKY_PORT_H_
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "port.h"
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Channel;
|
||||
class Datablock;
|
||||
class Accelerator;
|
||||
class DatablockTemplate;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sticky port. A port that is bound to scalar values in kernel code, with read-only
|
||||
/// semantics. Typically these values are bound to constant memory on a device where
|
||||
/// specialized memories are available. A sticky port also retains its last value: if
|
||||
/// no new datablock is available on its incoming channel it will redeliver the last
|
||||
/// datablock pulled from it on the next call to Pull.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class StickyPort : public Port {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
StickyPort();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~StickyPort();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is occupied. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if occupied, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsOccupied();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Pulls the next datablock from this port. Return the last datablock if no new
|
||||
/// block is available on the incoming channel.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * Pull();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Returns the datablock that would be returned by the next call to Pull, without
|
||||
/// removing it from the port.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the current top-of-stack object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * Peek();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> The push method is required by the abstract class Port, but has no meaning for
|
||||
/// sticky ports. This method is a no-op for StickyPort.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <param name="p"> [in,out] If non-null, the Datablock* to push. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Push(Datablock* p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the destination datablock for this port. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the destination buffer. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * GetDestinationBuffer(Accelerator * pAccelerator=NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a destination buffer. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <param name=""> [in,out] If non-null, the. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetDestinationBuffer(Datablock *);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a block to be the permanently sticky block for this port. Obviously, only
|
||||
/// valid for certain kinds of ports (input varieties). Use for blocks that will have
|
||||
/// only one value for the lifetime of the graph, to avoid creating and manageing an
|
||||
/// exposed channel or initializer channel that will only every be used once. Do not
|
||||
/// connect an upstream channel to ports that have been configured with a permanent
|
||||
/// block.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="p"> If non-null, the Datablock* to push. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetPermanentBlock(Datablock * p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object has block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if block pool, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasBlockPool();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
|
||||
/// we have an upstream allocator (meta) port, the runtime will not create a block
|
||||
/// pool for the corresponding output port. This turns out to put device-side
|
||||
/// allocation on the critical path in some cases, so we provide a way to override
|
||||
/// that behavior and allow a port to create a pool based on some size hints. When
|
||||
/// there is a block available with sufficient space in the pool, the meta port can
|
||||
/// avoid the allocation and draw from the pool.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="nPoolSize"> Size of the block pool. </param>
|
||||
/// <param name="nStride"> The stride. </param>
|
||||
/// <param name="nDataBytes"> The data in bytes. </param>
|
||||
/// <param name="nMetaBytes"> The meta in bytes. </param>
|
||||
/// <param name="nTemplateBytes"> The template in bytes. </param>
|
||||
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
|
||||
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
ForceBlockPoolHint(
|
||||
__in UINT nPoolSize,
|
||||
__in UINT nStride,
|
||||
__in UINT nDataBytes,
|
||||
__in UINT nMetaBytes,
|
||||
__in UINT nTemplateBytes,
|
||||
__in BOOL bPageLockHostViews=FALSE,
|
||||
__in BOOL bEagerDeviceMaterialize=FALSE
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
///
|
||||
/// Allocation of data-blocks and platform-specific buffers can be a signficant
|
||||
/// latency expense at dispatch time. We can actually preallocate output datablocks
|
||||
/// and create device- side buffers at graph construction time. For each node in the
|
||||
/// graph, allocate data blocks on any output ports, and create device-specific
|
||||
/// buffers for all accelerators capable of executing the node.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPool(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
|
||||
/// to null, and then release it. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
DestroyBlockPool(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Queries if a block pool is active and able to deliver/return blocks. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if a block pool is active, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
IsBlockPoolActive(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles only the first pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPoolAsync(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles the second pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
FinalizeBlockPoolAsync(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> add a new block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void AddNewBlock(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return a block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ReturnToPool(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets pool size. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> The pool size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetPoolSize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetRequestsPageLocked(BOOL bPageLocked);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL GetRequestsPageLocked();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <param name="pTemplate"> [in,out] If non-null, the template. </param>
|
||||
/// <param name="uiId"> The identifier. </param>
|
||||
/// <param name="lpszVariableBinding"> [in,out] If non-null, the variable binding. </param>
|
||||
/// <param name="nParmIdx"> Zero-based index of the n parm. </param>
|
||||
/// <param name="nInOutRouteIdx"> Zero-based index of the n in out route. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static Port * Create(DatablockTemplate * pTemplate,
|
||||
UINT uiId,
|
||||
char * lpszVariableBinding,
|
||||
int nParmIdx,
|
||||
int nInOutRouteIdx
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
|
||||
/// for this port in a way that is consistent with a well-formed graph. Called by
|
||||
/// CheckSemantics()
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="pos"> [in,out] output string stream. </param>
|
||||
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
|
||||
PTask::Graph * pGraph);
|
||||
protected:
|
||||
|
||||
/// <summary> The sticky datablock </summary>
|
||||
Datablock * m_pStickyDatablock;
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,230 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: SyncPoint.h
|
||||
//
|
||||
// summary: Declares the synchronise point class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __SYNC_POINT_H__
|
||||
#define __SYNC_POINT_H__
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "ReferenceCounted.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class AsyncContext;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> A synchronization point, on which dependences may be created, so that other
|
||||
/// threads/downstream operations can wait until dependences on previous operations
|
||||
/// in this context have resolved.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class SyncPoint : public ReferenceCounted {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="_pAsyncContext"> [in] If non-null, context for the asynchronous. </param>
|
||||
/// <param name="_pPlatformAsyncContextObject"> [in] non-null, the platform-specific asynchronous
|
||||
/// context object. E.g. the stream in CUDA, the
|
||||
/// ID3D11ImmediateContext object in DirectX and so
|
||||
/// on. </param>
|
||||
/// <param name="_pPlatformAsyncWaitObject"> [in] non-null, a platform-specific asynchronous
|
||||
/// wait object. E.g. a windows event or a cuda event
|
||||
/// object, etc. </param>
|
||||
/// <param name="_pPlatformParentSyncObject"> The platform parent synchronise object. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
SyncPoint(
|
||||
__in AsyncContext * _pAsyncContext,
|
||||
__in void * _pPlatformAsyncContextObject,
|
||||
__in void * _pPlatformAsyncWaitObject,
|
||||
__in void * _pPlatformParentSyncObject
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~SyncPoint();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform context object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the platform context object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void * GetPlatformContextObject();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform wait object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the platform wait object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void * GetPlatformWaitObject();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform wait object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the platform wait object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void * GetPlatformParentObject();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this sync point is *definitely* resolved. If this returns false, then
|
||||
/// the sync point represents completed work and no lock is required to check this
|
||||
/// since the transition is monotonic. If it returns TRUE indicating the work is
|
||||
/// still outstanding, that doesn't mean the sync point hasn't resolved. It just
|
||||
/// means the caller should acquire locks and call QueryOutstanding to get a higher
|
||||
/// fidelity answer.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if outstanding, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
QueryOutstandingFlag(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this sync point represents outstanding work or work that has been
|
||||
/// completed.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
///
|
||||
/// <returns> true if outstanding, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
QueryOutstanding(
|
||||
__in AsyncContext * pAsyncContext
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this sync point represents outstanding work or work that has been
|
||||
/// completed without blocking to acquire the locks needed to update async context
|
||||
/// and accelerator state when a state change on this sync point is detected.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if outstanding, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
NonblockingQueryOutstanding(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Marks this sync point as retired, meaning all the ops preceding it
|
||||
/// are known to be complete. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
MarkRetired(
|
||||
__in BOOL bContextSynchronized,
|
||||
__in BOOL bStatusQueried
|
||||
);
|
||||
|
||||
/////-------------------------------------------------------------------------------------------------
|
||||
///// <summary> Release by decrementing the refcount. We override the implementation inherited
|
||||
///// from ReferenceCounted so that we can figure out if the outstanding list
|
||||
///// for the containing async context can be garbage collected. If the refcount
|
||||
///// goes from 2 to 1, that *should* mean that its async context holds the only
|
||||
///// reference, and therefor we can retire it.
|
||||
///// </summary>
|
||||
/////
|
||||
///// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
/////
|
||||
///// <returns> . </returns>
|
||||
/////-------------------------------------------------------------------------------------------------
|
||||
|
||||
//virtual LONG Release();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets asynchronous context. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 5/1/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the asynchronous context. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
AsyncContext * GetAsyncContext();
|
||||
|
||||
protected:
|
||||
|
||||
/// <summary> The platform-specific asynchronous context object.
|
||||
/// Maps loosely to the abstraction of an independent command
|
||||
/// queue for a given device context.
|
||||
/// </summary>
|
||||
void * m_pPlatformAsyncContextObject;
|
||||
|
||||
/// <summary> The platform-specific asynchronous wait object. </summary>
|
||||
void * m_pPlatformAsyncWaitObject;
|
||||
|
||||
/// <summary> The platform parent synchronisation object--not used by all platforms. </summary>
|
||||
void * m_pPlatformParentSyncObject;
|
||||
|
||||
/// <summary> Context for the outstanding asynchronous operations. </summary>
|
||||
AsyncContext * m_pAsyncContext;
|
||||
|
||||
/// <summary> true if ops preceding this sync-point are known to
|
||||
/// be outstanding (or rather, conservatively, not known
|
||||
/// to be complete). </summary>
|
||||
BOOL m_bOutstanding;
|
||||
|
||||
/// <summary> true if we queried the underlying event to figure out
|
||||
/// that the sync point was no longer outstanding. </summary>
|
||||
BOOL m_bStatusQueried;
|
||||
|
||||
/// <summary> true if the context was synchronized, causing the
|
||||
/// sync point to be no longer outstanding.
|
||||
/// </summary>
|
||||
BOOL m_bContextSynchronized;
|
||||
|
||||
friend class AsyncContext;
|
||||
friend class AsyncDependence;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a string describing this refcount object. Allows subclasses to
|
||||
/// provide overrides that make leaks easier to find when detected by the
|
||||
/// rc profiler.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/9/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the rectangle profile descriptor. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual std::string GetRCProfileDescriptor();
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,347 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: ThreadPool.h
|
||||
//
|
||||
// summary: Declares the thread pool class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __THREAD_POOL_H__
|
||||
#define __THREAD_POOL_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include <Windows.h>
|
||||
#include <deque>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include "Lockable.h"
|
||||
#include "PTaskRuntime.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class ThreadPool;
|
||||
|
||||
class THREADDESC {
|
||||
public:
|
||||
CRITICAL_SECTION lock;
|
||||
HANDLE hThread;
|
||||
HANDLE hStartEvent;
|
||||
HANDLE hTerminateEvent;
|
||||
BOOL bRoutineValid;
|
||||
BOOL bTerminate;
|
||||
BOOL bActive;
|
||||
LPTHREAD_START_ROUTINE lpRoutine;
|
||||
LPVOID lpParameter;
|
||||
BOOL bDeleteOnThreadExit;
|
||||
BOOL bRemoveFromPoolOnThreadExit;
|
||||
ThreadPool * pThreadPool;
|
||||
THREADDESC(ThreadPool*pPool) {
|
||||
InitializeCriticalSection(&lock);
|
||||
hThread = INVALID_HANDLE_VALUE;
|
||||
hStartEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
hTerminateEvent = PTask::Runtime::GetRuntimeTerminateEvent();
|
||||
bRoutineValid = FALSE;
|
||||
bTerminate = FALSE;
|
||||
bActive = FALSE;
|
||||
lpRoutine = NULL;
|
||||
lpParameter = NULL;
|
||||
bDeleteOnThreadExit = FALSE;
|
||||
bRemoveFromPoolOnThreadExit = FALSE;
|
||||
pThreadPool = pPool;
|
||||
}
|
||||
~THREADDESC() {
|
||||
DeleteCriticalSection(&lock);
|
||||
}
|
||||
void Lock() { EnterCriticalSection(&lock); }
|
||||
void Unlock() { LeaveCriticalSection(&lock); }
|
||||
};
|
||||
|
||||
class ThreadPool : public Lockable {
|
||||
|
||||
static const int DEFGROWINC=2;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="nThreads"> If non-null, the p. </param>
|
||||
/// <param name="bPrimeThreads"> The prime threads. </param>
|
||||
/// <param name="bGrowable"> The growable. </param>
|
||||
/// <param name="uiGrowIncrement"> The grow increment. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
ThreadPool(
|
||||
__in UINT nThreads,
|
||||
__in BOOL bPrimeThreads,
|
||||
__in BOOL bGrowable,
|
||||
__in UINT uiGrowIncrement
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~ThreadPool();
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates this object. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/22/2013. </remarks>
|
||||
///
|
||||
/// <param name="uiThreads"> The threads. </param>
|
||||
/// <param name="bPrimeThreads"> The threads. </param>
|
||||
/// <param name="bGrowable"> The growable. </param>
|
||||
/// <param name="uiGrowIncrement"> The grow increment. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static ThreadPool *
|
||||
Create(
|
||||
__in UINT uiThreads,
|
||||
__in BOOL bPrimeThreads,
|
||||
__in BOOL bGrowable,
|
||||
__in UINT uiGrowIncrement=DEFGROWINC
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroys this object. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/22/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Destroy();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets pool size. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> The pool size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetPoolSize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets pool size. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> The pool size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetCurrentPoolSize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets target pool size. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/22/2013. </remarks>
|
||||
///
|
||||
/// <returns> The target pool size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetTargetPoolSize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets pool size. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/22/2013. </remarks>
|
||||
///
|
||||
/// <param name="uiThreads"> The threads. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetPoolSize(UINT uiThreads);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets grow increment. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/22/2013. </remarks>
|
||||
///
|
||||
/// <returns> The grow increment. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetGrowIncrement();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets grow increment. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/22/2013. </remarks>
|
||||
///
|
||||
/// <param name="uiIncrement"> Amount to increment by. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetGrowIncrement(UINT uiIncrement);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Request thread. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/22/2013. </remarks>
|
||||
///
|
||||
/// <param name="lpRoutine"> The routine. </param>
|
||||
/// <param name="lpParameter"> The parameter. </param>
|
||||
/// <param name="bStartThread"> true if the thread can be signaled to start
|
||||
/// before returning from this call, false if the
|
||||
/// caller would prefer to signal it explicitly. </param>
|
||||
///
|
||||
/// <returns> The handle of the thread. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static HANDLE
|
||||
RequestThread(
|
||||
__in LPTHREAD_START_ROUTINE lpRoutine,
|
||||
__in LPVOID lpParameter,
|
||||
__in BOOL bStartThread
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Starts a thread: if a previous call to RequestThread was made with
|
||||
/// the bStartThread parameter set to false, this API signals the thread
|
||||
/// to begin. Otherwise, the call has no effect (returns FALSE). </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="hThread"> The thread. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
StartThread(
|
||||
__in HANDLE hThread
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a thread. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/22/2013. </remarks>
|
||||
///
|
||||
/// <param name="lpRoutine"> The routine. </param>
|
||||
/// <param name="lpParameter"> The parameter. </param>
|
||||
///
|
||||
/// <returns> The thread. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
HANDLE
|
||||
GetThread(
|
||||
__in LPTHREAD_START_ROUTINE lpRoutine,
|
||||
__in LPVOID lpParameter,
|
||||
__in BOOL bStartThread
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Starts a thread: if a previous call to RequestThread was made with
|
||||
/// the bStartThread parameter set to false, this API signals the thread
|
||||
/// to begin. Otherwise, the call has no effect (returns FALSE). </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="hThread"> The thread. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
SignalThread(
|
||||
__in HANDLE hThread
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Thread pool proc. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="pVoidCastGraph"> the graph object, typecast to void* </param>
|
||||
///
|
||||
/// <returns> DWORD: 0 on thread exit. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static DWORD WINAPI _ThreadPoolProc(LPVOID pDesc);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Thread pool proc. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/22/2013. </remarks>
|
||||
///
|
||||
/// <param name="pDesc"> The description. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DWORD
|
||||
ThreadPoolProc(
|
||||
__in THREADDESC * pDesc
|
||||
);
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Notifies a thread alive. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/22/2013. </remarks>
|
||||
///
|
||||
/// <param name="hThread"> Handle of the thread. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void NotifyThreadAlive(HANDLE hThread);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Notifies a thread exit. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/22/2013. </remarks>
|
||||
///
|
||||
/// <param name="hThread"> Handle of the thread. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void NotifyThreadExit(HANDLE hThread);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Wait threads alive. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/22/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void WaitThreadsAlive();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Starts the threads. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/22/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void StartThreads(UINT uiThreads, BOOL bWaitAlive);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Prime thread. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/22/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void PrimeThread();
|
||||
|
||||
std::map<HANDLE, THREADDESC*> m_vhThreadDescs;
|
||||
std::deque<HANDLE> m_vhAvailable;
|
||||
std::set<HANDLE> m_vhInFlight;
|
||||
std::set<HANDLE> m_vhWaitingStartSignal;
|
||||
std::set<THREADDESC*> m_vZombieThreadDescs;
|
||||
UINT m_uiThreads;
|
||||
UINT m_uiTargetSize;
|
||||
BOOL m_bPrimeThreads;
|
||||
BOOL m_bGrowable;
|
||||
UINT m_uiGrowIncrement;
|
||||
UINT m_uiThreadsAlive;
|
||||
HANDLE m_hAllThreadsAlive;
|
||||
HANDLE m_hAllThreadsExited;
|
||||
UINT m_uiAliveWaiters;
|
||||
UINT m_uiExitWaiters;
|
||||
BOOL m_bExiting;
|
||||
|
||||
static ThreadPool * g_pThreadPool;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,46 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: Tracer.h
|
||||
//
|
||||
// summary: Declares the tracer class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
#ifndef __PTASK_TRACER_H__
|
||||
#define __PTASK_TRACER_H__
|
||||
#include <Windows.h>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <wmistr.h>
|
||||
#include <evntrace.h>
|
||||
#include "PTaskRuntime.h"
|
||||
|
||||
namespace PTask {
|
||||
namespace Runtime {
|
||||
|
||||
// Dynamically linked etw logging function.
|
||||
typedef ULONG (WINAPI *LPETWSETMARK)( HANDLE, LPVOID, ULONG );
|
||||
#pragma prefast( suppress:__WARNING_ENCODE_GLOBAL_FUNCTION_POINTER, "This call needs to be performant" );
|
||||
static LPETWSETMARK gs_pEtwSetMark = NULL ;
|
||||
|
||||
#define TRACER_MAX_MSG_LEN 64
|
||||
typedef struct _ETW_SET_MARK_INFORMATION {
|
||||
ULONG Flag;
|
||||
CHAR Mark[TRACER_MAX_MSG_LEN];
|
||||
} ETW_SET_MARK_INFORMATION;
|
||||
|
||||
class Tracer
|
||||
{
|
||||
public:
|
||||
Tracer(void);
|
||||
virtual ~Tracer(void);
|
||||
|
||||
static VOID EtwSetMarkA(char *msg);
|
||||
static ULONG LogDispatchEvent(char * lpszTaskName, BOOL bStart, UINT uiAcceleratorId, UINT uiDispatchNumber);
|
||||
static ULONG LogBufferSyncEvent(void * pbufferInstance, BOOL bStart, void * parentDatablock, UINT uiAcceleratorId);
|
||||
private:
|
||||
static VOID Tracer::InitializeETW();
|
||||
};
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -1,138 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: XMLReadr.h
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _XMLREADER_H_
|
||||
#define _XMLREADER_H_
|
||||
|
||||
#ifdef XMLSUPPORT
|
||||
|
||||
#ifdef _DEBUG
|
||||
#define _CRTDBG_MAP_ALLOC
|
||||
#include <stdlib.h>
|
||||
#include <crtdbg.h>
|
||||
#endif
|
||||
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
using namespace std;
|
||||
|
||||
#include <ole2.h>
|
||||
#include <xmllite.h>
|
||||
#include <stdio.h>
|
||||
#include <shlwapi.h>
|
||||
|
||||
/*#include "graph.h"
|
||||
#include "datablock.h"
|
||||
#include "datablocktemplate.h"
|
||||
#include "CompiledKernel.h"
|
||||
#include "primitive_types.h"
|
||||
*/
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "PTaskRuntime.h"
|
||||
#include "channel.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class XMLReaderException: public std::exception {};
|
||||
|
||||
class XMLReader
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> jcurrey, 5/8/2013. </remarks>
|
||||
///
|
||||
/// <param name="filename"> The name of the file to read XML from. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
XMLReader(const char * filename);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> jcurrey, 5/8/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~XMLReader();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the graph. </summary>
|
||||
///
|
||||
/// <remarks> jcurrey, originally </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the graph. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
Graph * GetGraph();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a port. </summary>
|
||||
///
|
||||
/// <remarks> jcurrey, originally </remarks>
|
||||
///
|
||||
/// <param name="portUID"> The port UID. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the port. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
Port * GetPort(UINT portUID);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Reads a graph. </summary>
|
||||
///
|
||||
/// <remarks> jcurrey, originally. </remarks>
|
||||
///
|
||||
/// <param name="pGraph"> [in,out] If non-null, the graph. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL ReadGraph(Graph * pGraph);
|
||||
|
||||
BOOL ReadStringElement(const char * elementName, std::string& cvalue);
|
||||
int ReadIntegerElement(const char * elementName);
|
||||
UINT ReadUINTElement(const char * elementName);
|
||||
bool ReadBooleanElement(const char * elementName);
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
const char* ReadTextElement(const char * elementName);
|
||||
BOOL ReadTemplates();
|
||||
BOOL ReadKernels();
|
||||
BOOL ReadTasks();
|
||||
BOOL ReadChannels();
|
||||
BOOL ReadActions();
|
||||
DatablockTemplate * ReadDatablockTemplate();
|
||||
CompiledKernel * ReadCompiledKernel(int& kernelID);
|
||||
Task * ReadTask();
|
||||
Port * ReadPort();
|
||||
Channel * ReadChannel();
|
||||
BOOL ReadNextNode(XmlNodeType requiredType);
|
||||
BOOL ReadElementStartTag(const char * requiredElementName);
|
||||
BOOL ReadElementText(const char *& text);
|
||||
BOOL ReadElementEndTag(const char * requiredElementName);
|
||||
const wchar_t * AllocWideStringCopy(const char * str);
|
||||
const char * AllocStringCopy(LPCWSTR strW);
|
||||
void FreeWideString(const wchar_t * str);
|
||||
void FreeString(const char * str);
|
||||
|
||||
IStream * m_pInFileStream;
|
||||
IXmlReader * m_pReader;
|
||||
|
||||
Graph * m_pGraph;
|
||||
map<string, DatablockTemplate *> m_templateMap;
|
||||
map<int, CompiledKernel *> m_kernelMap;
|
||||
map<UINT, Port *> m_portMap;
|
||||
std::set<const wchar_t*> m_wAllocs;
|
||||
std::set<const char*> m_cAllocs;
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
||||
#endif
|
|
@ -1,91 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: XMLWriter.h
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _XMLWRITER_H_
|
||||
#define _XMLWRITER_H_
|
||||
|
||||
#ifdef XMLSUPPORT
|
||||
|
||||
#ifdef _DEBUG
|
||||
#define _CRTDBG_MAP_ALLOC
|
||||
#include <stdlib.h>
|
||||
#include <crtdbg.h>
|
||||
#endif
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
#include <ole2.h>
|
||||
#include <xmllite.h>
|
||||
#include <stdio.h>
|
||||
#include <shlwapi.h>
|
||||
|
||||
/*#include "graph.h"
|
||||
#include "datablock.h"
|
||||
#include "datablocktemplate.h"
|
||||
#include "CompiledKernel.h"
|
||||
#include "primitive_types.h"
|
||||
*/
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "PTaskRuntime.h"
|
||||
#include "channel.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class XMLWriterException: public std::exception {};
|
||||
|
||||
class XMLWriter
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> jcurrey, 5/5/2013. </remarks>
|
||||
///
|
||||
/// <param name="filename"> The name of the file to write XML to. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
XMLWriter(const char * filename);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> jcurrey, 5/5/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~XMLWriter();
|
||||
|
||||
void WriteElementStartTag(const char * elementName);
|
||||
void WriteElementText(const char * text);
|
||||
void WriteElementEndTag();
|
||||
void WriteComment(const char * comment);
|
||||
void WriteEndDocument();
|
||||
|
||||
void WriteElement(const char * elementName, const char * text);
|
||||
void WriteElement(const char * elementName, int elementValue);
|
||||
void WriteElement(const char * elementName, unsigned int elementValue);
|
||||
void WriteElement(const char * elementName, bool elementValue);
|
||||
|
||||
void WriteGraph(Graph * pGraph);
|
||||
void WriteDatablockTemplate(DatablockTemplate * pTemplate);
|
||||
void WriteCompiledKernel(CompiledKernel * pCompiledKernel, int kernelID);
|
||||
void WriteTask(Task * pTask, int kernelID);
|
||||
void WritePorts(std::map<UINT, Port*>* pPorts);
|
||||
void WritePort(Port * pPort);
|
||||
void WriteControlPropagationInfo(Port * pPort);
|
||||
void WriteChannel(Channel * pChannel);
|
||||
void WriteChannelEndpointPredication(Channel * pChannel, CHANNELENDPOINTTYPE eEndpoint);
|
||||
|
||||
protected:
|
||||
const wchar_t * ToWChar(const char * str);
|
||||
|
||||
IStream * m_pOutFileStream;
|
||||
IXmlWriter * m_pWriter;
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
||||
#endif
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,447 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: claccelerator.h
|
||||
// Accelerator built on OpenCL interface
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _CLACCELERATOR_H_
|
||||
#define _CLACCELERATOR_H_
|
||||
#ifdef OPENCL_SUPPORT
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "oclhdr.h"
|
||||
#include "accelerator.h"
|
||||
#include <vector>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class CLAccelerator : public Accelerator
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="lpszName"> [in,out] If non-null, the name. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
CLAccelerator(char * lpszName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="id"> The identifier. </param>
|
||||
/// <param name="lpszName"> [in,out] If non-null, the name. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
CLAccelerator(cl_device_id id, char * lpszName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~CLAccelerator();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Open the OpenCL accelerator. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
HRESULT Open();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Open the OpenCL accelerator. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name=""> (optional) the. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
HRESULT Open(cl_device_id=NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the device. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the device. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void* GetDevice();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the context. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void* GetContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates an asynchronous context for the task. Create the cuda stream for this
|
||||
/// ptask.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 12/20/2011.
|
||||
///
|
||||
/// This method is required of all subclasses, and abstracts the work associated with
|
||||
/// managing whatever framework-level asynchrony abstractions are supported by the
|
||||
/// backend target. For example, CUDA supports the "stream", while DirectX supports
|
||||
/// an ID3D11ImmediateContext, OpenCL has command queues, and so on.
|
||||
/// </remarks>
|
||||
///
|
||||
/// <param name="pTask"> [in] non-null, the CUDA-capable acclerator to which the
|
||||
/// stream is bound. </param>
|
||||
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual AsyncContext *
|
||||
PlatformSpecificCreateAsyncContext(
|
||||
__in Task * pTask,
|
||||
__in ASYNCCONTEXTTYPE eAsyncContextType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the OpenCL command queue. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> The queue. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
cl_command_queue GetQueue();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Compiles. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="lpszFileName"> [in,out] If non-null, filename of the file. </param>
|
||||
/// <param name="lpszKernelName"> [in,out] If non-null, name of the kernel. </param>
|
||||
/// <param name="ppPlatformSpecificBinary"> [in,out] If non-null, the platform specific binary. </param>
|
||||
/// <param name="ppPlatformSpecificModule"> [in,out] If non-null, the platform specific module. </param>
|
||||
/// <param name="lpszCompilerOutput"> (optional) [in,out] If non-null, the compiler output. </param>
|
||||
/// <param name="uiCompilerOutput"> (optional) the compiler output. </param>
|
||||
/// <param name="threadGroupSizeX"> (optional) the thread group size x coordinate. </param>
|
||||
/// <param name="threadGroupSizeY"> (optional) the thread group size y coordinate. </param>
|
||||
/// <param name="threadGroupSizeZ"> (optional) the thread group size z coordinate. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Compile(
|
||||
char * lpszFileName,
|
||||
char * lpszKernelName,
|
||||
void ** ppPlatformSpecificBinary,
|
||||
void ** ppPlatformSpecificModule,
|
||||
char * lpszCompilerOutput=NULL,
|
||||
int uiCompilerOutput=0,
|
||||
int threadGroupSizeX=1,
|
||||
int threadGroupSizeY=1,
|
||||
int threadGroupSizeZ=1
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Compiles accelerator source code to create a PTask binary. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011.
|
||||
///
|
||||
/// The function accepts a string of source code and an operation in that source to
|
||||
/// build a binary for. This is a convenience for source code that may not be stored
|
||||
/// in files (e.g. dynamically generated code). On success the function will create
|
||||
/// platform- specific binary and module objects that can be later used by the
|
||||
/// runtime to invoke the shader code. The caller can provide a buffer for compiler
|
||||
/// output, which if present, the runtime will fill *iff* the compilation fails.
|
||||
///
|
||||
/// NB: Thread group dimensions are optional parameters here. This is because some
|
||||
/// runtimes require them statically, and some do not. DirectX requires thread-group
|
||||
/// sizes to be specified statically to enable compiler optimizations that cannot be
|
||||
/// used otherwise. CUDA and OpenCL allow runtime specification of these parameters.
|
||||
/// </remarks>
|
||||
///
|
||||
/// <param name="lpszShaderCode"> [in] actual source. cannot be null. </param>
|
||||
/// <param name="uiShaderCodeSize"> Size of the shader code. </param>
|
||||
/// <param name="lpszOperation"> [in] Function name in source file. cannot be null. </param>
|
||||
/// <param name="ppPlatformSpecificBinary"> [out] On success, a platform specific binary. </param>
|
||||
/// <param name="ppPlatformSpecificModule"> [out] On success, a platform specific module handle. </param>
|
||||
/// <param name="lpszCompilerOutput"> (optional) [in,out] On failure, the compiler output. </param>
|
||||
/// <param name="uiCompilerOutput"> (optional) [in] length of buffer supplied for
|
||||
/// compiler output. </param>
|
||||
/// <param name="nThreadGroupSizeX"> (optional) thread group X dimensions. (see remarks) </param>
|
||||
/// <param name="nThreadGroupSizeY"> (optional) thread group Y dimensions. (see remarks) </param>
|
||||
/// <param name="nThreadGroupSizeZ"> (optional) thread group Z dimensions. (see remarks) </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
Compile(
|
||||
__in char * lpszShaderCode,
|
||||
__in UINT uiShaderCodeSize,
|
||||
__in char * lpszOperation,
|
||||
__in void ** ppPlatformSpecificBinary,
|
||||
__in void ** ppPlatformSpecificModule,
|
||||
__in char * lpszCompilerOutput=NULL,
|
||||
__in int uiCompilerOutput=0,
|
||||
__in int nThreadGroupSizeX=1,
|
||||
__in int nThreadGroupSizeY=1,
|
||||
__in int nThreadGroupSizeZ=1
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the context current. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsDeviceContextCurrent();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Makes the context current. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL MakeDeviceContextCurrent();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Releases the current context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ReleaseCurrentDeviceContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if this accelerator has some support for device to device transfer
|
||||
/// with the given accelerator. This allows us to skip a trip through host memory
|
||||
/// in many cases.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsDeviceToDeviceTransfer(Accelerator * pAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the supports device memcy. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/12/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsDeviceMemcpy();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the supports function arguments. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsFunctionArguments();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the supports byval arguments. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsByvalArguments();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Synchronizes the context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="pTask"> (optional) [in,out] If non-null, the task. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Synchronize(Task*pTask=NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if 'p' has accessible memory space. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="p"> [in,out] If non-null, the p. </param>
|
||||
///
|
||||
/// <returns> true if accessible memory space, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasAccessibleMemorySpace(Accelerator*p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the supports pinned host memory. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsPinnedHostMemory();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate memory on the host. Some runtimes (esp. earlier versions of CUDA)
|
||||
/// require that CUDA APIs be used to allocate host-side buffers, or support
|
||||
/// specialized host allocators that can help improve DMA performance.
|
||||
/// AllocatePagelockedHostMemory wraps these APIs for accelerators that have runtime support
|
||||
/// for this, and uses normal system services (VirtualAlloc on Windows, malloc
|
||||
/// elsewhere) to satisfy requests.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <param name="uiBytes"> Number of bytes to allocate. </param>
|
||||
/// <param name="pbResultPageLocked"> [in,out] If non-null, the result of whether the
|
||||
/// allocated memory is page-locked is provided here. </param>
|
||||
///
|
||||
/// <returns> byte pointer on success, null on failure. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void * AllocatePagelockedHostMemory(UINT uiBytes, BOOL * pbResultPageLocked);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Free host memory. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <param name="pBuffer"> If non-null, the buffer. </param>
|
||||
/// <param name="bPageLocked"> true if the memory was allocated in the page-locked area. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
FreeHostMemory(
|
||||
void * pBuffer,
|
||||
BOOL bPageLocked
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform identifier. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> The platform identifier. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual cl_platform_id GetPlatformId();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Enumerate accelerators. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="devices"> [in,out] [in,out] If non-null, the devices. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void EnumerateAccelerators(std::vector<Accelerator*> &devices);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Enumerate platforms. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="platforms"> [in,out] The platforms. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void EnumeratePlatforms(std::vector<cl_platform_id> &platforms);
|
||||
|
||||
protected:
|
||||
|
||||
/// <summary> The device </summary>
|
||||
cl_device_id m_pDevice;
|
||||
/// <summary> The context </summary>
|
||||
cl_context m_pContext;
|
||||
/// <summary> Queue of open cl commands </summary>
|
||||
cl_command_queue m_cqCommandQueue;// OpenCL command que
|
||||
/// <summary> The cp platform </summary>
|
||||
cl_platform_id m_cpPlatform; // OpenCL platform
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates a new platform specific buffer. This routine is called by CreateBuffer to
|
||||
/// get a new instance of whatever buffer type corresponds to the platform
|
||||
/// implementing this interface. For example, DXAccelerator will return a new
|
||||
/// PDXBuffer object, where PDXBuffer is a subclass of PBuffer. The Accelerator super-
|
||||
/// class can then perform the rest of the work required to initialize the PBuffer.
|
||||
///
|
||||
/// We only create PBuffers to provide 'physical' views of the 'logical' buffer
|
||||
/// abstraction provided by the Datablock. Datablocks can have up to three different
|
||||
/// channels (data, metadata, template), so consequently, each of which must be
|
||||
/// backed by its own PBuffer. A PBuffer should not have to know what channel it is
|
||||
/// backing, but we include that information in it's creation to simplify the
|
||||
/// materialization of views between different subclasses of PBuffer.
|
||||
///
|
||||
/// The "proxy allocator" is present as parameter to handle two corner cases:
|
||||
///
|
||||
/// 1. Allocation of host-side buffers by the host-specific subclass of PBuffer
|
||||
/// (PHBuffer)--for example, we prefer to use a CUDA accelerator object to
|
||||
/// allocate host memory when a block will be touched by a CUDA-based PTask,
|
||||
/// because we can use the faster async APIs with memory we allocate using CUDA
|
||||
/// host allocation APIs. This requires that the HostAccelerator defer the host-
|
||||
/// side memory allocation to the CUDA accelerator.
|
||||
///
|
||||
/// 2. Communication between runtimes that provide some interop support (e.g. CUDA
|
||||
/// and DirectX can actually share texture objects, meaning there is no need to
|
||||
/// actually allocate a new buffer to back a CUDA view that already has a DirectX
|
||||
/// view, but the two accelerators must cooperate to assemble a PBuffer that
|
||||
/// shares the underlying shared object.
|
||||
///
|
||||
/// Case 1 is implemented, while case 2 is largely unimplemented. If no proxy
|
||||
/// accelerator is provided, allocation will proceed using the accelerator object
|
||||
/// whose member function is being called to allocate the PBuffer.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pLogicalParent"> [in] If non-null, the datablock that is the logical
|
||||
/// buffer using this 'physical' buffer to back a particular
|
||||
/// channel on this accelerator. </param>
|
||||
/// <param name="nDatblockChannelIndex"> Zero-based index of the channel being backed. Must be:
|
||||
/// * DBDATA_IDX = 0, OR
|
||||
/// * DBMETADATA_IDX = 1, OR
|
||||
/// * DBTEMPLATE_IDX = 2. </param>
|
||||
/// <param name="uiBufferAccessFlags"> Access flags determining what views to create. </param>
|
||||
/// <param name="pProxyAllocator"> [in,out] If non-null, the proxy allocator. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PBuffer* NewPlatformSpecificBuffer(Datablock * pLogicalParent,
|
||||
UINT nDatblockChannelIndex,
|
||||
BUFFERACCESSFLAGS uiBufferAccessFlags,
|
||||
Accelerator * pProxyAllocator
|
||||
);
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif // OPENCL_SUPPORT
|
||||
#endif
|
|
@ -1,269 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: CLTask.h
|
||||
//
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _CL_PTASK_H_
|
||||
#define _CL_PTASK_H_
|
||||
#ifdef OPENCL_SUPPORT
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "cuaccelerator.h"
|
||||
#include "task.h"
|
||||
#include "channel.h"
|
||||
#include "CompiledKernel.h"
|
||||
#include "oclhdr.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class CLTask : public Task {
|
||||
|
||||
friend class XMLReader;
|
||||
friend class XMLWriter;
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="hRuntimeTerminateEvt"> Handle of the terminate. </param>
|
||||
/// <param name="hGraphTeardownEvent"> Handle of the stop event. </param>
|
||||
/// <param name="hGraphStopEvent"> Handle of the running event. </param>
|
||||
/// <param name="hGraphRunningEvent"> The graph running event. </param>
|
||||
/// <param name="pCompiledKernel"> The CompiledKernel associated with this task. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
CLTask(
|
||||
__in HANDLE hRuntimeTerminateEvt,
|
||||
__in HANDLE hGraphTeardownEvent,
|
||||
__in HANDLE hGraphStopEvent,
|
||||
__in HANDLE hGraphRunningEvent,
|
||||
__in CompiledKernel * pCompiledKernel
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~CLTask();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] non-null, the accelerators to compile for. </param>
|
||||
/// <param name="pKernel"> [in,out] If non-null, the kernel. </param>
|
||||
///
|
||||
/// <returns> HRESULT (use SUCCEEDED/FAILED macros) </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual HRESULT Create(std::set<Accelerator*>& pAccelerators,
|
||||
CompiledKernel * pKernel
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Runs this CLTask. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificDispatch();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a compute geometry. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="tgx"> (optional) the thread group x dimensions. </param>
|
||||
/// <param name="tgy"> (optional) the thread group y dimensions. </param>
|
||||
/// <param name="tgz"> (optional) the thread group z dimensions. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetComputeGeometry(int tgx=1, int tgy=1, int tgz=1 );
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a block and grid size. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="grid"> The grid. </param>
|
||||
/// <param name="block"> The block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetBlockAndGridSize(PTASKDIM3 grid, PTASKDIM3 block);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> When the graph is complete, (indicated because Graph.Run was called), this method
|
||||
/// is called on every task to allow tasks to perform and one-time initializations
|
||||
/// that cannot be performed without knowing that the structure of the graph is now
|
||||
/// static. For example, computing parameter offset maps for dispatch.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/5/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void PlatformSpecificOnGraphComplete();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform the platform-specific work required to bind an individual input parameter.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> [in,out] If non-null, the port. </param>
|
||||
/// <param name="ordinal"> The ordinal. </param>
|
||||
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
|
||||
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificBindInput(Port * pPort,
|
||||
int ordinal,
|
||||
UINT uiActualIndex,
|
||||
PBuffer * pBuffer
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform the platform-specific work required to bind an individual output
|
||||
/// parameter.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> [in,out] If non-null, the port. </param>
|
||||
/// <param name="ordinal"> The ordinal. </param>
|
||||
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
|
||||
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificBindOutput(Port * pPort,
|
||||
int ordinal,
|
||||
UINT uiActualIndex,
|
||||
PBuffer * pBuffer);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform the platform-specific work required to bind an individual input parameter.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> [in,out] If non-null, the port. </param>
|
||||
/// <param name="ordinal"> The ordinal. </param>
|
||||
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
|
||||
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
|
||||
/// <param name="bScalarBinding"> true to scalar binding. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificBindConstant(Port * pPort,
|
||||
int ordinal,
|
||||
UINT uiActualIndex,
|
||||
PBuffer * pBuffer
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform specific finalize bindings. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/5/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificFinalizeBindings();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Bind accelerator executable. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL BindExecutable();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Bind parameter. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="pCS"> The create struct. </param>
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
/// <param name="pPort"> [in,out] If non-null, the port. </param>
|
||||
/// <param name="ordinal"> [in,out] The ordinal. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void BindParameter(cl_kernel pCS, PBuffer * pBuffer, Port * pPort, int &ordinal);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the estimate global size. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT EstimateGlobalSize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Loads source code from a file before compiling. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="cFilename"> Filename of the file. </param>
|
||||
/// <param name="cPreamble"> The preamble. </param>
|
||||
/// <param name="szFinalLength"> [in,out] If non-null, length of the final. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the source. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
char* CLTask::LoadSource(
|
||||
const char* cFilename,
|
||||
const char* cPreamble,
|
||||
size_t* szFinalLength
|
||||
);
|
||||
|
||||
/// <summary> A map from accelerator to compiled kernel object,
|
||||
/// allowing the system to dispatch on arbitrary
|
||||
/// accelerators by selecting the right object
|
||||
/// for the dispatch accelerator.
|
||||
/// </summary>
|
||||
std::map<Accelerator*, cl_kernel> m_pCSMap;
|
||||
|
||||
/// <summary> A map from accelerator to module,
|
||||
/// allowing the system to dispatch on arbitrary
|
||||
/// accelerators by selecting the right object
|
||||
/// for the dispatch accelerator.
|
||||
/// </summary>
|
||||
std::map<Accelerator*, cl_program> m_pModuleMap;
|
||||
|
||||
/// <summary> The preferred x thread group size </summary>
|
||||
UINT m_nPreferredXDim;
|
||||
|
||||
/// <summary> The preferred y thread group size </summary>
|
||||
UINT m_nPreferredYDim;
|
||||
|
||||
/// <summary> The preferred z thread group size </summary>
|
||||
UINT m_nPreferredZDim;
|
||||
|
||||
/// <summary> true if the user explicitly set the thread
|
||||
/// group geometry with a call to
|
||||
/// Task->SetGeometry.
|
||||
/// </summary>
|
||||
BOOL m_bGeometryExplicit;
|
||||
};
|
||||
};
|
||||
#endif // OPENCL_SUPPORT
|
||||
#endif // _CLTask_H_
|
|
@ -1,835 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: cuaccelerator.h
|
||||
// cuda-based accelerator
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _CUDA_ACCELERATOR_H_
|
||||
#define _CUDA_ACCELERATOR_H_
|
||||
#ifdef CUDA_SUPPORT
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "accelerator.h"
|
||||
#include "task.h"
|
||||
#include "cuhdr.h"
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> CUDA specific device attributes. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef struct cudevparms_t {
|
||||
|
||||
/// <summary> The major version number for the device</summary>
|
||||
int major;
|
||||
|
||||
/// <summary> The minor version number for the device</summary>
|
||||
int minor;
|
||||
|
||||
/// <summary> Device id returned by cuda initialization </summary>
|
||||
CUdevice dev;
|
||||
|
||||
/// <summary> The driver version </summary>
|
||||
int driverVersion;
|
||||
|
||||
/// <summary> The total global memory in MB</summary>
|
||||
size_t totalGlobalMem;
|
||||
|
||||
/// <summary> Number of multi processors </summary>
|
||||
int multiProcessorCount;
|
||||
|
||||
/// <summary> The total constant memory in KB</summary>
|
||||
int totalConstantMemory;
|
||||
|
||||
/// <summary> The shared memory per block </summary>
|
||||
int sharedMemPerBlock;
|
||||
|
||||
/// <summary> The number of registers per block </summary>
|
||||
int regsPerBlock;
|
||||
|
||||
/// <summary> Size of the warp </summary>
|
||||
int warpSize;
|
||||
|
||||
/// <summary> The maximum threads per block </summary>
|
||||
int maxThreadsPerBlock;
|
||||
|
||||
/// <summary> The maximum block dimensions </summary>
|
||||
int maxBlockDim[3];
|
||||
|
||||
/// <summary> The maximum grid dimensions </summary>
|
||||
int maxGridDim[3];
|
||||
|
||||
/// <summary> The memory pitch </summary>
|
||||
int memPitch;
|
||||
|
||||
/// <summary> The texture alignment </summary>
|
||||
int textureAlign;
|
||||
|
||||
/// <summary> The clock rate </summary>
|
||||
int clockRate;
|
||||
|
||||
/// <summary> True if the device can overlap gpu
|
||||
/// computation with data transfer
|
||||
/// </summary>
|
||||
int gpuOverlap;
|
||||
|
||||
/// <summary> True if kernel execute timeout is enabled </summary>
|
||||
int kernelExecTimeoutEnabled;
|
||||
|
||||
/// <summary> True if the device is integrated,
|
||||
/// false if the device is connected on PCIe
|
||||
/// </summary>
|
||||
int integrated;
|
||||
|
||||
/// <summary> True if the runtime can map host memory
|
||||
/// for data transfers to/from this device
|
||||
/// </summary>
|
||||
int canMapHostMemory;
|
||||
|
||||
/// <summary> True if the device supports
|
||||
/// concurrent execution of multiple different
|
||||
/// kernels
|
||||
/// </summary>
|
||||
int concurrentKernels;
|
||||
|
||||
/// <summary> True if ecc is enabled for the device memory </summary>
|
||||
int eccEnabled;
|
||||
|
||||
/// <summary> The if the tcc driver is in use for this device </summary>
|
||||
int tccDriver;
|
||||
|
||||
/// <summary> True if the device supports unified addressing.
|
||||
/// Unified addressing means device and host pointers are equal
|
||||
/// for page-locked host-allocations.
|
||||
/// </summary>
|
||||
int unifiedAddressing;
|
||||
|
||||
/// <summary> Name of the device </summary>
|
||||
char deviceName[256];
|
||||
|
||||
} CUDA_DEVICE_ATTRIBUTES;
|
||||
|
||||
static const int MAXCTXTS = 16;
|
||||
static const int MAXCTXDEPTH = 32;
|
||||
|
||||
class CUAccelerator :
|
||||
public Accelerator
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="attribs"> [in,out] If non-null, the attributes. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
CUAccelerator(CUDA_DEVICE_ATTRIBUTES * attribs);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~CUAccelerator();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Open the CUAccelerator. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> HRESULT--use SUCCEEDED() and FAILED() macros to check. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
HRESULT Open();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Opens a CUAccelerator object for the CUDA
|
||||
/// device specified. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="dev"> The device id. </param>
|
||||
///
|
||||
/// <returns> HRESULT--use SUCCEEDED() and FAILED() macros to check. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
HRESULT Open(CUdevice dev);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the device. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the device. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void* GetDevice();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the context. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void* GetContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates an asynchronous context for the task. Create the cuda stream for this
|
||||
/// ptask.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 12/20/2011.
|
||||
///
|
||||
/// This method is required of all subclasses, and abstracts the work associated with
|
||||
/// managing whatever framework-level asynchrony abstractions are supported by the
|
||||
/// backend target. For example, CUDA supports the "stream", while DirectX supports
|
||||
/// an ID3D11ImmediateContext, OpenCL has command queues, and so on.
|
||||
/// </remarks>
|
||||
///
|
||||
/// <param name="pTask"> [in] non-null, the CUDA-capable acclerator to which the
|
||||
/// stream is bound. </param>
|
||||
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual AsyncContext *
|
||||
PlatformSpecificCreateAsyncContext(
|
||||
__in Task * pTask,
|
||||
__in ASYNCCONTEXTTYPE eAsyncContextType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Cache the shader and module objects associated with
|
||||
/// successful compilation of szFunction in szFile.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="szFile"> [in] non-null, the file name. </param>
|
||||
/// <param name="szFunction"> [in] non-null, the function. </param>
|
||||
/// <param name="pCUDAFunction"> The cuda function. </param>
|
||||
/// <param name="pCUDAModule"> The cuda module. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void CachePutShader(char * szFile,
|
||||
char * szFunction,
|
||||
CUfunction pCUDAFunction,
|
||||
CUmodule pCUDAModule
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check the cache for a compiled version of the
|
||||
/// function szFunction in the file szFile. If it
|
||||
/// is present, compilation can be skipped.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="szFile"> [in] non-null, the file name. </param>
|
||||
/// <param name="szFunction"> [in] non-null, the function. </param>
|
||||
/// <param name="pCUDAFunction"> [out] The cuda function. </param>
|
||||
/// <param name="pCUDAModule"> [out] The cuda module. </param>
|
||||
///
|
||||
/// <returns> true if the shader is present in the cache, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL CacheGetShader(char * szFile,
|
||||
char * szFunction,
|
||||
CUfunction &pCUDAFunction,
|
||||
CUmodule &pCUDAModule
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Compiles CUDA code to create a new binary
|
||||
/// and module. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="lpszFileName"> [in,out] If non-null, filename of the file. </param>
|
||||
/// <param name="lpszOperation"> [in,out] If non-null, the operation. </param>
|
||||
/// <param name="ppPlatformSpecificBinary"> [in,out] If non-null, the platform specific binary. </param>
|
||||
/// <param name="ppPlatformSpecificModule"> [in,out] If non-null, the platform specific module. </param>
|
||||
/// <param name="lpszCompilerOutput"> (optional) [in,out] If non-null, the compiler output. </param>
|
||||
/// <param name="uiCompilerOutput"> (optional) the compiler output. </param>
|
||||
/// <param name="nThreadGroupSizeX"> (optional) the thread group size x coordinate. </param>
|
||||
/// <param name="nThreadGroupSizeY"> (optional) the thread group size y coordinate. </param>
|
||||
/// <param name="nThreadGroupSizeZ"> (optional) The thread group size z coordinate. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Compile(
|
||||
char * lpszFileName,
|
||||
char * lpszOperation,
|
||||
void ** ppPlatformSpecificBinary,
|
||||
void ** ppPlatformSpecificModule,
|
||||
char * lpszCompilerOutput=NULL,
|
||||
int uiCompilerOutput=0,
|
||||
int nThreadGroupSizeX=1,
|
||||
int nThreadGroupSizeY=1,
|
||||
int nThreadGroupSizeZ=1
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Compiles accelerator source code to create a PTask binary. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011.
|
||||
///
|
||||
/// The function accepts a string of source code and an operation in that source to
|
||||
/// build a binary for. This is a convenience for source code that may not be stored
|
||||
/// in files (e.g. dynamically generated code). On success the function will create
|
||||
/// platform- specific binary and module objects that can be later used by the
|
||||
/// runtime to invoke the shader code. The caller can provide a buffer for compiler
|
||||
/// output, which if present, the runtime will fill *iff* the compilation fails.
|
||||
///
|
||||
/// NB: Thread group dimensions are optional parameters here. This is because some
|
||||
/// runtimes require them statically, and some do not. DirectX requires thread-group
|
||||
/// sizes to be specified statically to enable compiler optimizations that cannot be
|
||||
/// used otherwise. CUDA and OpenCL allow runtime specification of these parameters.
|
||||
/// </remarks>
|
||||
///
|
||||
/// <param name="lpszShaderCode"> [in] actual source. cannot be null. </param>
|
||||
/// <param name="uiShaderCodeSize"> Size of the shader code. </param>
|
||||
/// <param name="lpszOperation"> [in] Function name in source file. cannot be null. </param>
|
||||
/// <param name="ppPlatformSpecificBinary"> [out] On success, a platform specific binary. </param>
|
||||
/// <param name="ppPlatformSpecificModule"> [out] On success, a platform specific module handle. </param>
|
||||
/// <param name="lpszCompilerOutput"> (optional) [in,out] On failure, the compiler output. </param>
|
||||
/// <param name="uiCompilerOutput"> (optional) [in] length of buffer supplied for
|
||||
/// compiler output. </param>
|
||||
/// <param name="nThreadGroupSizeX"> (optional) thread group X dimensions. (see remarks) </param>
|
||||
/// <param name="nThreadGroupSizeY"> (optional) thread group Y dimensions. (see remarks) </param>
|
||||
/// <param name="nThreadGroupSizeZ"> (optional) thread group Z dimensions. (see remarks) </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
Compile(
|
||||
__in char * lpszShaderCode,
|
||||
__in UINT uiShaderCodeSize,
|
||||
__in char * lpszOperation,
|
||||
__in void ** ppPlatformSpecificBinary,
|
||||
__in void ** ppPlatformSpecificModule,
|
||||
__in char * lpszCompilerOutput=NULL,
|
||||
__in int uiCompilerOutput=0,
|
||||
__in int nThreadGroupSizeX=1,
|
||||
__in int nThreadGroupSizeY=1,
|
||||
__in int nThreadGroupSizeZ=1
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if the context of this accelerator is current. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsDeviceContextCurrent();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Makes the context current. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL MakeDeviceContextCurrent();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Releases the current context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ReleaseCurrentDeviceContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Synchronizes the context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="ctxt"> [in,out] If non-null, the ctxt. </param>
|
||||
/// <param name="pTask"> (optional) [in,out] If non-null, the task. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Synchronize(Task*pTask=NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if the cuda runtime has been initialized. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if cuda initialized, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL IsCUDAInitialized();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a cuda initialized. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="bCUDAInitialized"> true to indicate CUinit has been called. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void SetCUDAInitialized(BOOL bCUDAInitialized);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Device to device transfer. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDstBuffer"> [in,out] If non-null, the accelerator. </param>
|
||||
/// <param name="pSrcBuffer"> [in,out] If non-null, buffer for source data. </param>
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
DeviceToDeviceTransfer(
|
||||
__inout PBuffer * pDstBuffer,
|
||||
__in PBuffer * pSrcBuffer,
|
||||
__in AsyncContext * pAsyncContext
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if this accelerator has support for unified addressing. Unified
|
||||
/// addressing means there is no distinction between device and host pointers (for
|
||||
/// page-locked memory). This is important because the datablock abstraction
|
||||
/// maintains a buffer per logical memory space, and if two memory spaces are
|
||||
/// logically the same (unified), but only for pointers to page-locked memory, a
|
||||
/// number of special cases arise for allocation, freeing, ownership, etc. Sadly,
|
||||
/// this complexity is required in the common case, because asynchronous transfers
|
||||
/// only work in CUDA when the host pointers are page-locked. We need to be able to
|
||||
/// tell when a page-locked buffer in the host-memory space is different from a
|
||||
/// device pointer in a CUAccelerator memory space.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if the device supports unified addressing. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsUnifiedAddressing();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if this accelerator has some support for device to device transfer
|
||||
/// with the given accelerator. This allows us to skip a trip through host memory
|
||||
/// in many cases.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsDeviceToDeviceTransfer(Accelerator * pAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the supports device memcy. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/12/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsDeviceMemcpy();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if this accelerator supports top-level
|
||||
/// function arguments. This will always return true
|
||||
/// for CUDA accelerators.</summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsFunctionArguments();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if this platform supports passing
|
||||
/// structs by value as arguments to top-level kernel
|
||||
/// entry points. This will always return true for
|
||||
/// CUDA accelerators.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsByvalArguments();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if 'pOtherAccelerator' has an accessible memory space.
|
||||
/// The other accelerator's memory space is accessible if there
|
||||
/// is a way to transfer data between the two other than by
|
||||
/// copying to host-memory as a waypoint. For example, some
|
||||
/// CUDA accelerators support peer-to-peer copy over PCI,
|
||||
/// and DirectX has interop APIs with CUDA.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pOtherAccelerator"> [in,out] If non-null, the other accelerator. </param>
|
||||
///
|
||||
/// <returns> true if accessible memory space, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasAccessibleMemorySpace(Accelerator * pOtherAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if the accelerator supports pinned host memory. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsPinnedHostMemory();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate memory on the host. Some runtimes (esp. earlier versions of CUDA)
|
||||
/// require that CUDA APIs be used to allocate host-side buffers, or support
|
||||
/// specialized host allocators that can help improve DMA performance.
|
||||
/// AllocatePagelockedHostMemory wraps these APIs for accelerators that have runtime support
|
||||
/// for this, and uses normal system services (VirtualAlloc on Windows, malloc
|
||||
/// elsewhere) to satisfy requests.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <param name="uiBytes"> Number of bytes to allocate. </param>
|
||||
/// <param name="pbResultPageLocked"> [in,out] If non-null, the result of whether the
|
||||
/// allocated memory is page-locked is provided here. </param>
|
||||
///
|
||||
/// <returns> byte pointer on success, null on failure. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void * AllocatePagelockedHostMemory(UINT uiBytes, BOOL * pbResultPageLocked);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Free host memory. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <param name="pBuffer"> If non-null, the buffer. </param>
|
||||
/// <param name="bPageLocked"> true if the memory was allocated in the page-locked area. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
FreeHostMemory(
|
||||
void * pBuffer,
|
||||
BOOL bPageLocked
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the device identifier. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> The device identifier. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual int GetDeviceId();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the device attributes. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the device attributes. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
CUDA_DEVICE_ATTRIBUTES* GetDeviceAttributes();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if this accelerator encapsulates a backend framework that provides
|
||||
/// explicit APIs for managing outstanding (Asynchronous) operations. When this is
|
||||
/// the case, the corresponding AsyncContext subclass can manage outstanding
|
||||
/// dependences explicitly to increase concurrency and avoid syncing with the device.
|
||||
/// When it is *not* the case, we must synchronize when we data to and from this
|
||||
/// accelerator context and contexts that *do* support an explicit async API. For
|
||||
/// example, CUDA supports the stream and event API to explicitly manage dependences
|
||||
/// and we use this feature heavily to allow task dispatch to get far ahead of device-
|
||||
/// side dispatch. However when data moves between CUAccelerators and other
|
||||
/// accelerator classes, we must use synchronous operations or provide a way to wait
|
||||
/// for outstanding dependences from those contexts to resolve. This method is used
|
||||
/// to tell us whether we can create an outstanding dependence after making calls
|
||||
/// that queue work, or whether we need to synchronize.
|
||||
///
|
||||
/// This override returns TRUE since this is the CUDA encapsulation class.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsExplicitAsyncOperations();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Enumerate accelerators. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="devices"> [out] non-null, the acclerator objects supporting CUDA. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void EnumerateAccelerators(std::vector<Accelerator*> &devices);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes the thread local context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/20/2014. </remarks>
|
||||
///
|
||||
/// <param name="eRole"> The role. </param>
|
||||
/// <param name="bMakeDefault"> Device is the default for the thread. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
InitializeTLSContextManagement(
|
||||
__in Accelerator * pDefaultAccelerator,
|
||||
__in PTTHREADROLE eRole,
|
||||
__in BOOL bPooledThread
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Deinitializes the thread local context. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/17/2014. </remarks>
|
||||
///
|
||||
/// <param name="eRole"> The role. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void DeinitializeTLSContextManagement();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determines if we can requires thread local context initialization. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/20/2014. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL UsesTLSContextManagement();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes the thread local context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/20/2014. </remarks>
|
||||
///
|
||||
/// <param name="eRole"> The role. </param>
|
||||
/// <param name="bMakeDefault"> This device should be the default for the thread. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
InitializeTLSContext(
|
||||
__in PTTHREADROLE eRole,
|
||||
__in BOOL bMakeDefault,
|
||||
__in BOOL bPooledThread
|
||||
);
|
||||
|
||||
protected:
|
||||
|
||||
void CheckContextInvariants();
|
||||
void CheckContextTLSInitialized();
|
||||
|
||||
/// <summary> true to cuda initialized </summary>
|
||||
static BOOL s_bCUDAInitialized;
|
||||
|
||||
/// <summary> context of primary device. This is essentially the "primary" CUDA context, but
|
||||
/// might not actually be the primary if user code is also managing device contexts
|
||||
/// or using cuda runtime API calls.
|
||||
/// </summary>
|
||||
static CUcontext s_pRootContext;
|
||||
|
||||
/// <summary> true if the root context is valid </summary>
|
||||
static BOOL s_bRootContextValid;
|
||||
|
||||
/// <summary> device id for the root context. </summary>
|
||||
static CUdevice s_nRootContext;
|
||||
|
||||
/// <summary> known ptask contexts. these should be created
|
||||
/// at init time in single threaded code (Scheduler::CreateAccelerators),
|
||||
/// so we consider them immutable once the runtime is going, so
|
||||
/// we needn't synchronize them or make them TLS. </summary>
|
||||
static CUcontext s_vKnownPTaskContexts[MAXCTXTS];
|
||||
static UINT s_nKnownPTaskContexts;
|
||||
static CUcontext s_vKnownUserContexts[MAXCTXTS];
|
||||
static UINT s_nKnownUserContexts;
|
||||
|
||||
/// <summary> Thread-local storage for caching device contexts,
|
||||
/// enabling some heuristics to avoid unnecessary and occasionally
|
||||
/// expensive calls to cuCtx[Push|Pop]Current.
|
||||
/// Additional book-keeping is necessary to keep track of
|
||||
/// contexts that don't belong to us (e.g. "primary" contexts
|
||||
/// created in user code as a side effect of invoking cuda runtime
|
||||
/// apis like cudaFree()).
|
||||
/// </summary>
|
||||
__declspec(thread) static CUAccelerator * s_pDefaultDeviceCtxt;
|
||||
__declspec(thread) static CUAccelerator * s_pCurrentDeviceCtxt;
|
||||
__declspec(thread) static int s_vContextDepthMap[MAXCTXTS];
|
||||
__declspec(thread) static CUAccelerator ** s_pContextChangeMap[MAXCTXTS];
|
||||
__declspec(thread) static CUAccelerator * s_vContextChangeMap[MAXCTXTS*MAXCTXDEPTH];
|
||||
__declspec(thread) static CUcontext s_pUserStackTop;
|
||||
__declspec(thread) static BOOL s_bContextTLSInit;
|
||||
__declspec(thread) static BOOL s_bThreadPoolThread;
|
||||
__declspec(thread) static PTTHREADROLE s_eThreadRole;
|
||||
|
||||
static BOOL IsKnownContext(CUcontext ctx);
|
||||
static BOOL IsKnownContext(CUcontext ctx, CUcontext * pContexts, UINT uiCtxCount);
|
||||
static BOOL AddKnownContext(CUcontext ctx, CUcontext * pContexts, UINT * puiCtxCount);
|
||||
static BOOL IsUserContext(CUcontext ctx);
|
||||
static BOOL IsPTaskContext(CUcontext ctx);
|
||||
static BOOL IsKnownPTaskContext(CUcontext ctx);
|
||||
static BOOL IsKnownUserContext(CUcontext ctx);
|
||||
static BOOL AddKnownPTaskContext(CUcontext ctx);
|
||||
static BOOL AddKnownUserContext(CUcontext ctx);
|
||||
static BOOL CheckContextProvenance(CUcontext ctx);
|
||||
|
||||
/// <summary> The device </summary>
|
||||
CUdevice m_pDevice;
|
||||
|
||||
/// <summary> The context </summary>
|
||||
CUcontext m_pContext;
|
||||
|
||||
/// <summary> true if this is also an application-level primary context.
|
||||
/// This means that PTask shares it with user code, does not
|
||||
/// own the context, and cannot make assumptions about context
|
||||
/// state on entry to PTask APIs on *application* threads.
|
||||
/// </summary>
|
||||
BOOL m_bApplicationPrimaryContext;
|
||||
|
||||
/// <summary> Identifier for the device </summary>
|
||||
int m_nDeviceId;
|
||||
|
||||
/// <summary> The device attributes </summary>
|
||||
CUDA_DEVICE_ATTRIBUTES *m_pDeviceAttributes;
|
||||
|
||||
/// <summary> The attributes </summary>
|
||||
CUDA_DEVICE_ATTRIBUTES m_attrs;
|
||||
|
||||
/// <summary> The set of accelerators that are known accessible for P2P transfers. </summary>
|
||||
std::set<Accelerator*> m_vP2PAccessible;
|
||||
|
||||
/// <summary> The set of accelerators that are known enabled for P2P transfers. </summary>
|
||||
std::set<Accelerator*> m_vP2PEnabled;
|
||||
|
||||
/// <summary> The set of accelerators that are *known inaccessible* for P2P transfers. </summary>
|
||||
std::set<Accelerator*> m_vP2PInaccessible;
|
||||
|
||||
/// <summary> The minimum stream priority. </summary>
|
||||
int m_nMinStreamPriority;
|
||||
|
||||
/// <summary> The maximum stream priority. </summary>
|
||||
int m_nMaxStreamPriority;
|
||||
|
||||
/// <summary> The maximum outstading launches. </summary>
|
||||
int m_nMaxOutstadingLaunches;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates a new platform specific buffer. This routine is called by CreateBuffer to
|
||||
/// get a new instance of whatever buffer type corresponds to the platform
|
||||
/// implementing this interface. For example, DXAccelerator will return a new
|
||||
/// PDXBuffer object, where PDXBuffer is a subclass of PBuffer. The Accelerator super-
|
||||
/// class can then perform the rest of the work required to initialize the PBuffer.
|
||||
///
|
||||
/// We only create PBuffers to provide 'physical' views of the 'logical' buffer
|
||||
/// abstraction provided by the Datablock. Datablocks can have up to three different
|
||||
/// channels (data, metadata, template), so consequently, each of which must be
|
||||
/// backed by its own PBuffer. A PBuffer should not have to know what channel it is
|
||||
/// backing, but we include that information in it's creation to simplify the
|
||||
/// materialization of views between different subclasses of PBuffer.
|
||||
///
|
||||
/// The "proxy allocator" is present as parameter to handle two corner cases:
|
||||
///
|
||||
/// 1. Allocation of host-side buffers by the host-specific subclass of PBuffer
|
||||
/// (PHBuffer)--for example, we prefer to use a CUDA accelerator object to
|
||||
/// allocate host memory when a block will be touched by a CUDA-based PTask,
|
||||
/// because we can use the faster async APIs with memory we allocate using CUDA
|
||||
/// host allocation APIs. This requires that the HostAccelerator defer the host-
|
||||
/// side memory allocation to the CUDA accelerator.
|
||||
///
|
||||
/// 2. Communication between runtimes that provide some interop support (e.g. CUDA
|
||||
/// and DirectX can actually share texture objects, meaning there is no need to
|
||||
/// actually allocate a new buffer to back a CUDA view that already has a DirectX
|
||||
/// view, but the two accelerators must cooperate to assemble a PBuffer that
|
||||
/// shares the underlying shared object.
|
||||
///
|
||||
/// Case 1 is implemented, while case 2 is largely unimplemented. If no proxy
|
||||
/// accelerator is provided, allocation will proceed using the accelerator object
|
||||
/// whose member function is being called to allocate the PBuffer.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pLogicalParent"> [in] If non-null, the datablock that is the logical
|
||||
/// buffer using this 'physical' buffer to back a particular
|
||||
/// channel on this accelerator. </param>
|
||||
/// <param name="nDatblockChannelIndex"> Zero-based index of the channel being backed. Must be:
|
||||
/// * DBDATA_IDX = 0, OR
|
||||
/// * DBMETADATA_IDX = 1, OR
|
||||
/// * DBTEMPLATE_IDX = 2. </param>
|
||||
/// <param name="uiBufferAccessFlags"> Access flags determining what views to create. </param>
|
||||
/// <param name="pProxyAllocator"> [in,out] If non-null, the proxy allocator. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PBuffer* NewPlatformSpecificBuffer(Datablock * pLogicalParent,
|
||||
UINT nDatblockChannelIndex,
|
||||
BUFFERACCESSFLAGS uiBufferAccessFlags,
|
||||
Accelerator * pProxyAllocator
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determine if we should attempt page locked allocation. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/24/2012. </remarks>
|
||||
///
|
||||
/// <param name="uiAllocBytes"> The allocate in bytes. </param>
|
||||
///
|
||||
/// <returns> true if we should page-lock the requested buffer. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL ShouldAttemptPageLockedAllocation(UINT uiAllocBytes);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Determine if we can access a peer device through explicit peer APIs.
|
||||
/// Cache the result.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/2/2013. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> true if we can access peer, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL CanAccessPeer(Accelerator * pAccelerator);
|
||||
|
||||
friend class PCUBuffer;
|
||||
};
|
||||
};
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: cuhdr.h
|
||||
//
|
||||
// summary: Declares the cuhdr class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __CUHDR_H__
|
||||
#define __CUHDR_H__
|
||||
#ifdef CUDA_SUPPORT
|
||||
#include "cuda.h"
|
||||
#endif
|
||||
#endif
|
|
@ -1,329 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: CUTask.h
|
||||
// CUDA based task
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _CUDA_TASK_H_
|
||||
#define _CUDA_TASK_H_
|
||||
#ifdef CUDA_SUPPORT
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "accelerator.h"
|
||||
#include "task.h"
|
||||
#include "cuhdr.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class CompiledKernel;
|
||||
|
||||
class CUTask : public Task {
|
||||
|
||||
friend class GeometryEstimator;
|
||||
friend class XMLReader;
|
||||
friend class XMLWriter;
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="hRuntimeTerminateEvt"> Handle of the global terminate event. </param>
|
||||
/// <param name="hGraphTeardownEvent"> Handle of the stop event. </param>
|
||||
/// <param name="hGraphStopEvent"> Handle of the running event. </param>
|
||||
/// <param name="hGraphRunningEvent"> The graph running event. </param>
|
||||
/// <param name="pCompiledKernel"> The CompiledKernel associated with this task. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
CUTask(
|
||||
__in HANDLE hRuntimeTerminateEvt,
|
||||
__in HANDLE hGraphTeardownEvent,
|
||||
__in HANDLE hGraphStopEvent,
|
||||
__in HANDLE hGraphRunningEvent,
|
||||
__in CompiledKernel * pCompiledKernel
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~CUTask();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates a PTask. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] non-null, list of accelerators this task might run on. </param>
|
||||
/// <param name="pCompiledKernel"> [in,out] If non-null, the compiled kernel. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual HRESULT Create(
|
||||
std::set<Accelerator*>& pAccelerators,
|
||||
CompiledKernel * pCompiledKernel
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Runs this ptask. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificDispatch();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes instrumentation. </summary>
|
||||
///
|
||||
/// <remarks> t-nailaf, 06/10/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void InitializeInstrumentation();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Finalizes instrumentation. </summary>
|
||||
///
|
||||
/// <remarks> t-nailaf, 06/10/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void FinalizeInstrumentation();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a compute geometry. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="tgx"> (optional) the thread group X dimensions. </param>
|
||||
/// <param name="tgy"> (optional) the thread group Y dimensions. </param>
|
||||
/// <param name="tgz"> (optional) the thread group Z dimensions. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetComputeGeometry(int tgx=1, int tgy=1, int tgz=1 );
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a block and grid size. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="grid"> The grid. </param>
|
||||
/// <param name="block"> The block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetBlockAndGridSize(PTASKDIM3 grid, PTASKDIM3 block);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a synchronization timestamp. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="p"> [in,out] If non-null, the p. </param>
|
||||
///
|
||||
/// <returns> The synchronization timestamp. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetSynchronizationTimestamp(Accelerator * p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Increment synchronise timestamp. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="p"> [in,out] If non-null, the p. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void IncrementSyncTimestamp(Accelerator * p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> When the graph is complete, (indicated because Graph.Run was called), this method
|
||||
/// is called on every task to allow tasks to perform and one-time initializations
|
||||
/// that cannot be performed without knowing that the structure of the graph is now
|
||||
/// static. For example, computing parameter offset maps for dispatch.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/5/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void PlatformSpecificOnGraphComplete();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Estimate dispatch dimensions. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void EstimateDispatchDimensions();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform the platform-specific work required to bind an individual input parameter.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> [in,out] If non-null, the port. </param>
|
||||
/// <param name="ordinal"> The ordinal. </param>
|
||||
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
|
||||
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificBindInput(Port * pPort,
|
||||
int ordinal,
|
||||
UINT uiActualIndex,
|
||||
PBuffer * pBuffer
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform the platform-specific work required to bind an individual output
|
||||
/// parameter.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> [in,out] If non-null, the port. </param>
|
||||
/// <param name="ordinal"> The ordinal. </param>
|
||||
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
|
||||
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificBindOutput(Port * pPort,
|
||||
int ordinal,
|
||||
UINT uiActualIndex,
|
||||
PBuffer * pBuffer);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform the platform-specific work required to bind an individual input parameter.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> [in,out] If non-null, the port. </param>
|
||||
/// <param name="ordinal"> The ordinal. </param>
|
||||
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
|
||||
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
|
||||
/// <param name="bScalarBinding"> true to scalar binding. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificBindConstant(Port * pPort,
|
||||
int ordinal,
|
||||
UINT uiActualIndex,
|
||||
PBuffer * pBuffer
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform specific finalize bindings. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/5/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificFinalizeBindings();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Bind accelerator executable. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL BindExecutable();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Calculates the parameter offsets. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void ComputeParameterOffsets();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Adds a parameter indeces to 'indexmap'. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="portmap"> [in,out] [in,out] If non-null, the portmap. </param>
|
||||
/// <param name="indexmap"> [in,out] [in,out] If non-null, the indexmap. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void AddParameterIndeces(
|
||||
std::map<UINT, Port*>& portmap,
|
||||
std::map<UINT, Port*>& indexmap);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Collect migration resources. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="vblocks"> [in,out] [in,out] If non-null, the vblocks. </param>
|
||||
/// <param name="vaccs"> [in,out] [in,out] If non-null, the vaccs. </param>
|
||||
/// <param name="vstreams"> [in,out] The vstreams. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL CollectMigrationResources(
|
||||
std::list<Datablock*> &vblocks,
|
||||
std::list<Accelerator*> &vaccs,
|
||||
std::list<CUstream> &vstreams);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Executes the ps dispatch enter action. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <param name="pContext"> The context. </param>
|
||||
/// <param name="hStream"> The stream. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL OnPSDispatchEnter(CUstream hStream);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Executes the ps dispatch exit action. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <param name="pContext"> The context. </param>
|
||||
/// <param name="hStream"> The stream. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL OnPSDispatchExit(CUstream hStream);
|
||||
|
||||
std::map<Accelerator*, CUfunction> m_pCSMap;
|
||||
std::map<Accelerator*, CUmodule> m_pModuleMap;
|
||||
std::map<Port*, UINT> m_pParameterOffsets;
|
||||
UINT m_uiParameterSize;
|
||||
BOOL m_bParameterOffsetsInitialized;
|
||||
UINT m_nPreferredXDim;
|
||||
UINT m_nPreferredYDim;
|
||||
UINT m_nPreferredZDim;
|
||||
BOOL m_bGeometryExplicit;
|
||||
BOOL m_bThreadBlockSizesExplicit;
|
||||
PTASKDIM3 m_pThreadBlockSize;
|
||||
PTASKDIM3 m_pGridSize;
|
||||
CUevent m_hPSDispatchStart;
|
||||
CUevent m_hPSDispatchEnd;
|
||||
BOOL m_bPSDispatchEventsValid;
|
||||
};
|
||||
};
|
||||
#endif // CUDA_SUPPORT
|
||||
#endif
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,591 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: datablocktemplate.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _DATABLOCK_TEMPLATE_H_
|
||||
#define _DATABLOCK_TEMPLATE_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include <Windows.h>
|
||||
#include "primitive_types.h"
|
||||
#include "datablock.h"
|
||||
#include "ReferenceCounted.h"
|
||||
|
||||
using namespace PTask;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Values that represent the different points in the lifecycle of a datablock
|
||||
/// where the application context associated with a datablock can be managed
|
||||
/// via a callback. </summary>
|
||||
///
|
||||
/// <remarks> jcurrey, 5/1/2014. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef enum applicationcontext_callback_point_t {
|
||||
|
||||
/// <summary> Point at which a datablock is created. </summary>
|
||||
CALLBACKPOINT_CREATE,
|
||||
|
||||
/// <summary> Point at which a datablock is cloned. </summary>
|
||||
CALLBACKPOINT_CLONE,
|
||||
|
||||
/// <summary> Point at which a datablock is destroyed. </summary>
|
||||
CALLBACKPOINT_DESTROY
|
||||
|
||||
} APPLICATIONCONTEXTCALLBACKPOINT;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Function signature of callbacks used to manage the application context associated
|
||||
/// with datablocks. set on a per-template basis, via
|
||||
/// DatablockTemplate::SetApplicationContextCallback().
|
||||
///
|
||||
/// If eCallbackPoint is CALLBACKPOINT_CREATE or CALLBACKPOINT_DESTROY,
|
||||
/// ppApplicationContext points to the application context of the datablock being
|
||||
/// created or destroyed.
|
||||
///
|
||||
/// If eCallbackPoint is CALLBACKPOINT_CLONE, ppApplicationContext points to the
|
||||
/// application context of the datablock clone being created. The application context
|
||||
/// of the datablock being cloned is accessible via pDatablock.
|
||||
///
|
||||
/// pDatablock is provided for information only. None of its state should be modified
|
||||
/// by the callback.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> jcurrey, 5/1/2014. </remarks>
|
||||
///
|
||||
/// <param name="eCallbackPoint"> [in] The point in the datablock's lifecycle at which the callback was called. </param>
|
||||
/// <param name="pDatablock"> [in] The datablock being created, cloned or destroyed. </param>
|
||||
/// <param name="ppApplicationContext"> [inout] The application context to be managed. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef void (__stdcall *LPFNAPPLICATIONCONTEXTCALLBACK)(
|
||||
__in APPLICATIONCONTEXTCALLBACKPOINT eCallbackPoint,
|
||||
__in const Datablock * pDatablock,
|
||||
__inout void ** ppApplicationContext
|
||||
);
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class DatablockTemplate : public ReferenceCounted
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="lpszTemplateName"> [in] If non-null, name of the template. </param>
|
||||
/// <param name="uiElementStride"> [in] The element stride in bytes. </param>
|
||||
/// <param name="uiElementsX"> [in] Number of elements in X dimension. </param>
|
||||
/// <param name="uiElementsY"> [in] Number of elements in Y dimension. </param>
|
||||
/// <param name="uiElementsZ"> [in] Number of elements in Z dimension. </param>
|
||||
/// <param name="bIsRecordStream"> [in] true if this object is record stream. </param>
|
||||
/// <param name="bIsByteAddressable"> [in] true if this object is byte addressable. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DatablockTemplate(
|
||||
__in char * lpszTemplateName,
|
||||
__in unsigned int uiElementStride,
|
||||
__in unsigned int uiElementsX,
|
||||
__in unsigned int uiElementsY,
|
||||
__in unsigned int uiElementsZ,
|
||||
__in bool bIsRecordStream,
|
||||
__in bool bIsByteAddressable
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="lpszTemplateName"> [in] If non-null, name of the template. </param>
|
||||
/// <param name="uiElementStride"> [in] The element stride in bytes. </param>
|
||||
/// <param name="uiElementsX"> [in] Number of elements in X dimension. </param>
|
||||
/// <param name="uiElementsY"> [in] Number of elements in Y dimension. </param>
|
||||
/// <param name="uiElementsZ"> [in] Number of elements in Z dimension. </param>
|
||||
/// <param name="uiPitch"> [in] The row pitch. </param>
|
||||
/// <param name="bIsRecordStream"> [in] true if this object is record stream. </param>
|
||||
/// <param name="bIsByteAddressable"> [in] true if this object is byte addressable. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DatablockTemplate(
|
||||
__in char * lpszTemplateName,
|
||||
__in unsigned int uiElementStride,
|
||||
__in unsigned int uiElementsX,
|
||||
__in unsigned int uiElementsY,
|
||||
__in unsigned int uiElementsZ,
|
||||
__in unsigned int uiPitch,
|
||||
__in bool bIsRecordStream,
|
||||
__in bool bIsByteAddressable
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="lpszTemplateName"> [in] If non-null, name of the template. </param>
|
||||
/// <param name="pBufferDims"> [in] The element stride in bytes. </param>
|
||||
/// <param name="uiNumBufferDims"> [in] Number of elements in X dimension. </param>
|
||||
/// <param name="bIsRecordStream"> [in] true if this object is record stream. </param>
|
||||
/// <param name="bIsByteAddressable"> [in] true if this object is byte addressable. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DatablockTemplate(
|
||||
__in char * lpszTemplateName,
|
||||
__in BUFFERDIMENSIONS * pBufferDims,
|
||||
__in unsigned int uiNumBufferDims,
|
||||
__in bool bIsRecordStream,
|
||||
__in bool bIsByteAddressable
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="lpszTemplateName"> [in] If non-null, name of the template. </param>
|
||||
/// <param name="uiElementStride"> [in] The element stride in bytes. </param>
|
||||
/// <param name="describedParameterType"> [in] Type of the described parameter. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DatablockTemplate(
|
||||
__in char * lpszTemplateName,
|
||||
__in unsigned int uiElementStride,
|
||||
__in PTASK_PARM_TYPE describedParameterType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~DatablockTemplate();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the stride. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> The stride. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
unsigned int GetStride(UINT uiChannelIndex=DBDATA_IDX);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the number of elements in X. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> The stride. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
unsigned int GetXElementCount(UINT uiChannelIndex=DBDATA_IDX);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the number of elements in Y. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> The stride. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
unsigned int GetYElementCount(UINT uiChannelIndex=DBDATA_IDX);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the number of elements in Z. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> The stride. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
unsigned int GetZElementCount(UINT uiChannelIndex=DBDATA_IDX);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the number of elements in Z. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> The stride. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
unsigned int GetTotalElementCount(UINT uiChannelIndex=DBDATA_IDX);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the number of elements in Z. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> The stride. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
unsigned int GetDimensionElementCount(UINT uiDim, UINT uiChannelIndex=DBDATA_IDX);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the pitch. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/18/2013. </remarks>
|
||||
///
|
||||
/// <param name="uiChannelIndex"> (optional) zero-based index of the channel. </param>
|
||||
///
|
||||
/// <returns> The pitch. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
unsigned int GetPitch(UINT uiChannelIndex=DBDATA_IDX);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets buffer dimensions. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/18/2013. </remarks>
|
||||
///
|
||||
/// <param name="uiChannelIndex"> (optional) zero-based index of the channel. </param>
|
||||
///
|
||||
/// <returns> The buffer dimensions. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BUFFERDIMENSIONS GetBufferDimensions(UINT uiChannelIndex=DBDATA_IDX);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets buffer dimensions. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/18/2013. </remarks>
|
||||
///
|
||||
/// <param name="uiChannelIndex"> (optional) zero-based index of the channel. </param>
|
||||
///
|
||||
/// <returns> The buffer dimensions. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void SetBufferDimensions(BUFFERDIMENSIONS &dims, UINT uiChannelIndex=DBDATA_IDX);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the datablock byte count. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> The datablock byte count. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
unsigned int GetDatablockByteCount(UINT nChannelIndex=0);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is byte-addressable. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if raw, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual bool IsByteAddressable();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is variable dimensioned. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if variable dimensioned, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual bool IsVariableDimensioned();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets whether the template describes byte addressable blocks. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="bIsByteAddressable"> [in] true if this object is byte addressable. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
SetByteAddressable(
|
||||
__in bool bIsByteAddressable
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if this template describes blocks that
|
||||
/// comprise a record stream.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if the template indicates a record stream. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL DescribesRecordStream();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if this template describes blocks that
|
||||
/// are used as scalar parameter in kernel functions.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if the template describes scalar parameter blocks. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL DescribesScalarParameter();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the parameter base type. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> The parameter base type. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PTASK_PARM_TYPE GetParameterBaseType();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets the default value. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="lpvInitData"> [in] If non-null, information describing the lpv initialise. </param>
|
||||
/// <param name="cbData"> [in] The data. </param>
|
||||
/// <param name="nRecordCount"> [in] Number of records. </param>
|
||||
/// <param name="bExplicitlyEmpty"> [in] True if this initializer describes an explicitly empty
|
||||
/// initial value (0-length) We track this explicitly because
|
||||
/// creating resources based on such initial values that can
|
||||
/// actually be bound to device-side execution parameters
|
||||
/// necessitates the creation of non-zero-size buffers, whose
|
||||
/// logical length is still 0. Hence, we must decouple the
|
||||
/// tracking of the "empty" property from whether the init buffer
|
||||
/// is null or has no length in general. A null initializer does
|
||||
/// not necessarily mean no initializer has been set! </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
SetInitialValue(
|
||||
__in void * lpvInitData,
|
||||
__in UINT cbData,
|
||||
__in UINT nRecordCount,
|
||||
__in BOOL bExplicitlyEmpty=FALSE
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the initial value size. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> The initial value size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetInitialValueSizeBytes();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the number of elements in the initial value. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> The initial value size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetInitialValueElements();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the initial value. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the initial value. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual const void * GetInitialValue();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object has an initial value. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if initial value, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasInitialValue();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object has an initial value that can be recreated easily
|
||||
/// using a memset (rather than a memcpy). The object is memsettable if it has
|
||||
/// an initial value whose size is less than 4 bytes, or whose initial value
|
||||
/// is identical for all elements when the value is interpreted as either a 4-byte
|
||||
/// int or an unsigned char.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if initial value, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsInitialValueMemsettable(UINT szPrimitiveSize=0);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object has an initial value that can be recreated easily
|
||||
/// using a memset (rather than a memcpy), restricted to 8 bit objects.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if initial value, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsInitialValueMemsettableD8();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the initial value memset stride. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/6/2012. </remarks>
|
||||
///
|
||||
/// <returns> The initial value memset stride. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetInitialValueMemsetStride();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Queries if the initial value for this template is empty. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if an initial value is empty, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsInitialValueEmpty();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the type. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the type. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual char * GetTemplateName();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Set the application context callback function associated with this
|
||||
/// datablock template. </summary>
|
||||
///
|
||||
/// <remarks> jcurrey, 5/1/2014. </remarks>
|
||||
///
|
||||
/// <param name="pCallback"> [in] The callback function to associate with this template. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetApplicationContextCallback(LPFNAPPLICATIONCONTEXTCALLBACK pCallback);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Get the application context callback function associated with this
|
||||
/// datablock template. </summary>
|
||||
///
|
||||
/// <remarks> jcurrey, 5/1/2014. </remarks>
|
||||
///
|
||||
/// <returns> The callback function associated with this template. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual LPFNAPPLICATIONCONTEXTCALLBACK GetApplicationContextCallback();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default initialize. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/9/2012. </remarks>
|
||||
///
|
||||
/// <param name="lpszTemplateName"> [in,out] If non-null, name of the template. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void DefaultInitialize(char * lpszTemplateName);
|
||||
|
||||
/// <summary> true if this template describes a
|
||||
/// record stream
|
||||
/// </summary>
|
||||
bool m_bRecordStream;
|
||||
|
||||
/// <summary> true if this template describes
|
||||
/// byte-addressable datablocks
|
||||
/// </summary>
|
||||
bool m_bByteAddressable;
|
||||
|
||||
/// <summary> true if this template describes blocks
|
||||
/// that are used as scalar parameters in
|
||||
/// kernel invocations </summary>
|
||||
bool m_bScalarParameter;
|
||||
|
||||
/// <summary> The parameter base type</summary>
|
||||
PTASK_PARM_TYPE m_bParameterBaseType;
|
||||
|
||||
/// <summary> The name of datablock template,
|
||||
/// user-supplied (in a hopefully
|
||||
/// descriptive way)
|
||||
/// </summary>
|
||||
char * m_lpszTemplateName;
|
||||
|
||||
#if 0
|
||||
/// <summary> The stride in bytes of a single
|
||||
/// element in a block created with
|
||||
/// this template.
|
||||
/// </summary>
|
||||
unsigned int m_uiStride;
|
||||
|
||||
/// <summary> The vui channel dimensions. </summary>
|
||||
unsigned int* m_pChannelDimensions[NUM_DATABLOCK_CHANNELS];
|
||||
|
||||
/// <summary> Sizes of the three dimensions of elements in blocks created with this template.
|
||||
/// </summary>
|
||||
unsigned int m_vuiDataDimensions[MAX_DATABLOCK_DIMENSIONS];
|
||||
|
||||
/// <summary> The vui meta dimensions. </summary>
|
||||
unsigned int m_vuiMetaDimensions[MAX_DATABLOCK_DIMENSIONS];
|
||||
|
||||
/// <summary> The vui template data dimensions. </summary>
|
||||
unsigned int m_vuiTemplateDataDimensions[MAX_DATABLOCK_DIMENSIONS];
|
||||
#endif
|
||||
|
||||
/// <summary> The channel dimensions, per channel type. </summary>
|
||||
BUFFERDIMENSIONS m_vChannelDimensions[NUM_DATABLOCK_CHANNELS];
|
||||
|
||||
/// <summary> An (optional) initial value. </summary>
|
||||
void * m_lpvInitialValue;
|
||||
|
||||
/// <summary> Size of the initial value buffer if such a buffer is extant. </summary>
|
||||
UINT m_cbInitialValue;
|
||||
|
||||
/// <summary> Number of records in the initial value. Generally speaking this
|
||||
/// value should be the same as m_cbInitialValue/stride, but we
|
||||
/// insist on this redundancy to enable sanity checking. </summary>
|
||||
UINT m_nInitialRecordCount;
|
||||
|
||||
/// <summary> true if the initial value is explicitly empty, meaning that a null
|
||||
/// m_lpvInitialValue pointer or 0-valued m_cbInitialValue does not indicate
|
||||
/// the absence of an initializer for this template.
|
||||
/// </summary>
|
||||
BOOL m_bExplicitlyEmptyInitialValue;
|
||||
|
||||
/// <summary> true if we have already checked whether this template
|
||||
/// has an initial value that can be created with a memset
|
||||
/// call (rather than a memcpy). </summary>
|
||||
BOOL m_bMemsetCheckComplete;
|
||||
|
||||
/// <summary> true if the initial value can be created with memset.
|
||||
/// Valid only if m_bMemsetCheckComplete is true.
|
||||
/// </summary>
|
||||
BOOL m_bMemsettableInitialValue;
|
||||
|
||||
/// <summary> true if the initial value can be created with memset.
|
||||
/// Valid only if m_bMemsetCheckComplete is true.
|
||||
/// </summary>
|
||||
BOOL m_bMemsettableInitialValueD8;
|
||||
|
||||
/// <summary> The memsettable initial value (byte-granularity). </summary>
|
||||
unsigned char m_ucMemsettableInitialValueD8;
|
||||
|
||||
/// <summary> The memset initial value stride. </summary>
|
||||
UINT m_bMemsetInitialValueStride;
|
||||
|
||||
/// <summary> The application context callback. </summary>
|
||||
LPFNAPPLICATIONCONTEXTCALLBACK m_pApplicationContextCallback;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,132 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: dispatchcounter.h
|
||||
//
|
||||
// summary: Declares the dispatchcounter class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef _DISPATCH_COUNTER_H_
|
||||
#define _DISPATCH_COUNTER_H_
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
class CHighResolutionTimer;
|
||||
class CSharedPerformanceTimer;
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Task;
|
||||
class Port;
|
||||
|
||||
class DispatchCounter {
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/18/2013. </remarks>
|
||||
///
|
||||
/// <param name="pTask"> [in,out] If non-null, the task. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DispatchCounter(Task * pTask);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/18/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~DispatchCounter();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initialises the invocation counting diagnostics tool. This facility
|
||||
/// allows us to track the number of invocations per task and compare
|
||||
/// optionally against specified expected number. Useful for finding
|
||||
/// races or situations where tasks are firing when they shouldn't.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/28/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Initialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Deinitialises the invocation counting diagnostics tool. This facility
|
||||
/// allows us to track the number of invocations per task and compare
|
||||
/// optionally against specified expected number. Useful for finding
|
||||
/// races or situations where tasks are firing when they shouldn't.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/28/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Deinitialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dumps the dispatch counts for every task in the graph. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/28/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Report(std::ostream& ss);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Verify dispatch counts against a prediction for every task in the graph. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/28/2012. </remarks>
|
||||
///
|
||||
/// <param name="pvInvocationCounts"> [in,out] If non-null, the pv invocation counts. </param>
|
||||
///
|
||||
/// <returns> true if the actual and predicted match, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL Verify(std::map<std::string, UINT> * pvInvocationCounts);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record the fact that a task dispatch has occurred. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/28/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void RecordDispatch();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets the expected dispatch count for the given task. The runtime will assert if
|
||||
/// the actual number of dispatches for the task exceeds this number.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/28/2012. </remarks>
|
||||
///
|
||||
/// <param name="nDispatchCount"> Number of dispatches. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void SetExpectedDispatchCount(UINT nDispatchCount);
|
||||
|
||||
protected:
|
||||
|
||||
/// <summary> Lock for the dispatch count map. </summary>
|
||||
static CRITICAL_SECTION m_csDispatchMap;
|
||||
|
||||
/// <summary> Number of dispatches per task. Keyed by name to
|
||||
/// be robust to graph deletion/runtime-cleanup </summary>
|
||||
static std::map<std::string, UINT> m_vDispatchMap;
|
||||
|
||||
/// <summary> true if dispatch counting initialized. </summary>
|
||||
static BOOL m_bDispatchCountingInitialized;
|
||||
|
||||
/// <summary> The task. </summary>
|
||||
Task * m_pTask;
|
||||
|
||||
/// <summary> The expected number of dispatches for this task. </summary>
|
||||
UINT m_nExpectedDispatches;
|
||||
|
||||
/// <summary> The actual number of times this task has been dispatched. </summary>
|
||||
UINT m_nActualDispatches;
|
||||
|
||||
};
|
||||
};
|
||||
#endif
|
|
@ -1,695 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: dxaccelerator.h
|
||||
// direct x based accelerator
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _DX_ACCELERATOR_H_
|
||||
#define _DX_ACCELERATOR_H_
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "ptdxhdr.h"
|
||||
#include "datablocktemplate.h"
|
||||
#include "dxcodecache.h"
|
||||
#include "accelerator.h"
|
||||
#include "CompiledKernel.h"
|
||||
#include <vector>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class DXAccelerator : public Accelerator {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DXAccelerator();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~DXAccelerator();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the open. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
HRESULT Open();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Opens a DXAccelerator by
|
||||
/// associating the DXAccelerator object with an adapter
|
||||
/// and a live D3D11 device context </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAdapter"> [in] If non-null, the adapter. </param>
|
||||
/// <param name="uiEnumerationIndex"> Zero-based index of the adapter when
|
||||
/// the OS enumerates it. This is necessary because
|
||||
/// the D3D11 APIs for creating a device are
|
||||
/// idiosyncratic in the presence of multiple
|
||||
/// adapters.</param>
|
||||
///
|
||||
/// <returns> S_OK on success, E_FAIL otherwise.
|
||||
/// Use windows SUCCEEDED() and FAILED() macros </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
HRESULT Open(IDXGIAdapter * pAdapter, UINT uiEnumerationIndex);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Opens a reference device.
|
||||
/// Should only be called if the programmer wants to work
|
||||
/// with the runtime in an environment where no DX11 hardware
|
||||
/// is present, since the reference device is very very slow.
|
||||
/// Use PTask::Runtime::SetUseReferenceDevices() to enable
|
||||
/// this feature.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
HRESULT OpenReferenceDevice();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return a pointer to the platform-specific device object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the device. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void* GetDevice();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return a pointer to the platform-specific device context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the context. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void* GetContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates an asynchronous context for the task. Create the cuda stream for this
|
||||
/// ptask.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 12/20/2011.
|
||||
///
|
||||
/// This method is required of all subclasses, and abstracts the work associated with
|
||||
/// managing whatever framework-level asynchrony abstractions are supported by the
|
||||
/// backend target. For example, CUDA supports the "stream", while DirectX supports
|
||||
/// an ID3D11ImmediateContext, OpenCL has command queues, and so on.
|
||||
/// </remarks>
|
||||
///
|
||||
/// <param name="pTask"> [in] non-null, the CUDA-capable acclerator to which the
|
||||
/// stream is bound. </param>
|
||||
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual AsyncContext *
|
||||
PlatformSpecificCreateAsyncContext(
|
||||
__in Task * pTask,
|
||||
__in ASYNCCONTEXTTYPE eAsyncContextType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Cache the DX objects created when a shader is compiled
|
||||
/// so that subsequent calls are made to compile the
|
||||
/// same function, we can reuse the existing binaries. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="szFile"> [in] non-null, the file name. </param>
|
||||
/// <param name="szFunc"> [in] non-null, the function name </param>
|
||||
/// <param name="p"> [in] non-null, a pointer to a ID3D11ComputeShader. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void CachePutShader(char * szFile, char * szFunc, ID3D11ComputeShader*p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check the shader cache for an existing binary made from the
|
||||
/// given HLSL file and function name. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="szFile"> [in] If non-null, the file. </param>
|
||||
/// <param name="szFunc"> [in] If non-null, the func. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the shader binary. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
ID3D11ComputeShader* CacheGetShader(char * szFile, char * szFunc);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Compiles accelerator source code to create a PTask binary. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011.
|
||||
/// The function accepts a file name and an operation in the file
|
||||
/// to build a binary for. For example, "foo.hlsl" and "vectoradd" will
|
||||
/// compile the vectoradd() shader in foo.hlsl. On success the function
|
||||
/// will create platform-specific binary and module objects that can be
|
||||
/// later used by the runtime to invoke the shader code. The caller can
|
||||
/// provide a buffer for compiler output, which if present, the runtime
|
||||
/// will fill *iff* the compilation fails.
|
||||
/// ***
|
||||
/// NB: Thread group dimensions are optional parameters here but
|
||||
/// *must* be used for optimal performance because DirectX requires
|
||||
/// statically specified thread group sizes, and the default values
|
||||
/// of 1, 1, 1 are not likely to be a good performance combination.
|
||||
/// </remarks>
|
||||
///
|
||||
/// <param name="lpszFileName"> [in] filename+path of source. cannot be null.</param>
|
||||
/// <param name="lpszOperation"> [in] Function name in source file. cannot be null.</param>
|
||||
/// <param name="ppPlatformSpecificBinary"> [out] On success, a platform specific binary. </param>
|
||||
/// <param name="ppPlatformSpecificModule"> [out] On success, a platform specific module handle. </param>
|
||||
/// <param name="lpszCompilerOutput"> (optional) [in,out] On failure, the compiler output. </param>
|
||||
/// <param name="uiCompilerOutput"> (optional) [in] length of buffer supplied for
|
||||
/// compiler output. </param>
|
||||
/// <param name="tgx"> (optional) thread group X dimensions. (see remarks)</param>
|
||||
/// <param name="tgy"> (optional) thread group Y dimensions. (see remarks)</param>
|
||||
/// <param name="tgz"> (optional) thread group Z dimensions. (see remarks)</param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Compile(
|
||||
char * lpszFileName,
|
||||
char * lpszOperation,
|
||||
void ** ppPlatformSpecificBinary,
|
||||
void ** ppPlatformSpecificModule,
|
||||
char * lpszCompilerOutput=NULL,
|
||||
int uiCompilerOutput=0,
|
||||
int tgx=1,
|
||||
int tgy=1,
|
||||
int tgz=1
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Compiles accelerator source code to create a PTask binary. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011.
|
||||
///
|
||||
/// The function accepts a string of source code and an operation in that source to
|
||||
/// build a binary for. This is a convenience for source code that may not be stored
|
||||
/// in files (e.g. dynamically generated code). On success the function will create
|
||||
/// platform- specific binary and module objects that can be later used by the
|
||||
/// runtime to invoke the shader code. The caller can provide a buffer for compiler
|
||||
/// output, which if present, the runtime will fill *iff* the compilation fails.
|
||||
///
|
||||
/// NB: Thread group dimensions are optional parameters here. This is because some
|
||||
/// runtimes require them statically, and some do not. DirectX requires thread-group
|
||||
/// sizes to be specified statically to enable compiler optimizations that cannot be
|
||||
/// used otherwise. CUDA and OpenCL allow runtime specification of these parameters.
|
||||
/// </remarks>
|
||||
///
|
||||
/// <param name="lpszShaderCode"> [in] actual source. cannot be null. </param>
|
||||
/// <param name="uiShaderCodeSize"> Size of the shader code. </param>
|
||||
/// <param name="lpszOperation"> [in] Function name in source file. cannot be null. </param>
|
||||
/// <param name="ppPlatformSpecificBinary"> [out] On success, a platform specific binary. </param>
|
||||
/// <param name="ppPlatformSpecificModule"> [out] On success, a platform specific module handle. </param>
|
||||
/// <param name="lpszCompilerOutput"> (optional) [in,out] On failure, the compiler output. </param>
|
||||
/// <param name="uiCompilerOutput"> (optional) [in] length of buffer supplied for
|
||||
/// compiler output. </param>
|
||||
/// <param name="nThreadGroupSizeX"> (optional) thread group X dimensions. (see remarks) </param>
|
||||
/// <param name="nThreadGroupSizeY"> (optional) thread group Y dimensions. (see remarks) </param>
|
||||
/// <param name="nThreadGroupSizeZ"> (optional) thread group Z dimensions. (see remarks) </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
Compile(
|
||||
__in char * lpszShaderCode,
|
||||
__in UINT uiShaderCodeSize,
|
||||
__in char * lpszOperation,
|
||||
__in void ** ppPlatformSpecificBinary,
|
||||
__in void ** ppPlatformSpecificModule,
|
||||
__in char * lpszCompilerOutput,
|
||||
__in int uiCompilerOutput,
|
||||
__in int nThreadGroupSizeX,
|
||||
__in int nThreadGroupSizeY,
|
||||
__in int nThreadGroupSizeZ
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if this accelerator's device context is current. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if the context is current. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsDeviceContextCurrent();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Makes the context current. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL MakeDeviceContextCurrent();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Releases the current context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ReleaseCurrentDeviceContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the D3D feature level for the hardware
|
||||
/// behind this accelerator object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <returns> The feature level. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual D3D_FEATURE_LEVEL GetFeatureLevel();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if this accelerator has some support for device to device transfer
|
||||
/// with the given accelerator. This allows us to skip a trip through host memory
|
||||
/// in many cases.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsDeviceToDeviceTransfer(Accelerator * pAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Device to device transfer. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pDstBuffer"> [in,out] If non-null, the accelerator. </param>
|
||||
/// <param name="pSrcBuffer"> [in,out] If non-null, buffer for source data. </param>
|
||||
/// <param name="pAsyncContext"> [in,out] If non-null, context for the asynchronous. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
DeviceToDeviceTransfer(
|
||||
__inout PBuffer * pDstBuffer,
|
||||
__in PBuffer * pSrcBuffer,
|
||||
__in AsyncContext * pAsyncContext
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the supports device memcy. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/12/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsDeviceMemcpy();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if the front-end programming model
|
||||
/// supports function arguments for top-level kernel
|
||||
/// invocations. DirectX requires
|
||||
/// top-level invocations to find their inputs
|
||||
/// at global scope in constant buffers and
|
||||
/// *StructuredBuffers, etc. so this function
|
||||
/// always returns false for this class.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <returns> FALSE. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsFunctionArguments();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if the underlying platform supports byval arguments
|
||||
/// for kernel invocations. If the platform does support this,
|
||||
/// PTask can elide explicit creation and population of
|
||||
/// buffers to back these arguments, which is a performance
|
||||
/// win when it is actually supported. DirectX does not
|
||||
/// support this sort of thing so we always return false.</summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <returns> FALSE </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsByvalArguments();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Synchronizes the context.
|
||||
/// We could force a synchronization using
|
||||
/// ID3D11Device functions (flush, end), but
|
||||
/// there is no need because any attempt to reference
|
||||
/// output from a PTask executed by a DXAccelerator will
|
||||
/// force the completion of any predecessor operations.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="ctxt"> [in] non-null, the device ctxt. </param>
|
||||
/// <param name="pTask"> (optional) [in] If non-null, the task. </param>
|
||||
///
|
||||
/// <returns> true. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Synchronize(Task*pTask=NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check whether the given accelerator has a memory
|
||||
/// space that is accessible from this accelerator without
|
||||
/// copying explictly through host memory space. Currently,
|
||||
/// CUDA interop APIs make it the case that we should be able
|
||||
/// to migrate between CUDA and DirectX devices without
|
||||
/// necessarily going through the host.
|
||||
/// TODO: take advantage of these APIs.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <param name="p"> [in] non-null, a second accelerator. </param>
|
||||
///
|
||||
/// <returns> true if accessible memory space, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasAccessibleMemorySpace(Accelerator*p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if the runtime for this accelerator
|
||||
/// supports pinned host memory. DirectX does not expose this
|
||||
/// functionality through the API, so we always return false
|
||||
/// from DXAccelerator.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <returns> false </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsPinnedHostMemory();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate memory on the host. Some runtimes (esp. earlier versions of CUDA)
|
||||
/// require that CUDA APIs be used to allocate host-side buffers, or support
|
||||
/// specialized host allocators that can help improve DMA performance.
|
||||
/// AllocatePagelockedHostMemory wraps these APIs for accelerators that have runtime support
|
||||
/// for this, and uses normal system services (VirtualAlloc on Windows, malloc
|
||||
/// elsewhere) to satisfy requests.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <param name="uiBytes"> Number of bytes to allocate. </param>
|
||||
/// <param name="pbResultPageLocked"> [in,out] If non-null, the result of whether the
|
||||
/// allocated memory is page-locked is provided here. </param>
|
||||
///
|
||||
/// <returns> byte pointer on success, null on failure. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void *
|
||||
AllocatePagelockedHostMemory(
|
||||
UINT uiBytes,
|
||||
BOOL * pbResultPageLocked
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Free host memory. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <param name="pBuffer"> If non-null, the buffer. </param>
|
||||
/// <param name="bPageLocked"> true if the memory was allocated in the page-locked area. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
FreeHostMemory(
|
||||
void * pBuffer,
|
||||
BOOL bPageLocked
|
||||
);
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the adapter for this accelerator. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the adapter. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual IDXGIAdapter* GetAdapter();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the adapter description. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the adapter description. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual DXGI_ADAPTER_DESC* GetAdapterDesc();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Enumerate accelerators present on the current machine
|
||||
/// and populate a vector with opened Accelerator objects.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="candidates"> [in] candidates list to populate </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void EnumerateAccelerators(std::vector<Accelerator*> &candidates);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates a device. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAdapter"> [in,out] If non-null, the adapter. </param>
|
||||
/// <param name="DriverType"> Type of the driver. </param>
|
||||
/// <param name="Software"> external software rasterizer (always NULL!). </param>
|
||||
/// <param name="Flags"> creation flags to pass to DX runtime. </param>
|
||||
/// <param name="pFeatureLevels"> Acceptable DX feature levels list. </param>
|
||||
/// <param name="FeatureLevels"> Number of entries in feature levels list. </param>
|
||||
/// <param name="SDKVersion"> The sdk version. </param>
|
||||
/// <param name="ppDevice"> [out] If non-null, the device. </param>
|
||||
/// <param name="pFeatureLevel"> [out] If non-null, the feature level of the device </param>
|
||||
/// <param name="ppImmediateContext"> [out] If non-null, context for the device. </param>
|
||||
///
|
||||
/// <returns> HRESULT--use SUCCEEDED() or FAILED() macros</returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static HRESULT WINAPI CreateDevice(
|
||||
IDXGIAdapter* pAdapter,
|
||||
D3D_DRIVER_TYPE DriverType,
|
||||
HMODULE Software,
|
||||
UINT32 Flags,
|
||||
CONST D3D_FEATURE_LEVEL* pFeatureLevels,
|
||||
UINT FeatureLevels,
|
||||
UINT32 SDKVersion,
|
||||
ID3D11Device** ppDevice,
|
||||
D3D_FEATURE_LEVEL* pFeatureLevel,
|
||||
ID3D11DeviceContext** ppImmediateContext
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if this accelerator encapsulates a backend framework that provides
|
||||
/// explicit APIs for managing outstanding (Asynchronous) operations. When this is
|
||||
/// the case, the corresponding AsyncContext subclass can manage outstanding
|
||||
/// dependences explicitly to increase concurrency and avoid syncing with the device.
|
||||
/// When it is *not* the case, we must synchronize when we data to and from this
|
||||
/// accelerator context and contexts that *do* support an explicit async API. For
|
||||
/// example, CUDA supports the stream and event API to explicitly manage dependences
|
||||
/// and we use this feature heavily to allow task dispatch to get far ahead of device-
|
||||
/// side dispatch. However when data moves between CUAccelerators and other
|
||||
/// accelerator classes, we must use synchronous operations or provide a way to wait
|
||||
/// for outstanding dependences from those contexts to resolve. This method is used
|
||||
/// to tell us whether we can create an outstanding dependence after making calls
|
||||
/// that queue work, or whether we need to synchronize.
|
||||
///
|
||||
/// The function is not abstract because most accelerator classes don't support async
|
||||
/// operations yet. In DirectX it is unnecessary because the DX runtime manages these
|
||||
/// dependences under the covers, and in OpenCL the API is present, but we do not
|
||||
/// yet take advantage of it. So it's simpler to override a default implementation
|
||||
/// that returns FALSE.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsExplicitAsyncOperations();
|
||||
|
||||
protected:
|
||||
/// <summary> A cache of compiled shader objects</summary>
|
||||
DXCodeCache * m_pCache;
|
||||
|
||||
|
||||
/// <summary> The ID3D11Device for this accelerator </summary>
|
||||
ID3D11Device* m_pDevice;
|
||||
|
||||
/// <summary> The device context for this accelerator</summary>
|
||||
ID3D11DeviceContext* m_pContext;
|
||||
|
||||
/// <summary> The 3d feature level of the backing device</summary>
|
||||
D3D_FEATURE_LEVEL m_d3dFeatureLevel;
|
||||
|
||||
/// <summary> The adapter backing this device</summary>
|
||||
IDXGIAdapter * m_pAdapter;
|
||||
|
||||
/// <summary> The description of the adapter provided by the OS</summary>
|
||||
DXGI_ADAPTER_DESC m_desc;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Find a shader file to compile. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="strDestPath"> shader file path </param>
|
||||
/// <param name="cchDest"> size of path buffer </param>
|
||||
/// <param name="strDestPath"> file name </param>
|
||||
///
|
||||
/// <returns> The found dxsdk shader file cch. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static HRESULT
|
||||
FindDXSDKShaderFileCch(
|
||||
__in_ecount(cchDest) WCHAR* strDestPath,
|
||||
int cchDest,
|
||||
__in LPCWSTR strFilename );
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates a new platform specific buffer. This routine is called by CreateBuffer to
|
||||
/// get a new instance of whatever buffer type corresponds to the platform
|
||||
/// implementing this interface. For example, DXAccelerator will return a new
|
||||
/// PDXBuffer object, where PDXBuffer is a subclass of PBuffer. The Accelerator super-
|
||||
/// class can then perform the rest of the work required to initialize the PBuffer.
|
||||
///
|
||||
/// We only create PBuffers to provide 'physical' views of the 'logical' buffer
|
||||
/// abstraction provided by the Datablock. Datablocks can have up to three different
|
||||
/// channels (data, metadata, template), so consequently, each of which must be
|
||||
/// backed by its own PBuffer. A PBuffer should not have to know what channel it is
|
||||
/// backing, but we include that information in it's creation to simplify the
|
||||
/// materialization of views between different subclasses of PBuffer.
|
||||
///
|
||||
/// The "proxy allocator" is present as parameter to handle two corner cases:
|
||||
///
|
||||
/// 1. Allocation of host-side buffers by the host-specific subclass of PBuffer
|
||||
/// (PHBuffer)--for example, we prefer to use a CUDA accelerator object to
|
||||
/// allocate host memory when a block will be touched by a CUDA-based PTask,
|
||||
/// because we can use the faster async APIs with memory we allocate using CUDA
|
||||
/// host allocation APIs. This requires that the HostAccelerator defer the host-
|
||||
/// side memory allocation to the CUDA accelerator.
|
||||
///
|
||||
/// 2. Communication between runtimes that provide some interop support (e.g. CUDA
|
||||
/// and DirectX can actually share texture objects, meaning there is no need to
|
||||
/// actually allocate a new buffer to back a CUDA view that already has a DirectX
|
||||
/// view, but the two accelerators must cooperate to assemble a PBuffer that
|
||||
/// shares the underlying shared object.
|
||||
///
|
||||
/// Case 1 is implemented, while case 2 is largely unimplemented. If no proxy
|
||||
/// accelerator is provided, allocation will proceed using the accelerator object
|
||||
/// whose member function is being called to allocate the PBuffer.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pLogicalParent"> [in] If non-null, the datablock that is the logical
|
||||
/// buffer using this 'physical' buffer to back a particular
|
||||
/// channel on this accelerator. </param>
|
||||
/// <param name="nDatblockChannelIndex"> Zero-based index of the channel being backed. Must be:
|
||||
/// * DBDATA_IDX = 0, OR
|
||||
/// * DBMETADATA_IDX = 1, OR
|
||||
/// * DBTEMPLATE_IDX = 2. </param>
|
||||
/// <param name="uiBufferAccessFlags"> Access flags determining what views to create. </param>
|
||||
/// <param name="pProxyAllocator"> [in,out] If non-null, the proxy allocator. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PBuffer* NewPlatformSpecificBuffer(Datablock * pLogicalParent,
|
||||
UINT nDatblockChannelIndex,
|
||||
BUFFERACCESSFLAGS uiBufferAccessFlags,
|
||||
Accelerator * pProxyAllocator
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Compile with macros. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="lpszShaderCode"> [in] filename+path of source. cannot be null. </param>
|
||||
/// <param name="uiShaderCodeSize"> Size of the shader code. </param>
|
||||
/// <param name="lpszOperation"> [in] Function name in source file. cannot be null. </param>
|
||||
/// <param name="ppPlatformSpecificBinary"> [out] On success, a platform specific binary. </param>
|
||||
/// <param name="ppPlatformSpecificModule"> [out] On success, a platform specific module handle. </param>
|
||||
/// <param name="lpszCompilerOutput"> (optional) [in,out] On failure, the compiler output. </param>
|
||||
/// <param name="uiCompilerOutput"> (optional) [in] length of buffer supplied for
|
||||
/// compiler output. </param>
|
||||
/// <param name="pMacroDefs"> (optional) the macro defs. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///
|
||||
/// ### <param name="uiCompilerOutput"> (optional) the compiler output. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
CompileWithMacros(
|
||||
__in char * lpszShaderCode,
|
||||
__in UINT uiShaderCodeSize,
|
||||
__in char * lpszOperation,
|
||||
__out void ** ppPlatformSpecificBinary,
|
||||
__out void ** ppPlatformSpecificModule,
|
||||
__inout char * lpszCompilerOutput,
|
||||
__in int uiCompilerOutput,
|
||||
__in const void * pMacroDefs=NULL // const D3D_SHADER_MACRO*
|
||||
);
|
||||
|
||||
private:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Warmup pipeline. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/28/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void WarmupPipeline();
|
||||
|
||||
/// <summary> true to enable, false to disable code paths that
|
||||
/// directly leverage direct x asyncrony. </summary>
|
||||
static BOOL s_bEnableDirectXAsyncrony;
|
||||
|
||||
/// <summary> true to enable, false to disable code paths that
|
||||
/// try to use resource sharing support in DX11. </summary>
|
||||
static BOOL s_bEnableDirectXP2PAPIs;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,68 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: dxcodecache.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _DX_CODE_CACHE_H_
|
||||
#define _DX_CODE_CACHE_H_
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "ptdxhdr.h"
|
||||
#include <map>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class DXCodeCache {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DXCodeCache();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~DXCodeCache();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Cache get. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="szFile"> [in,out] If non-null, the file. </param>
|
||||
/// <param name="szFunc"> [in,out] If non-null, the func. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
ID3D11ComputeShader* CacheGet(char * szFile, char * szFunc);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Cache put. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="szFile"> [in,out] If non-null, the file. </param>
|
||||
/// <param name="szFunc"> [in,out] If non-null, the func. </param>
|
||||
/// <param name="p"> [in,out] If non-null, the p. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void CachePut(char * szFile, char * szFunc, ID3D11ComputeShader* p);
|
||||
protected:
|
||||
struct ltstr {
|
||||
bool operator()(std::string s1, std::string s2) const {
|
||||
return strcmp(s1.c_str(), s2.c_str()) < 0;
|
||||
}
|
||||
};
|
||||
std::map<std::string, ID3D11ComputeShader*, ltstr> m_cache;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,347 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: dxtask.h
|
||||
// directx based task
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _DX_TASK_H_
|
||||
#define _DX_TASK_H_
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "ptdxhdr.h"
|
||||
#include "accelerator.h"
|
||||
#include "dxaccelerator.h"
|
||||
#include "task.h"
|
||||
#include "channel.h"
|
||||
#include "CompiledKernel.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Task running accelerator code that
|
||||
/// is supported by the DirectX 11 runtime. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class DXTask : public Task {
|
||||
|
||||
friend class XMLReader;
|
||||
friend class XMLWriter;
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="hRuntimeTerminateEvt"> Handle of the graph terminate event. </param>
|
||||
/// <param name="hGraphTeardownEvent"> Handle of the stop event. </param>
|
||||
/// <param name="hGraphStopEvent"> Handle of the running event. </param>
|
||||
/// <param name="hGraphRunningEvent"> The graph running event. </param>
|
||||
/// <param name="pCompiledKernel"> The CompiledKernel associated with this task. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DXTask(
|
||||
__in HANDLE hRuntimeTerminateEvt,
|
||||
__in HANDLE hGraphTeardownEvent,
|
||||
__in HANDLE hGraphStopEvent,
|
||||
__in HANDLE hGraphRunningEvent,
|
||||
__in CompiledKernel * pCompiledKernel
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~DXTask();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in,out] [in,out] If non-null, the accelerators. </param>
|
||||
/// <param name="pKernel"> [in,out] If non-null, the kernel. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual HRESULT
|
||||
Create(
|
||||
__in std::set<Accelerator*>& pAccelerators,
|
||||
__in CompiledKernel * pKernel
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform platform-specific calls to dispatch the task. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificDispatch();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a compute geometry. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="nThreadGroupsX"> (optional) the thread groups in x. </param>
|
||||
/// <param name="nThreadGroupsY"> (optional) the thread groups in y. </param>
|
||||
/// <param name="nThreadGroupsZ"> (optional) the thread groups in z. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetComputeGeometry(int nThreadGroupsX=1, int nThreadGroupsY=1, int nThreadGroupsZ=1);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a block and grid size. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="grid"> The grid. </param>
|
||||
/// <param name="block"> The block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetBlockAndGridSize(PTASKDIM3 grid, PTASKDIM3 block);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> When the graph is complete, (indicated because Graph.Run was called), this method
|
||||
/// is called on every task to allow tasks to perform and one-time initializations
|
||||
/// that cannot be performed without knowing that the structure of the graph is now
|
||||
/// static. For example, computing parameter offset maps for dispatch.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/5/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void PlatformSpecificOnGraphComplete();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform the platform-specific work required to bind an
|
||||
/// individual input parameter. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="ordinal"> [in,out] The ordinal. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificBindInput(Port * pPort, int ordinal, UINT uiActualIndex, PBuffer * pBuffer);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform the platform-specific work required to bind an
|
||||
/// individual output parameter. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="ordinal"> [in,out] The ordinal. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificBindOutput(Port * pPort, int ordinal, UINT uiActualIndex, PBuffer * pBuffer);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform the platform-specific work required to bind an
|
||||
/// individual input parameter. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="ordinal"> [in,out] The ordinal. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificBindConstant(Port * pPort,
|
||||
int ordinal,
|
||||
UINT uiActualIndex,
|
||||
PBuffer * pBuffer
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform specific finalize bindings. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/5/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificFinalizeBindings();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Bind shader. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL BindExecutable();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Unbind shader. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void UnbindExecutable();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Unbind inputs. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void UnbindInputs();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Unbind outputs. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void UnbindOutputs();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Unbind constants. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void UnbindConstants();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Searches for a channel. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="uiUID"> The uid. </param>
|
||||
/// <param name="p"> [in,out] If non-null, the p. </param>
|
||||
/// <param name="siz"> The siz. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the found channel. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
Channel * FindChannel(UINT uiUID, Channel ** p, int siz);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Searches for index of a given channe. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="uiUID"> The uid. </param>
|
||||
/// <param name="p"> [in,out] If non-null, the p. </param>
|
||||
/// <param name="siz"> The siz. </param>
|
||||
///
|
||||
/// <returns> The found channel index. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
int FindChannelIndex(UINT uiUID, Channel ** p, int siz);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Removes the channel. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="index"> Zero-based index of the. </param>
|
||||
/// <param name="p"> [in,out] If non-null, the p. </param>
|
||||
/// <param name="psiz"> [in,out] If non-null, the psiz. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL RemoveChannel(int index, Channel ** p, UINT * psiz);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Releases the channels. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="p"> [in,out] If non-null, the p. </param>
|
||||
/// <param name="psiz"> [in,out] If non-null, the psiz. </param>
|
||||
/// <param name="bDeallocate"> true to deallocate. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL ReleaseChannels(Channel ** p, UINT * psiz, BOOL bDeallocate);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Estimate dispatch dimensions. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void EstimateDispatchDimensions(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Estimate dispatch dimensions helper function. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
/// <param name="x"> [in,out] The x coordinate. </param>
|
||||
/// <param name="y"> [in,out] The y coordinate. </param>
|
||||
/// <param name="z"> [in,out] The z coordinate. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void __estimateDispatchDimensions(Datablock * pBlock, UINT& x, UINT& y, UINT& z);
|
||||
|
||||
/// <summary> The compute shader map </summary>
|
||||
std::map<Accelerator*, ID3D11ComputeShader*> m_pCSMap;
|
||||
|
||||
/// <summary> The preferred number of thread
|
||||
/// groups to spawn in the X dimension
|
||||
/// </summary>
|
||||
UINT m_nPreferredXDim;
|
||||
|
||||
/// <summary> The preferred number of thread
|
||||
/// groups to spawn in the Y dimension
|
||||
/// </summary>
|
||||
UINT m_nPreferredYDim;
|
||||
|
||||
/// <summary> The preferred number of thread
|
||||
/// groups to spawn in the Z dimension
|
||||
/// </summary>
|
||||
UINT m_nPreferredZDim;
|
||||
|
||||
/// <summary> true if the compute geometry was
|
||||
/// explicitly set by a call from a
|
||||
/// user program. </summary>
|
||||
BOOL m_bGeometryExplicit;
|
||||
|
||||
/// <summary> true if we estimated the
|
||||
/// geometry based on datablock template
|
||||
/// or datablock properties.
|
||||
/// </summary>
|
||||
BOOL m_bGeometryEstimated;
|
||||
|
||||
/// <summary> Platform specific objects: a list of ShaderResourceView
|
||||
/// pointers, reused for binding inputs on every dispatch.
|
||||
/// </summary>
|
||||
ID3D11ShaderResourceView** m_ppInputSRVs;
|
||||
|
||||
/// <summary> Platform specific objects: a list of ID3D11UnorderedAccessView
|
||||
/// pointers, reused for binding outputs on every dispatch.
|
||||
/// </summary>
|
||||
ID3D11UnorderedAccessView ** m_ppOutputUAVs;
|
||||
|
||||
/// <summary> Platform specific objects: a list of ID3D11Buffer
|
||||
/// pointers, reused for binding constants on every dispatch.
|
||||
/// </summary>
|
||||
ID3D11Buffer** m_ppConstantBuffers;
|
||||
|
||||
/// <summary> The p 2 p dispatch input locks. </summary>
|
||||
std::set<PBuffer*> m_vP2PDispatchInputLocks;
|
||||
|
||||
/// <summary> The p 2 p dispatch output locks. </summary>
|
||||
std::set<PBuffer*> m_vP2PDispatchOutputLocks;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,59 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: extremetrace.h
|
||||
//
|
||||
// summary: Macros for extreme trace mode
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __EXTREME_TRACE_H__
|
||||
#define __EXTREME_TRACE_H__
|
||||
|
||||
#ifdef EXTREME_TRACE
|
||||
#include "PTaskRuntime.h"
|
||||
#define MSGSIZE 256
|
||||
#define trace(x) \
|
||||
if(PTask::Runtime::g_bExtremeTrace) {\
|
||||
char szMsg[MSGSIZE];\
|
||||
sprintf_s(szMsg, MSGSIZE, "%s\n", x);\
|
||||
printf("T[%4X]: %s", ::GetCurrentThreadId(), szMsg); }
|
||||
#define trace2(x, y) \
|
||||
if(PTask::Runtime::g_bExtremeTrace) {\
|
||||
char szMsg[MSGSIZE];\
|
||||
sprintf_s(szMsg, MSGSIZE, x, y);\
|
||||
printf("T[%4X]: %s", ::GetCurrentThreadId(), szMsg); }
|
||||
#define trace3(x, y, z) \
|
||||
if(PTask::Runtime::g_bExtremeTrace) {\
|
||||
char szMsg[MSGSIZE];\
|
||||
sprintf_s(szMsg, MSGSIZE, x, y, z);\
|
||||
printf("T[%4X]: %s", ::GetCurrentThreadId(), szMsg); }
|
||||
#define trace4(x, y, z, w) \
|
||||
if(PTask::Runtime::g_bExtremeTrace) {\
|
||||
char szMsg[MSGSIZE];\
|
||||
sprintf_s(szMsg, MSGSIZE, x, y, z, w);\
|
||||
printf("T[%4X]: %s", ::GetCurrentThreadId(), szMsg); }
|
||||
#define trace5(x, y, z, w, u) \
|
||||
if(PTask::Runtime::g_bExtremeTrace) {\
|
||||
char szMsg[MSGSIZE];\
|
||||
sprintf_s(szMsg, MSGSIZE, x, y, z, w, u);\
|
||||
printf("T[%4X]: %s", ::GetCurrentThreadId(), szMsg); }
|
||||
#define trace6(x, y, z, w, u, t) \
|
||||
if(PTask::Runtime::g_bExtremeTrace) {\
|
||||
char szMsg[MSGSIZE];\
|
||||
sprintf_s(szMsg, MSGSIZE, x, y, z, w, u, t);\
|
||||
printf("T[%4X]: %s", ::GetCurrentThreadId(), szMsg); }
|
||||
#define trace8(x, y, z, w, u, t, r) \
|
||||
if(PTask::Runtime::g_bExtremeTrace) {\
|
||||
char szMsg[MSGSIZE];\
|
||||
sprintf_s(szMsg, MSGSIZE, x, y, z, w, u, t, r);\
|
||||
printf("T[%4X]: %s", ::GetCurrentThreadId(), szMsg); }
|
||||
#else
|
||||
#define trace(x)
|
||||
#define trace2(x, y)
|
||||
#define trace3(x, y, z)
|
||||
#define trace4(x, y, z, w)
|
||||
#define trace5(x, y, z, w, u)
|
||||
#define trace6(x, y, z, w, u, v)
|
||||
#define trace7(x, y, z, w, u, v, r)
|
||||
#define trace8(x, y, z, w, u, v, r, s)
|
||||
#endif
|
||||
|
||||
#endif
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,419 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: GraphInputChannel.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _GRAPH_INPUT_CHANNEL_H_
|
||||
#define _GRAPH_INPUT_CHANNEL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include <Windows.h>
|
||||
#include "datablock.h"
|
||||
#include "BlockPoolOwner.h"
|
||||
#include "BlockPool.h"
|
||||
#include <deque>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class GraphInputChannel : public Channel, public BlockPoolOwner {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pGraph"> [in,out] If non-null, the graph. </param>
|
||||
/// <param name="pDatablockTemplate"> [in,out] If non-null, the p. </param>
|
||||
/// <param name="hRuntimeTerminateEvent"> Handle of the terminate. </param>
|
||||
/// <param name="hGraphTeardownEvt"> Handle of the stop. </param>
|
||||
/// <param name="hGraphStopEvent"> The graph stop event. </param>
|
||||
/// <param name="lpszChannelName"> [in,out] If non-null, name of the channel. </param>
|
||||
/// <param name="bHasBlockPool"> the has block pool. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
GraphInputChannel(
|
||||
__in Graph * pGraph,
|
||||
__in DatablockTemplate * pDatablockTemplate,
|
||||
__in HANDLE hRuntimeTerminateEvent,
|
||||
__in HANDLE hGraphTeardownEvt,
|
||||
__in HANDLE hGraphStopEvent,
|
||||
__in char * lpszChannelName,
|
||||
__in BOOL bHasBlockPool
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if the channel is (or can be) connected to a data source or sink that can be
|
||||
/// streamed. Generally speaking, this is a property of the primitive whose IO
|
||||
/// resources are being exposed by this port; consequently this property must be set
|
||||
/// explicitly by the programmer when graph structures that are stateful are
|
||||
/// constructured. For example, in a sort primitive, the main input can be streamed
|
||||
/// (broken into multiple blocks) only if there is a merge network downstream of the
|
||||
/// node performing the sort. Code that feeds the main input port needs to know this
|
||||
/// to decide whether to grow blocks until all data is present, or two push partial
|
||||
/// input.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if the port can stream data, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL CanStream();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~GraphInputChannel();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object has block pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if block pool, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasBlockPool();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this object is global pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/30/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if global pool, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL BlockPoolIsGlobal();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force block pooling for a port that has an up-stream allocator. In general, when
|
||||
/// we have an upstream allocator (meta) port, the runtime will not create a block
|
||||
/// pool for the corresponding output port. This turns out to put device-side
|
||||
/// allocation on the critical path in some cases, so we provide a way to override
|
||||
/// that behavior and allow a port to create a pool based on some size hints. When
|
||||
/// there is a block available with sufficient space in the pool, the meta port can
|
||||
/// avoid the allocation and draw from the pool.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 9/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="nPoolSize"> Size of the block pool. </param>
|
||||
/// <param name="nStride"> The stride. </param>
|
||||
/// <param name="nDataBytes"> The data in bytes. </param>
|
||||
/// <param name="nMetaBytes"> The meta in bytes. </param>
|
||||
/// <param name="nTemplateBytes"> The template in bytes. </param>
|
||||
/// <param name="bPageLockHostViews"> (optional) the page lock host views. </param>
|
||||
/// <param name="bEagerDeviceMaterialize"> (optional) the eager device materialize. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
ForceBlockPoolHint(
|
||||
__in UINT nPoolSize,
|
||||
__in UINT nStride,
|
||||
__in UINT nDataBytes,
|
||||
__in UINT nMetaBytes,
|
||||
__in UINT nTemplateBytes,
|
||||
__in BOOL bPageLockHostViews=FALSE,
|
||||
__in BOOL bEagerDeviceMaterialize=FALSE
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
///
|
||||
/// Allocation of data-blocks and platform-specific buffers can be a signficant
|
||||
/// latency expense at dispatch time. We can actually preallocate output datablocks
|
||||
/// and create device- side buffers at graph construction time. For each node in the
|
||||
/// graph, allocate data blocks on any output ports, and create device-specific
|
||||
/// buffers for all accelerators capable of executing the node.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPool(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroys the block pool. AddRef everything in the bool, set its owner
|
||||
/// to null, and then release it. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
DestroyBlockPool(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Queries if a block pool is active and able to deliver/return blocks. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if a block pool is active, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
IsBlockPoolActive(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the owner name. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/18/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the owner name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual char *
|
||||
GetPoolOwnerName(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets high water mark. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetHighWaterMark();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the total number of blocks owned by the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The total number of blocks owned by the pool (whether they are queued or not). </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetOwnedBlockCount();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the low water mark. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetLowWaterMark();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the currently available count. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/19/2013. </remarks>
|
||||
///
|
||||
/// <returns> The high water mark. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetAvailableBlockCount();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles only the first pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
AllocateBlockPoolAsync(
|
||||
__in std::vector<Accelerator*>* pAccelerators,
|
||||
__in unsigned int uiPoolSize=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate block pool. Attempt to preallocate blocks on this port.
|
||||
/// Asynchronous version. Only allocates device-space buffers
|
||||
/// in the first pass. Second pass queues all the copies.
|
||||
/// This function handles the second pass.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/15/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in] If non-null, the accelerators on which views of blocks
|
||||
/// allocated in the pool may be required. </param>
|
||||
/// <param name="uiPoolSize"> [in] (optional) Size of the pool. If zero/defaulted,
|
||||
/// Runtime::GetICBlockPoolSize() will be used to determine the
|
||||
/// size of the pool. </param>
|
||||
///
|
||||
/// <returns> True if it succeeds, false if it fails. If a port type doesn't actually implement
|
||||
/// pooling, return false as well.
|
||||
/// </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
FinalizeBlockPoolAsync(
|
||||
VOID
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> add a new block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void AddNewBlock(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return a block to the pool. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ReturnToPool(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets pool size. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> The pool size. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetPoolSize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <param name="bPageLocked"> true to lock, false to unlock the page. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetRequestsPageLocked(BOOL bPageLocked);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets request page locked. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL GetRequestsPageLocked();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets pooled block. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/29/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the pooled block. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * GetPooledBlock();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Derives an initial value datablock for this channel based on its template,
|
||||
/// and pushes that datablock into this channel, blocking until there is capacity
|
||||
/// for an optional timeout in milliseconds. Default timeout is infinite.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="dwTimeout"> (optional) the timeout in milliseconds. Use 0xFFFFFFFF for no
|
||||
/// timeout. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PushInitializer(DWORD dwTimeout=0xFFFFFFFF);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a destination buffer for a block with an upstream
|
||||
/// allocator. Succeeds only if the pool happens to have blocks
|
||||
/// backed by sufficient resources in all channels that are backed.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> (optional) [in,out] If non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the destination buffer. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock *
|
||||
GetBlockFromPool(
|
||||
__in Accelerator * pAccelerator=NULL,
|
||||
__in UINT uiDataBytes=0,
|
||||
__in UINT uiMetaBytes=0,
|
||||
__in UINT uiTemplateBytes=0
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this channel has downstream writers. An output channel is
|
||||
/// considered a writer because we must conservatively assume consumed
|
||||
/// blocks will be written.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/15/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if downstream writers, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasDownstreamWriters();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
|
||||
/// for this chanell in a way that is consistent with a well-formed graph. Called by
|
||||
/// CheckSemantics()
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="pos"> [in,out] output string stream. </param>
|
||||
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
|
||||
PTask::Graph * pGraph);
|
||||
|
||||
/// <summary> The block pool. </summary>
|
||||
BlockPool * m_pBlockPool;
|
||||
|
||||
/// <summary> true if this object has block pool. </summary>
|
||||
BOOL m_bHasBlockPool;
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,167 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: GraphOutputChannel.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _GRAPH_OUTPUT_CHANNEL_H_
|
||||
#define _GRAPH_OUTPUT_CHANNEL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include <Windows.h>
|
||||
#include "datablock.h"
|
||||
#include <deque>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class GraphOutputChannel : public Channel {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pGraph"> [in,out] If non-null, the graph. </param>
|
||||
/// <param name="pDatablockTemplate"> [in,out] If non-null, the p. </param>
|
||||
/// <param name="hRuntimeTerminateEvent"> Handle of the terminate. </param>
|
||||
/// <param name="hGraphTeardownEvt"> Handle of the stop. </param>
|
||||
/// <param name="hGraphStopEvent"> The graph stop event. </param>
|
||||
/// <param name="lpszChannelName"> [in,out] If non-null, name of the channel. </param>
|
||||
/// <param name="bHasBlockPool"> the has block pool. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
GraphOutputChannel(
|
||||
__in Graph * pGraph,
|
||||
__in DatablockTemplate * pDatablockTemplate,
|
||||
__in HANDLE hRuntimeTerminateEvent,
|
||||
__in HANDLE hGraphTeardownEvt,
|
||||
__in HANDLE hGraphStopEvent,
|
||||
__in char * lpszChannelName,
|
||||
__in BOOL bHasBlockPool
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if the channel is (or can be) connected to a data source or sink that can be
|
||||
/// streamed. Generally speaking, this is a property of the primitive whose IO
|
||||
/// resources are being exposed by this port; consequently this property must be set
|
||||
/// explicitly by the programmer when graph structures that are stateful are
|
||||
/// constructured. For example, in a sort primitive, the main input can be streamed
|
||||
/// (broken into multiple blocks) only if there is a merge network downstream of the
|
||||
/// node performing the sort. Code that feeds the main input port needs to know this
|
||||
/// to decide whether to grow blocks until all data is present, or two push partial
|
||||
/// input.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if the port can stream data, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL CanStream();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~GraphOutputChannel();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Populate a set of tasks that are bound to this channel as consumers. Because a
|
||||
/// channel may be an output channel or a multi-channel, the range of cardinality of
|
||||
/// this result is [0..n]. Return the number of such tasks. Note that we cache the
|
||||
/// result of this call: computing it requires a transitive closure over paths that
|
||||
/// can include multi-channels and in/out routing, which in turn means traversing the
|
||||
/// graph recursively. Since the result of this traversal cannot change, and the
|
||||
/// traversal requires locking parts of the graph, we prefer to avoid repeating work
|
||||
/// to recompute the same result.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 10/2/2012. </remarks>
|
||||
///
|
||||
/// <param name="pvTasks"> [in,out] non-null, the tasks. </param>
|
||||
///
|
||||
/// <returns> The number of downstream consuming tasks. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
GetDownstreamTasks(
|
||||
__inout std::set<Task*>* pvTasks
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets memory spaces downstream of this channel that either *must* consume data
|
||||
/// that flows through this channel, or *may* consume it. The list is non-trivial
|
||||
/// because of different channel types and predication. For example, an output
|
||||
/// channel has no downstream consumers, while a multi-channel can have any number.
|
||||
/// Enumerating consumers is complicated by the following additional factors:
|
||||
///
|
||||
/// 1) The presence of channel predicates can ensure dynamically that a particular
|
||||
/// bound task never actually consumes a block flowing through it.
|
||||
///
|
||||
/// 2) If the channel is bound to In/out ports, then we need to analyze paths of
|
||||
/// length greater than 1. In fact, we need the transitive closure.
|
||||
///
|
||||
/// 3) A task's accelerator class may enable it to be bound to several different
|
||||
/// accelerators, meaning the list of potential consumers can be greater than 1 even
|
||||
/// if the channel binding structure is trivial.
|
||||
///
|
||||
/// Note that we cache the result of this call: computing it requires a transitive
|
||||
/// closure over paths that can include multi-channels and in/out routing, which in
|
||||
/// turn means traversing the graph recursively. Since the result of this traversal
|
||||
/// cannot change, and the traversal requires locking parts of the graph, we prefer
|
||||
/// to avoid repeating work to recompute the same result.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 10/2/2012. </remarks>
|
||||
///
|
||||
/// <param name="ppvMandatoryAccelerators"> [in,out] If non-null, the mandatory accelerators. </param>
|
||||
/// <param name="ppvPotentialAccelerators"> [in,out] If non-null, the potential accelerators. </param>
|
||||
///
|
||||
/// <returns> The downstream memory spaces. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
EnumerateDownstreamMemorySpaces(
|
||||
__inout std::set<Accelerator*>* pvMandatoryAccelerators,
|
||||
__inout std::set<Accelerator*>* pvPotentialAccelerators
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this channel has downstream writers. An output channel is
|
||||
/// considered a writer because we must conservatively assume consumed
|
||||
/// blocks will be written.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/15/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if downstream writers, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasDownstreamWriters();
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
|
||||
/// for this chanell in a way that is consistent with a well-formed graph. Called by
|
||||
/// CheckSemantics()
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="pos"> [in,out] output string stream. </param>
|
||||
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
|
||||
PTask::Graph * pGraph);
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,470 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: hostaccelerator.h
|
||||
// host "accelerator"
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _HOST_ACCELERATOR_H_
|
||||
#define _HOST_ACCELERATOR_H_
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "datablocktemplate.h"
|
||||
#include "dxcodecache.h"
|
||||
#include "accelerator.h"
|
||||
#include "CompiledKernel.h"
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Host accelerator.
|
||||
///
|
||||
/// The host accelerator provides a way to execute ptask nodes
|
||||
/// on the CPU. Currently, where an accelerator-based ptask
|
||||
/// accepts source code and "compiles" a node with the resulting
|
||||
/// binary, the host accelerator expects a dll with a
|
||||
///
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class HostAccelerator : public Accelerator {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="cpuid"> The cpuid. </param>
|
||||
/// <param name="lpszName"> [in,out] If non-null, the name. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
HostAccelerator(int cpuid, char * lpszName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~HostAccelerator();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Open the host accelerator. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual HRESULT Open();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the device. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the device. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void* GetDevice();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the context. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void* GetContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates an asynchronous context for the task. Create the cuda stream for this
|
||||
/// ptask.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 12/20/2011.
|
||||
///
|
||||
/// This method is required of all subclasses, and abstracts the work associated with
|
||||
/// managing whatever framework-level asynchrony abstractions are supported by the
|
||||
/// backend target. For example, CUDA supports the "stream", while DirectX supports
|
||||
/// an ID3D11ImmediateContext, OpenCL has command queues, and so on.
|
||||
/// </remarks>
|
||||
///
|
||||
/// <param name="pTask"> [in] non-null, the CUDA-capable acclerator to which the
|
||||
/// stream is bound. </param>
|
||||
/// <param name="eAsyncContextType"> Type of the asynchronous context. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual AsyncContext *
|
||||
PlatformSpecificCreateAsyncContext(
|
||||
__in Task * pTask,
|
||||
__in ASYNCCONTEXTTYPE eAsyncContextType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Cache a binary. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="szFile"> [in,out] If non-null, the file. </param>
|
||||
/// <param name="szFunc"> [in,out] If non-null, the func. </param>
|
||||
/// <param name="lpfn"> The lpfn. </param>
|
||||
/// <param name="hModule"> The module. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void CachePutShader(char * szFile, char * szFunc, FARPROC lpfn, HMODULE hModule);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check the cache for a binary. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="szFile"> [in,out] If non-null, the file. </param>
|
||||
/// <param name="szFunc"> [in,out] If non-null, the func. </param>
|
||||
/// <param name="ppFunction"> [in,out] The function. </param>
|
||||
/// <param name="pModule"> [in,out] The module. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL CacheGetShader(char * szFile, char * szFunc, FARPROC &ppFunction, HMODULE &pModule);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Compiles accelerator source code to create a PTask binary. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011.
|
||||
/// </remarks>
|
||||
///
|
||||
/// <param name="lpszFileName"> [in] filename+path of source. cannot be null.</param>
|
||||
/// <param name="lpszOperation"> [in] Function name in source file. cannot be null.</param>
|
||||
/// <param name="ppPlatformSpecificBinary"> [out] On success, a platform specific binary. </param>
|
||||
/// <param name="ppPlatformSpecificModule"> [out] On success, a platform specific module handle. </param>
|
||||
/// <param name="lpszCompilerOutput"> (optional) [in,out] On failure, the compiler output. </param>
|
||||
/// <param name="uiCompilerOutput"> (optional) [in] length of buffer supplied for
|
||||
/// compiler output. </param>
|
||||
/// <param name="tgx"> (optional) thread group X dimensions. (see remarks)</param>
|
||||
/// <param name="tgy"> (optional) thread group Y dimensions. (see remarks)</param>
|
||||
/// <param name="tgz"> (optional) thread group Z dimensions. (see remarks)</param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Compile(
|
||||
char * lpszFileName,
|
||||
char * lpszOperation,
|
||||
void ** ppPlatformSpecificBinary,
|
||||
void ** ppPlatformSpecificModule,
|
||||
char * lpszCompilerOutput=NULL,
|
||||
int uiCompilerOutput=0,
|
||||
int tgx=1,
|
||||
int tgy=1,
|
||||
int tgz=1
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Compiles accelerator source code to create a PTask binary. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011.
|
||||
///
|
||||
/// The function accepts a string of source code and an operation in that source to
|
||||
/// build a binary for.
|
||||
///
|
||||
/// Currently, this is not implemented for host tasks because this involves
|
||||
/// setting up infrastructure to choose a compiler and target a DLL, etc.
|
||||
/// </remarks>
|
||||
///
|
||||
/// <param name="lpszShaderCode"> [in] actual source. cannot be null. </param>
|
||||
/// <param name="uiShaderCodeSize"> Size of the shader code. </param>
|
||||
/// <param name="lpszOperation"> [in] Function name in source file. cannot be null. </param>
|
||||
/// <param name="ppPlatformSpecificBinary"> [out] On success, a platform specific binary. </param>
|
||||
/// <param name="ppPlatformSpecificModule"> [out] On success, a platform specific module handle. </param>
|
||||
/// <param name="lpszCompilerOutput"> (optional) [in,out] On failure, the compiler output. </param>
|
||||
/// <param name="uiCompilerOutput"> (optional) [in] length of buffer supplied for
|
||||
/// compiler output. </param>
|
||||
/// <param name="nThreadGroupSizeX"> (optional) thread group X dimensions. (see remarks) </param>
|
||||
/// <param name="nThreadGroupSizeY"> (optional) thread group Y dimensions. (see remarks) </param>
|
||||
/// <param name="nThreadGroupSizeZ"> (optional) thread group Z dimensions. (see remarks) </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
Compile(
|
||||
__in char * lpszShaderCode,
|
||||
__in UINT uiShaderCodeSize,
|
||||
__in char * lpszOperation,
|
||||
__in void ** ppPlatformSpecificBinary,
|
||||
__in void ** ppPlatformSpecificModule,
|
||||
__in char * lpszCompilerOutput=NULL,
|
||||
__in int uiCompilerOutput=0,
|
||||
__in int nThreadGroupSizeX=1,
|
||||
__in int nThreadGroupSizeY=1,
|
||||
__in int nThreadGroupSizeZ=1
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the context current. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsDeviceContextCurrent();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Makes the context current. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL MakeDeviceContextCurrent();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Releases the current context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void ReleaseCurrentDeviceContext();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return true if this accelerator has some support for device to device transfer
|
||||
/// with the given accelerator. This allows us to skip a trip through host memory
|
||||
/// in many cases.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/25/2012. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerator"> [in,out] If non-null, the accelerator. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsDeviceToDeviceTransfer(Accelerator * pAccelerator);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the supports device memcy. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/12/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsDeviceMemcpy();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the supports function arguments. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsFunctionArguments();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the supports byval arguments. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsByvalArguments();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Synchronizes the context. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="ctxt"> [in,out] If non-null, the ctxt. </param>
|
||||
/// <param name="pTask"> (optional) [in,out] If non-null, the task. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Synchronize(Task*pTask=NULL);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if 'p' has accessible memory space. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="p"> [in,out] If non-null, the p. </param>
|
||||
///
|
||||
/// <returns> true if accessible memory space, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasAccessibleMemorySpace(Accelerator*p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the supports pinned host memory. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL SupportsPinnedHostMemory();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate memory on the host. Some runtimes (esp. earlier versions of CUDA)
|
||||
/// require that CUDA APIs be used to allocate host-side buffers, or support
|
||||
/// specialized host allocators that can help improve DMA performance.
|
||||
/// AllocatePagelockedHostMemory wraps these APIs for accelerators that have runtime support
|
||||
/// for this, and uses normal system services (VirtualAlloc on Windows, malloc
|
||||
/// elsewhere) to satisfy requests.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <param name="uiBytes"> Number of bytes to allocate. </param>
|
||||
/// <param name="pbResultPageLocked"> [in,out] If non-null, the result of whether the
|
||||
/// allocated memory is page-locked is provided here. </param>
|
||||
///
|
||||
/// <returns> byte pointer on success, null on failure. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void * AllocatePagelockedHostMemory(UINT uiBytes, BOOL * pbResultPageLocked);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Free host memory. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/17/2011. </remarks>
|
||||
///
|
||||
/// <param name="pBuffer"> If non-null, the buffer. </param>
|
||||
/// <param name="bPageLocked"> true if the memory was allocated in the page-locked area. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void
|
||||
FreeHostMemory(
|
||||
void * pBuffer,
|
||||
BOOL bPageLocked
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the device identifier. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> The device identifier. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual int GetDeviceId();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Enumerate accelerators. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="devices"> [in,out] [in,out] If non-null, the devices. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void EnumerateAccelerators(std::vector<Accelerator*> &devices);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate memory extent. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/6/2012. </remarks>
|
||||
///
|
||||
/// <param name="ulNumberOfBytes"> The ul number of in bytes. </param>
|
||||
/// <param name="ulFlags"> The ul flags. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void * __stdcall AllocateMemoryExtent(ULONG ulNumberOfBytes, ULONG ulFlags);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Allocate memory extent. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/6/2012. </remarks>
|
||||
///
|
||||
/// <param name="ulNumberOfBytes"> The ul number of in bytes. </param>
|
||||
/// <param name="ulFlags"> The ul flags. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void __stdcall DeallocateMemoryExtent(void* pvMemoryExtent);
|
||||
|
||||
protected:
|
||||
|
||||
/// <summary> Identifier for the device </summary>
|
||||
int m_nDeviceId;
|
||||
|
||||
/// <summary> The code cache </summary>
|
||||
std::map<std::string, FARPROC> m_pCodeCache;
|
||||
|
||||
/// <summary> The module cache </summary>
|
||||
std::map<std::string, HMODULE> m_pModuleCache;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates a new platform specific buffer. This routine is called by CreateBuffer to
|
||||
/// get a new instance of whatever buffer type corresponds to the platform
|
||||
/// implementing this interface. For example, DXAccelerator will return a new
|
||||
/// PDXBuffer object, where PDXBuffer is a subclass of PBuffer. The Accelerator super-
|
||||
/// class can then perform the rest of the work required to initialize the PBuffer.
|
||||
///
|
||||
/// We only create PBuffers to provide 'physical' views of the 'logical' buffer
|
||||
/// abstraction provided by the Datablock. Datablocks can have up to three different
|
||||
/// channels (data, metadata, template), so consequently, each of which must be
|
||||
/// backed by its own PBuffer. A PBuffer should not have to know what channel it is
|
||||
/// backing, but we include that information in it's creation to simplify the
|
||||
/// materialization of views between different subclasses of PBuffer.
|
||||
///
|
||||
/// The "proxy allocator" is present as parameter to handle two corner cases:
|
||||
///
|
||||
/// 1. Allocation of host-side buffers by the host-specific subclass of PBuffer
|
||||
/// (PHBuffer)--for example, we prefer to use a CUDA accelerator object to
|
||||
/// allocate host memory when a block will be touched by a CUDA-based PTask,
|
||||
/// because we can use the faster async APIs with memory we allocate using CUDA
|
||||
/// host allocation APIs. This requires that the HostAccelerator defer the host-
|
||||
/// side memory allocation to the CUDA accelerator.
|
||||
///
|
||||
/// 2. Communication between runtimes that provide some interop support (e.g. CUDA
|
||||
/// and DirectX can actually share texture objects, meaning there is no need to
|
||||
/// actually allocate a new buffer to back a CUDA view that already has a DirectX
|
||||
/// view, but the two accelerators must cooperate to assemble a PBuffer that
|
||||
/// shares the underlying shared object.
|
||||
///
|
||||
/// Case 1 is implemented, while case 2 is largely unimplemented. If no proxy
|
||||
/// accelerator is provided, allocation will proceed using the accelerator object
|
||||
/// whose member function is being called to allocate the PBuffer.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pLogicalParent"> [in] If non-null, the datablock that is the logical
|
||||
/// buffer using this 'physical' buffer to back a particular
|
||||
/// channel on this accelerator. </param>
|
||||
/// <param name="nDatblockChannelIndex"> Zero-based index of the channel being backed. Must be:
|
||||
/// * DBDATA_IDX = 0, OR
|
||||
/// * DBMETADATA_IDX = 1, OR
|
||||
/// * DBTEMPLATE_IDX = 2. </param>
|
||||
/// <param name="uiBufferAccessFlags"> Access flags determining what views to create. </param>
|
||||
/// <param name="pProxyAllocator"> [in,out] If non-null, the proxy allocator. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual PBuffer* NewPlatformSpecificBuffer(Datablock * pLogicalParent,
|
||||
UINT nDatblockChannelIndex,
|
||||
BUFFERACCESSFLAGS uiBufferAccessFlags,
|
||||
Accelerator * pProxyAllocator
|
||||
);
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,490 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: HostTask.h
|
||||
// Host based task
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _HOST_TASK_H_
|
||||
#define _HOST_TASK_H_
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "accelerator.h"
|
||||
#include "cuaccelerator.h"
|
||||
#include "task.h"
|
||||
#include "channel.h"
|
||||
#include "datablock.h"
|
||||
#include "CompiledKernel.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
|
||||
using namespace PTask;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> function signature for simple host tasks. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/16/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef void (__stdcall *LPFNHOSTTASK)(
|
||||
UINT nArguments,
|
||||
void **ppArguments
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> function signature for host tasks that have dependences on other accelerators.
|
||||
/// The BOOL array contains entries which are true if that entry corresponds to an
|
||||
/// input already materialized on the dependent device, false otherwise. The
|
||||
/// pvDeviceBindings array contains entries which are meaningful when the entry at
|
||||
/// the same index in the BOOL array is true, and is a platform-specific device id.
|
||||
/// Generated code must know how to use these IDs.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/16/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef void (__stdcall *LPFNDEPHOSTTASK)(
|
||||
UINT nArguments,
|
||||
void **ppArguments,
|
||||
BOOL * pbIsDependentBinding,
|
||||
void ** pvDeviceBindings,
|
||||
UINT nDeps,
|
||||
void ** pDeps);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Defines a structure for providing dependent accelerator context information
|
||||
/// to a host task. Moving from LPFNHOSTTASK and LPFNDEPHOSTTASK approach
|
||||
/// because we have to change the signature every time there is a new requirement.
|
||||
/// Using a descriptor struct instead allows us to grow the structure as needed
|
||||
/// without having to change a bunch of code. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/6/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef struct _dependent_context_t {
|
||||
/*
|
||||
pbDependentBindings:
|
||||
pvDependentBindings:
|
||||
nDeps:
|
||||
pDepDevs:
|
||||
pStreams: a vector of length nDeps (always 1 for you), each member of which can be typecast (in your case) to type CUstream_t.
|
||||
|
||||
*/
|
||||
|
||||
/// <summary> The number of bytes in the dependent context
|
||||
/// descriptor structure.
|
||||
/// </summary>
|
||||
UINT cbDependentContext;
|
||||
|
||||
/// <summary> The number of arguments in the task argument list. </summary>
|
||||
UINT nArguments;
|
||||
|
||||
/// <summary> The number of dependent accelerators assigned. </summary>
|
||||
UINT nDependentAccelerators;
|
||||
|
||||
/// <summary> Reserved, pad to 16 bytes before pointer types. </summary>
|
||||
UINT uiReserved0;
|
||||
|
||||
/// <summary> The arguments, to be typecast according to what the
|
||||
/// task knows implicitly as well as the dependent accelerator
|
||||
/// binding information provided in the subsequent members
|
||||
/// of this structure.
|
||||
/// </summary>
|
||||
void **ppArguments;
|
||||
|
||||
/// <summary> A vector of length nArguments, specifying the datablock
|
||||
/// that each argument is associated with.
|
||||
/// </summary>
|
||||
Datablock ** ppDatablocks;
|
||||
|
||||
/// <summary> a vector of BOOL, of length nArguments. If a given member is TRUE, you can expect
|
||||
/// the data for the argument in question to be pre-materialized in device space.
|
||||
/// </summary>
|
||||
BOOL * pbIsDependentBinding;
|
||||
|
||||
/// <summary> a vector of length nArguments, whose members can be typecast to platform-specic
|
||||
/// device objects (e.g. CUdevice): if pbDependentBindings[i] is TRUE, then
|
||||
/// pvDependentBindings[i] is a valid platform specific object.
|
||||
/// </summary>
|
||||
void ** pvDeviceBindings;
|
||||
|
||||
/// <summary> a vector of length nDeps (always 1 for you), each member of which can be typecast
|
||||
/// (e.g. type CUdevice).
|
||||
/// </summary>
|
||||
void ** pDependentDevices;
|
||||
|
||||
/// <summary> The streams: a vector of length nDependentAccelerators each member of which can
|
||||
/// be typecast to a platform-specific asynchronous context object (e.g. type
|
||||
/// CUstream_t).
|
||||
/// </summary>
|
||||
void ** pStreams;
|
||||
|
||||
/// <summary> A pointer to the PTask-assigned task name. Enables less ambiguous debug
|
||||
/// output for graphs that use the same host entry point in multiple
|
||||
/// places in the graph.
|
||||
/// </summary>
|
||||
char * lpszTaskName;
|
||||
|
||||
} DEPENDENTCONTEXT, *LPDEPENDENTCONTEXT;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> function signature for host tasks that have dependences on other accelerators.
|
||||
/// The structure contains members which allow the task dispatch code to determine
|
||||
/// whether entries are already materialized on the dependent device, as well as
|
||||
/// enabling the code to get platform specific objects such as device ids and stream
|
||||
/// handles where needed. Generated code must know how to use this structure.
|
||||
/// Currently, the the task's BindDependentAcceleratorClass member is called
|
||||
/// with the bRequestPSObjects parameter == TRUE, the code assumes the host task
|
||||
/// entry point follows this form; otherwise the legacy versions above
|
||||
/// (LPFNDEPHOSTTASK, LPFNHOSTTASK) are used for backward compatibility.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 5/16/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef void (__stdcall *LPFNDEPHOSTTASKEX)(LPDEPENDENTCONTEXT);
|
||||
|
||||
namespace PTask {
|
||||
|
||||
static const int MAXARGS=64;
|
||||
|
||||
class HostTask : public Task {
|
||||
|
||||
friend class XMLReader;
|
||||
friend class XMLWriter;
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="hRuntimeTerminateEvt"> Handle of the graph terminate event. </param>
|
||||
/// <param name="hGraphTeardownEvent"> Handle of the stop event. </param>
|
||||
/// <param name="hGraphStopEvent"> Handle of the running event. </param>
|
||||
/// <param name="hGraphRunningEvent"> The graph running event. </param>
|
||||
/// <param name="pCompiledKernel"> The CompiledKernel associated with this task. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
HostTask(
|
||||
__in HANDLE hRuntimeTerminateEvt,
|
||||
__in HANDLE hGraphTeardownEvent,
|
||||
__in HANDLE hGraphStopEvent,
|
||||
__in HANDLE hGraphRunningEvent,
|
||||
__in CompiledKernel * pCompiledKernel
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~HostTask();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates this task. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pAccelerators"> [in,out] [in,out] If non-null, the accelerators. </param>
|
||||
/// <param name="pKernel"> [in,out] If non-null, the kernel. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual HRESULT Create( std::set<Accelerator*>& pAccelerators, CompiledKernel * pKernel );
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dispatches this task. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificDispatch();
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets the compute geometry. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="nThreadGroupsX"> (optional) the thread groups in x. </param>
|
||||
/// <param name="nThreadGroupsY"> (optional) the thread groups in y. </param>
|
||||
/// <param name="nThreadGroupsZ"> (optional) the thread groups in z. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetComputeGeometry(int nThreadGroupsX=1, int nThreadGroupsY=1, int nThreadGroupsZ=1);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Sets a block and grid size. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="grid"> The grid. </param>
|
||||
/// <param name="block"> The block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetBlockAndGridSize(PTASKDIM3 grid, PTASKDIM3 block);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets a synchronization timestamp. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="p"> [in,out] If non-null, the p. </param>
|
||||
///
|
||||
/// <returns> The synchronization timestamp. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetSynchronizationTimestamp(Accelerator * p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Increment synchronise timestamp. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="p"> [in,out] If non-null, the p. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void IncrementSyncTimestamp(Accelerator * p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> When the graph is complete, (indicated because Graph.Run was called), this method
|
||||
/// is called on every task to allow tasks to perform and one-time initializations
|
||||
/// that cannot be performed without knowing that the structure of the graph is now
|
||||
/// static. For example, computing parameter offset maps for dispatch.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/5/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void PlatformSpecificOnGraphComplete();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform the platform-specific work required to bind an individual input parameter.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> [in,out] If non-null, the port. </param>
|
||||
/// <param name="ordinal"> The ordinal. </param>
|
||||
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
|
||||
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificBindInput(Port * pPort,
|
||||
int ordinal,
|
||||
UINT uiActualIndex,
|
||||
PBuffer * pBuffer
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform the platform-specific work required to bind an individual output
|
||||
/// parameter.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> [in,out] If non-null, the port. </param>
|
||||
/// <param name="ordinal"> The ordinal. </param>
|
||||
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
|
||||
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificBindOutput(Port * pPort,
|
||||
int ordinal,
|
||||
UINT uiActualIndex,
|
||||
PBuffer * pBuffer);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Perform the platform-specific work required to bind an individual input parameter.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/22/2011. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> [in,out] If non-null, the port. </param>
|
||||
/// <param name="ordinal"> The ordinal. </param>
|
||||
/// <param name="uiActualIndex"> Zero-based index of the user interface actual. </param>
|
||||
/// <param name="pBuffer"> [in,out] If non-null, the buffer. </param>
|
||||
/// <param name="bScalarBinding"> true to scalar binding. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificBindConstant(Port * pPort,
|
||||
int ordinal,
|
||||
UINT uiActualIndex,
|
||||
PBuffer * pBuffer
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the platform specific finalize bindings. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/5/2012. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL PlatformSpecificFinalizeBindings();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Bind accelerator executable. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL BindExecutable();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Collect migration resources. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="vblocks"> [in,out] [in,out] If non-null, the vblocks. </param>
|
||||
/// <param name="vaccs"> [in,out] [in,out] If non-null, the vaccs. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
CollectMigrationResources(
|
||||
__inout std::list<Datablock*> &vblocks,
|
||||
__inout std::list<Accelerator*> &vaccs
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform-specific dispatch if the task has no dependences on other accelerators.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pCS"> The function pointer address for dispatch. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
PlatformSpecificDispatchNoDependences(
|
||||
__in FARPROC pCS
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform-specific dispatch if the task has dependences on other accelerators.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pCS"> The function pointer address for dispatch. </param>
|
||||
/// <param name="nDeps"> The number dependent assignments. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
PlatformSpecificDispatchWithDependences(
|
||||
__in FARPROC pCS,
|
||||
__in UINT nDeps
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Platform-specific dispatch if the task has dependences on other accelerators.
|
||||
/// This version extends the PlatformSpecificDispatchWithDependences version
|
||||
/// with the ability to provide other platform-specific objects such as stream
|
||||
/// handles through a struct/descriptor based interface. Currently, this is
|
||||
/// called if m_bRequestDependentPSObjects is true, otherwise, legacy versions
|
||||
/// are called.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="pCS"> The function pointer address for dispatch. </param>
|
||||
/// <param name="nDeps"> The number dependent assignments. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL
|
||||
PlatformSpecificDispatchWithDependencesEx(
|
||||
__in FARPROC pCS,
|
||||
__in UINT nDeps
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Executes the ps dispatch enter action. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL OnPSDispatchEnter();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Executes the ps dispatch exit action. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL OnPSDispatchExit();
|
||||
|
||||
/// <summary> map of host-task invocation parameter index to value </summary>
|
||||
std::map<int, void*> m_pParameters;
|
||||
|
||||
/// <summary> map of host-task invocation parameter index to source port </summary>
|
||||
std::map<int, Port*> m_pParameterPorts;
|
||||
|
||||
/// <summary> map of host-task invocation parameter index to datablock </summary>
|
||||
std::map<int, Datablock*> m_pParameterDatablockMap;
|
||||
|
||||
/// <summary> map of function pointers </summary>
|
||||
std::map<Accelerator*, FARPROC> m_pCSMap;
|
||||
|
||||
/// <summary> map of HMODULE handles </summary>
|
||||
std::map<Accelerator*, HMODULE> m_pModuleMap;
|
||||
|
||||
/// <summary> The preferred x size </summary>
|
||||
UINT m_nPreferredXDim;
|
||||
|
||||
/// <summary> The preferred y size </summary>
|
||||
UINT m_nPreferredYDim;
|
||||
|
||||
/// <summary> The preferred z size </summary>
|
||||
UINT m_nPreferredZDim;
|
||||
|
||||
/// <summary> true if the user set the geometry
|
||||
/// explicitly with a call to
|
||||
/// SetComputeGeometry.</summary>
|
||||
BOOL m_bGeometryExplicit;
|
||||
|
||||
/// <summary> true if the user set the thread block
|
||||
/// sizes explicitly.
|
||||
/// </summary>
|
||||
BOOL m_bThreadBlockSizesExplicit;
|
||||
|
||||
/// <summary> Size of the thread block </summary>
|
||||
PTASKDIM3 m_pThreadBlockSize;
|
||||
|
||||
/// <summary> Size of the dispatch grid </summary>
|
||||
PTASKDIM3 m_pGridSize;
|
||||
|
||||
void* m_ppArgs[MAXARGS];
|
||||
Datablock* m_ppDatablocks[MAXARGS];
|
||||
void* m_ppDeps[MAXARGS];
|
||||
BOOL m_pbIsDependentBinding[MAXARGS];
|
||||
void* m_pvDeviceBindings[MAXARGS];
|
||||
void* m_ppStreams[MAXARGS];
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
|
@ -1,171 +0,0 @@
|
|||
/********************************************************
|
||||
* hrperft.h
|
||||
**********************************************************/
|
||||
|
||||
#ifndef _HRPERFT_H_
|
||||
#define _HRPERFT_H_
|
||||
|
||||
// performance timers are architecture and platform
|
||||
// specific. Need to define a routine to access
|
||||
// the perf counters on whatever processor is in use here:
|
||||
#include "windows.h"
|
||||
typedef double ctrtype;
|
||||
#define hpfresult(x) x.QuadPart
|
||||
#define query_hpc(x) QueryPerformanceCounter(x)
|
||||
#define query_freq(x) QueryPerformanceFrequency(x)
|
||||
typedef long (__stdcall *LPFNtQuerySystemTime)(PLARGE_INTEGER SystemTime);
|
||||
|
||||
typedef enum gran_t {
|
||||
gran_nanosec,
|
||||
gran_usec,
|
||||
gran_msec,
|
||||
gran_sec
|
||||
} hpf_granularity;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> High resolution timer.
|
||||
/// For collecting performance measurements.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class CHighResolutionTimer {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="gran"> The granularity of the timer
|
||||
/// (seconds or milliseconds). </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
CHighResolutionTimer(hpf_granularity gran);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
~CHighResolutionTimer(void);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the tick frequency of the underlying
|
||||
/// counter primitive.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
double tickfreq();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the tick count. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
__int64 tickcnt();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Resets this timer. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void reset();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return the time elapsed since the
|
||||
/// last reset. Optionally, reset the timer
|
||||
/// as a side-effect of the query. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="reset"> true to reset. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
double elapsed(bool reset);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Queries the system time. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="li"> The li. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL query_system_time(PLARGE_INTEGER li);
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
/// <summary> The granularity of the timer,
|
||||
/// either seconds or milliseconds
|
||||
/// </summary>
|
||||
hpf_granularity m_gran;
|
||||
|
||||
/// <summary> the value of the underlying
|
||||
/// timing primitive at the time the
|
||||
/// timer was last reset.</summary>
|
||||
__int64 m_start;
|
||||
|
||||
/// <summary> The frequency of the underlying
|
||||
/// timing primitive </summary>
|
||||
double m_freq;
|
||||
|
||||
/// <summary> Module for windows DLL for querying
|
||||
/// system time getting perf counter
|
||||
/// frequency.
|
||||
/// </summary>
|
||||
HMODULE m_hModule;
|
||||
|
||||
/// <summary> Function pointer for querying
|
||||
/// system time
|
||||
/// </summary>
|
||||
LPFNtQuerySystemTime m_lpfnQuerySystemTime;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Free resources allocated to support
|
||||
/// query of system time. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void free_query_system_time();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initialises the query system time. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
LPFNtQuerySystemTime init_query_system_time();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return the difference in milliseconds. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="lEarly"> The early. </param>
|
||||
/// <param name="lLate"> The late. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DWORD delta_milliseconds(LARGE_INTEGER lEarly, LARGE_INTEGER lLate);};
|
||||
|
||||
#endif
|
|
@ -1,656 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: instrumenter.h
|
||||
//
|
||||
// summary: Declares the instrumenter class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __PTASK_INSTRUMENTATION_H__
|
||||
#define __PTASK_INSTRUMENTATION_H__
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "Lockable.h"
|
||||
#include <stack>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <tuple>
|
||||
|
||||
class CSharedPerformanceTimer;
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Instrumenter : public Lockable
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initialize an the ad hoc instrumentation framework. Creates a singleton
|
||||
/// instrumenter object.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL Initialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Shutdown the ad hoc instrumentation framework, destroys the singleton
|
||||
/// instrumenter object.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL Destroy();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Reports all measured latencies and acknowledges any outstanding
|
||||
/// (incomplete) measurments . </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="ss"> [in,out] The ss. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Report(std::ostream& ss);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Enables the instrumentation framework. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="bEnable"> true to enable, false to disable. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL Enable(BOOL bEnable);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if the adhoc instrumentation framework is enabled. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if enabled, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL IsEnabled();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if a measurement matching 'strEventName' is in flight. In flight
|
||||
/// means that a start sentinal has been pushed onto the outstanding stack
|
||||
/// that has not been matched yet by a corresponding completion. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> true if in flight, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL IsInFlight(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Collect data point. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/12/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static double CollectDataPoint(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Collect data point. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/12/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static double
|
||||
CollectDataPoint(
|
||||
__in std::string& strEventName,
|
||||
__out UINT &nSamples,
|
||||
__out double &dMin,
|
||||
__out double &dMax
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if a measurement matching 'strEventName' is complete. Note that
|
||||
/// because multiple measurements matching a given name can be tracked, it is
|
||||
/// possible for an event name to be both "in flight" and complete.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> true if complete, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL IsComplete(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets nesting depth for the given event name. If the nest depth is 0 it means
|
||||
/// there are no measurements with the given name in flight. A depth greater than 1
|
||||
/// means there is a nested measurement with the same name. This idiom is likely best
|
||||
/// avoided in potentially concurrent code, since the instrumenter handles nesting
|
||||
/// with a stack, which makes it difficult to disambiguate end sentinels if they are
|
||||
/// not ordered explicitly by the program.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> The nesting depth. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT GetNestingDepth(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event start. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT RecordEventStart(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event complete. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT RecordEventComplete(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event start for an event that should have only one start sentinel,
|
||||
/// but for which concurrency implies non-determinism, so many threads may attempt
|
||||
/// to record the same event start. The primary example of this scenario is
|
||||
/// start of data processing in PTask, which occurs as soon as the first block
|
||||
/// is pushed by the user. It is simplest to record this by calling the instrumenter
|
||||
/// on every exposed call to Channel::Push, with all calls after the first ignored.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT RecordSingletonEventStart(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event complete an event that should have only one start sentinel, but for
|
||||
/// which concurrency implies non-determinism, so many threads may attempt to record
|
||||
/// the same event start. The primary example of this scenario is start of data
|
||||
/// processing in PTask, which occurs as soon as the first block is pushed by the
|
||||
/// user. It is simplest to record this by calling the instrumenter on every exposed
|
||||
/// call to Channel::Push, with all calls after the first ignored.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
/// <param name="bRequireOutstanding"> (Optional) true to require an outstanding entry. Some
|
||||
/// stats (like first return-value materialization)
|
||||
/// are very difficult to capture unambiguously, because
|
||||
/// calls to record the event must be placed in common code
|
||||
/// paths. Calling with this parameter set to true allows the
|
||||
/// record call to fail without protest if the caller knows
|
||||
/// this to be such an event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT RecordSingletonEventComplete(std::string& strEventName, BOOL bRequireOutstanding=TRUE);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event start. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT RecordEventStart(char * strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event complete. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT RecordEventComplete(char * strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Increment externally measured latency for a cumulative event. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
/// <param name="dIncrement"> Amount to increment by. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT AccumulateEventLatency(char * strEventName, double dIncrement);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record start for a cumulative event. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT RecordCumulativeEventStart(char * strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record cumulative event complete. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT RecordCumulativeEventComplete(char * strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event start for an event that should have only one start sentinel,
|
||||
/// but for which concurrency implies non-determinism, so many threads may attempt
|
||||
/// to record the same event start. The primary example of this scenario is
|
||||
/// start of data processing in PTask, which occurs as soon as the first block
|
||||
/// is pushed by the user. It is simplest to record this by calling the instrumenter
|
||||
/// on every exposed call to Channel::Push, with all calls after the first ignored.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT RecordSingletonEventStart(char * strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event complete an event that should have only one start sentinel, but for
|
||||
/// which concurrency implies non-determinism, so many threads may attempt to record
|
||||
/// the same event start. The primary example of this scenario is start of data
|
||||
/// processing in PTask, which occurs as soon as the first block is pushed by the
|
||||
/// user. It is simplest to record this by calling the instrumenter on every exposed
|
||||
/// call to Channel::Push, with all calls after the first ignored.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
/// <param name="bRequireOutstanding"> (Optional) true to require an outstanding entry. Some
|
||||
/// stats (like first return-value materialization)
|
||||
/// are very difficult to capture unambiguously, because
|
||||
/// calls to record the event must be placed in common code
|
||||
/// paths. Calling with this parameter set to true allows the
|
||||
/// record call to fail without protest if the caller knows
|
||||
/// this to be such an event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT RecordSingletonEventComplete(char * strEventName, BOOL bRequireOutstanding=TRUE);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Resets this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Reset();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
Instrumenter();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~Instrumenter();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Collect data point. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/12/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
double __CollectDataPoint(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Collect data point. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/12/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
double
|
||||
__CollectDataPoint(
|
||||
__in std::string& strEventName,
|
||||
__out UINT &nSamples,
|
||||
__out double &dMin,
|
||||
__out double &dMax
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Reports all measured latencies and acknowledges any outstanding
|
||||
/// (incomplete) measurments . </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="ss"> [in,out] The ss. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void __Report(std::ostream& ss);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Reports all measured latencies matching the given event name.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="ss"> [in,out] The ss. </param>
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void __ReportComplete(std::ostream& ss, std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Reports any outstanding (incomplete)
|
||||
/// measurments matching the given event name.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="ss"> [in,out] The ss. </param>
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void __ReportOutstanding(std::ostream& ss, std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Enables the instrumentation framework. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="bEnable"> true to enable, false to disable. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL __Enable(BOOL bEnable);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if the adhoc instrumentation framework is enabled. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if enabled, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL __IsEnabled();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if a measurement matching 'strEventName' is in flight. In flight
|
||||
/// means that a start sentinal has been pushed onto the outstanding stack
|
||||
/// that has not been matched yet by a corresponding completion. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> true if in flight, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL __IsInFlight(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if a measurement matching 'strEventName' is complete. Note that
|
||||
/// because multiple measurements matching a given name can be tracked, it is
|
||||
/// possible for an event name to be both "in flight" and complete.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> true if complete, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL __IsComplete(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets nesting depth for the given event name. If the nest depth is 0 it means
|
||||
/// there are no measurements with the given name in flight. A depth greater than 1
|
||||
/// means there is a nested measurement with the same name. This idiom is likely best
|
||||
/// avoided in potentially concurrent code, since the instrumenter handles nesting
|
||||
/// with a stack, which makes it difficult to disambiguate end sentinels if they are
|
||||
/// not ordered explicitly by the program.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> The nesting depth. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT __GetNestingDepth(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event start. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT __RecordEventStart(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event complete. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT __RecordEventComplete(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event start. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
/// <param name="dIncrement"> Amount to increment by. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT __AccumulateEventLatency(std::string& strEventName, double dIncrement);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event start. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT __RecordCumulativeEventStart(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event complete. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT __RecordCumulativeEventComplete(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event start. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT __RecordSingletonEventStart(std::string& strEventName);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record event complete. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///
|
||||
/// <param name="strEventName"> [in,out] Name of the event. </param>
|
||||
/// <param name="bRequireOutstanding"> true to require outstanding. </param>
|
||||
///
|
||||
/// <returns> the new nesting depth for events matching this name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT __RecordSingletonEventComplete(std::string& strEventName, BOOL bRequireOutstanding);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Resets this object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void __Reset();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Finalize singletons. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/23/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void __FinalizeSingletons();
|
||||
|
||||
typedef std::map<std::string, std::tuple<UINT, double, double, double>> CumulativeEventMap;
|
||||
|
||||
BOOL m_bEnabled;
|
||||
CSharedPerformanceTimer * m_pRTTimer;
|
||||
std::map<std::string, std::stack<double>> m_vOutstanding;
|
||||
std::map<std::string, std::vector<double>> m_vCompleted;
|
||||
std::map<std::string, double> m_vSingletonCompleted;
|
||||
CumulativeEventMap m_vCumulativeEvents;
|
||||
static UINT m_bInitialized;
|
||||
static Instrumenter * g_pInstrumenter;
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
#ifdef ADHOC_STATS
|
||||
#define recordGraphDestroyStart() Instrumenter::RecordEventStart("GraphDestroy")
|
||||
#define recordGraphDestroyLatency() Instrumenter::RecordEventComplete("GraphDestroy")
|
||||
#define recordTeardownStart() Instrumenter::RecordEventStart("Teardown")
|
||||
#define recordTeardownLatency() Instrumenter::RecordEventComplete("Teardown")
|
||||
#define recordFirstPush() Instrumenter::RecordSingletonEventStart("ProcessData")
|
||||
#define recordMaterialize() Instrumenter::RecordSingletonEventComplete("ProcessData", FALSE)
|
||||
#define record_dispatch_entry() {Instrumenter::RecordSingletonEventStart("DispatchPhase"); Instrumenter::RecordCumulativeEventStart("task-dispatch"); }
|
||||
#define record_dispatch_exit() {Instrumenter::RecordSingletonEventComplete("DispatchPhase"); Instrumenter::RecordCumulativeEventComplete("task-dispatch"); }
|
||||
#define record_psdispatch_entry() Instrumenter::RecordCumulativeEventStart("PSDispatch")
|
||||
#define record_psdispatch_exit() Instrumenter::RecordCumulativeEventComplete("PSDispatch")
|
||||
#define record_psdispatch_latency(d) Instrumenter::AccumulateEventLatency("PSDispatch", d)
|
||||
#define record_stream_agg_entry(x) Instrumenter::RecordCumulativeEventStart("SADispatch");
|
||||
#define record_stream_agg_exit(x) Instrumenter::RecordCumulativeEventComplete("SADispatch");
|
||||
|
||||
#define record_schedule_entry()
|
||||
#define record_schedule_exit()
|
||||
#define record_wait_acc_entry()
|
||||
#define record_wait_acc_exit()
|
||||
#define record_sort_q_entry()
|
||||
#define record_sort_q_exit()
|
||||
//#define record_schedule_entry() Instrumenter::RecordCumulativeEventStart("Schedule")
|
||||
//#define record_schedule_exit() Instrumenter::RecordCumulativeEventComplete("Schedule")
|
||||
//#define record_wait_acc_entry() Instrumenter::RecordCumulativeEventStart("block-acc")
|
||||
//#define record_wait_acc_exit() Instrumenter::RecordCumulativeEventComplete("block-acc")
|
||||
//#define record_sort_q_entry() Instrumenter::RecordCumulativeEventStart("sortq")
|
||||
//#define record_sort_q_exit() Instrumenter::RecordCumulativeEventComplete("sortq")
|
||||
#else
|
||||
#define recordTeardownStart()
|
||||
#define recordTeardownLatency()
|
||||
#define recordGraphDestroyStart()
|
||||
#define recordGraphDestroyLatency()
|
||||
#define recordFirstPush()
|
||||
#define recordMaterialize()
|
||||
#define record_dispatch_entry()
|
||||
#define record_dispatch_exit()
|
||||
#define record_psdispatch_entry()
|
||||
#define record_psdispatch_exit()
|
||||
#define record_psdispatch_latency(d)
|
||||
#define record_schedule_entry()
|
||||
#define record_schedule_exit()
|
||||
#define record_wait_acc_entry()
|
||||
#define record_wait_acc_exit()
|
||||
#define record_sort_q_entry()
|
||||
#define record_sort_q_exit()
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,112 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: InternalChannel.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _INTERNAL_CHANNEL_H_
|
||||
#define _INTERNAL_CHANNEL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include <Windows.h>
|
||||
#include "datablock.h"
|
||||
#include "channel.h"
|
||||
#include <deque>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> InternalChannel. Channel subclass specialized for Task-Task communication. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class InternalChannel : public Channel {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pGraph"> [in,out] If non-null, the graph. </param>
|
||||
/// <param name="pDatablockTemplate"> [in,out] If non-null, the datablock template. </param>
|
||||
/// <param name="hRuntimeTerminateEvent"> Handle of the graph terminate event. </param>
|
||||
/// <param name="hGraphTeardownEvt"> The graph teardown event. </param>
|
||||
/// <param name="hGraphStopEvent"> Handle of the graph stop event. </param>
|
||||
/// <param name="lpszChannelName"> [in,out] If non-null, name of the channel. </param>
|
||||
/// <param name="bHasBlockPool"> the has block pool. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
InternalChannel(
|
||||
__in Graph * pGraph,
|
||||
__in DatablockTemplate * pDatablockTemplate,
|
||||
__in HANDLE hRuntimeTerminateEvent,
|
||||
__in HANDLE hGraphTeardownEvt,
|
||||
__in HANDLE hGraphStopEvent,
|
||||
__in char * lpszChannelName,
|
||||
__in BOOL bHasBlockPool
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~InternalChannel();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if the channel is (or can be) connected to a data source or sink that can be
|
||||
/// streamed. Generally speaking, this is a property of the primitive whose IO
|
||||
/// resources are being exposed by this port; consequently this property must be set
|
||||
/// explicitly by the programmer when graph structures that are stateful are
|
||||
/// constructured. For example, in a sort primitive, the main input can be streamed
|
||||
/// (broken into multiple blocks) only if there is a merge network downstream of the
|
||||
/// node performing the sort. Code that feeds the main input port needs to know this
|
||||
/// to decide whether to grow blocks until all data is present, or two push partial
|
||||
/// input.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if the port can stream data, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL CanStream();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this channel has downstream writers. An output channel is
|
||||
/// considered a writer because we must conservatively assume consumed
|
||||
/// blocks will be written.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/15/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if downstream writers, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasDownstreamWriters();
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
|
||||
/// for this chanell in a way that is consistent with a well-formed graph. Called by
|
||||
/// CheckSemantics()
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="pos"> [in,out] output string stream. </param>
|
||||
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
|
||||
PTask::Graph * pGraph);
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,435 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: multichannel.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _MULTI_CHANNEL_H_
|
||||
#define _MULTI_CHANNEL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include <Windows.h>
|
||||
#include "datablock.h"
|
||||
#include "ReferenceCounted.h"
|
||||
#include "channel.h"
|
||||
#include "PTaskRuntime.h"
|
||||
#include <map>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Bundled channel class. Any block pushed into this channel is pushed into
|
||||
/// multiple bundled channels.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class MultiChannel : public Channel
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="pDatablockTemplate"> [in] If non-null, the datablock template. </param>
|
||||
/// <param name="hRuntimeTerminateEvent"> Handle of the runtime terminate event. </param>
|
||||
/// <param name="hGraphTeardownEvt"> The graph teardown event. </param>
|
||||
/// <param name="hGraphStopEvent"> Handle of the graph stop event. </param>
|
||||
/// <param name="lpszChannelName"> [in] If non-null, name of the channel. </param>
|
||||
/// <param name="bHasBlockPool"> the has block pool. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
MultiChannel(
|
||||
__in Graph * pGraph,
|
||||
__in DatablockTemplate * pDatablockTemplate,
|
||||
__in HANDLE hRuntimeTerminateEvent,
|
||||
__in HANDLE hGraphTeardownEvt,
|
||||
__in HANDLE hGraphStopEvent,
|
||||
__in char * lpszChannelName,
|
||||
__in BOOL bHasBlockPool
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~MultiChannel();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if the channel is (or can be) connected to a data source or sink that can be
|
||||
/// streamed. Generally speaking, this is a property of the primitive whose IO
|
||||
/// resources are being exposed by this port; consequently this property must be set
|
||||
/// explicitly by the programmer when graph structures that are stateful are
|
||||
/// constructured. For example, in a sort primitive, the main input can be streamed
|
||||
/// (broken into multiple blocks) only if there is a merge network downstream of the
|
||||
/// node performing the sort. Code that feeds the main input port needs to know this
|
||||
/// to decide whether to grow blocks until all data is present, or two push partial
|
||||
/// input.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/20/2011. </remarks>
|
||||
///
|
||||
/// <returns> true if the port can stream data, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL CanStream();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if channel is ready. This has a different meaning depending on the channel
|
||||
/// subtype in question, but in general means "is the channel ready to produce or
|
||||
/// consume datablocks?".
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="type"> (optional) the type of the channel. </param>
|
||||
///
|
||||
/// <returns> true if ready, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL IsReady(CHANNELENDPOINTTYPE type=CE_DST);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Pulls a datablock from the channel, potentially timing out after dwTimeout
|
||||
/// milliseconds.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="dwTimeout"> (optional) the timeout in milliseconds. Use 0xFFFFFFFF for no
|
||||
/// timeout. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * Pull(DWORD dwTimeout=0xFFFFFFFF);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Returns the first available datablock on the channel without removing it. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the currently available datablock object. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Datablock * Peek();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Pushes a datablock into this channel, blocking until there is capacity
|
||||
/// for an optional timeout in milliseconds. Default timeout is infinite.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
/// <param name="dwTimeout"> (optional) the timeout in milliseconds. Use 0xFFFFFFFF for no
|
||||
/// timeout. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL Push(Datablock* pBlock, DWORD dwTimeout=0xFFFFFFFF);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary>
|
||||
/// Sets the capacity of the channel, which is the maximum number of datablocks it can queue
|
||||
/// before subsequent calls to push will block.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="nCapacity"> The capacity. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void SetCapacity(UINT nCapacity);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the capacity. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/10/2013. </remarks>
|
||||
///
|
||||
/// <returns> The capacity. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT GetCapacity();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Bind this channel to a port. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="pPort"> [in] non-null, the port to bind. </param>
|
||||
/// <param name="type"> (optional) the type of the channel. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void BindPort(Port * pPort, CHANNELENDPOINTTYPE type);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Unbind a port from this channel. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="type"> (optional) the type of the channel. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Port * UnbindPort(CHANNELENDPOINTTYPE type);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the port to which this channel is bound. Lock not required because we assume
|
||||
/// this is set at creation, rather than after the graph has entered the running
|
||||
/// state.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <param name="type"> (optional) the type of the channel. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the bound port. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual Port * GetBoundPort(CHANNELENDPOINTTYPE type);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the datablock template associated with this port. Lock not required because
|
||||
/// we assume this is set at creation, rather than after the graph has entered the
|
||||
/// running state.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the template. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DatablockTemplate * GetTemplate();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the current queue depth. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///
|
||||
/// <returns> The queue depth. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual size_t GetQueueDepth();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary>
|
||||
/// Drains this channels queue, releasing references to the blocks in the queue.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/19/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual void Drain();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Coalesce channel. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/20/2012. </remarks>
|
||||
///
|
||||
/// <param name="pChannel"> [in,out] If non-null, the channel. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void CoalesceChannel(Channel * pChannel);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the coalesced channel map. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 4/18/2012. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the coalesced channel map. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
std::map<UINT, Channel*>* GetCoalescedChannelMap();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Populate a set of tasks that are bound to this channel as consumers. Because a
|
||||
/// channel may be an output channel or a multi-channel, the range of cardinality of
|
||||
/// this result is [0..n]. Return the number of such tasks. Note that we cache the
|
||||
/// result of this call: computing it requires a transitive closure over paths that
|
||||
/// can include multi-channels and in/out routing, which in turn means traversing the
|
||||
/// graph recursively. Since the result of this traversal cannot change, and the
|
||||
/// traversal requires locking parts of the graph, we prefer to avoid repeating work
|
||||
/// to recompute the same result.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 10/2/2012. </remarks>
|
||||
///
|
||||
/// <param name="pvTasks"> [in,out] non-null, the tasks. </param>
|
||||
///
|
||||
/// <returns> The number of downstream consuming tasks. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
GetDownstreamTasks(
|
||||
__inout std::set<Task*>* pvTasks
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets memory spaces downstream of this channel that either *must* consume data
|
||||
/// that flows through this channel, or *may* consume it. The list is non-trivial
|
||||
/// because of different channel types and predication. For example, an output
|
||||
/// channel has no downstream consumers, while a multi-channel can have any number.
|
||||
/// Enumerating consumers is complicated by the following additional factors:
|
||||
///
|
||||
/// 1) The presence of channel predicates can ensure dynamically that a particular
|
||||
/// bound task never actually consumes a block flowing through it.
|
||||
///
|
||||
/// 2) If the channel is bound to In/out ports, then we need to analyze paths of
|
||||
/// length greater than 1. In fact, we need the transitive closure.
|
||||
///
|
||||
/// 3) A task's accelerator class may enable it to be bound to several different
|
||||
/// accelerators, meaning the list of potential consumers can be greater than 1 even
|
||||
/// if the channel binding structure is trivial.
|
||||
///
|
||||
/// Note that we cache the result of this call: computing it requires a transitive
|
||||
/// closure over paths that can include multi-channels and in/out routing, which in
|
||||
/// turn means traversing the graph recursively. Since the result of this traversal
|
||||
/// cannot change, and the traversal requires locking parts of the graph, we prefer
|
||||
/// to avoid repeating work to recompute the same result.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 10/2/2012. </remarks>
|
||||
///
|
||||
/// <param name="ppvMandatoryAccelerators"> [in,out] If non-null, the mandatory accelerators. </param>
|
||||
/// <param name="ppvPotentialAccelerators"> [in,out] If non-null, the potential accelerators. </param>
|
||||
///
|
||||
/// <returns> The downstream memory spaces. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL
|
||||
EnumerateDownstreamMemorySpaces(
|
||||
__inout std::set<Accelerator*>* pvMandatoryAccelerators,
|
||||
__inout std::set<Accelerator*>* pvPotentialAccelerators
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this channel has downstream writers. An output channel is
|
||||
/// considered a writer because we must conservatively assume consumed
|
||||
/// blocks will be written.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/15/2013. </remarks>
|
||||
///
|
||||
/// <returns> true if downstream writers, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasDownstreamWriters();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Find the maximal capacity downstream port/channel path starting at this channel.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 1/3/2014. </remarks>
|
||||
///
|
||||
/// <param name="vTasksVisited"> [in,out] [in,out] If non-null, the tasks visited. </param>
|
||||
/// <param name="vPath"> [in,out] list of channels along the maximal path. </param>
|
||||
///
|
||||
/// <returns> The found maximal downstream capacity. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual UINT
|
||||
FindMaximalDownstreamCapacity(
|
||||
__inout std::set<Task*>& vTasksVisited,
|
||||
__inout std::vector<Channel*>& vPath
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this channel has any non trivial predicates. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/3/2014. </remarks>
|
||||
///
|
||||
/// <returns> true if non trivial predicate, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL HasNonTrivialPredicate();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return the super-set of all "control signals of interest" for this graph object.
|
||||
/// A control signal is "of interest" if the behavior of this object is is predicated
|
||||
/// in some way by the presence or absence of a given signal. This function returns
|
||||
/// the bit-wise OR of all such signals.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/7/2014. </remarks>
|
||||
///
|
||||
/// <returns> The bitwise OR of all found control signals of interest. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual CONTROLSIGNAL GetControlSignalsOfInterest();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if this multi-channel has an exposed component channel. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/7/2014. </remarks>
|
||||
///
|
||||
/// <returns> true if exposed component channel, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL HasExposedComponentChannel();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Channel.toString() </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="os"> [in,out] The operating system. </param>
|
||||
/// <param name="pChannel"> [in,out] If non-null, the channel. </param>
|
||||
///
|
||||
/// <returns> The shifted result. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
friend std::ostream& operator<<(std::ostream &os, Channel * pChannel);
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Check type-specific semantics. Return true if all the structures are initialized
|
||||
/// for this chanell in a way that is consistent with a well-formed graph. Called by
|
||||
/// CheckSemantics()
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="pos"> [in,out] output string stream. </param>
|
||||
/// <param name="pGraph"> [in,out] non-null, the graph. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual BOOL CheckTypeSpecificSemantics(std::ostream * pos,
|
||||
PTask::Graph * pGraph);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the downstream readonly port count. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/6/2012. </remarks>
|
||||
///
|
||||
/// <returns> The downstream readonly port count. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT GetDownstreamReadonlyPortCount();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the downstream writer port count. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/6/2012. </remarks>
|
||||
///
|
||||
/// <returns> The downstream writer port count. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
UINT GetDownstreamWriterPortCount();
|
||||
|
||||
/// <summary> The channel map. </summary>
|
||||
std::map<UINT, Channel*> m_pChannelMap;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,54 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: nvtxmacros.h
|
||||
//
|
||||
// summary: Declares the nvtxmacros class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __NVTX_MACROS_H__
|
||||
#define __NVTX_MACROS_H__
|
||||
|
||||
|
||||
|
||||
#if defined(NVPROFILE) && defined(CUDA_SUPPORT)
|
||||
#include "nvToolsExt.h"
|
||||
|
||||
extern BOOL gbnvtxldok;
|
||||
extern BOOL gbvntxinit;
|
||||
|
||||
#define DECLARE_NVTX_GLOBALS() \
|
||||
BOOL gbnvtxldok = FALSE; \
|
||||
BOOL gbvntxinit = FALSE;
|
||||
|
||||
#define INITNVTX() initnvtx()
|
||||
#define MARKEVENT(x) if(gbnvtxldok) nvtxMark(x)
|
||||
#define NAMETHREAD(x) if(gbnvtxldok) nvtxNameOsThread(GetCurrentThreadId(),(x))
|
||||
#define MARKRANGEENTER(x) if(gbnvtxldok) nvtxRangePush(x)
|
||||
#define MARKRANGEEXIT() if(gbnvtxldok) nvtxRangePop()
|
||||
#define MARKTASKENTER(x) if(gbnvtxldok) nvtxRangePushA(x)
|
||||
#define MARKTASKEXIT() if(gbnvtxldok) nvtxRangePop()
|
||||
|
||||
#define DECLARE_NVTX_INIT() \
|
||||
void initnvtx() { \
|
||||
if(!gbvntxinit) { \
|
||||
gbnvtxldok = FALSE; \
|
||||
HANDLE hNVTXlib = LoadLibrary(L"nvToolsExt64_1.dll"); \
|
||||
if(hNVTXlib != NULL) { \
|
||||
MARKEVENT(L"initnvtx"); \
|
||||
gbnvtxldok = TRUE; \
|
||||
} \
|
||||
gbvntxinit = TRUE; \
|
||||
} \
|
||||
}
|
||||
|
||||
#else
|
||||
#define DECLARE_NVTX_GLOBALS()
|
||||
#define DECLARE_NVTX_INIT()
|
||||
#define INITNVTX()
|
||||
#define MARKEVENT(x)
|
||||
#define NAMETHREAD(x)
|
||||
#define MARKRANGEENTER(x)
|
||||
#define MARKRANGEEXIT()
|
||||
#define MARKTASKENTER(x)
|
||||
#define MARKTASKEXIT()
|
||||
#endif
|
||||
#endif
|
|
@ -1,12 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: oclhdr.h
|
||||
//
|
||||
// summary: Declares the oclhdr class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __OCLHDR_H__
|
||||
#define __OCLHDR_H__
|
||||
#ifdef OPENCL_SUPPORT
|
||||
#include "CL\cl.h"
|
||||
#endif
|
||||
#endif
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,33 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: ptaskapi.h
|
||||
//
|
||||
// summary: Includes of public headers for ptask. Do not include this within the
|
||||
// ptask implementation!
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __PTASK_PUBLIC_API_H__
|
||||
#define __PTASK_PUBLIC_API_H__
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "PTaskRuntime.h"
|
||||
#include "ptaskutils.h"
|
||||
#include "accelerator.h"
|
||||
#include "graph.h"
|
||||
#include "datablock.h"
|
||||
#include "datablocktemplate.h"
|
||||
#include "CompiledKernel.h"
|
||||
#include "hrperft.h"
|
||||
#include "task.h"
|
||||
#include "graphInputChannel.h"
|
||||
#include "graphOutputChannel.h"
|
||||
#include "internalChannel.h"
|
||||
#include "InitializerChannel.h"
|
||||
#include "multichannel.h"
|
||||
#include "InputPort.h"
|
||||
#include "OutputPort.h"
|
||||
#include "StickyPort.h"
|
||||
#include "MetaPort.h"
|
||||
#include "hrperft.h"
|
||||
#include "shrperft.h"
|
||||
|
||||
#endif
|
|
@ -1,34 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: ptasklynx.h
|
||||
//
|
||||
// summary: Declares the lynx conditional compilation macros
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __PTASK_LYNX_H__
|
||||
#define __PTASK_LYNX_H___
|
||||
|
||||
#ifdef PTASK_LYNX_INSTRUMENTATION
|
||||
#include "lynx.h"
|
||||
#define init_task_code_instrumentation(x) (x)->InitializeInstrumentation()
|
||||
#define finalize_task_code_instrumentation(x) (x)->FinalizeInstrumentation()
|
||||
#else
|
||||
#define init_task_code_instrumentation(x)
|
||||
#define finalize_task_code_instrumentation(x)
|
||||
#endif
|
||||
#ifdef REPORT_TIMING
|
||||
#include "shrperft.h"
|
||||
#define ptasklynx_start_timer() \
|
||||
CSharedPerformanceTimer * timer = new CSharedPerformanceTimer(gran_msec, true); \
|
||||
double start = timer->elapsed(false);
|
||||
#define ptasklynx_stop_timer() \
|
||||
error = cuCtxSynchronize(); \
|
||||
PTASSERT(error == CUDA_SUCCESS); \
|
||||
double end = timer->elapsed(false); \
|
||||
double runtime = end - start; \
|
||||
std::cout << m_lpszTaskName << "\t" << runtime << std::endl; \
|
||||
delete timer;
|
||||
#else
|
||||
#define ptasklynx_start_timer()
|
||||
#define ptasklynx_stop_timer()
|
||||
#endif
|
||||
#endif
|
|
@ -1,197 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: ptaskutils.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _PTASK_UTILS_H_
|
||||
#define _PTASK_UTILS_H_
|
||||
#include <Windows.h>
|
||||
#include "primitive_types.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
static const unsigned int DEFAULT_GROUP_SIZE = 256;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Values that represent how to interpret raw buffer contents when using
|
||||
/// DUMP_INTERMEDIATE_BLOCKS for debugging.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef enum dumptype_t {
|
||||
dt_raw = 0,
|
||||
dt_float = 1,
|
||||
dt_int = 2,
|
||||
dt_double = 3
|
||||
} DEBUGDUMPTYPE;
|
||||
|
||||
class ptaskutils
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> derive the best group size for dispatch. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <param name="group_size"> Size of the group. </param>
|
||||
/// <param name="global_size"> Size of the global. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static size_t
|
||||
ptaskutils::roundup(
|
||||
int group_size,
|
||||
int global_size
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return a unique integer identifier. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static unsigned int
|
||||
ptaskutils::nextuid(
|
||||
void
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Select the accelerator class for the given file, assumed to contain shader/kernel
|
||||
/// code.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <param name="szFile"> The file. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static ACCELERATOR_CLASS
|
||||
ptaskutils::SelectAcceleratorClass(
|
||||
const char * szFile
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Loads file into memory. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/28/2013. </remarks>
|
||||
///
|
||||
/// <param name="hFile"> The file. </param>
|
||||
/// <param name="ppMemory"> [in,out] If non-null, the memory. </param>
|
||||
/// <param name="puiBytes"> [in,out] If non-null, the pui in bytes. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
LoadFileIntoMemory(
|
||||
const HANDLE hFile,
|
||||
void ** ppMemory,
|
||||
UINT * puiBytes
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Loads file into memory. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/28/2013. </remarks>
|
||||
///
|
||||
/// <param name="szFile"> The file. </param>
|
||||
/// <param name="ppMemory"> [in,out] If non-null, the memory. </param>
|
||||
/// <param name="puiBytes"> [in,out] If non-null, the pui in bytes. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
LoadFileIntoMemory(
|
||||
const char * szFile,
|
||||
void ** ppMemory,
|
||||
UINT * puiBytes
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Loads file into memory. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/28/2013. </remarks>
|
||||
///
|
||||
/// <param name="szFile"> The file. </param>
|
||||
/// <param name="ppMemory"> [in,out] If non-null, the memory. </param>
|
||||
/// <param name="puiBytes"> [in,out] If non-null, the pui in bytes. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
LoadFileIntoMemory(
|
||||
const WCHAR * pwszFile,
|
||||
void ** ppMemory,
|
||||
UINT * puiBytes
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Returns the number of set signal codes in a control signal. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/14/2013. </remarks>
|
||||
///
|
||||
/// <param name="luiSignalWord"> The lui signal word. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static UINT
|
||||
SignalCount(
|
||||
__in CONTROLSIGNAL luiSignalWord
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> get the index of the first set signal if any. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 2/14/2013. </remarks>
|
||||
///
|
||||
/// <param name="luiSignalWord"> The lui signal word. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static int
|
||||
GetFirstSignalIndex(
|
||||
__in CONTROLSIGNAL luiSignalWord
|
||||
);
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes utils. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void initialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Cleans up utils. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void cleanup();
|
||||
|
||||
/// <summary> Unique id lock </summary>
|
||||
static CRITICAL_SECTION m_csUIDLock;
|
||||
|
||||
/// <summary> The uid counter </summary>
|
||||
static unsigned int m_uiUIDCounter;
|
||||
|
||||
/// <summary> true if utils is initialized </summary>
|
||||
static BOOL m_bInitialized;
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: ptdxhdr.h
|
||||
//
|
||||
// summary: include DirectX headers required for given build environment
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
#pragma once
|
||||
|
||||
#if (_MSC_VER > 1600)
|
||||
// apparently d3dx11.h is obsolete in win8
|
||||
#include <d3dcommon.h>
|
||||
#include <d3d11.h>
|
||||
#include <d3dcompiler.h>
|
||||
#else
|
||||
#include <d3dcommon.h>
|
||||
#include <d3d11.h>
|
||||
#ifdef DIRECTXCOMPILERSUPPORT
|
||||
#include <d3dcompiler.h>
|
||||
#include <d3dx11.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -1,274 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: ptgc.h
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _PTGC_H_
|
||||
#define _PTGC_H_
|
||||
#include <deque>
|
||||
#include "Lockable.h"
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Datablock;
|
||||
|
||||
static const UINT DEFAULT_DATABLOCK_GC_THREADS = 1;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Datablock garbage collector. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class GarbageCollector : public Lockable
|
||||
{
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="nGCThreads"> (optional) the gc threads. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
GarbageCollector(UINT nGCThreads=DEFAULT_DATABLOCK_GC_THREADS);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~GarbageCollector();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force GC. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void ForceGC();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force a GC sweep that is targeted at a particular memory space. Can be called under
|
||||
/// low-mem conditions by a failing attempt to allocate device memory. Forcing a
|
||||
/// full GC sweep from that calling context is impractical because a full sweep
|
||||
/// requires locks we cannot acquire without breaking the lock-ordering discipline.
|
||||
/// However a device-specific allocation context can be assumed to hold a lock on the
|
||||
/// accelerator for which we are allocating, making it safe to sweep the GC queue
|
||||
/// and free device buffers for that memspace *only* without deleting the parent blocks.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void ForceGC(UINT uiMemSpaceId);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Queue a datablock for garbage collection. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void QueueForGC(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destroys the GC. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/18/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void DestroyGC();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Creates the GC. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/18/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void CreateGC();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Reports the current state of the queue to the console in some detail.
|
||||
/// If we are getting tight on memory, this can be a handy tool for checking
|
||||
/// whether more aggressive GC would help the workload.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 9/7/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Report();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Shuts down this object and frees any resources it is using. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 3/1/2012. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void Shutdown();
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Notifies an allocation. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/1/2013. </remarks>
|
||||
///
|
||||
/// <param name="pNewBlock"> [in,out] If non-null, the new block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void NotifyAllocation(Datablock * pNewBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Notifies an allocation. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/1/2013. </remarks>
|
||||
///
|
||||
/// <param name="pNewBlock"> [in,out] If non-null, the new block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void __NotifyAllocation(Datablock * pNewBlock);
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force GC. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void _ForceGC();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Force a GC sweep that is targeted at a particular memory space. Can be called under
|
||||
/// low-mem conditions by a failing attempt to allocate device memory. Forcing a
|
||||
/// full GC sweep from that calling context is impractical because a full sweep
|
||||
/// requires locks we cannot acquire without breaking the lock-ordering discipline.
|
||||
/// However a device-specific allocation context can be assumed to hold a lock on the
|
||||
/// accelerator for which we are allocating, making it safe to sweep the GC queue
|
||||
/// and free device buffers for that memspace *only* without deleting the parent blocks.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void _ForceGC(UINT uiMemSpaceId);
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Queue a datablock for garbage collection. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="pBlock"> [in,out] If non-null, the block. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void _QueueForGC(Datablock * pBlock);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Reports the current state of the queue to the console in some detail.
|
||||
/// If we are getting tight on memory, this can be a handy tool for checking
|
||||
/// whether more aggressive GC would help the workload.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 9/7/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void _Report();
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> GC thread. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <param name="p"> The p. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static DWORD WINAPI PTaskGCThread(LPVOID p);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> The garbage collector thread proc. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/28/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DWORD GarbageCollectorThread();
|
||||
|
||||
/// <summary> The queue of blocks to delete </summary>
|
||||
std::deque<Datablock*> m_vQ;
|
||||
|
||||
/// <summary> Handle of the work available event. Set when the queue is non-empty. </summary>
|
||||
HANDLE m_hWorkAvailable;
|
||||
|
||||
/// <summary> Handle of the quiescent event--set when a sweep is not in progress. </summary>
|
||||
HANDLE m_hQuiescent;
|
||||
|
||||
/// <summary> Handle of the gc threads </summary>
|
||||
HANDLE * m_vGCThreads;
|
||||
|
||||
/// <summary> The number of gc threads. </summary>
|
||||
UINT m_nGCThreads;
|
||||
|
||||
/// <summary> Handle of the gc global shutdown event. </summary>
|
||||
HANDLE m_hGCShutdown;
|
||||
|
||||
/// <summary> Handle of the global shutdown event. </summary>
|
||||
HANDLE m_hRuntimeTerminateEvent;
|
||||
|
||||
/// <summary> true if the GC thread is alive </summary>
|
||||
BOOL m_bAlive;
|
||||
|
||||
/// <summary> true to shutdown in progress. </summary>
|
||||
BOOL m_bShutdownInProgress;
|
||||
|
||||
/// <summary> true to shutdown complete. </summary>
|
||||
BOOL m_bShutdownComplete;
|
||||
|
||||
/// <summary> true to quiescent. </summary>
|
||||
BOOL m_bQuiescent;
|
||||
|
||||
#ifdef DEBUG
|
||||
/// <summary> Debug mode--keep a list of
|
||||
/// things that have already been queued or
|
||||
/// deleted to ensure we don't double free. </summary>
|
||||
std::set<Datablock*> m_vQueued;
|
||||
std::set<Datablock*> m_vDeleted;
|
||||
CRITICAL_SECTION m_csGCTracker;
|
||||
#define ptgc_init() InitializeCriticalSection(&m_csGCTracker);
|
||||
#define ptgc_deinit() DeleteCriticalSection(&m_csGCTracker);
|
||||
#define ptgc_lock() EnterCriticalSection(&m_csGCTracker);
|
||||
#define ptgc_unlock() LeaveCriticalSection(&m_csGCTracker);
|
||||
#define ptgc_check_double_q(x) assert(m_vQueued.find(x)==m_vQueued.end())
|
||||
#define ptgc_check_double_free(x) assert(m_vDeleted.find(x)==m_vDeleted.end())
|
||||
#define ptgc_record_q(x) m_vQueued.insert(x)
|
||||
#define ptgc_record_free(x) { m_vDeleted.insert(x); m_vQueued.erase(x); }
|
||||
#define ptgc_reset() { ptgc_lock(); m_vQueued.clear(); m_vDeleted.clear(); ptgc_unlock(); }
|
||||
#define ptgc_new(x) { GarbageCollector::NotifyAllocation(x); }
|
||||
#else
|
||||
#define ptgc_init()
|
||||
#define ptgc_deinit()
|
||||
#define ptgc_lock()
|
||||
#define ptgc_unlock()
|
||||
#define ptgc_check_double_q(x)
|
||||
#define ptgc_check_double_free(x)
|
||||
#define ptgc_record_q(x)
|
||||
#define ptgc_record_free(x)
|
||||
#define ptgc_reset()
|
||||
#define ptgc_new(x)
|
||||
#endif
|
||||
|
||||
static CRITICAL_SECTION m_csGlobalGCPtr;
|
||||
|
||||
static GarbageCollector * g_pGarbageCollector;
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,132 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: ptlock.h
|
||||
//
|
||||
// summary: Declares the ptlock class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __PTLOCK_H__
|
||||
#define __PTLOCK_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <crtdbg.h>
|
||||
#include "Lockable.h"
|
||||
#include <assert.h>
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class PTLock : public Lockable {
|
||||
public:
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/27/2011. </remarks>
|
||||
///
|
||||
/// <param name="lpszProtectedObjectName"> [in] If non-null, name of the protected object. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
PTLock(char * lpszProtectedObjectName) :
|
||||
Lockable(lpszProtectedObjectName),
|
||||
m_nReaders(0),
|
||||
m_nWriters(0) { }
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Reader lock. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <returns> The lock. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
int LockRO() {
|
||||
int nDepth = Lock();
|
||||
if(nDepth > 1) {
|
||||
assert(m_nReaders > 0);
|
||||
assert(m_nWriters == 0);
|
||||
Unlock();
|
||||
return nDepth;
|
||||
}
|
||||
while(m_nWriters > 0) {
|
||||
Unlock();
|
||||
Sleep(1);
|
||||
Lock();
|
||||
}
|
||||
assert(m_nWriters == 0);
|
||||
m_nReaders++;
|
||||
Unlock();
|
||||
return m_nReaders;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Unlocks the ro. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
int UnlockRO() {
|
||||
int nDepth = Lock();
|
||||
assert(m_nReaders > 0);
|
||||
assert(m_nWriters == 0);
|
||||
if(nDepth == 1 && m_nReaders) {
|
||||
m_nReaders--;
|
||||
}
|
||||
Unlock();
|
||||
return m_nReaders;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Writer lock. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
int LockRW() {
|
||||
int nDepth = Lock();
|
||||
if(nDepth > 1) {
|
||||
assert(m_nReaders == 0);
|
||||
assert(m_nWriters == 1);
|
||||
return nDepth;
|
||||
}
|
||||
while(m_nReaders > 0) {
|
||||
Unlock();
|
||||
Sleep(1);
|
||||
Lock();
|
||||
}
|
||||
assert(m_nReaders == 0);
|
||||
assert(m_nWriters == 0);
|
||||
m_nWriters++;
|
||||
return m_nWriters;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> release a write lock. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 8/20/2013. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
int UnlockRW() {
|
||||
assert(m_nWriters == 1);
|
||||
assert(m_nReaders == 0);
|
||||
if(GetLockDepth() > 1)
|
||||
return Unlock();
|
||||
m_nWriters--;
|
||||
return Unlock();
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
/// <summary> The readers. </summary>
|
||||
int m_nReaders;
|
||||
|
||||
/// <summary> The writers. </summary>
|
||||
int m_nWriters;
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,65 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: ptprofsupport.h
|
||||
//
|
||||
// summary: macros for dealing with conditionally compiled runtime monitoring modes.
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __PTASK_PROFSUPPORT_H__
|
||||
#define __PTASK_PROFSUPPORT_H__
|
||||
|
||||
#ifdef PROFILE_REFCOUNT_OBJECTS
|
||||
#include <sstream>
|
||||
#endif
|
||||
|
||||
#ifdef PROFILE_PBUFFERS
|
||||
#include <sstream>
|
||||
#include "PBuffer.h"
|
||||
#endif
|
||||
|
||||
namespace PTask {
|
||||
|
||||
namespace Runtime {
|
||||
|
||||
extern int g_bTPProfilingSupported;
|
||||
extern int g_bRCProfilingSupported;
|
||||
extern int g_bDBProfilingSupported;
|
||||
extern int g_bCTProfilingSupported;
|
||||
extern int g_bPBufferProfilingSupported;
|
||||
extern int g_bInvocationCountingSupported;
|
||||
extern int g_bBlockPoolProfilingSupported;
|
||||
extern int g_bChannelProfilingSupported;
|
||||
extern int g_bAdhocInstrumentationSupported;
|
||||
extern int g_bSignalProfilingSupported;
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
#ifndef DEBUG
|
||||
// warn PTask users if a release build supports a profiling mode
|
||||
// that likely impacts performance (they all pretty much do)
|
||||
#define WARN_PROFILE_SUPPORT(bSupport, bReqState) \
|
||||
if(bSupport && bReqState) { \
|
||||
MandatoryInform("XXXX: PERFORMANCE: Using %s(%d) support in release build!\n", \
|
||||
__FUNCTION__, \
|
||||
(bReqState)); \
|
||||
}
|
||||
#else
|
||||
#define WARN_PROFILE_SUPPORT(bSupport, bReqState)
|
||||
#endif
|
||||
|
||||
#define SET_PROFILER_MODE(bSupport, bReqState, bTarget) { \
|
||||
if(!(bSupport)) { \
|
||||
if(bReqState) { \
|
||||
MandatoryInform("%s(%d) called, not supported in build!\n", \
|
||||
__FUNCTION__, \
|
||||
bReqState); \
|
||||
} \
|
||||
bTarget = FALSE; \
|
||||
} else { \
|
||||
WARN_PROFILE_SUPPORT(bSupport, bReqState); \
|
||||
bTarget = bReqState; \
|
||||
} }
|
||||
|
||||
|
||||
#endif
|
|
@ -1,16 +0,0 @@
|
|||
//{{NO_DEPENDENCIES}}
|
||||
// Microsoft Visual C++ generated include file.
|
||||
// Used by ptask.rc
|
||||
//
|
||||
#define IDI_MAIN_ICON 101
|
||||
|
||||
// Next default values for new objects
|
||||
//
|
||||
#ifdef APSTUDIO_INVOKED
|
||||
#ifndef APSTUDIO_READONLY_SYMBOLS
|
||||
#define _APS_NEXT_RESOURCE_VALUE 113
|
||||
#define _APS_NEXT_COMMAND_VALUE 40029
|
||||
#define _APS_NEXT_CONTROL_VALUE 1000
|
||||
#define _APS_NEXT_SYMED_VALUE 101
|
||||
#endif
|
||||
#endif
|
|
@ -1,170 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: shrperft.h
|
||||
//
|
||||
// summary: Declares a thread-safe high resolution timer utility
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef _SHRPERFT_H_
|
||||
#define _SHRPERFT_H_
|
||||
#include "hrperft.h"
|
||||
|
||||
// performance timers are architecture and platform
|
||||
// specific. The CHighResolutionTimer class defined in
|
||||
// hrperft.h is lightweight but not thread-safe.
|
||||
// This version is thread-safe, but will have higher
|
||||
// overheads due to synchronization...Use this only for
|
||||
// cases where measurements require a global time line
|
||||
// across multiple threads.
|
||||
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> High resolution timer.
|
||||
/// For collecting performance measurements.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
class CSharedPerformanceTimer {
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="gran"> The granularity of the timer
|
||||
/// (seconds, milliseconds, micro-seconds). </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
CSharedPerformanceTimer(hpf_granularity gran, bool bStart);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
~CSharedPerformanceTimer(void);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Resets this timer. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void reset();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return the time elapsed since the
|
||||
/// last reset. For compatibility with hrperft, the reset parameter is
|
||||
/// present, but will assert. Objects of this class should never be reset.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="reset"> true to reset. (ignored, will assert if true in debug mode)</param>
|
||||
///
|
||||
/// <returns> The elapsed time since the timer started </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
double elapsed(bool reset=false);
|
||||
|
||||
protected:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the tick count. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
__int64 tickcnt();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Queries the system time. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="li"> The li. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
BOOL query_system_time(PLARGE_INTEGER li);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets the tick frequency of the underlying
|
||||
/// counter primitive.
|
||||
/// </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
double tickfreq();
|
||||
|
||||
/// <summary> lock. </summary>
|
||||
CRITICAL_SECTION m_cs;
|
||||
|
||||
/// <summary> The granularity of the timer,
|
||||
/// either seconds or milliseconds
|
||||
/// </summary>
|
||||
hpf_granularity m_gran;
|
||||
|
||||
/// <summary> the value of the underlying
|
||||
/// timing primitive at the time the
|
||||
/// timer was last reset.</summary>
|
||||
__int64 m_start;
|
||||
|
||||
/// <summary> The frequency of the underlying
|
||||
/// timing primitive </summary>
|
||||
double m_freq;
|
||||
|
||||
/// <summary> Module for windows DLL for querying
|
||||
/// system time getting perf counter
|
||||
/// frequency.
|
||||
/// </summary>
|
||||
HMODULE m_hModule;
|
||||
|
||||
/// <summary> Function pointer for querying
|
||||
/// system time
|
||||
/// </summary>
|
||||
LPFNtQuerySystemTime m_lpfnQuerySystemTime;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Free resources allocated to support
|
||||
/// query of system time. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void free_query_system_time();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initialises the query system time. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
LPFNtQuerySystemTime init_query_system_time();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Return the difference in milliseconds. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 12/23/2011. </remarks>
|
||||
///
|
||||
/// <param name="lEarly"> The early. </param>
|
||||
/// <param name="lLate"> The late. </param>
|
||||
///
|
||||
/// <returns> . </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
DWORD delta_milliseconds(LARGE_INTEGER lEarly, LARGE_INTEGER lLate);
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,608 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: signalprofiler.h
|
||||
//
|
||||
// summary: Declares the signalprofiler class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef __SIGNAL_PROFILER_H__
|
||||
#define __SIGNAL_PROFILER_H__
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include "channel.h"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include "Lockable.h"
|
||||
#include <assert.h>
|
||||
#include "datablock.h"
|
||||
#include "task.h"
|
||||
#include "port.h"
|
||||
|
||||
class CHighResolutionTimer;
|
||||
class CSharedPerformanceTimer;
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class ReferenceCounted;
|
||||
class Task;
|
||||
class Port;
|
||||
class Channel;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Defines an alias representing the sigevttype. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/30/2014. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
typedef enum sigevttype {
|
||||
SIGEVT_UNSPECIFIED=0,
|
||||
SIGEVT_INGRESS=1,
|
||||
SIGEVT_EGRESS=2
|
||||
} SIGEVTTYPE;
|
||||
|
||||
static const char * g_lpszSigEventTypeStrings[] = {
|
||||
"SIGEVT_UNSPECIFIED",
|
||||
"SIGEVT_INGRESS",
|
||||
"SIGEVT_EGRESS"
|
||||
};
|
||||
#define SigEventTypeString(e) (g_lpszSigEventTypeStrings[(int)e])
|
||||
|
||||
typedef enum witnesstype_t {
|
||||
wtport,
|
||||
wttask,
|
||||
wtchannel,
|
||||
wtunknown
|
||||
} WITNESSTYPE;
|
||||
|
||||
typedef enum channelsigactivitystate_t {
|
||||
cas_none=0,
|
||||
cas_unexercised=1,
|
||||
cas_exercised=2
|
||||
} CHANNELACTIVITYSTATE;
|
||||
|
||||
typedef enum channelpredicationstate_t {
|
||||
cps_na=0,
|
||||
cps_open=1,
|
||||
cps_closed=2
|
||||
} CHANNELPREDICATIONSTATE;
|
||||
|
||||
typedef struct SignalObservation_t {
|
||||
SIGEVTTYPE eType;
|
||||
double dTimestamp;
|
||||
Lockable * pWitness;
|
||||
WITNESSTYPE wType;
|
||||
CONTROLSIGNAL luiRawSignal;
|
||||
Datablock * pBlock;
|
||||
UINT uiDBUID;
|
||||
BOOL bTookRef;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Signal observation t. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/30/2014. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
struct SignalObservation_t() :
|
||||
eType(SIGEVT_UNSPECIFIED),
|
||||
dTimestamp(0.0),
|
||||
pWitness(NULL),
|
||||
luiRawSignal(0),
|
||||
pBlock(NULL),
|
||||
uiDBUID(0),
|
||||
bTookRef(FALSE),
|
||||
wType(wtunknown) {}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Signal observation t. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/30/2014. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
SignalObservation_t::Initialize(
|
||||
__in SIGEVTTYPE _eType,
|
||||
__in double _dTimestamp,
|
||||
__in Lockable * _pWitness,
|
||||
__in WITNESSTYPE _wType,
|
||||
__in CONTROLSIGNAL _luiSignal,
|
||||
__in Datablock * _pBlock,
|
||||
__in UINT _uiDBUID,
|
||||
__in BOOL _bTakeRef
|
||||
)
|
||||
{
|
||||
eType = _eType;
|
||||
dTimestamp = _dTimestamp;
|
||||
pWitness = _pWitness;
|
||||
luiRawSignal = _luiSignal;
|
||||
pBlock = _pBlock;
|
||||
wType = _wType;
|
||||
uiDBUID = _uiDBUID;
|
||||
bTookRef = _bTakeRef;
|
||||
if(_bTakeRef)
|
||||
pBlock->AddRef();
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets witness type. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/30/2014. </remarks>
|
||||
///
|
||||
/// <param name="pWitness"> [in,out] If non-null, the witness. </param>
|
||||
///
|
||||
/// <returns> The witness type. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static WITNESSTYPE
|
||||
GetWitnessType(
|
||||
Lockable* pWitness
|
||||
) {
|
||||
Channel * pChannel = dynamic_cast<Channel*>(pWitness);
|
||||
Port * pPort = dynamic_cast<Port*>(pWitness);
|
||||
Task * pTask = dynamic_cast<Task*>(pWitness);
|
||||
int nVPointerCount = 0;
|
||||
nVPointerCount += pChannel ? 1 : 0;
|
||||
nVPointerCount += pPort ? 1 : 0;
|
||||
nVPointerCount += pTask ? 1 : 0;
|
||||
assert(nVPointerCount == 1);
|
||||
return pChannel ? wtchannel : (pPort ? wtport : (pTask ? wttask : wtunknown));
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets witness type. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/30/2014. </remarks>
|
||||
///
|
||||
/// <param name="pWitness"> [in,out] If non-null, the witness. </param>
|
||||
///
|
||||
/// <returns> The witness type. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static char *
|
||||
GetWitnessName(
|
||||
Lockable* pWitness
|
||||
) {
|
||||
Channel * pChannel = dynamic_cast<Channel*>(pWitness);
|
||||
Port * pPort = dynamic_cast<Port*>(pWitness);
|
||||
Task * pTask = dynamic_cast<Task*>(pWitness);
|
||||
int nVPointerCount = 0;
|
||||
nVPointerCount += pChannel ? 1 : 0;
|
||||
nVPointerCount += pPort ? 1 : 0;
|
||||
nVPointerCount += pTask ? 1 : 0;
|
||||
assert(nVPointerCount == 1);
|
||||
return pChannel ? pChannel->GetName() : (pPort ? pPort->GetVariableBinding() : (pTask ? pTask->GetTaskName() : "wtunknown"));
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Signal observation t. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/30/2014. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
struct SignalObservation_t(
|
||||
__in SIGEVTTYPE _eType,
|
||||
__in double _dTimestamp,
|
||||
__in Lockable * _pWitness,
|
||||
__in Datablock * _pBlock,
|
||||
__in BOOL _bTakeRef=FALSE
|
||||
)
|
||||
{
|
||||
CONTROLSIGNAL _luiSignal = _pBlock ? _pBlock->__getControlSignals() : DBCTLC_NONE;
|
||||
UINT _uiDBUID = _pBlock ? _pBlock->GetDBUID() : 0;
|
||||
WITNESSTYPE _wType = GetWitnessType(_pWitness);
|
||||
Initialize(_eType, _dTimestamp, _pWitness, _wType, _luiSignal, _pBlock, _uiDBUID, _bTakeRef);
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/30/2014. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
~SignalObservation_t() {
|
||||
if(bTookRef && pBlock)
|
||||
pBlock->Release();
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Stream insertion operator. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/30/2014. </remarks>
|
||||
///
|
||||
/// <param name="os"> [in,out] The operating system. </param>
|
||||
/// <param name="pObservation"> [in,out] If non-null, the observation. </param>
|
||||
///
|
||||
/// <returns> The shifted result. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
friend std::ostream& operator<<(
|
||||
std::ostream &os,
|
||||
SignalObservation_t* pObservation
|
||||
)
|
||||
{
|
||||
os << pObservation->dTimestamp << ": "
|
||||
<< ControlSignalString(pObservation->luiRawSignal) << " "
|
||||
<< SigEventTypeString(pObservation->eType) << " DB#"
|
||||
<< pObservation->uiDBUID << " "
|
||||
<< GetWitnessName(pObservation->pWitness);
|
||||
return os;
|
||||
}
|
||||
|
||||
} SIGOBSERVATION;
|
||||
|
||||
class SignalProfiler {
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if 'luiControlSignal' is under profile. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/27/2014. </remarks>
|
||||
///
|
||||
/// <param name="luiControlSignal"> The lui control signal. </param>
|
||||
///
|
||||
/// <returns> true if under profile, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
IsUnderProfile(
|
||||
__in CONTROLSIGNAL luiControlSignal
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if control signals on this block are under profile. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/27/2014. </remarks>
|
||||
///
|
||||
/// <param name="luiControlSignal"> The lui control signal. </param>
|
||||
///
|
||||
/// <returns> true if under profile, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
IsUnderProfile(
|
||||
__in Datablock * pBlock
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Registers the signal as being one "of interest" to the profiler. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/27/2014. </remarks>
|
||||
///
|
||||
/// <param name="luiControlSignal"> The lui control signal. </param>
|
||||
/// <param name="bEnable"> (Optional) the enable. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
RegisterSignal(
|
||||
__in CONTROLSIGNAL luiControlSignal,
|
||||
__in BOOL bEnable=TRUE
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes control signal profiling. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Initialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Deinitialize control signal profiling. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Deinitialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dumps profile statistics. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///
|
||||
/// <param name="ss"> [in,out] The ss. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Report(std::ostream& ss);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets signal history for a particular graph object. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the task dispatch history. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static std::stringstream* GetHistory();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Record signal transit. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/27/2014. </remarks>
|
||||
///
|
||||
/// <param name="pWitness"> [in,out] If non-null, the witness. </param>
|
||||
/// <param name="pBlock"> [in,out] The lui control signal. </param>
|
||||
/// <param name="eSigEventType"> Type of the signal event. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void
|
||||
RecordSignalTransit(
|
||||
__in Lockable * pWitness,
|
||||
__in Datablock * pBlock,
|
||||
__in SIGEVTTYPE eSigEventType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return true if the given graph object ever bore witness to
|
||||
/// the given control signal. </summary>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
SignalTrafficOccurred(
|
||||
__in Lockable * pWitness,
|
||||
__in CONTROLSIGNAL luiControlSignal,
|
||||
__in SIGEVTTYPE eType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return true if the given graph object ever bore witness to
|
||||
/// the given control signal. </summary>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
BalancedSignalTrafficOccurred(
|
||||
__in Lockable * pWitness,
|
||||
__in CONTROLSIGNAL luiControlSignal
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return true if the given graph object ever bore witness to
|
||||
/// the given control signal. </summary>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
SuppressedSignalTrafficOccurred(
|
||||
__in Lockable * pWitness,
|
||||
__in CONTROLSIGNAL luiControlSignal
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return true if the given graph object ever bore witness to
|
||||
/// the given control signal. </summary>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
ProfiledSignalTrafficOccurred(
|
||||
__in Lockable * pWitness,
|
||||
__in SIGEVTTYPE eType
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return true if the given graph object ever bore witness to
|
||||
/// the given control signal. </summary>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
AnyProfiledSignalTrafficOccurred(
|
||||
__in Lockable * pWitness
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return true if the given graph object ever bore witness to
|
||||
/// the given control signal. </summary>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
SignalIngressOccurred(
|
||||
__in Lockable * pWitness,
|
||||
__in CONTROLSIGNAL luiControlSignal
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return true if the given graph object ever bore witness to
|
||||
/// the given control signal. </summary>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
SignalEgressOccurred(
|
||||
__in Lockable * pWitness,
|
||||
__in CONTROLSIGNAL luiControlSignal
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return true if the given graph object ever bore witness to
|
||||
/// the given control signal. </summary>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
ProfiledSignalIngressOccurred(
|
||||
__in Lockable * pWitness
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> return true if the given graph object ever bore witness to
|
||||
/// the given control signal. </summary>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
ProfiledSignalEgressOccurred(
|
||||
__in Lockable * pWitness
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Profiled signal transit suppressed. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/27/2014. </remarks>
|
||||
///
|
||||
/// <param name="pWitness"> [in,out] If non-null, the witness. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
BalancedProfiledSignalTrafficOccurred(
|
||||
__in Lockable * pWitness
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Profiled signal transit suppressed. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/27/2014. </remarks>
|
||||
///
|
||||
/// <param name="pWitness"> [in,out] If non-null, the witness. </param>
|
||||
///
|
||||
/// <returns> true if it succeeds, false if it fails. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
SuppressedProfiledSignalTrafficOccurred(
|
||||
__in Lockable * pWitness
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Query if 'pWitness' has relevant predicate. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 6/27/2014. </remarks>
|
||||
///
|
||||
/// <param name="pWitness"> [in,out] If non-null, the witness. </param>
|
||||
///
|
||||
/// <returns> true if relevant predicate, false if not. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static BOOL
|
||||
HasRelevantPredicate(
|
||||
__in Lockable * pWitness
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets signal activity state. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/1/2014. </remarks>
|
||||
///
|
||||
/// <param name="pLockable"> [in,out] If non-null, the lockable. </param>
|
||||
///
|
||||
/// <returns> The signal activity state. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static CHANNELACTIVITYSTATE
|
||||
GetSignalActivityState(
|
||||
__in Lockable * pLockable
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets channel signal predication state. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/1/2014. </remarks>
|
||||
///
|
||||
/// <param name="pLockable"> [in,out] If non-null, the lockable. </param>
|
||||
///
|
||||
/// <returns> The channel signal predication state. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static CHANNELPREDICATIONSTATE
|
||||
GetChannelSignalPredicationState(
|
||||
__in Lockable * pLockable
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets channel coded color. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/1/2014. </remarks>
|
||||
///
|
||||
/// <param name="eActivityState"> State of the activity. </param>
|
||||
/// <param name="ePredicationState"> State of the predication. </param>
|
||||
///
|
||||
/// <returns> null if it fails, else the channel coded color. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static char *
|
||||
GetChannelCodedColor(
|
||||
__in CHANNELACTIVITYSTATE eActivityState,
|
||||
__in CHANNELPREDICATIONSTATE ePredicationState
|
||||
);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets channel coded name. </summary>
|
||||
///
|
||||
/// <remarks> crossbac, 7/1/2014. </remarks>
|
||||
///
|
||||
/// <param name="pLockable"> [in,out] If non-null, the lockable. </param>
|
||||
/// <param name="bBlocked"> The blocked. </param>
|
||||
///
|
||||
/// <returns> The channel coded name. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static std::string
|
||||
GetChannelCodedName(
|
||||
__in Lockable * pLockable,
|
||||
__in BOOL bBlocked
|
||||
);
|
||||
|
||||
/// <summary> true if signal profiler is initialized. </summary>
|
||||
static BOOL s_bSignalProfilerInit;
|
||||
|
||||
protected:
|
||||
|
||||
static BOOL IsRelevantPredicate(CHANNELPREDICATE ePredicate);
|
||||
|
||||
#ifdef PROFILE_CONTROLSIGNALS
|
||||
|
||||
/// <summary> The control signal history. key is the raw control signal value
|
||||
/// number, value is a vector of timestamps at which the raw signal was observed.
|
||||
/// Since a raw signal may be the bitwise or multiple individual signals, we also
|
||||
/// maintain a map for bitwise signal values.
|
||||
/// </summary>
|
||||
static std::map<Lockable*, std::set<SIGOBSERVATION*>> s_vWitnessToSignalMap;
|
||||
static std::map<CONTROLSIGNAL, std::set<SIGOBSERVATION*>> s_vSignalToWitnessMap;
|
||||
static std::map<double, std::set<SIGOBSERVATION*>> s_vSignalHistory;
|
||||
static BOOL s_bFilterProfiledSignals;
|
||||
static CONTROLSIGNAL s_luiSignalsOfInterest;
|
||||
static CRITICAL_SECTION s_csSignalProfiler;
|
||||
static CSharedPerformanceTimer * s_pGlobalProfileTimer;
|
||||
static char * s_lpszChannelColors[3][3];
|
||||
static void Lock();
|
||||
static void Unlock();
|
||||
static BOOL IsLocked();
|
||||
static BOOL Enabled();
|
||||
|
||||
#pragma warning(disable:4127)
|
||||
#define ctlpon() (PTask::Runtime::GetControlSignalProfileMode()&&(s_bSignalProfile!=NULL))
|
||||
#define ctlptimer() (s_pGlobalProfileTimer)
|
||||
#define ctlpdeclegressctr() UINT uiEgressCounter = 0
|
||||
#define ctlpingress(l,b) SignalProfiler::RecordSignalTransit((l), (b), SIGEVTTYPE::SIGEVT_INGRESS)
|
||||
#define ctlpegress(l,b) SignalProfiler::RecordSignalTransit((l), (b), SIGEVTTYPE::SIGEVT_EGRESS)
|
||||
#define ctlpopegress(l,b) { ctlpcondegress(uiEgressCounter == 0, l, b); uiEgressCounter++; }
|
||||
#define ctlpcondingress(c,l,b) if(c) { ctlpingress((l),(b)); }
|
||||
#define ctlpcondegress(c,l,b) if(c) { ctlpegress((l),(b)); }
|
||||
#define ctlpwasactive(x) SignalProfiler::AnyProfiledSignalTrafficOccurred(x)
|
||||
#define ctlpwasbalanced(x) SignalProfiler::BalancedProfiledSignalTrafficOccurred((x))
|
||||
#define ctlpwassuppresed(x) SignalProfiler::SuppressedProfiledSignalTrafficOccurred((x))
|
||||
#define ctlphasrelevantpredicate(x) SignalProfiler::HasRelevantPredicate(x)
|
||||
#define ctlpgetchactstate(x) SignalProfiler::GetSignalActivityState(x)
|
||||
#define ctlpgetchpredstate(x) SignalProfiler::GetChannelSignalPredicationState(x)
|
||||
#define ctlpgetchcolor(x,y) SignalProfiler::GetChannelCodedColor((x),(y))
|
||||
#define ctlpgetchname(x,y) SignalProfiler::GetChannelCodedName((x),(y))
|
||||
#else
|
||||
#define ctlpon()
|
||||
#define ctlptimer()
|
||||
#define ctlpdeclegressctr()
|
||||
#define ctlpingress(l,b)
|
||||
#define ctlpegress(l,b)
|
||||
#define ctlpopegress(l,b)
|
||||
#define ctlpcondingress(c,l,b)
|
||||
#define ctlpcondegress(c,l,b)
|
||||
#define ctlpwasactive(x) FALSE
|
||||
#define ctlpwasbalanced(x) FALSE
|
||||
#define ctlpwassuppresed(x) FALSE
|
||||
#define ctlphasrelevantpredicate(x) FALSE
|
||||
#define ctlpgetchactstate(x) cas_none
|
||||
#define ctlpgetchpredstate(x) cps_na
|
||||
#define ctlpgetchcolor(x,y) "gray60"
|
||||
#define ctlpgetchname(x,y) "channel"
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,13 +0,0 @@
|
|||
//--------------------------------------------------------------------------------------
|
||||
// File: symbiostypes.h
|
||||
//
|
||||
// Maintainer: crossbac@microsoft.com
|
||||
//--------------------------------------------------------------------------------------
|
||||
#ifndef _SYMBIOS_TYPES_H_
|
||||
#define _SYMBIOS_TYPES_H_
|
||||
|
||||
// #include <d3dcommon.h>
|
||||
//#include "ptdxhdr.h"
|
||||
//#include "accelerator.h"
|
||||
|
||||
#endif
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,311 +0,0 @@
|
|||
///-------------------------------------------------------------------------------------------------
|
||||
// file: taskprofiler.h
|
||||
//
|
||||
// summary: Declares the taskprofiler class
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef _TASK_PROFILER_H_
|
||||
#define _TASK_PROFILER_H_
|
||||
|
||||
#include "primitive_types.h"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
class CHighResolutionTimer;
|
||||
class CSharedPerformanceTimer;
|
||||
|
||||
namespace PTask {
|
||||
|
||||
class Task;
|
||||
|
||||
class TaskProfile {
|
||||
|
||||
public:
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Default constructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///
|
||||
/// <param name="pTask"> [in,out] If non-null, the task. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
TaskProfile(Task * pTask);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Destructor. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
virtual ~TaskProfile();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Print migration stats. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 1/11/2012. </remarks>
|
||||
///
|
||||
/// <param name="ss"> [in,out] The ss. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void MigrationReport(std::ostream& ss);
|
||||
|
||||
/// <summary> The task. </summary>
|
||||
Task * m_pTask;
|
||||
|
||||
/// <summary> The dispatch accelerator history. key is the dispatch
|
||||
/// number, value is the accelerator upon which the dispatch
|
||||
/// took place. </summary>
|
||||
std::map<UINT, UINT> m_vDispatchAcceleratorHistory;
|
||||
|
||||
/// <summary> The dependent dispatch accelerator history. key is the dispatch
|
||||
/// number, value is the accelerator used in the dependent binding.
|
||||
/// Note that this object model assumes 1 depacc binding per task,
|
||||
/// which is less general than what much of the code appears to allow
|
||||
/// (in terms of binding cardinality, heterogeneity), but is in line with
|
||||
/// the defacto limitations on dependent bindings at present.
|
||||
/// </summary>
|
||||
std::map<UINT, UINT> m_vDependentAcceleratorHistory;
|
||||
|
||||
std::map<std::string,
|
||||
std::map<int,
|
||||
std::vector<double>*>&> m_vEnterProfileMap;
|
||||
std::map<std::string,
|
||||
std::map<int,
|
||||
std::vector<double>*>&> m_vExitProfileMap;
|
||||
std::map<int, std::vector<double>*> m_vEnterAcquireDispatchResourceLocks;
|
||||
std::map<int, std::vector<double>*> m_vEnterReleaseDispatchResourceLocks;
|
||||
std::map<int, std::vector<double>*> m_vEnterMigrateInputs;
|
||||
std::map<int, std::vector<double>*> m_vEnterAssembleIOLockList;
|
||||
std::map<int, std::vector<double>*> m_vEnterSchedule;
|
||||
std::map<int, std::vector<double>*> m_vEnterBlockedOnReadyQ;
|
||||
std::map<int, std::vector<double>*> m_vEnterBlockedNotReady;
|
||||
std::map<int, std::vector<double>*> m_vEnterPropagateDataflow;
|
||||
std::map<int, std::vector<double>*> m_vEnterReleaseInflightDatablocks;
|
||||
std::map<int, std::vector<double>*> m_vEnterRIBMaterializeViews;
|
||||
std::map<int, std::vector<double>*> m_vEnterRIBSyncHost;
|
||||
std::map<int, std::vector<double>*> m_vEnterBindMetaPorts;
|
||||
std::map<int, std::vector<double>*> m_vEnterDispatch;
|
||||
std::map<int, std::vector<double>*> m_vEnterPSDispatch;
|
||||
std::map<int, std::vector<double>*> m_vEnterBindConstants;
|
||||
std::map<int, std::vector<double>*> m_vEnterBindOutputs;
|
||||
std::map<int, std::vector<double>*> m_vEnterBindInputs;
|
||||
std::map<int, std::vector<double>*> m_vEnterAssignDependentAccelerator;
|
||||
std::map<int, std::vector<double>*> m_vEnterDispatchTeardown;
|
||||
std::map<int, std::vector<double>*> m_vExitAcquireDispatchResourceLocks;
|
||||
std::map<int, std::vector<double>*> m_vExitReleaseDispatchResourceLocks;
|
||||
std::map<int, std::vector<double>*> m_vExitMigrateInputs;
|
||||
std::map<int, std::vector<double>*> m_vExitAssembleIOLockList;
|
||||
std::map<int, std::vector<double>*> m_vExitSchedule;
|
||||
std::map<int, std::vector<double>*> m_vExitBlockedOnReadyQ;
|
||||
std::map<int, std::vector<double>*> m_vExitBlockedNotReady;
|
||||
std::map<int, std::vector<double>*> m_vExitPropagateDataflow;
|
||||
std::map<int, std::vector<double>*> m_vExitReleaseInflightDatablocks;
|
||||
std::map<int, std::vector<double>*> m_vExitRIBMaterializeViews;
|
||||
std::map<int, std::vector<double>*> m_vExitRIBSyncHost;
|
||||
std::map<int, std::vector<double>*> m_vExitBindMetaPorts;
|
||||
std::map<int, std::vector<double>*> m_vExitDispatch;
|
||||
std::map<int, std::vector<double>*> m_vExitPSDispatch;
|
||||
std::map<int, std::vector<double>*> m_vExitBindConstants;
|
||||
std::map<int, std::vector<double>*> m_vExitBindOutputs;
|
||||
std::map<int, std::vector<double>*> m_vExitBindInputs;
|
||||
std::map<int, std::vector<double>*> m_vExitAssignDependentAccelerator;
|
||||
std::map<int, std::vector<double>*> m_vExitDispatchTeardown;
|
||||
CRITICAL_SECTION m_csTiming;
|
||||
static UINT m_nMetrics;
|
||||
static std::map<std::string, std::string> m_vMetricNickNames;
|
||||
static std::map<UINT, std::string> m_vMetricOrder;
|
||||
static std::stringstream m_ssTaskStats;
|
||||
static std::stringstream m_ssTaskDispatchHistory;
|
||||
static CRITICAL_SECTION m_csTaskProfiler;
|
||||
static BOOL m_bProfilerOutputTabular;
|
||||
static BOOL m_bTaskProfilerInit;
|
||||
static CSharedPerformanceTimer * m_pGlobalProfileTimer;
|
||||
static ULONG m_nInputBindEvents;
|
||||
static ULONG m_nInputMigrations;
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes the task profiling. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///
|
||||
/// <param name="bTabular"> true to tabular. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Initialize(BOOL bTabular=TRUE);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Deinitialize task profiling. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Deinitialize();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dumps a task profile statistics. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///
|
||||
/// <param name="ss"> [in,out] The ss. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
static void Report(std::ostream& ss);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Merge task instance statistics. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void MergeTaskInstanceStatistics();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Initializes the task instance profiling. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void InitializeInstanceProfile();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Deinitialize task instance profiling. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void DeinitializeInstanceProfile();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Dumps a task instance profile statistics. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///
|
||||
/// <param name="ss"> [in,out] The ss. </param>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
void DumpTaskProfile(std::ostream& ss);
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets task dispatch history. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the task dispatch history. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
std::stringstream* GetDispatchHistory();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets task instance profile statistics. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the task instance profile statistics. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
std::stringstream* GetTaskProfile();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets task instance profile statistics columnar. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the task instance profile statistics columnar. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
std::stringstream* GetTaskProfileColumnar();
|
||||
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
/// <summary> Gets task instance profile statistics tabular. </summary>
|
||||
///
|
||||
/// <remarks> Crossbac, 7/17/2013. </remarks>
|
||||
///
|
||||
/// <returns> null if it fails, else the task instance profile statistics tabular. </returns>
|
||||
///-------------------------------------------------------------------------------------------------
|
||||
|
||||
std::stringstream* GetTaskProfileTabular();
|
||||
|
||||
|
||||
#if (defined(GRAPH_DIAGNOSTICS) || defined(PROFILE_TASKS))
|
||||
#define log_dispacc(x,y,z,b) { if(m_pTaskProfile) { \
|
||||
m_pTaskProfile->m_vDispatchAcceleratorHistory[x] = y; \
|
||||
if(b) { m_pTaskProfile->m_vDependentAcceleratorHistory[x] = z; } } }
|
||||
#define PTR_LD Runtime::Tracer::LogDispatchEvent
|
||||
#define PTR_EN() Runtime::GetDispatchTracingEnabled()
|
||||
#define dispaccid() m_pDispatchAccelerator->GetAcceleratorId()
|
||||
#define hasdepacc() (GetDependentBindingClassCount()!=0)
|
||||
#define depaccid() ((hasdepacc())?(m_vDependentAcceleratorAssignments.begin()->second->at(0)->GetAcceleratorId()):0)
|
||||
#define log_dispatch(bEnter) { if(PTR_EN()) { PTR_LD(m_lpszTaskName, (bEnter), dispaccid(), m_nDispatchNumber); } \
|
||||
if(bEnter) { log_dispacc(m_nDispatchNumber, dispaccid(), depaccid(), hasdepacc()); } }
|
||||
#define log_dispatch_enter() log_dispatch(TRUE)
|
||||
#define log_dispatch_exit() log_dispatch(FALSE)
|
||||
#else
|
||||
#define log_dispatch_enter()
|
||||
#define log_dispatch_exit()
|
||||
#endif
|
||||
|
||||
#ifdef PROFILE_TASKS
|
||||
#pragma warning(disable:4127)
|
||||
#define tpon() (PTask::Runtime::GetTaskProfileMode()&&(m_pTaskProfile!=NULL))
|
||||
#define tptimer() (m_pTaskProfile->m_pGlobalProfileTimer)
|
||||
#define tpqtimer() (tpon()?tptimer()->elapsed(false):0.0)
|
||||
#define tpprofile_enter(x) \
|
||||
double dTPStart_##x = tpqtimer(); \
|
||||
if(tpon()) { \
|
||||
std::map<int, std::vector<double>*>::iterator xxmiTP_##x; \
|
||||
xxmiTP_##x = m_pTaskProfile->m_vEnter##x.find(m_nDispatchNumber); \
|
||||
if(xxmiTP_##x!=m_pTaskProfile->m_vEnter##x.end()) { \
|
||||
xxmiTP_##x->second->push_back(dTPStart_##x); \
|
||||
} else { \
|
||||
std::vector<double>* l = new std::vector<double>(); \
|
||||
l->push_back(dTPStart_##x); \
|
||||
m_pTaskProfile->m_vEnter##x[m_nDispatchNumber] = l; \
|
||||
}}
|
||||
#define tpprofile_exit(x) \
|
||||
double dTPExit_##x = tpqtimer(); \
|
||||
if(tpon()) { \
|
||||
std::map<int, std::vector<double>*>::iterator xxmiTP_##x; \
|
||||
xxmiTP_##x = m_pTaskProfile->m_vExit##x.find(m_nDispatchNumber); \
|
||||
if(xxmiTP_##x!=m_pTaskProfile->m_vExit##x.end()) { \
|
||||
xxmiTP_##x->second->push_back(dTPExit_##x); \
|
||||
} else { \
|
||||
std::vector<double>* l = new std::vector<double>(); \
|
||||
l->push_back(dTPExit_##x); \
|
||||
m_pTaskProfile->m_vExit##x[m_nDispatchNumber] = l; \
|
||||
}}
|
||||
#define tpprofile_destroy(x) \
|
||||
{ \
|
||||
std::map<int, std::vector<double>*>::iterator xxmiTP_##x; \
|
||||
for(xxmiTP_##x = m_vExit##x.begin(); \
|
||||
xxmiTP_##x != m_vExit##x.end(); \
|
||||
xxmiTP_##x++) { \
|
||||
if(xxmiTP_##x->second) { \
|
||||
delete xxmiTP_##x->second; \
|
||||
} \
|
||||
} \
|
||||
for(xxmiTP_##x = m_vEnter##x.begin(); \
|
||||
xxmiTP_##x != m_vEnter##x.end(); \
|
||||
xxmiTP_##x++) { \
|
||||
if(xxmiTP_##x->second) { \
|
||||
delete xxmiTP_##x->second; \
|
||||
} \
|
||||
} \
|
||||
m_vEnter##x.clear(); \
|
||||
m_vExit##x.clear(); \
|
||||
}
|
||||
#else
|
||||
#define tpprofile_enter(x)
|
||||
#define tpprofile_exit(x)
|
||||
#define tpprofile_init_map(x)
|
||||
#define tpprofile_init_map_nickname(a,x,y)
|
||||
#define tpprofile_destroy(x)
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
#endif
|
|
@ -1,45 +0,0 @@
|
|||
@echo off
|
||||
|
||||
if not exist version.txt (
|
||||
echo version.txt not found in current directory. Please run from \Common\PTask in your CNTK tree. Exiting.
|
||||
goto:end
|
||||
)
|
||||
|
||||
if [%DANDELION_ROOT%] == [] (
|
||||
echo DANDELION_ROOT environment variable must be set. Exiting.
|
||||
goto:end
|
||||
)
|
||||
|
||||
echo Checking out existing PTask files...
|
||||
tf checkout . /r
|
||||
|
||||
echo Copying PTask release artifacts from %DANDELION_ROOT% ...
|
||||
copy /Y %DANDELION_ROOT%\ptask\ptask\bin\x64\Release\ptask.lib lib\Release\ptask.lib
|
||||
copy /Y %DANDELION_ROOT%\ptask\ptask\bin\x64\Release\ptask.pdb lib\Release\ptask.pdb
|
||||
copy /Y %DANDELION_ROOT%\ptask\ptask\bin\x64\Debug\ptask.lib lib\Debug\ptask.lib
|
||||
copy /Y %DANDELION_ROOT%\ptask\ptask\bin\x64\Debug\ptask.pdb lib\Debug\ptask.pdb
|
||||
copy /Y %DANDELION_ROOT%\ptask\ptask\*.h include
|
||||
|
||||
echo Making sure any new files are added to the repository...
|
||||
tf add . /r
|
||||
echo.
|
||||
echo ** Safe to ignore any warnings above about items already having pending changes **
|
||||
|
||||
echo.
|
||||
echo Once you are ready to check in an update to PTask, perfrom the following steps:
|
||||
echo.
|
||||
echo // Note a timestamp that the PTask repository could be rolled back to to re-build this version of PTask.
|
||||
echo notepad version.txt
|
||||
echo.
|
||||
echo // Perform checkin - automatically omits any files identical to their latest checked in version.
|
||||
echo tf checkin
|
||||
echo.
|
||||
echo // View the contents of the checkin.
|
||||
echo tf changeset nnnnn
|
||||
echo.
|
||||
echo // Check which files are still checked out.
|
||||
echo tf status . /r
|
||||
echo.
|
||||
echo // Revert any files still checked out.
|
||||
echo tf undo . /r
|
||||
:end
|
|
@ -1,13 +0,0 @@
|
|||
Current
|
||||
Version from PTask git repository at 17:30 on 7/8/2014.
|
||||
|
||||
Previous
|
||||
Version from PTask git repository at 23:00 on 6/20/2014.
|
||||
Version from MSR-SV Source Depot at 17:00 on 6/10/2014.
|
||||
Version from MSR-SV Source Depot at 10:00 on 6/4/2014.
|
||||
Version from MSR-SV Source Depot at 16:00 on 5/21/2014.
|
||||
Version from MSR-SV Source Depot at 01:45 on 5/16/2014.
|
||||
Version from MSR-SV Source Depot at 22:10 on 3/21/2014.
|
||||
Version from MSR-SV Source Depot at 22:00 on 2/25/2014.
|
||||
Version from MSR-SV Source Depot at 16:45 on 12/18/2013.
|
||||
Version from MSR-SV Source Depot at 11:00 on 11/12/2013.
|
|
@ -394,6 +394,7 @@ private:
|
|||
size_t m_epochStartSample; // the starting sample for the epoch
|
||||
size_t m_totalSamples; // number of samples in the dataset
|
||||
bool m_partialMinibatch; // a partial minibatch is allowed
|
||||
MBLayoutPtr m_pMBLayout;
|
||||
|
||||
int m_traceLevel;
|
||||
vector<SectionFile*> m_secFiles;
|
||||
|
@ -414,14 +415,14 @@ private:
|
|||
public:
|
||||
virtual void Init(const ConfigParameters& config);
|
||||
virtual void Destroy();
|
||||
BinaryReader() { }
|
||||
BinaryReader() : m_pMBLayout(make_shared<MBLayout>()) { }
|
||||
virtual ~BinaryReader();
|
||||
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
|
||||
virtual bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices);
|
||||
|
||||
size_t GetNumParallelSequences() { return 1 ;}
|
||||
void SetNumParallelSequences(const size_t) { };
|
||||
void CopyMBLayoutTo(MBLayoutPtr) {};
|
||||
void CopyMBLayoutTo(MBLayoutPtr pMBLayout) { pMBLayout->CopyFrom(m_pMBLayout); NOT_IMPLEMENTED; }
|
||||
virtual const std::map<LabelIdType, LabelType>& GetLabelMapping(const std::wstring& sectionName);
|
||||
virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<typename BinaryReader<ElemType>::LabelIdType, typename BinaryReader<ElemType>::LabelType>& labelMapping);
|
||||
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0);
|
||||
|
|
|
@ -93,6 +93,7 @@ private:
|
|||
bool m_partialMinibatch; // a partial minibatch is allowed
|
||||
LabelKind m_labelType; // labels are categories, create mapping table
|
||||
msra::dbn::randomordering m_randomordering; // randomizing class
|
||||
MBLayoutPtr m_pMBLayout;
|
||||
|
||||
std::wstring m_labelsName;
|
||||
std::wstring m_featuresName;
|
||||
|
@ -136,14 +137,14 @@ private:
|
|||
public:
|
||||
virtual void Init(const ConfigParameters& config);
|
||||
virtual void Destroy();
|
||||
DSSMReader() { m_qfeaturesBuffer = NULL; m_dfeaturesBuffer = NULL; m_labelsBuffer = NULL; }
|
||||
DSSMReader() : m_pMBLayout(make_shared<MBLayout>()) { m_qfeaturesBuffer = NULL; m_dfeaturesBuffer = NULL; m_labelsBuffer = NULL; }
|
||||
virtual ~DSSMReader();
|
||||
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
|
||||
virtual bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices);
|
||||
|
||||
size_t GetNumParallelSequences() { return 1 ;}
|
||||
void SetNumParallelSequences(const size_t) { };
|
||||
void CopyMBLayoutTo(MBLayoutPtr) {};
|
||||
void CopyMBLayoutTo(MBLayoutPtr pMBLayout) { pMBLayout->CopyFrom(m_pMBLayout); NOT_IMPLEMENTED; }
|
||||
|
||||
virtual const std::map<LabelIdType, LabelType>& GetLabelMapping(const std::wstring& sectionName);
|
||||
virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<LabelIdType, typename LabelType>& labelMapping);
|
||||
|
|
|
@ -17,10 +17,6 @@
|
|||
#include "msra_mgram.h" // for unigram scores of ground-truth path in sequence training
|
||||
|
||||
#include "rollingwindowsource.h" // minibatch sources
|
||||
#include "utterancesource.h"
|
||||
#ifdef _WIN32
|
||||
#include "readaheadsource.h"
|
||||
#endif
|
||||
#include "chunkevalsource.h"
|
||||
#define DATAREADER_EXPORTS
|
||||
#include "DataReader.h"
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -19,19 +19,14 @@ private:
|
|||
const static size_t m_htkRandomizeAuto = 0;
|
||||
const static size_t m_htkRandomizeDisable = (size_t)-1;
|
||||
|
||||
msra::dbn::minibatchiterator* m_mbiter;
|
||||
msra::dbn::minibatchsource* m_frameSource;
|
||||
#ifdef _WIN32
|
||||
msra::dbn::minibatchreadaheadsource* m_readAheadSource;
|
||||
#endif
|
||||
msra::dbn::FileEvalSource* m_fileEvalSource;
|
||||
msra::dbn::latticesource* m_lattices;
|
||||
map<wstring,msra::lattices::lattice::htkmlfwordsequence> m_latticeMap;
|
||||
unique_ptr<msra::dbn::minibatchiterator> m_mbiter;
|
||||
unique_ptr<msra::dbn::minibatchsource> m_frameSource;
|
||||
unique_ptr<msra::dbn::FileEvalSource> m_fileEvalSource;
|
||||
unique_ptr<msra::dbn::latticesource> m_lattices;
|
||||
map<wstring, msra::lattices::lattice::htkmlfwordsequence> m_latticeMap;
|
||||
|
||||
vector<bool> m_sentenceEnd;
|
||||
bool m_readAhead;
|
||||
bool m_truncated;
|
||||
bool m_fullutt; //read full utterance every time
|
||||
bool m_framemode;
|
||||
vector<size_t> m_processedFrame;
|
||||
intargvector m_numberOfuttsPerMinibatchForAllEpochs;
|
||||
|
@ -40,9 +35,9 @@ private:
|
|||
size_t m_mbSize;
|
||||
vector<size_t> m_toProcess;
|
||||
vector<size_t> m_switchFrame;
|
||||
vector<size_t> m_validFrame; //valid frame number in each channel
|
||||
vector<size_t> m_extraUttsPerMinibatch;
|
||||
size_t m_extraUttNum;
|
||||
vector<size_t> m_validFrame; //valid frame number in each channel
|
||||
vector<size_t> m_extraUttsPerMinibatch;
|
||||
size_t m_extraUttNum;
|
||||
bool m_noData;
|
||||
bool m_trainOrTest; // if false, in file writing mode
|
||||
using LabelType = typename IDataReader<ElemType>::LabelType;
|
||||
|
@ -52,33 +47,36 @@ private:
|
|||
|
||||
bool m_partialMinibatch; // allow partial minibatches?
|
||||
|
||||
std::vector<ElemType*> m_featuresBufferMultiUtt;
|
||||
std::vector<std::shared_ptr<ElemType>> m_featuresBufferMultiUtt;
|
||||
std::vector<size_t> m_featuresBufferAllocatedMultiUtt;
|
||||
std::vector<ElemType*> m_labelsBufferMultiUtt;
|
||||
std::vector<std::shared_ptr<ElemType>> m_labelsBufferMultiUtt;
|
||||
std::vector<size_t> m_labelsBufferAllocatedMultiUtt;
|
||||
std::vector<size_t> m_featuresStartIndexMultiUtt;
|
||||
std::vector<size_t> m_labelsStartIndexMultiUtt;
|
||||
|
||||
CUDAPageLockedMemAllocator* m_cudaAllocator;
|
||||
unique_ptr<CUDAPageLockedMemAllocator> m_cudaAllocator;
|
||||
std::vector<std::shared_ptr<ElemType>> m_featuresBufferMultiIO;
|
||||
std::vector<size_t> m_featuresBufferAllocatedMultiIO;
|
||||
std::vector<std::shared_ptr<ElemType>> m_labelsBufferMultiIO;
|
||||
std::vector<size_t> m_labelsBufferAllocatedMultiIO;
|
||||
//for lattice uids and phoneboundaries
|
||||
std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> m_latticeBufferMultiUtt;
|
||||
std::vector<std::vector<size_t>> m_labelsIDBufferMultiUtt;
|
||||
std::vector<std::vector<size_t>> m_phoneboundaryIDBufferMultiUtt;
|
||||
std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> m_extraLatticeBufferMultiUtt;
|
||||
std::vector<std::vector<size_t>> m_extraLabelsIDBufferMultiUtt;
|
||||
std::vector<std::vector<size_t>> m_extraPhoneboundaryIDBufferMultiUtt;
|
||||
//hmm
|
||||
msra::asr::simplesenonehmm m_hset;
|
||||
|
||||
//for lattice uids and phoneboundaries
|
||||
std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> m_latticeBufferMultiUtt;
|
||||
std::vector<std::vector<size_t>> m_labelsIDBufferMultiUtt;
|
||||
std::vector<std::vector<size_t>> m_phoneboundaryIDBufferMultiUtt;
|
||||
std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> m_extraLatticeBufferMultiUtt;
|
||||
std::vector<std::vector<size_t>> m_extraLabelsIDBufferMultiUtt;
|
||||
std::vector<std::vector<size_t>> m_extraPhoneboundaryIDBufferMultiUtt;
|
||||
|
||||
//hmm
|
||||
msra::asr::simplesenonehmm m_hset;
|
||||
|
||||
std::map<std::wstring,size_t> m_featureNameToIdMap;
|
||||
std::map<std::wstring,size_t> m_labelNameToIdMap;
|
||||
std::map<std::wstring,size_t> m_nameToTypeMap;
|
||||
std::map<std::wstring,size_t> m_featureNameToDimMap;
|
||||
std::map<std::wstring,size_t> m_labelNameToDimMap;
|
||||
|
||||
// for writing outputs to files (standard single input/output network) - deprecate eventually
|
||||
bool m_checkDictionaryKeys;
|
||||
bool m_convertLabelsToTargets;
|
||||
|
@ -95,8 +93,8 @@ private:
|
|||
void PrepareForWriting(const ConfigParameters& config);
|
||||
|
||||
bool GetMinibatchToTrainOrTest(std::map<std::wstring, Matrix<ElemType>*>&matrices);
|
||||
bool GetMinibatch4SEToTrainOrTest(std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> & latticeinput, vector<size_t> &uids, vector<size_t> &boundaries, std::vector<size_t> &extrauttmap);
|
||||
void fillOneUttDataforParallelmode(std::map<std::wstring, Matrix<ElemType>*>& matrices, size_t startFr, size_t framenum, size_t channelIndex, size_t sourceChannelIndex);
|
||||
bool GetMinibatch4SEToTrainOrTest(std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> & latticeinput, vector<size_t> &uids, vector<size_t> &boundaries, std::vector<size_t> &extrauttmap);
|
||||
void fillOneUttDataforParallelmode(std::map<std::wstring, Matrix<ElemType>*>& matrices, size_t startFr, size_t framenum, size_t channelIndex, size_t sourceChannelIndex);
|
||||
bool GetMinibatchToWrite(std::map<std::wstring, Matrix<ElemType>*>&matrices);
|
||||
|
||||
void StartMinibatchLoopToTrainOrTest(size_t mbSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples = requestDataSize);
|
||||
|
@ -104,11 +102,11 @@ private:
|
|||
|
||||
bool ReNewBufferForMultiIO(size_t i);
|
||||
|
||||
size_t GetNumParallelSequences() { return m_numberOfuttsPerMinibatch; }
|
||||
size_t GetNumParallelSequences();
|
||||
void SetNumParallelSequences(const size_t) { };
|
||||
|
||||
void GetDataNamesFromConfig(const ConfigParameters& readerConfig, std::vector<std::wstring>& features, std::vector<std::wstring>& labels,
|
||||
std::vector<std::wstring>& hmms, std::vector<std::wstring>& lattices);
|
||||
void GetDataNamesFromConfig(const ConfigParameters& readerConfig, std::vector<std::wstring>& features, std::vector<std::wstring>& labels,
|
||||
std::vector<std::wstring>& hmms, std::vector<std::wstring>& lattices);
|
||||
|
||||
size_t ReadLabelToTargetMappingFile (const std::wstring& labelToTargetMappingFile, const std::wstring& labelListFile, std::vector<std::vector<ElemType>>& labelToTargetMap);
|
||||
|
||||
|
@ -122,42 +120,9 @@ private:
|
|||
};
|
||||
|
||||
private:
|
||||
CUDAPageLockedMemAllocator* GetCUDAAllocator(int deviceID)
|
||||
{
|
||||
if (m_cudaAllocator != nullptr)
|
||||
{
|
||||
if (m_cudaAllocator->GetDeviceId() != deviceID)
|
||||
{
|
||||
delete m_cudaAllocator;
|
||||
m_cudaAllocator = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_cudaAllocator == nullptr)
|
||||
{
|
||||
m_cudaAllocator = new CUDAPageLockedMemAllocator(deviceID);
|
||||
}
|
||||
|
||||
return m_cudaAllocator;
|
||||
}
|
||||
|
||||
std::shared_ptr<ElemType> AllocateIntermediateBuffer(int deviceID, size_t numElements)
|
||||
{
|
||||
if (deviceID >= 0)
|
||||
{
|
||||
// Use pinned memory for GPU devices for better copy performance
|
||||
size_t totalSize = sizeof(ElemType) * numElements;
|
||||
return std::shared_ptr<ElemType>((ElemType*)GetCUDAAllocator(deviceID)->Malloc(totalSize), [this, deviceID](ElemType* p) {
|
||||
this->GetCUDAAllocator(deviceID)->Free((char*)p);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::shared_ptr<ElemType>(new ElemType[numElements], [](ElemType* p) {
|
||||
delete[] p;
|
||||
});
|
||||
}
|
||||
}
|
||||
// Helper functions
|
||||
unique_ptr<CUDAPageLockedMemAllocator>& GetCUDAAllocator(int deviceID);
|
||||
std::shared_ptr<ElemType> AllocateIntermediateBuffer(int deviceID, size_t numElements);
|
||||
|
||||
public:
|
||||
MBLayoutPtr m_pMBLayout;
|
||||
|
@ -173,8 +138,8 @@ public:
|
|||
{
|
||||
}
|
||||
virtual void Init(const ConfigParameters& config);
|
||||
virtual void Destroy() {delete this;}
|
||||
virtual ~HTKMLFReader();
|
||||
virtual void Destroy() { delete this; }
|
||||
virtual ~HTKMLFReader() { }
|
||||
|
||||
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize)
|
||||
{
|
||||
|
@ -192,8 +157,8 @@ public:
|
|||
virtual const std::map<LabelIdType, LabelType>& GetLabelMapping(const std::wstring& sectionName);
|
||||
virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<LabelIdType, LabelType>& labelMapping);
|
||||
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0);
|
||||
virtual bool GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> & latticeinput, vector<size_t> &uids, vector<size_t> &boundaries, vector<size_t> &extrauttmap);
|
||||
virtual bool GetHmmData(msra::asr::simplesenonehmm * hmm);
|
||||
virtual bool GetMinibatch4SE(std::vector<shared_ptr<const msra::dbn::latticesource::latticepair>> & latticeinput, vector<size_t> &uids, vector<size_t> &boundaries, vector<size_t> &extrauttmap);
|
||||
virtual bool GetHmmData(msra::asr::simplesenonehmm * hmm);
|
||||
|
||||
virtual bool DataEnd(EndDataType endDataType);
|
||||
void CopyMBLayoutTo(MBLayoutPtr);
|
||||
|
|
|
@ -108,23 +108,12 @@
|
|||
<ClInclude Include="htkfeatio.h" />
|
||||
<ClInclude Include="HTKMLFReader.h" />
|
||||
<ClInclude Include="HTKMLFWriter.h" />
|
||||
<ClInclude Include="latticearchive.h" />
|
||||
<ClInclude Include="latticestorage.h" />
|
||||
<ClInclude Include="minibatchiterator.h" />
|
||||
<ClInclude Include="minibatchsourcehelpers.h" />
|
||||
<ClInclude Include="msra_mgram.h" />
|
||||
<ClInclude Include="numahelpers.h" />
|
||||
<ClInclude Include="pplhelpers.h" />
|
||||
<ClInclude Include="readaheadsource.h" />
|
||||
<ClInclude Include="rollingwindowsource.h" />
|
||||
<ClInclude Include="simplesenonehmm.h" />
|
||||
<ClInclude Include="simplethread.h" />
|
||||
<ClInclude Include="simple_checked_arrays.h" />
|
||||
<ClInclude Include="ssefloat4.h" />
|
||||
<ClInclude Include="ssematrix.h" />
|
||||
<ClInclude Include="stdafx.h" />
|
||||
<ClInclude Include="targetver.h" />
|
||||
<ClInclude Include="utterancesource.h" />
|
||||
<ClInclude Include="utterancesourcemulti.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче